{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2446, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.702702702702703e-07, "loss": 3.914, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.405405405405406e-07, "loss": 3.7247, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.108108108108109e-07, "loss": 3.8179, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.0810810810810812e-06, "loss": 3.9695, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.3513513513513515e-06, "loss": 4.035, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.6216216216216219e-06, "loss": 3.7208, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.8918918918918922e-06, "loss": 3.6515, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.1621621621621623e-06, "loss": 3.4713, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.432432432432433e-06, "loss": 3.5122, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.702702702702703e-06, "loss": 2.5384, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.9729729729729736e-06, "loss": 2.6081, "step": 11 }, { "epoch": 0.0, "learning_rate": 3.2432432432432437e-06, "loss": 2.3291, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.513513513513514e-06, "loss": 1.8066, "step": 13 }, { "epoch": 0.01, "learning_rate": 3.7837837837837844e-06, "loss": 1.5921, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.0540540540540545e-06, "loss": 1.4388, "step": 15 }, { "epoch": 0.01, "learning_rate": 4.324324324324325e-06, "loss": 1.4086, "step": 16 }, { "epoch": 0.01, "learning_rate": 4.594594594594596e-06, "loss": 1.21, "step": 17 }, { "epoch": 0.01, "learning_rate": 4.864864864864866e-06, "loss": 1.0597, "step": 18 }, { "epoch": 0.01, "learning_rate": 5.135135135135135e-06, "loss": 1.0664, "step": 19 }, { "epoch": 0.01, "learning_rate": 5.405405405405406e-06, "loss": 1.1034, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.675675675675676e-06, "loss": 1.0144, "step": 21 }, { "epoch": 0.01, "learning_rate": 5.945945945945947e-06, "loss": 0.9312, "step": 22 }, { "epoch": 0.01, "learning_rate": 6.2162162162162164e-06, "loss": 0.966, "step": 23 }, { "epoch": 0.01, "learning_rate": 6.486486486486487e-06, "loss": 0.8463, "step": 24 }, { "epoch": 0.01, "learning_rate": 6.7567567567567575e-06, "loss": 0.9202, "step": 25 }, { "epoch": 0.01, "learning_rate": 7.027027027027028e-06, "loss": 0.9266, "step": 26 }, { "epoch": 0.01, "learning_rate": 7.297297297297298e-06, "loss": 0.8965, "step": 27 }, { "epoch": 0.01, "learning_rate": 7.567567567567569e-06, "loss": 0.8654, "step": 28 }, { "epoch": 0.01, "learning_rate": 7.837837837837838e-06, "loss": 0.8189, "step": 29 }, { "epoch": 0.01, "learning_rate": 8.108108108108109e-06, "loss": 0.8193, "step": 30 }, { "epoch": 0.01, "learning_rate": 8.378378378378378e-06, "loss": 0.8061, "step": 31 }, { "epoch": 0.01, "learning_rate": 8.64864864864865e-06, "loss": 0.8261, "step": 32 }, { "epoch": 0.01, "learning_rate": 8.91891891891892e-06, "loss": 0.8132, "step": 33 }, { "epoch": 0.01, "learning_rate": 9.189189189189191e-06, "loss": 0.778, "step": 34 }, { "epoch": 0.01, "learning_rate": 9.45945945945946e-06, "loss": 0.8129, "step": 35 }, { "epoch": 0.01, "learning_rate": 9.729729729729732e-06, "loss": 0.7442, "step": 36 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 0.7654, "step": 37 }, { "epoch": 0.02, "learning_rate": 1.027027027027027e-05, "loss": 0.8042, "step": 38 }, { "epoch": 0.02, "learning_rate": 1.0540540540540541e-05, "loss": 0.7447, "step": 39 }, { "epoch": 0.02, "learning_rate": 1.0810810810810812e-05, "loss": 0.8085, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.1081081081081081e-05, "loss": 0.6754, "step": 41 }, { "epoch": 0.02, "learning_rate": 1.1351351351351352e-05, "loss": 0.7993, "step": 42 }, { "epoch": 0.02, "learning_rate": 1.1621621621621622e-05, "loss": 0.7228, "step": 43 }, { "epoch": 0.02, "learning_rate": 1.1891891891891894e-05, "loss": 0.7996, "step": 44 }, { "epoch": 0.02, "learning_rate": 1.2162162162162164e-05, "loss": 0.7621, "step": 45 }, { "epoch": 0.02, "learning_rate": 1.2432432432432433e-05, "loss": 0.7324, "step": 46 }, { "epoch": 0.02, "learning_rate": 1.2702702702702702e-05, "loss": 0.7214, "step": 47 }, { "epoch": 0.02, "learning_rate": 1.2972972972972975e-05, "loss": 0.7916, "step": 48 }, { "epoch": 0.02, "learning_rate": 1.3243243243243244e-05, "loss": 0.7493, "step": 49 }, { "epoch": 0.02, "learning_rate": 1.3513513513513515e-05, "loss": 0.7094, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.3783783783783784e-05, "loss": 0.6915, "step": 51 }, { "epoch": 0.02, "learning_rate": 1.4054054054054055e-05, "loss": 0.7377, "step": 52 }, { "epoch": 0.02, "learning_rate": 1.4324324324324326e-05, "loss": 0.7305, "step": 53 }, { "epoch": 0.02, "learning_rate": 1.4594594594594596e-05, "loss": 0.736, "step": 54 }, { "epoch": 0.02, "learning_rate": 1.4864864864864865e-05, "loss": 0.7118, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.5135135135135138e-05, "loss": 0.7813, "step": 56 }, { "epoch": 0.02, "learning_rate": 1.540540540540541e-05, "loss": 0.7843, "step": 57 }, { "epoch": 0.02, "learning_rate": 1.5675675675675676e-05, "loss": 0.7459, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.5945945945945947e-05, "loss": 0.7001, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.6216216216216218e-05, "loss": 0.7714, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.648648648648649e-05, "loss": 0.7398, "step": 61 }, { "epoch": 0.03, "learning_rate": 1.6756756756756757e-05, "loss": 0.7837, "step": 62 }, { "epoch": 0.03, "learning_rate": 1.7027027027027028e-05, "loss": 0.7814, "step": 63 }, { "epoch": 0.03, "learning_rate": 1.72972972972973e-05, "loss": 0.7091, "step": 64 }, { "epoch": 0.03, "learning_rate": 1.756756756756757e-05, "loss": 0.7132, "step": 65 }, { "epoch": 0.03, "learning_rate": 1.783783783783784e-05, "loss": 0.7662, "step": 66 }, { "epoch": 0.03, "learning_rate": 1.8108108108108108e-05, "loss": 0.7957, "step": 67 }, { "epoch": 0.03, "learning_rate": 1.8378378378378383e-05, "loss": 0.8157, "step": 68 }, { "epoch": 0.03, "learning_rate": 1.864864864864865e-05, "loss": 0.6874, "step": 69 }, { "epoch": 0.03, "learning_rate": 1.891891891891892e-05, "loss": 0.6407, "step": 70 }, { "epoch": 0.03, "learning_rate": 1.918918918918919e-05, "loss": 0.7226, "step": 71 }, { "epoch": 0.03, "learning_rate": 1.9459459459459463e-05, "loss": 0.7492, "step": 72 }, { "epoch": 0.03, "learning_rate": 1.972972972972973e-05, "loss": 0.6559, "step": 73 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.7014, "step": 74 }, { "epoch": 0.03, "learning_rate": 1.9999991229177605e-05, "loss": 0.7113, "step": 75 }, { "epoch": 0.03, "learning_rate": 1.9999964916725805e-05, "loss": 0.7679, "step": 76 }, { "epoch": 0.03, "learning_rate": 1.9999921062690757e-05, "loss": 0.7724, "step": 77 }, { "epoch": 0.03, "learning_rate": 1.9999859667149386e-05, "loss": 0.6777, "step": 78 }, { "epoch": 0.03, "learning_rate": 1.9999780730209394e-05, "loss": 0.7554, "step": 79 }, { "epoch": 0.03, "learning_rate": 1.9999684252009243e-05, "loss": 0.6715, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.9999570232718174e-05, "loss": 0.7803, "step": 81 }, { "epoch": 0.03, "learning_rate": 1.9999438672536202e-05, "loss": 0.7306, "step": 82 }, { "epoch": 0.03, "learning_rate": 1.9999289571694097e-05, "loss": 0.7824, "step": 83 }, { "epoch": 0.03, "learning_rate": 1.999912293045341e-05, "loss": 0.7659, "step": 84 }, { "epoch": 0.03, "learning_rate": 1.9998938749106455e-05, "loss": 0.8113, "step": 85 }, { "epoch": 0.04, "learning_rate": 1.9998737027976323e-05, "loss": 0.7753, "step": 86 }, { "epoch": 0.04, "learning_rate": 1.9998517767416856e-05, "loss": 0.7205, "step": 87 }, { "epoch": 0.04, "learning_rate": 1.999828096781268e-05, "loss": 0.7603, "step": 88 }, { "epoch": 0.04, "learning_rate": 1.9998026629579178e-05, "loss": 0.7418, "step": 89 }, { "epoch": 0.04, "learning_rate": 1.99977547531625e-05, "loss": 0.7534, "step": 90 }, { "epoch": 0.04, "learning_rate": 1.9997465339039568e-05, "loss": 0.7847, "step": 91 }, { "epoch": 0.04, "learning_rate": 1.9997158387718057e-05, "loss": 0.7377, "step": 92 }, { "epoch": 0.04, "learning_rate": 1.9996833899736406e-05, "loss": 0.7227, "step": 93 }, { "epoch": 0.04, "learning_rate": 1.9996491875663833e-05, "loss": 0.7066, "step": 94 }, { "epoch": 0.04, "learning_rate": 1.999613231610029e-05, "loss": 0.7835, "step": 95 }, { "epoch": 0.04, "learning_rate": 1.999575522167651e-05, "loss": 0.7709, "step": 96 }, { "epoch": 0.04, "learning_rate": 1.9995360593053983e-05, "loss": 0.6646, "step": 97 }, { "epoch": 0.04, "learning_rate": 1.9994948430924944e-05, "loss": 0.7428, "step": 98 }, { "epoch": 0.04, "learning_rate": 1.99945187360124e-05, "loss": 0.637, "step": 99 }, { "epoch": 0.04, "learning_rate": 1.9994071509070104e-05, "loss": 0.7413, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.9993606750882566e-05, "loss": 0.7932, "step": 101 }, { "epoch": 0.04, "learning_rate": 1.9993124462265045e-05, "loss": 0.7157, "step": 102 }, { "epoch": 0.04, "learning_rate": 1.999262464406356e-05, "loss": 0.7333, "step": 103 }, { "epoch": 0.04, "learning_rate": 1.9992107297154872e-05, "loss": 0.7356, "step": 104 }, { "epoch": 0.04, "learning_rate": 1.999157242244649e-05, "loss": 0.7008, "step": 105 }, { "epoch": 0.04, "learning_rate": 1.9991020020876676e-05, "loss": 0.7209, "step": 106 }, { "epoch": 0.04, "learning_rate": 1.9990450093414437e-05, "loss": 0.736, "step": 107 }, { "epoch": 0.04, "learning_rate": 1.9989862641059504e-05, "loss": 0.7011, "step": 108 }, { "epoch": 0.04, "learning_rate": 1.9989257664842382e-05, "loss": 0.6757, "step": 109 }, { "epoch": 0.04, "learning_rate": 1.9988635165824293e-05, "loss": 0.6541, "step": 110 }, { "epoch": 0.05, "learning_rate": 1.99879951450972e-05, "loss": 0.6673, "step": 111 }, { "epoch": 0.05, "learning_rate": 1.9987337603783806e-05, "loss": 0.7241, "step": 112 }, { "epoch": 0.05, "learning_rate": 1.9986662543037548e-05, "loss": 0.7017, "step": 113 }, { "epoch": 0.05, "learning_rate": 1.998596996404259e-05, "loss": 0.6831, "step": 114 }, { "epoch": 0.05, "learning_rate": 1.9985259868013836e-05, "loss": 0.7272, "step": 115 }, { "epoch": 0.05, "learning_rate": 1.9984532256196905e-05, "loss": 0.7158, "step": 116 }, { "epoch": 0.05, "learning_rate": 1.998378712986815e-05, "loss": 0.7135, "step": 117 }, { "epoch": 0.05, "learning_rate": 1.9983024490334645e-05, "loss": 0.697, "step": 118 }, { "epoch": 0.05, "learning_rate": 1.9982244338934186e-05, "loss": 0.7524, "step": 119 }, { "epoch": 0.05, "learning_rate": 1.998144667703529e-05, "loss": 0.6382, "step": 120 }, { "epoch": 0.05, "learning_rate": 1.998063150603718e-05, "loss": 0.7254, "step": 121 }, { "epoch": 0.05, "learning_rate": 1.99797988273698e-05, "loss": 0.7586, "step": 122 }, { "epoch": 0.05, "learning_rate": 1.9978948642493817e-05, "loss": 0.7567, "step": 123 }, { "epoch": 0.05, "learning_rate": 1.997808095290058e-05, "loss": 0.761, "step": 124 }, { "epoch": 0.05, "learning_rate": 1.997719576011217e-05, "loss": 0.6922, "step": 125 }, { "epoch": 0.05, "learning_rate": 1.9976293065681355e-05, "loss": 0.8705, "step": 126 }, { "epoch": 0.05, "learning_rate": 1.9975372871191613e-05, "loss": 0.8465, "step": 127 }, { "epoch": 0.05, "learning_rate": 1.9974435178257114e-05, "loss": 0.6818, "step": 128 }, { "epoch": 0.05, "learning_rate": 1.9973479988522727e-05, "loss": 0.7871, "step": 129 }, { "epoch": 0.05, "learning_rate": 1.997250730366401e-05, "loss": 0.689, "step": 130 }, { "epoch": 0.05, "learning_rate": 1.9971517125387215e-05, "loss": 0.7687, "step": 131 }, { "epoch": 0.05, "learning_rate": 1.997050945542928e-05, "loss": 0.7739, "step": 132 }, { "epoch": 0.05, "learning_rate": 1.9969484295557814e-05, "loss": 0.6592, "step": 133 }, { "epoch": 0.05, "learning_rate": 1.9968441647571124e-05, "loss": 0.7345, "step": 134 }, { "epoch": 0.06, "learning_rate": 1.9967381513298188e-05, "loss": 0.699, "step": 135 }, { "epoch": 0.06, "learning_rate": 1.9966303894598645e-05, "loss": 0.6508, "step": 136 }, { "epoch": 0.06, "learning_rate": 1.996520879336283e-05, "loss": 0.7698, "step": 137 }, { "epoch": 0.06, "learning_rate": 1.996409621151172e-05, "loss": 0.6685, "step": 138 }, { "epoch": 0.06, "learning_rate": 1.996296615099697e-05, "loss": 0.7438, "step": 139 }, { "epoch": 0.06, "learning_rate": 1.9961818613800892e-05, "loss": 0.7239, "step": 140 }, { "epoch": 0.06, "learning_rate": 1.9960653601936454e-05, "loss": 0.7671, "step": 141 }, { "epoch": 0.06, "learning_rate": 1.995947111744728e-05, "loss": 0.6988, "step": 142 }, { "epoch": 0.06, "learning_rate": 1.995827116240764e-05, "loss": 0.6864, "step": 143 }, { "epoch": 0.06, "learning_rate": 1.995705373892246e-05, "loss": 0.7423, "step": 144 }, { "epoch": 0.06, "learning_rate": 1.995581884912729e-05, "loss": 0.7609, "step": 145 }, { "epoch": 0.06, "learning_rate": 1.9954566495188333e-05, "loss": 0.7873, "step": 146 }, { "epoch": 0.06, "learning_rate": 1.995329667930243e-05, "loss": 0.6349, "step": 147 }, { "epoch": 0.06, "learning_rate": 1.995200940369704e-05, "loss": 0.7028, "step": 148 }, { "epoch": 0.06, "learning_rate": 1.9950704670630258e-05, "loss": 0.661, "step": 149 }, { "epoch": 0.06, "learning_rate": 1.9949382482390803e-05, "loss": 0.7158, "step": 150 }, { "epoch": 0.06, "learning_rate": 1.994804284129801e-05, "loss": 0.6674, "step": 151 }, { "epoch": 0.06, "learning_rate": 1.994668574970183e-05, "loss": 0.7478, "step": 152 }, { "epoch": 0.06, "learning_rate": 1.9945311209982822e-05, "loss": 0.7583, "step": 153 }, { "epoch": 0.06, "learning_rate": 1.9943919224552154e-05, "loss": 0.7372, "step": 154 }, { "epoch": 0.06, "learning_rate": 1.99425097958516e-05, "loss": 0.7122, "step": 155 }, { "epoch": 0.06, "learning_rate": 1.994108292635353e-05, "loss": 0.7396, "step": 156 }, { "epoch": 0.06, "learning_rate": 1.9939638618560906e-05, "loss": 0.6849, "step": 157 }, { "epoch": 0.06, "learning_rate": 1.9938176875007284e-05, "loss": 0.643, "step": 158 }, { "epoch": 0.07, "learning_rate": 1.99366976982568e-05, "loss": 0.7633, "step": 159 }, { "epoch": 0.07, "learning_rate": 1.9935201090904177e-05, "loss": 0.6894, "step": 160 }, { "epoch": 0.07, "learning_rate": 1.9933687055574705e-05, "loss": 0.8241, "step": 161 }, { "epoch": 0.07, "learning_rate": 1.993215559492426e-05, "loss": 0.7478, "step": 162 }, { "epoch": 0.07, "learning_rate": 1.9930606711639266e-05, "loss": 0.7617, "step": 163 }, { "epoch": 0.07, "learning_rate": 1.992904040843672e-05, "loss": 0.6509, "step": 164 }, { "epoch": 0.07, "learning_rate": 1.992745668806418e-05, "loss": 0.7458, "step": 165 }, { "epoch": 0.07, "learning_rate": 1.9925855553299755e-05, "loss": 0.7561, "step": 166 }, { "epoch": 0.07, "learning_rate": 1.992423700695209e-05, "loss": 0.7045, "step": 167 }, { "epoch": 0.07, "learning_rate": 1.9922601051860386e-05, "loss": 0.7136, "step": 168 }, { "epoch": 0.07, "learning_rate": 1.9920947690894376e-05, "loss": 0.6977, "step": 169 }, { "epoch": 0.07, "learning_rate": 1.991927692695433e-05, "loss": 0.7501, "step": 170 }, { "epoch": 0.07, "learning_rate": 1.9917588762971037e-05, "loss": 0.7366, "step": 171 }, { "epoch": 0.07, "learning_rate": 1.9915883201905824e-05, "loss": 0.6958, "step": 172 }, { "epoch": 0.07, "learning_rate": 1.9914160246750517e-05, "loss": 0.6859, "step": 173 }, { "epoch": 0.07, "learning_rate": 1.9912419900527467e-05, "loss": 0.7127, "step": 174 }, { "epoch": 0.07, "learning_rate": 1.9910662166289523e-05, "loss": 0.7156, "step": 175 }, { "epoch": 0.07, "learning_rate": 1.9908887047120046e-05, "loss": 0.7304, "step": 176 }, { "epoch": 0.07, "learning_rate": 1.990709454613288e-05, "loss": 0.6756, "step": 177 }, { "epoch": 0.07, "learning_rate": 1.9905284666472374e-05, "loss": 0.6849, "step": 178 }, { "epoch": 0.07, "learning_rate": 1.9903457411313347e-05, "loss": 0.6512, "step": 179 }, { "epoch": 0.07, "learning_rate": 1.9901612783861114e-05, "loss": 0.669, "step": 180 }, { "epoch": 0.07, "learning_rate": 1.9899750787351445e-05, "loss": 0.6937, "step": 181 }, { "epoch": 0.07, "learning_rate": 1.9897871425050598e-05, "loss": 0.7683, "step": 182 }, { "epoch": 0.07, "learning_rate": 1.989597470025528e-05, "loss": 0.7368, "step": 183 }, { "epoch": 0.08, "learning_rate": 1.989406061629265e-05, "loss": 0.785, "step": 184 }, { "epoch": 0.08, "learning_rate": 1.989212917652034e-05, "loss": 0.6766, "step": 185 }, { "epoch": 0.08, "learning_rate": 1.9890180384326404e-05, "loss": 0.642, "step": 186 }, { "epoch": 0.08, "learning_rate": 1.9888214243129348e-05, "loss": 0.6693, "step": 187 }, { "epoch": 0.08, "learning_rate": 1.9886230756378102e-05, "loss": 0.7124, "step": 188 }, { "epoch": 0.08, "learning_rate": 1.9884229927552036e-05, "loss": 0.7384, "step": 189 }, { "epoch": 0.08, "learning_rate": 1.9882211760160924e-05, "loss": 0.7166, "step": 190 }, { "epoch": 0.08, "learning_rate": 1.9880176257744968e-05, "loss": 0.6555, "step": 191 }, { "epoch": 0.08, "learning_rate": 1.9878123423874777e-05, "loss": 0.7438, "step": 192 }, { "epoch": 0.08, "learning_rate": 1.987605326215135e-05, "loss": 0.666, "step": 193 }, { "epoch": 0.08, "learning_rate": 1.9873965776206103e-05, "loss": 0.7085, "step": 194 }, { "epoch": 0.08, "learning_rate": 1.9871860969700824e-05, "loss": 0.7651, "step": 195 }, { "epoch": 0.08, "learning_rate": 1.9869738846327685e-05, "loss": 0.7598, "step": 196 }, { "epoch": 0.08, "learning_rate": 1.9867599409809248e-05, "loss": 0.6693, "step": 197 }, { "epoch": 0.08, "learning_rate": 1.986544266389843e-05, "loss": 0.6429, "step": 198 }, { "epoch": 0.08, "learning_rate": 1.9863268612378525e-05, "loss": 0.6636, "step": 199 }, { "epoch": 0.08, "learning_rate": 1.986107725906317e-05, "loss": 0.6937, "step": 200 }, { "epoch": 0.08, "learning_rate": 1.985886860779636e-05, "loss": 0.6555, "step": 201 }, { "epoch": 0.08, "learning_rate": 1.9856642662452437e-05, "loss": 0.7356, "step": 202 }, { "epoch": 0.08, "learning_rate": 1.9854399426936072e-05, "loss": 0.7173, "step": 203 }, { "epoch": 0.08, "learning_rate": 1.985213890518227e-05, "loss": 0.6426, "step": 204 }, { "epoch": 0.08, "learning_rate": 1.984986110115636e-05, "loss": 0.7005, "step": 205 }, { "epoch": 0.08, "learning_rate": 1.984756601885398e-05, "loss": 0.6916, "step": 206 }, { "epoch": 0.08, "learning_rate": 1.9845253662301085e-05, "loss": 0.7192, "step": 207 }, { "epoch": 0.09, "learning_rate": 1.984292403555393e-05, "loss": 0.7189, "step": 208 }, { "epoch": 0.09, "learning_rate": 1.984057714269906e-05, "loss": 0.6656, "step": 209 }, { "epoch": 0.09, "learning_rate": 1.9838212987853312e-05, "loss": 0.729, "step": 210 }, { "epoch": 0.09, "learning_rate": 1.9835831575163803e-05, "loss": 0.7722, "step": 211 }, { "epoch": 0.09, "learning_rate": 1.9833432908807927e-05, "loss": 0.724, "step": 212 }, { "epoch": 0.09, "learning_rate": 1.9831016992993334e-05, "loss": 0.7056, "step": 213 }, { "epoch": 0.09, "learning_rate": 1.9828583831957935e-05, "loss": 0.6936, "step": 214 }, { "epoch": 0.09, "learning_rate": 1.98261334299699e-05, "loss": 0.7692, "step": 215 }, { "epoch": 0.09, "learning_rate": 1.9823665791327635e-05, "loss": 0.686, "step": 216 }, { "epoch": 0.09, "learning_rate": 1.9821180920359788e-05, "loss": 0.7909, "step": 217 }, { "epoch": 0.09, "learning_rate": 1.9818678821425227e-05, "loss": 0.7024, "step": 218 }, { "epoch": 0.09, "learning_rate": 1.9816159498913044e-05, "loss": 0.6972, "step": 219 }, { "epoch": 0.09, "learning_rate": 1.981362295724255e-05, "loss": 0.7299, "step": 220 }, { "epoch": 0.09, "learning_rate": 1.981106920086325e-05, "loss": 0.7137, "step": 221 }, { "epoch": 0.09, "learning_rate": 1.980849823425486e-05, "loss": 0.6248, "step": 222 }, { "epoch": 0.09, "learning_rate": 1.9805910061927274e-05, "loss": 0.7232, "step": 223 }, { "epoch": 0.09, "learning_rate": 1.9803304688420568e-05, "loss": 0.705, "step": 224 }, { "epoch": 0.09, "learning_rate": 1.9800682118305006e-05, "loss": 0.6893, "step": 225 }, { "epoch": 0.09, "learning_rate": 1.9798042356181e-05, "loss": 0.7827, "step": 226 }, { "epoch": 0.09, "learning_rate": 1.9795385406679125e-05, "loss": 0.6216, "step": 227 }, { "epoch": 0.09, "learning_rate": 1.9792711274460116e-05, "loss": 0.6915, "step": 228 }, { "epoch": 0.09, "learning_rate": 1.979001996421483e-05, "loss": 0.743, "step": 229 }, { "epoch": 0.09, "learning_rate": 1.978731148066428e-05, "loss": 0.7098, "step": 230 }, { "epoch": 0.09, "learning_rate": 1.978458582855958e-05, "loss": 0.6684, "step": 231 }, { "epoch": 0.09, "learning_rate": 1.978184301268198e-05, "loss": 0.6526, "step": 232 }, { "epoch": 0.1, "learning_rate": 1.9779083037842825e-05, "loss": 0.7063, "step": 233 }, { "epoch": 0.1, "learning_rate": 1.977630590888357e-05, "loss": 0.6826, "step": 234 }, { "epoch": 0.1, "learning_rate": 1.977351163067575e-05, "loss": 0.7599, "step": 235 }, { "epoch": 0.1, "learning_rate": 1.9770700208120996e-05, "loss": 0.7276, "step": 236 }, { "epoch": 0.1, "learning_rate": 1.9767871646150998e-05, "loss": 0.6454, "step": 237 }, { "epoch": 0.1, "learning_rate": 1.9765025949727526e-05, "loss": 0.7429, "step": 238 }, { "epoch": 0.1, "learning_rate": 1.976216312384239e-05, "loss": 0.6716, "step": 239 }, { "epoch": 0.1, "learning_rate": 1.975928317351747e-05, "loss": 0.7179, "step": 240 }, { "epoch": 0.1, "learning_rate": 1.975638610380466e-05, "loss": 0.7334, "step": 241 }, { "epoch": 0.1, "learning_rate": 1.975347191978591e-05, "loss": 0.7534, "step": 242 }, { "epoch": 0.1, "learning_rate": 1.9750540626573164e-05, "loss": 0.7452, "step": 243 }, { "epoch": 0.1, "learning_rate": 1.97475922293084e-05, "loss": 0.7468, "step": 244 }, { "epoch": 0.1, "learning_rate": 1.9744626733163593e-05, "loss": 0.6564, "step": 245 }, { "epoch": 0.1, "learning_rate": 1.9741644143340707e-05, "loss": 0.662, "step": 246 }, { "epoch": 0.1, "learning_rate": 1.9738644465071698e-05, "loss": 0.7549, "step": 247 }, { "epoch": 0.1, "learning_rate": 1.9735627703618494e-05, "loss": 0.6778, "step": 248 }, { "epoch": 0.1, "learning_rate": 1.9732593864272994e-05, "loss": 0.7245, "step": 249 }, { "epoch": 0.1, "learning_rate": 1.9729542952357045e-05, "loss": 0.7622, "step": 250 }, { "epoch": 0.1, "learning_rate": 1.9726474973222453e-05, "loss": 0.7587, "step": 251 }, { "epoch": 0.1, "learning_rate": 1.9723389932250955e-05, "loss": 0.7281, "step": 252 }, { "epoch": 0.1, "learning_rate": 1.9720287834854222e-05, "loss": 0.6908, "step": 253 }, { "epoch": 0.1, "learning_rate": 1.9717168686473845e-05, "loss": 0.6357, "step": 254 }, { "epoch": 0.1, "learning_rate": 1.971403249258132e-05, "loss": 0.6483, "step": 255 }, { "epoch": 0.1, "learning_rate": 1.9710879258678045e-05, "loss": 0.7361, "step": 256 }, { "epoch": 0.11, "learning_rate": 1.970770899029532e-05, "loss": 0.7461, "step": 257 }, { "epoch": 0.11, "learning_rate": 1.9704521692994305e-05, "loss": 0.6225, "step": 258 }, { "epoch": 0.11, "learning_rate": 1.9701317372366055e-05, "loss": 0.7378, "step": 259 }, { "epoch": 0.11, "learning_rate": 1.969809603403147e-05, "loss": 0.6854, "step": 260 }, { "epoch": 0.11, "learning_rate": 1.9694857683641304e-05, "loss": 0.7554, "step": 261 }, { "epoch": 0.11, "learning_rate": 1.969160232687616e-05, "loss": 0.6839, "step": 262 }, { "epoch": 0.11, "learning_rate": 1.9688329969446473e-05, "loss": 0.7053, "step": 263 }, { "epoch": 0.11, "learning_rate": 1.968504061709249e-05, "loss": 0.7743, "step": 264 }, { "epoch": 0.11, "learning_rate": 1.9681734275584278e-05, "loss": 0.7763, "step": 265 }, { "epoch": 0.11, "learning_rate": 1.96784109507217e-05, "loss": 0.7409, "step": 266 }, { "epoch": 0.11, "learning_rate": 1.9675070648334426e-05, "loss": 0.6643, "step": 267 }, { "epoch": 0.11, "learning_rate": 1.9671713374281883e-05, "loss": 0.698, "step": 268 }, { "epoch": 0.11, "learning_rate": 1.966833913445329e-05, "loss": 0.7453, "step": 269 }, { "epoch": 0.11, "learning_rate": 1.9664947934767614e-05, "loss": 0.6386, "step": 270 }, { "epoch": 0.11, "learning_rate": 1.9661539781173582e-05, "loss": 0.7071, "step": 271 }, { "epoch": 0.11, "learning_rate": 1.965811467964965e-05, "loss": 0.6803, "step": 272 }, { "epoch": 0.11, "learning_rate": 1.9654672636204014e-05, "loss": 0.6056, "step": 273 }, { "epoch": 0.11, "learning_rate": 1.965121365687458e-05, "loss": 0.6341, "step": 274 }, { "epoch": 0.11, "learning_rate": 1.9647737747728972e-05, "loss": 0.6479, "step": 275 }, { "epoch": 0.11, "learning_rate": 1.9644244914864502e-05, "loss": 0.7001, "step": 276 }, { "epoch": 0.11, "learning_rate": 1.9640735164408176e-05, "loss": 0.7345, "step": 277 }, { "epoch": 0.11, "learning_rate": 1.9637208502516673e-05, "loss": 0.7122, "step": 278 }, { "epoch": 0.11, "learning_rate": 1.9633664935376335e-05, "loss": 0.6863, "step": 279 }, { "epoch": 0.11, "learning_rate": 1.9630104469203165e-05, "loss": 0.6755, "step": 280 }, { "epoch": 0.11, "learning_rate": 1.9626527110242808e-05, "loss": 0.7279, "step": 281 }, { "epoch": 0.12, "learning_rate": 1.9622932864770538e-05, "loss": 0.7658, "step": 282 }, { "epoch": 0.12, "learning_rate": 1.9619321739091247e-05, "loss": 0.8178, "step": 283 }, { "epoch": 0.12, "learning_rate": 1.9615693739539452e-05, "loss": 0.7593, "step": 284 }, { "epoch": 0.12, "learning_rate": 1.961204887247926e-05, "loss": 0.7031, "step": 285 }, { "epoch": 0.12, "learning_rate": 1.9608387144304363e-05, "loss": 0.7344, "step": 286 }, { "epoch": 0.12, "learning_rate": 1.9604708561438033e-05, "loss": 0.7371, "step": 287 }, { "epoch": 0.12, "learning_rate": 1.960101313033312e-05, "loss": 0.6481, "step": 288 }, { "epoch": 0.12, "learning_rate": 1.9597300857472e-05, "loss": 0.7669, "step": 289 }, { "epoch": 0.12, "learning_rate": 1.959357174936663e-05, "loss": 0.7682, "step": 290 }, { "epoch": 0.12, "learning_rate": 1.9589825812558468e-05, "loss": 0.6921, "step": 291 }, { "epoch": 0.12, "learning_rate": 1.95860630536185e-05, "loss": 0.7272, "step": 292 }, { "epoch": 0.12, "learning_rate": 1.9582283479147237e-05, "loss": 0.6797, "step": 293 }, { "epoch": 0.12, "learning_rate": 1.9578487095774666e-05, "loss": 0.6798, "step": 294 }, { "epoch": 0.12, "learning_rate": 1.9574673910160263e-05, "loss": 0.6218, "step": 295 }, { "epoch": 0.12, "learning_rate": 1.957084392899299e-05, "loss": 0.6786, "step": 296 }, { "epoch": 0.12, "learning_rate": 1.9566997158991265e-05, "loss": 0.6568, "step": 297 }, { "epoch": 0.12, "learning_rate": 1.956313360690295e-05, "loss": 0.6483, "step": 298 }, { "epoch": 0.12, "learning_rate": 1.9559253279505354e-05, "loss": 0.6623, "step": 299 }, { "epoch": 0.12, "learning_rate": 1.955535618360521e-05, "loss": 0.6965, "step": 300 }, { "epoch": 0.12, "learning_rate": 1.9551442326038664e-05, "loss": 0.7012, "step": 301 }, { "epoch": 0.12, "learning_rate": 1.9547511713671264e-05, "loss": 0.6993, "step": 302 }, { "epoch": 0.12, "learning_rate": 1.9543564353397953e-05, "loss": 0.6872, "step": 303 }, { "epoch": 0.12, "learning_rate": 1.953960025214305e-05, "loss": 0.6712, "step": 304 }, { "epoch": 0.12, "learning_rate": 1.953561941686024e-05, "loss": 0.7013, "step": 305 }, { "epoch": 0.13, "learning_rate": 1.9531621854532562e-05, "loss": 0.7142, "step": 306 }, { "epoch": 0.13, "learning_rate": 1.95276075721724e-05, "loss": 0.6824, "step": 307 }, { "epoch": 0.13, "learning_rate": 1.9523576576821463e-05, "loss": 0.7435, "step": 308 }, { "epoch": 0.13, "learning_rate": 1.9519528875550783e-05, "loss": 0.731, "step": 309 }, { "epoch": 0.13, "learning_rate": 1.9515464475460692e-05, "loss": 0.6508, "step": 310 }, { "epoch": 0.13, "learning_rate": 1.951138338368082e-05, "loss": 0.77, "step": 311 }, { "epoch": 0.13, "learning_rate": 1.9507285607370065e-05, "loss": 0.666, "step": 312 }, { "epoch": 0.13, "learning_rate": 1.9503171153716606e-05, "loss": 0.6168, "step": 313 }, { "epoch": 0.13, "learning_rate": 1.949904002993787e-05, "loss": 0.6891, "step": 314 }, { "epoch": 0.13, "learning_rate": 1.949489224328053e-05, "loss": 0.658, "step": 315 }, { "epoch": 0.13, "learning_rate": 1.9490727801020485e-05, "loss": 0.698, "step": 316 }, { "epoch": 0.13, "learning_rate": 1.9486546710462847e-05, "loss": 0.7834, "step": 317 }, { "epoch": 0.13, "learning_rate": 1.9482348978941947e-05, "loss": 0.7289, "step": 318 }, { "epoch": 0.13, "learning_rate": 1.9478134613821286e-05, "loss": 0.6546, "step": 319 }, { "epoch": 0.13, "learning_rate": 1.9473903622493554e-05, "loss": 0.6534, "step": 320 }, { "epoch": 0.13, "learning_rate": 1.9469656012380617e-05, "loss": 0.7529, "step": 321 }, { "epoch": 0.13, "learning_rate": 1.946539179093347e-05, "loss": 0.7308, "step": 322 }, { "epoch": 0.13, "learning_rate": 1.946111096563226e-05, "loss": 0.7221, "step": 323 }, { "epoch": 0.13, "learning_rate": 1.945681354398627e-05, "loss": 0.6508, "step": 324 }, { "epoch": 0.13, "learning_rate": 1.945249953353387e-05, "loss": 0.6877, "step": 325 }, { "epoch": 0.13, "learning_rate": 1.944816894184255e-05, "loss": 0.7452, "step": 326 }, { "epoch": 0.13, "learning_rate": 1.9443821776508885e-05, "loss": 0.6759, "step": 327 }, { "epoch": 0.13, "learning_rate": 1.943945804515851e-05, "loss": 0.6791, "step": 328 }, { "epoch": 0.13, "learning_rate": 1.9435077755446124e-05, "loss": 0.7525, "step": 329 }, { "epoch": 0.13, "learning_rate": 1.9430680915055492e-05, "loss": 0.7387, "step": 330 }, { "epoch": 0.14, "learning_rate": 1.942626753169938e-05, "loss": 0.6926, "step": 331 }, { "epoch": 0.14, "learning_rate": 1.9421837613119597e-05, "loss": 0.7244, "step": 332 }, { "epoch": 0.14, "learning_rate": 1.9417391167086946e-05, "loss": 0.709, "step": 333 }, { "epoch": 0.14, "learning_rate": 1.941292820140122e-05, "loss": 0.674, "step": 334 }, { "epoch": 0.14, "learning_rate": 1.9408448723891203e-05, "loss": 0.6841, "step": 335 }, { "epoch": 0.14, "learning_rate": 1.940395274241463e-05, "loss": 0.7307, "step": 336 }, { "epoch": 0.14, "learning_rate": 1.9399440264858192e-05, "loss": 0.7572, "step": 337 }, { "epoch": 0.14, "learning_rate": 1.9394911299137522e-05, "loss": 0.7091, "step": 338 }, { "epoch": 0.14, "learning_rate": 1.9390365853197163e-05, "loss": 0.6941, "step": 339 }, { "epoch": 0.14, "learning_rate": 1.938580393501058e-05, "loss": 0.705, "step": 340 }, { "epoch": 0.14, "learning_rate": 1.938122555258013e-05, "loss": 0.6568, "step": 341 }, { "epoch": 0.14, "learning_rate": 1.9376630713937043e-05, "loss": 0.6852, "step": 342 }, { "epoch": 0.14, "learning_rate": 1.9372019427141424e-05, "loss": 0.7676, "step": 343 }, { "epoch": 0.14, "learning_rate": 1.9367391700282228e-05, "loss": 0.6645, "step": 344 }, { "epoch": 0.14, "learning_rate": 1.9362747541477255e-05, "loss": 0.6581, "step": 345 }, { "epoch": 0.14, "learning_rate": 1.9358086958873116e-05, "loss": 0.6912, "step": 346 }, { "epoch": 0.14, "learning_rate": 1.935340996064524e-05, "loss": 0.5606, "step": 347 }, { "epoch": 0.14, "learning_rate": 1.9348716554997854e-05, "loss": 0.6755, "step": 348 }, { "epoch": 0.14, "learning_rate": 1.9344006750163962e-05, "loss": 0.7071, "step": 349 }, { "epoch": 0.14, "learning_rate": 1.9339280554405336e-05, "loss": 0.6998, "step": 350 }, { "epoch": 0.14, "learning_rate": 1.93345379760125e-05, "loss": 0.6462, "step": 351 }, { "epoch": 0.14, "learning_rate": 1.9329779023304724e-05, "loss": 0.6674, "step": 352 }, { "epoch": 0.14, "learning_rate": 1.9325003704629982e-05, "loss": 0.756, "step": 353 }, { "epoch": 0.14, "learning_rate": 1.9320212028364976e-05, "loss": 0.7485, "step": 354 }, { "epoch": 0.15, "learning_rate": 1.9315404002915093e-05, "loss": 0.6598, "step": 355 }, { "epoch": 0.15, "learning_rate": 1.9310579636714402e-05, "loss": 0.7027, "step": 356 }, { "epoch": 0.15, "learning_rate": 1.930573893822563e-05, "loss": 0.6232, "step": 357 }, { "epoch": 0.15, "learning_rate": 1.9300881915940163e-05, "loss": 0.7015, "step": 358 }, { "epoch": 0.15, "learning_rate": 1.9296008578378015e-05, "loss": 0.8247, "step": 359 }, { "epoch": 0.15, "learning_rate": 1.929111893408782e-05, "loss": 0.7135, "step": 360 }, { "epoch": 0.15, "learning_rate": 1.9286212991646823e-05, "loss": 0.6709, "step": 361 }, { "epoch": 0.15, "learning_rate": 1.928129075966085e-05, "loss": 0.6455, "step": 362 }, { "epoch": 0.15, "learning_rate": 1.9276352246764305e-05, "loss": 0.6988, "step": 363 }, { "epoch": 0.15, "learning_rate": 1.9271397461620154e-05, "loss": 0.6794, "step": 364 }, { "epoch": 0.15, "learning_rate": 1.9266426412919905e-05, "loss": 0.7772, "step": 365 }, { "epoch": 0.15, "learning_rate": 1.9261439109383594e-05, "loss": 0.6561, "step": 366 }, { "epoch": 0.15, "learning_rate": 1.925643555975977e-05, "loss": 0.6804, "step": 367 }, { "epoch": 0.15, "learning_rate": 1.925141577282549e-05, "loss": 0.7263, "step": 368 }, { "epoch": 0.15, "learning_rate": 1.924637975738628e-05, "loss": 0.7349, "step": 369 }, { "epoch": 0.15, "learning_rate": 1.9241327522276133e-05, "loss": 0.6582, "step": 370 }, { "epoch": 0.15, "learning_rate": 1.923625907635751e-05, "loss": 0.625, "step": 371 }, { "epoch": 0.15, "learning_rate": 1.92311744285213e-05, "loss": 0.6996, "step": 372 }, { "epoch": 0.15, "learning_rate": 1.9226073587686805e-05, "loss": 0.7124, "step": 373 }, { "epoch": 0.15, "learning_rate": 1.922095656280174e-05, "loss": 0.7546, "step": 374 }, { "epoch": 0.15, "learning_rate": 1.921582336284221e-05, "loss": 0.7235, "step": 375 }, { "epoch": 0.15, "learning_rate": 1.9210673996812694e-05, "loss": 0.7486, "step": 376 }, { "epoch": 0.15, "learning_rate": 1.920550847374602e-05, "loss": 0.7058, "step": 377 }, { "epoch": 0.15, "learning_rate": 1.9200326802703374e-05, "loss": 0.7482, "step": 378 }, { "epoch": 0.15, "learning_rate": 1.919512899277425e-05, "loss": 0.6735, "step": 379 }, { "epoch": 0.16, "learning_rate": 1.9189915053076472e-05, "loss": 0.6708, "step": 380 }, { "epoch": 0.16, "learning_rate": 1.9184684992756142e-05, "loss": 0.675, "step": 381 }, { "epoch": 0.16, "learning_rate": 1.9179438820987645e-05, "loss": 0.6281, "step": 382 }, { "epoch": 0.16, "learning_rate": 1.917417654697363e-05, "loss": 0.6828, "step": 383 }, { "epoch": 0.16, "learning_rate": 1.9168898179944994e-05, "loss": 0.6829, "step": 384 }, { "epoch": 0.16, "learning_rate": 1.9163603729160854e-05, "loss": 0.7762, "step": 385 }, { "epoch": 0.16, "learning_rate": 1.9158293203908552e-05, "loss": 0.6461, "step": 386 }, { "epoch": 0.16, "learning_rate": 1.9152966613503627e-05, "loss": 0.6883, "step": 387 }, { "epoch": 0.16, "learning_rate": 1.914762396728979e-05, "loss": 0.7082, "step": 388 }, { "epoch": 0.16, "learning_rate": 1.914226527463892e-05, "loss": 0.7298, "step": 389 }, { "epoch": 0.16, "learning_rate": 1.9136890544951046e-05, "loss": 0.6838, "step": 390 }, { "epoch": 0.16, "learning_rate": 1.9131499787654334e-05, "loss": 0.6955, "step": 391 }, { "epoch": 0.16, "learning_rate": 1.912609301220505e-05, "loss": 0.6945, "step": 392 }, { "epoch": 0.16, "learning_rate": 1.912067022808757e-05, "loss": 0.6849, "step": 393 }, { "epoch": 0.16, "learning_rate": 1.9115231444814356e-05, "loss": 0.672, "step": 394 }, { "epoch": 0.16, "learning_rate": 1.910977667192592e-05, "loss": 0.7544, "step": 395 }, { "epoch": 0.16, "learning_rate": 1.9104305918990832e-05, "loss": 0.6818, "step": 396 }, { "epoch": 0.16, "learning_rate": 1.9098819195605697e-05, "loss": 0.6401, "step": 397 }, { "epoch": 0.16, "learning_rate": 1.9093316511395128e-05, "loss": 0.8019, "step": 398 }, { "epoch": 0.16, "learning_rate": 1.908779787601174e-05, "loss": 0.6541, "step": 399 }, { "epoch": 0.16, "learning_rate": 1.908226329913612e-05, "loss": 0.7466, "step": 400 }, { "epoch": 0.16, "learning_rate": 1.907671279047683e-05, "loss": 0.6918, "step": 401 }, { "epoch": 0.16, "learning_rate": 1.9071146359770384e-05, "loss": 0.6416, "step": 402 }, { "epoch": 0.16, "learning_rate": 1.9065564016781204e-05, "loss": 0.7059, "step": 403 }, { "epoch": 0.17, "learning_rate": 1.9059965771301644e-05, "loss": 0.6869, "step": 404 }, { "epoch": 0.17, "learning_rate": 1.9054351633151945e-05, "loss": 0.6925, "step": 405 }, { "epoch": 0.17, "learning_rate": 1.9048721612180232e-05, "loss": 0.6839, "step": 406 }, { "epoch": 0.17, "learning_rate": 1.9043075718262485e-05, "loss": 0.7487, "step": 407 }, { "epoch": 0.17, "learning_rate": 1.9037413961302534e-05, "loss": 0.6022, "step": 408 }, { "epoch": 0.17, "learning_rate": 1.9031736351232025e-05, "loss": 0.6931, "step": 409 }, { "epoch": 0.17, "learning_rate": 1.9026042898010428e-05, "loss": 0.6718, "step": 410 }, { "epoch": 0.17, "learning_rate": 1.9020333611624993e-05, "loss": 0.7025, "step": 411 }, { "epoch": 0.17, "learning_rate": 1.9014608502090744e-05, "loss": 0.6595, "step": 412 }, { "epoch": 0.17, "learning_rate": 1.9008867579450472e-05, "loss": 0.6941, "step": 413 }, { "epoch": 0.17, "learning_rate": 1.9003110853774694e-05, "loss": 0.7194, "step": 414 }, { "epoch": 0.17, "learning_rate": 1.8997338335161656e-05, "loss": 0.7182, "step": 415 }, { "epoch": 0.17, "learning_rate": 1.89915500337373e-05, "loss": 0.6861, "step": 416 }, { "epoch": 0.17, "learning_rate": 1.8985745959655268e-05, "loss": 0.7109, "step": 417 }, { "epoch": 0.17, "learning_rate": 1.8979926123096858e-05, "loss": 0.686, "step": 418 }, { "epoch": 0.17, "learning_rate": 1.8974090534271013e-05, "loss": 0.6813, "step": 419 }, { "epoch": 0.17, "learning_rate": 1.896823920341432e-05, "loss": 0.6497, "step": 420 }, { "epoch": 0.17, "learning_rate": 1.8962372140790984e-05, "loss": 0.6652, "step": 421 }, { "epoch": 0.17, "learning_rate": 1.895648935669278e-05, "loss": 0.6616, "step": 422 }, { "epoch": 0.17, "learning_rate": 1.8950590861439098e-05, "loss": 0.6319, "step": 423 }, { "epoch": 0.17, "learning_rate": 1.8944676665376858e-05, "loss": 0.6901, "step": 424 }, { "epoch": 0.17, "learning_rate": 1.8938746778880535e-05, "loss": 0.6906, "step": 425 }, { "epoch": 0.17, "learning_rate": 1.8932801212352124e-05, "loss": 0.6569, "step": 426 }, { "epoch": 0.17, "learning_rate": 1.8926839976221128e-05, "loss": 0.6184, "step": 427 }, { "epoch": 0.17, "learning_rate": 1.8920863080944534e-05, "loss": 0.6378, "step": 428 }, { "epoch": 0.18, "learning_rate": 1.8914870537006805e-05, "loss": 0.6651, "step": 429 }, { "epoch": 0.18, "learning_rate": 1.8908862354919843e-05, "loss": 0.7356, "step": 430 }, { "epoch": 0.18, "learning_rate": 1.8902838545222987e-05, "loss": 0.6437, "step": 431 }, { "epoch": 0.18, "learning_rate": 1.8896799118482995e-05, "loss": 0.7163, "step": 432 }, { "epoch": 0.18, "learning_rate": 1.889074408529401e-05, "loss": 0.6933, "step": 433 }, { "epoch": 0.18, "learning_rate": 1.888467345627756e-05, "loss": 0.7284, "step": 434 }, { "epoch": 0.18, "learning_rate": 1.887858724208252e-05, "loss": 0.6641, "step": 435 }, { "epoch": 0.18, "learning_rate": 1.8872485453385124e-05, "loss": 0.6717, "step": 436 }, { "epoch": 0.18, "learning_rate": 1.88663681008889e-05, "loss": 0.7557, "step": 437 }, { "epoch": 0.18, "learning_rate": 1.8860235195324695e-05, "loss": 0.707, "step": 438 }, { "epoch": 0.18, "learning_rate": 1.8854086747450636e-05, "loss": 0.6398, "step": 439 }, { "epoch": 0.18, "learning_rate": 1.8847922768052105e-05, "loss": 0.7226, "step": 440 }, { "epoch": 0.18, "learning_rate": 1.8841743267941746e-05, "loss": 0.6508, "step": 441 }, { "epoch": 0.18, "learning_rate": 1.8835548257959413e-05, "loss": 0.7045, "step": 442 }, { "epoch": 0.18, "learning_rate": 1.882933774897217e-05, "loss": 0.7008, "step": 443 }, { "epoch": 0.18, "learning_rate": 1.8823111751874277e-05, "loss": 0.7322, "step": 444 }, { "epoch": 0.18, "learning_rate": 1.8816870277587155e-05, "loss": 0.7538, "step": 445 }, { "epoch": 0.18, "learning_rate": 1.881061333705937e-05, "loss": 0.6776, "step": 446 }, { "epoch": 0.18, "learning_rate": 1.8804340941266638e-05, "loss": 0.6401, "step": 447 }, { "epoch": 0.18, "learning_rate": 1.879805310121176e-05, "loss": 0.6541, "step": 448 }, { "epoch": 0.18, "learning_rate": 1.879174982792465e-05, "loss": 0.8082, "step": 449 }, { "epoch": 0.18, "learning_rate": 1.8785431132462278e-05, "loss": 0.5973, "step": 450 }, { "epoch": 0.18, "learning_rate": 1.8779097025908684e-05, "loss": 0.6512, "step": 451 }, { "epoch": 0.18, "learning_rate": 1.8772747519374927e-05, "loss": 0.7673, "step": 452 }, { "epoch": 0.19, "learning_rate": 1.8766382623999094e-05, "loss": 0.8369, "step": 453 }, { "epoch": 0.19, "learning_rate": 1.8760002350946244e-05, "loss": 0.6954, "step": 454 }, { "epoch": 0.19, "learning_rate": 1.875360671140844e-05, "loss": 0.7501, "step": 455 }, { "epoch": 0.19, "learning_rate": 1.8747195716604675e-05, "loss": 0.6257, "step": 456 }, { "epoch": 0.19, "learning_rate": 1.8740769377780893e-05, "loss": 0.821, "step": 457 }, { "epoch": 0.19, "learning_rate": 1.873432770620995e-05, "loss": 0.6851, "step": 458 }, { "epoch": 0.19, "learning_rate": 1.8727870713191593e-05, "loss": 0.673, "step": 459 }, { "epoch": 0.19, "learning_rate": 1.872139841005246e-05, "loss": 0.74, "step": 460 }, { "epoch": 0.19, "learning_rate": 1.8714910808146024e-05, "loss": 0.807, "step": 461 }, { "epoch": 0.19, "learning_rate": 1.8708407918852608e-05, "loss": 0.6937, "step": 462 }, { "epoch": 0.19, "learning_rate": 1.8701889753579356e-05, "loss": 0.6517, "step": 463 }, { "epoch": 0.19, "learning_rate": 1.8695356323760197e-05, "loss": 0.7829, "step": 464 }, { "epoch": 0.19, "learning_rate": 1.868880764085584e-05, "loss": 0.7234, "step": 465 }, { "epoch": 0.19, "learning_rate": 1.8682243716353754e-05, "loss": 0.7447, "step": 466 }, { "epoch": 0.19, "learning_rate": 1.8675664561768144e-05, "loss": 0.6787, "step": 467 }, { "epoch": 0.19, "learning_rate": 1.8669070188639924e-05, "loss": 0.6403, "step": 468 }, { "epoch": 0.19, "learning_rate": 1.866246060853672e-05, "loss": 0.6881, "step": 469 }, { "epoch": 0.19, "learning_rate": 1.8655835833052808e-05, "loss": 0.7213, "step": 470 }, { "epoch": 0.19, "learning_rate": 1.8649195873809143e-05, "loss": 0.7371, "step": 471 }, { "epoch": 0.19, "learning_rate": 1.8642540742453302e-05, "loss": 0.6393, "step": 472 }, { "epoch": 0.19, "learning_rate": 1.863587045065949e-05, "loss": 0.5962, "step": 473 }, { "epoch": 0.19, "learning_rate": 1.8629185010128478e-05, "loss": 0.6924, "step": 474 }, { "epoch": 0.19, "learning_rate": 1.862248443258764e-05, "loss": 0.618, "step": 475 }, { "epoch": 0.19, "learning_rate": 1.8615768729790893e-05, "loss": 0.6643, "step": 476 }, { "epoch": 0.2, "learning_rate": 1.8609037913518676e-05, "loss": 0.7445, "step": 477 }, { "epoch": 0.2, "learning_rate": 1.8602291995577957e-05, "loss": 0.7095, "step": 478 }, { "epoch": 0.2, "learning_rate": 1.8595530987802177e-05, "loss": 0.6985, "step": 479 }, { "epoch": 0.2, "learning_rate": 1.8588754902051262e-05, "loss": 0.7014, "step": 480 }, { "epoch": 0.2, "learning_rate": 1.8581963750211577e-05, "loss": 0.7189, "step": 481 }, { "epoch": 0.2, "learning_rate": 1.857515754419592e-05, "loss": 0.719, "step": 482 }, { "epoch": 0.2, "learning_rate": 1.8568336295943498e-05, "loss": 0.6508, "step": 483 }, { "epoch": 0.2, "learning_rate": 1.8561500017419902e-05, "loss": 0.7039, "step": 484 }, { "epoch": 0.2, "learning_rate": 1.8554648720617086e-05, "loss": 0.7891, "step": 485 }, { "epoch": 0.2, "learning_rate": 1.8547782417553355e-05, "loss": 0.7368, "step": 486 }, { "epoch": 0.2, "learning_rate": 1.8540901120273332e-05, "loss": 0.6893, "step": 487 }, { "epoch": 0.2, "learning_rate": 1.8534004840847943e-05, "loss": 0.6616, "step": 488 }, { "epoch": 0.2, "learning_rate": 1.8527093591374397e-05, "loss": 0.7322, "step": 489 }, { "epoch": 0.2, "learning_rate": 1.8520167383976168e-05, "loss": 0.749, "step": 490 }, { "epoch": 0.2, "learning_rate": 1.8513226230802958e-05, "loss": 0.6647, "step": 491 }, { "epoch": 0.2, "learning_rate": 1.850627014403069e-05, "loss": 0.6962, "step": 492 }, { "epoch": 0.2, "learning_rate": 1.8499299135861488e-05, "loss": 0.6822, "step": 493 }, { "epoch": 0.2, "learning_rate": 1.849231321852364e-05, "loss": 0.6693, "step": 494 }, { "epoch": 0.2, "learning_rate": 1.8485312404271604e-05, "loss": 0.6608, "step": 495 }, { "epoch": 0.2, "learning_rate": 1.8478296705385953e-05, "loss": 0.6914, "step": 496 }, { "epoch": 0.2, "learning_rate": 1.8471266134173377e-05, "loss": 0.6816, "step": 497 }, { "epoch": 0.2, "learning_rate": 1.8464220702966656e-05, "loss": 0.7194, "step": 498 }, { "epoch": 0.2, "learning_rate": 1.8457160424124637e-05, "loss": 0.6852, "step": 499 }, { "epoch": 0.2, "learning_rate": 1.8450085310032206e-05, "loss": 0.6765, "step": 500 }, { "epoch": 0.2, "learning_rate": 1.8442995373100282e-05, "loss": 0.751, "step": 501 }, { "epoch": 0.21, "learning_rate": 1.8435890625765776e-05, "loss": 0.7049, "step": 502 }, { "epoch": 0.21, "learning_rate": 1.8428771080491582e-05, "loss": 0.6654, "step": 503 }, { "epoch": 0.21, "learning_rate": 1.8421636749766563e-05, "loss": 0.7253, "step": 504 }, { "epoch": 0.21, "learning_rate": 1.8414487646105496e-05, "loss": 0.7771, "step": 505 }, { "epoch": 0.21, "learning_rate": 1.8407323782049093e-05, "loss": 0.7697, "step": 506 }, { "epoch": 0.21, "learning_rate": 1.840014517016395e-05, "loss": 0.7785, "step": 507 }, { "epoch": 0.21, "learning_rate": 1.8392951823042525e-05, "loss": 0.6777, "step": 508 }, { "epoch": 0.21, "learning_rate": 1.8385743753303144e-05, "loss": 0.6817, "step": 509 }, { "epoch": 0.21, "learning_rate": 1.8378520973589937e-05, "loss": 0.6524, "step": 510 }, { "epoch": 0.21, "learning_rate": 1.837128349657285e-05, "loss": 0.663, "step": 511 }, { "epoch": 0.21, "learning_rate": 1.8364031334947612e-05, "loss": 0.5611, "step": 512 }, { "epoch": 0.21, "learning_rate": 1.8356764501435704e-05, "loss": 0.6298, "step": 513 }, { "epoch": 0.21, "learning_rate": 1.8349483008784346e-05, "loss": 0.6959, "step": 514 }, { "epoch": 0.21, "learning_rate": 1.8342186869766475e-05, "loss": 0.6743, "step": 515 }, { "epoch": 0.21, "learning_rate": 1.833487609718072e-05, "loss": 0.7166, "step": 516 }, { "epoch": 0.21, "learning_rate": 1.832755070385138e-05, "loss": 0.7456, "step": 517 }, { "epoch": 0.21, "learning_rate": 1.8320210702628397e-05, "loss": 0.6953, "step": 518 }, { "epoch": 0.21, "learning_rate": 1.8312856106387343e-05, "loss": 0.7597, "step": 519 }, { "epoch": 0.21, "learning_rate": 1.8305486928029383e-05, "loss": 0.7405, "step": 520 }, { "epoch": 0.21, "learning_rate": 1.8298103180481276e-05, "loss": 0.6394, "step": 521 }, { "epoch": 0.21, "learning_rate": 1.8290704876695325e-05, "loss": 0.6471, "step": 522 }, { "epoch": 0.21, "learning_rate": 1.828329202964937e-05, "loss": 0.7252, "step": 523 }, { "epoch": 0.21, "learning_rate": 1.8275864652346772e-05, "loss": 0.6842, "step": 524 }, { "epoch": 0.21, "learning_rate": 1.8268422757816366e-05, "loss": 0.673, "step": 525 }, { "epoch": 0.22, "learning_rate": 1.826096635911246e-05, "loss": 0.6223, "step": 526 }, { "epoch": 0.22, "learning_rate": 1.8253495469314803e-05, "loss": 0.7223, "step": 527 }, { "epoch": 0.22, "learning_rate": 1.8246010101528566e-05, "loss": 0.7398, "step": 528 }, { "epoch": 0.22, "learning_rate": 1.8238510268884316e-05, "loss": 0.7027, "step": 529 }, { "epoch": 0.22, "learning_rate": 1.823099598453799e-05, "loss": 0.7069, "step": 530 }, { "epoch": 0.22, "learning_rate": 1.8223467261670885e-05, "loss": 0.6365, "step": 531 }, { "epoch": 0.22, "learning_rate": 1.8215924113489613e-05, "loss": 0.6707, "step": 532 }, { "epoch": 0.22, "learning_rate": 1.8208366553226095e-05, "loss": 0.7026, "step": 533 }, { "epoch": 0.22, "learning_rate": 1.820079459413754e-05, "loss": 0.6887, "step": 534 }, { "epoch": 0.22, "learning_rate": 1.8193208249506408e-05, "loss": 0.638, "step": 535 }, { "epoch": 0.22, "learning_rate": 1.8185607532640396e-05, "loss": 0.638, "step": 536 }, { "epoch": 0.22, "learning_rate": 1.817799245687241e-05, "loss": 0.7042, "step": 537 }, { "epoch": 0.22, "learning_rate": 1.8170363035560544e-05, "loss": 0.6734, "step": 538 }, { "epoch": 0.22, "learning_rate": 1.8162719282088064e-05, "loss": 0.6363, "step": 539 }, { "epoch": 0.22, "learning_rate": 1.8155061209863368e-05, "loss": 0.6991, "step": 540 }, { "epoch": 0.22, "learning_rate": 1.814738883231997e-05, "loss": 0.7348, "step": 541 }, { "epoch": 0.22, "learning_rate": 1.8139702162916485e-05, "loss": 0.8031, "step": 542 }, { "epoch": 0.22, "learning_rate": 1.8132001215136595e-05, "loss": 0.6623, "step": 543 }, { "epoch": 0.22, "learning_rate": 1.8124286002489034e-05, "loss": 0.6902, "step": 544 }, { "epoch": 0.22, "learning_rate": 1.8116556538507547e-05, "loss": 0.6894, "step": 545 }, { "epoch": 0.22, "learning_rate": 1.8108812836750887e-05, "loss": 0.6904, "step": 546 }, { "epoch": 0.22, "learning_rate": 1.810105491080278e-05, "loss": 0.6872, "step": 547 }, { "epoch": 0.22, "learning_rate": 1.8093282774271908e-05, "loss": 0.6051, "step": 548 }, { "epoch": 0.22, "learning_rate": 1.8085496440791874e-05, "loss": 0.6974, "step": 549 }, { "epoch": 0.22, "learning_rate": 1.807769592402119e-05, "loss": 0.6279, "step": 550 }, { "epoch": 0.23, "learning_rate": 1.806988123764324e-05, "loss": 0.6083, "step": 551 }, { "epoch": 0.23, "learning_rate": 1.8062052395366275e-05, "loss": 0.7348, "step": 552 }, { "epoch": 0.23, "learning_rate": 1.805420941092337e-05, "loss": 0.6704, "step": 553 }, { "epoch": 0.23, "learning_rate": 1.8046352298072408e-05, "loss": 0.7317, "step": 554 }, { "epoch": 0.23, "learning_rate": 1.8038481070596057e-05, "loss": 0.6518, "step": 555 }, { "epoch": 0.23, "learning_rate": 1.803059574230175e-05, "loss": 0.6152, "step": 556 }, { "epoch": 0.23, "learning_rate": 1.8022696327021645e-05, "loss": 0.6646, "step": 557 }, { "epoch": 0.23, "learning_rate": 1.8014782838612616e-05, "loss": 0.6791, "step": 558 }, { "epoch": 0.23, "learning_rate": 1.8006855290956226e-05, "loss": 0.6175, "step": 559 }, { "epoch": 0.23, "learning_rate": 1.7998913697958693e-05, "loss": 0.6689, "step": 560 }, { "epoch": 0.23, "learning_rate": 1.7990958073550882e-05, "loss": 0.6883, "step": 561 }, { "epoch": 0.23, "learning_rate": 1.7982988431688266e-05, "loss": 0.726, "step": 562 }, { "epoch": 0.23, "learning_rate": 1.79750047863509e-05, "loss": 0.7097, "step": 563 }, { "epoch": 0.23, "learning_rate": 1.7967007151543425e-05, "loss": 0.6863, "step": 564 }, { "epoch": 0.23, "learning_rate": 1.7958995541294997e-05, "loss": 0.6354, "step": 565 }, { "epoch": 0.23, "learning_rate": 1.7950969969659303e-05, "loss": 0.5956, "step": 566 }, { "epoch": 0.23, "learning_rate": 1.7942930450714515e-05, "loss": 0.6322, "step": 567 }, { "epoch": 0.23, "learning_rate": 1.7934876998563263e-05, "loss": 0.6341, "step": 568 }, { "epoch": 0.23, "learning_rate": 1.7926809627332642e-05, "loss": 0.6763, "step": 569 }, { "epoch": 0.23, "learning_rate": 1.7918728351174136e-05, "loss": 0.6754, "step": 570 }, { "epoch": 0.23, "learning_rate": 1.7910633184263643e-05, "loss": 0.6639, "step": 571 }, { "epoch": 0.23, "learning_rate": 1.790252414080141e-05, "loss": 0.7094, "step": 572 }, { "epoch": 0.23, "learning_rate": 1.7894401235012028e-05, "loss": 0.7198, "step": 573 }, { "epoch": 0.23, "learning_rate": 1.788626448114442e-05, "loss": 0.6438, "step": 574 }, { "epoch": 0.24, "learning_rate": 1.7878113893471786e-05, "loss": 0.6869, "step": 575 }, { "epoch": 0.24, "learning_rate": 1.7869949486291604e-05, "loss": 0.668, "step": 576 }, { "epoch": 0.24, "learning_rate": 1.7861771273925576e-05, "loss": 0.7005, "step": 577 }, { "epoch": 0.24, "learning_rate": 1.7853579270719635e-05, "loss": 0.7021, "step": 578 }, { "epoch": 0.24, "learning_rate": 1.7845373491043905e-05, "loss": 0.6996, "step": 579 }, { "epoch": 0.24, "learning_rate": 1.7837153949292674e-05, "loss": 0.6826, "step": 580 }, { "epoch": 0.24, "learning_rate": 1.7828920659884364e-05, "loss": 0.7017, "step": 581 }, { "epoch": 0.24, "learning_rate": 1.782067363726153e-05, "loss": 0.7321, "step": 582 }, { "epoch": 0.24, "learning_rate": 1.7812412895890792e-05, "loss": 0.6876, "step": 583 }, { "epoch": 0.24, "learning_rate": 1.7804138450262862e-05, "loss": 0.6382, "step": 584 }, { "epoch": 0.24, "learning_rate": 1.779585031489247e-05, "loss": 0.6685, "step": 585 }, { "epoch": 0.24, "learning_rate": 1.7787548504318372e-05, "loss": 0.6965, "step": 586 }, { "epoch": 0.24, "learning_rate": 1.7779233033103306e-05, "loss": 0.6898, "step": 587 }, { "epoch": 0.24, "learning_rate": 1.7770903915833986e-05, "loss": 0.7186, "step": 588 }, { "epoch": 0.24, "learning_rate": 1.7762561167121042e-05, "loss": 0.5961, "step": 589 }, { "epoch": 0.24, "learning_rate": 1.775420480159903e-05, "loss": 0.706, "step": 590 }, { "epoch": 0.24, "learning_rate": 1.7745834833926395e-05, "loss": 0.6752, "step": 591 }, { "epoch": 0.24, "learning_rate": 1.7737451278785435e-05, "loss": 0.5933, "step": 592 }, { "epoch": 0.24, "learning_rate": 1.772905415088228e-05, "loss": 0.6891, "step": 593 }, { "epoch": 0.24, "learning_rate": 1.772064346494688e-05, "loss": 0.7117, "step": 594 }, { "epoch": 0.24, "learning_rate": 1.7712219235732954e-05, "loss": 0.6617, "step": 595 }, { "epoch": 0.24, "learning_rate": 1.7703781478017995e-05, "loss": 0.6343, "step": 596 }, { "epoch": 0.24, "learning_rate": 1.769533020660321e-05, "loss": 0.5853, "step": 597 }, { "epoch": 0.24, "learning_rate": 1.768686543631352e-05, "loss": 0.6899, "step": 598 }, { "epoch": 0.24, "learning_rate": 1.767838718199753e-05, "loss": 0.6233, "step": 599 }, { "epoch": 0.25, "learning_rate": 1.7669895458527487e-05, "loss": 0.7243, "step": 600 }, { "epoch": 0.25, "learning_rate": 1.766139028079927e-05, "loss": 0.6416, "step": 601 }, { "epoch": 0.25, "learning_rate": 1.765287166373237e-05, "loss": 0.6862, "step": 602 }, { "epoch": 0.25, "learning_rate": 1.7644339622269827e-05, "loss": 0.6542, "step": 603 }, { "epoch": 0.25, "learning_rate": 1.7635794171378257e-05, "loss": 0.7114, "step": 604 }, { "epoch": 0.25, "learning_rate": 1.762723532604778e-05, "loss": 0.8002, "step": 605 }, { "epoch": 0.25, "learning_rate": 1.761866310129202e-05, "loss": 0.6922, "step": 606 }, { "epoch": 0.25, "learning_rate": 1.7610077512148073e-05, "loss": 0.7517, "step": 607 }, { "epoch": 0.25, "learning_rate": 1.760147857367647e-05, "loss": 0.6715, "step": 608 }, { "epoch": 0.25, "learning_rate": 1.7592866300961163e-05, "loss": 0.6672, "step": 609 }, { "epoch": 0.25, "learning_rate": 1.7584240709109498e-05, "loss": 0.695, "step": 610 }, { "epoch": 0.25, "learning_rate": 1.757560181325218e-05, "loss": 0.6148, "step": 611 }, { "epoch": 0.25, "learning_rate": 1.7566949628543252e-05, "loss": 0.6443, "step": 612 }, { "epoch": 0.25, "learning_rate": 1.7558284170160073e-05, "loss": 0.7007, "step": 613 }, { "epoch": 0.25, "learning_rate": 1.754960545330328e-05, "loss": 0.6778, "step": 614 }, { "epoch": 0.25, "learning_rate": 1.754091349319677e-05, "loss": 0.6917, "step": 615 }, { "epoch": 0.25, "learning_rate": 1.753220830508767e-05, "loss": 0.656, "step": 616 }, { "epoch": 0.25, "learning_rate": 1.7523489904246312e-05, "loss": 0.71, "step": 617 }, { "epoch": 0.25, "learning_rate": 1.7514758305966206e-05, "loss": 0.668, "step": 618 }, { "epoch": 0.25, "learning_rate": 1.7506013525564012e-05, "loss": 0.6859, "step": 619 }, { "epoch": 0.25, "learning_rate": 1.7497255578379514e-05, "loss": 0.7139, "step": 620 }, { "epoch": 0.25, "learning_rate": 1.7488484479775585e-05, "loss": 0.6734, "step": 621 }, { "epoch": 0.25, "learning_rate": 1.7479700245138184e-05, "loss": 0.6442, "step": 622 }, { "epoch": 0.25, "learning_rate": 1.7470902889876295e-05, "loss": 0.6452, "step": 623 }, { "epoch": 0.26, "learning_rate": 1.746209242942193e-05, "loss": 0.6672, "step": 624 }, { "epoch": 0.26, "learning_rate": 1.745326887923009e-05, "loss": 0.6883, "step": 625 }, { "epoch": 0.26, "learning_rate": 1.7444432254778725e-05, "loss": 0.7069, "step": 626 }, { "epoch": 0.26, "learning_rate": 1.7435582571568736e-05, "loss": 0.7285, "step": 627 }, { "epoch": 0.26, "learning_rate": 1.7426719845123914e-05, "loss": 0.6627, "step": 628 }, { "epoch": 0.26, "learning_rate": 1.7417844090990947e-05, "loss": 0.667, "step": 629 }, { "epoch": 0.26, "learning_rate": 1.7408955324739363e-05, "loss": 0.6565, "step": 630 }, { "epoch": 0.26, "learning_rate": 1.7400053561961523e-05, "loss": 0.7884, "step": 631 }, { "epoch": 0.26, "learning_rate": 1.7391138818272578e-05, "loss": 0.6268, "step": 632 }, { "epoch": 0.26, "learning_rate": 1.738221110931046e-05, "loss": 0.7447, "step": 633 }, { "epoch": 0.26, "learning_rate": 1.737327045073584e-05, "loss": 0.7412, "step": 634 }, { "epoch": 0.26, "learning_rate": 1.73643168582321e-05, "loss": 0.6906, "step": 635 }, { "epoch": 0.26, "learning_rate": 1.7355350347505312e-05, "loss": 0.6412, "step": 636 }, { "epoch": 0.26, "learning_rate": 1.7346370934284214e-05, "loss": 0.715, "step": 637 }, { "epoch": 0.26, "learning_rate": 1.7337378634320173e-05, "loss": 0.7714, "step": 638 }, { "epoch": 0.26, "learning_rate": 1.7328373463387166e-05, "loss": 0.6954, "step": 639 }, { "epoch": 0.26, "learning_rate": 1.7319355437281737e-05, "loss": 0.6106, "step": 640 }, { "epoch": 0.26, "learning_rate": 1.731032457182299e-05, "loss": 0.6845, "step": 641 }, { "epoch": 0.26, "learning_rate": 1.730128088285255e-05, "loss": 0.7755, "step": 642 }, { "epoch": 0.26, "learning_rate": 1.7292224386234534e-05, "loss": 0.5845, "step": 643 }, { "epoch": 0.26, "learning_rate": 1.7283155097855525e-05, "loss": 0.674, "step": 644 }, { "epoch": 0.26, "learning_rate": 1.727407303362455e-05, "loss": 0.6503, "step": 645 }, { "epoch": 0.26, "learning_rate": 1.7264978209473035e-05, "loss": 0.6771, "step": 646 }, { "epoch": 0.26, "learning_rate": 1.725587064135481e-05, "loss": 0.6127, "step": 647 }, { "epoch": 0.26, "learning_rate": 1.724675034524604e-05, "loss": 0.7146, "step": 648 }, { "epoch": 0.27, "learning_rate": 1.7237617337145224e-05, "loss": 0.6379, "step": 649 }, { "epoch": 0.27, "learning_rate": 1.7228471633073164e-05, "loss": 0.6424, "step": 650 }, { "epoch": 0.27, "learning_rate": 1.721931324907293e-05, "loss": 0.6366, "step": 651 }, { "epoch": 0.27, "learning_rate": 1.7210142201209825e-05, "loss": 0.712, "step": 652 }, { "epoch": 0.27, "learning_rate": 1.7200958505571386e-05, "loss": 0.6458, "step": 653 }, { "epoch": 0.27, "learning_rate": 1.719176217826732e-05, "loss": 0.67, "step": 654 }, { "epoch": 0.27, "learning_rate": 1.71825532354295e-05, "loss": 0.6944, "step": 655 }, { "epoch": 0.27, "learning_rate": 1.7173331693211922e-05, "loss": 0.7089, "step": 656 }, { "epoch": 0.27, "learning_rate": 1.7164097567790693e-05, "loss": 0.6424, "step": 657 }, { "epoch": 0.27, "learning_rate": 1.7154850875363987e-05, "loss": 0.6755, "step": 658 }, { "epoch": 0.27, "learning_rate": 1.7145591632152025e-05, "loss": 0.6924, "step": 659 }, { "epoch": 0.27, "learning_rate": 1.7136319854397037e-05, "loss": 0.6625, "step": 660 }, { "epoch": 0.27, "learning_rate": 1.712703555836325e-05, "loss": 0.6687, "step": 661 }, { "epoch": 0.27, "learning_rate": 1.7117738760336846e-05, "loss": 0.6443, "step": 662 }, { "epoch": 0.27, "learning_rate": 1.7108429476625937e-05, "loss": 0.7328, "step": 663 }, { "epoch": 0.27, "learning_rate": 1.7099107723560537e-05, "loss": 0.5872, "step": 664 }, { "epoch": 0.27, "learning_rate": 1.708977351749254e-05, "loss": 0.6595, "step": 665 }, { "epoch": 0.27, "learning_rate": 1.7080426874795666e-05, "loss": 0.6828, "step": 666 }, { "epoch": 0.27, "learning_rate": 1.7071067811865477e-05, "loss": 0.6646, "step": 667 }, { "epoch": 0.27, "learning_rate": 1.7061696345119304e-05, "loss": 0.7373, "step": 668 }, { "epoch": 0.27, "learning_rate": 1.7052312490996237e-05, "loss": 0.7879, "step": 669 }, { "epoch": 0.27, "learning_rate": 1.7042916265957107e-05, "loss": 0.6031, "step": 670 }, { "epoch": 0.27, "learning_rate": 1.703350768648443e-05, "loss": 0.6827, "step": 671 }, { "epoch": 0.27, "learning_rate": 1.702408676908241e-05, "loss": 0.6262, "step": 672 }, { "epoch": 0.28, "learning_rate": 1.701465353027688e-05, "loss": 0.7102, "step": 673 }, { "epoch": 0.28, "learning_rate": 1.7005207986615293e-05, "loss": 0.6267, "step": 674 }, { "epoch": 0.28, "learning_rate": 1.699575015466669e-05, "loss": 0.7388, "step": 675 }, { "epoch": 0.28, "learning_rate": 1.698628005102166e-05, "loss": 0.7303, "step": 676 }, { "epoch": 0.28, "learning_rate": 1.6976797692292325e-05, "loss": 0.6721, "step": 677 }, { "epoch": 0.28, "learning_rate": 1.6967303095112297e-05, "loss": 0.6575, "step": 678 }, { "epoch": 0.28, "learning_rate": 1.695779627613667e-05, "loss": 0.7179, "step": 679 }, { "epoch": 0.28, "learning_rate": 1.6948277252041957e-05, "loss": 0.6756, "step": 680 }, { "epoch": 0.28, "learning_rate": 1.69387460395261e-05, "loss": 0.6721, "step": 681 }, { "epoch": 0.28, "learning_rate": 1.6929202655308414e-05, "loss": 0.6859, "step": 682 }, { "epoch": 0.28, "learning_rate": 1.691964711612956e-05, "loss": 0.6197, "step": 683 }, { "epoch": 0.28, "learning_rate": 1.691007943875153e-05, "loss": 0.6698, "step": 684 }, { "epoch": 0.28, "learning_rate": 1.6900499639957596e-05, "loss": 0.6647, "step": 685 }, { "epoch": 0.28, "learning_rate": 1.689090773655231e-05, "loss": 0.7312, "step": 686 }, { "epoch": 0.28, "learning_rate": 1.688130374536144e-05, "loss": 0.6452, "step": 687 }, { "epoch": 0.28, "learning_rate": 1.6871687683231975e-05, "loss": 0.6965, "step": 688 }, { "epoch": 0.28, "learning_rate": 1.686205956703206e-05, "loss": 0.6312, "step": 689 }, { "epoch": 0.28, "learning_rate": 1.6852419413651003e-05, "loss": 0.6391, "step": 690 }, { "epoch": 0.28, "learning_rate": 1.6842767239999214e-05, "loss": 0.7419, "step": 691 }, { "epoch": 0.28, "learning_rate": 1.6833103063008194e-05, "loss": 0.6088, "step": 692 }, { "epoch": 0.28, "learning_rate": 1.6823426899630498e-05, "loss": 0.625, "step": 693 }, { "epoch": 0.28, "learning_rate": 1.681373876683971e-05, "loss": 0.7497, "step": 694 }, { "epoch": 0.28, "learning_rate": 1.680403868163041e-05, "loss": 0.7308, "step": 695 }, { "epoch": 0.28, "learning_rate": 1.6794326661018136e-05, "loss": 0.698, "step": 696 }, { "epoch": 0.28, "learning_rate": 1.6784602722039376e-05, "loss": 0.6865, "step": 697 }, { "epoch": 0.29, "learning_rate": 1.6774866881751518e-05, "loss": 0.7627, "step": 698 }, { "epoch": 0.29, "learning_rate": 1.6765119157232824e-05, "loss": 0.6541, "step": 699 }, { "epoch": 0.29, "learning_rate": 1.6755359565582408e-05, "loss": 0.6987, "step": 700 }, { "epoch": 0.29, "learning_rate": 1.6745588123920197e-05, "loss": 0.6267, "step": 701 }, { "epoch": 0.29, "learning_rate": 1.6735804849386914e-05, "loss": 0.6864, "step": 702 }, { "epoch": 0.29, "learning_rate": 1.6726009759144023e-05, "loss": 0.6408, "step": 703 }, { "epoch": 0.29, "learning_rate": 1.6716202870373726e-05, "loss": 0.7263, "step": 704 }, { "epoch": 0.29, "learning_rate": 1.670638420027892e-05, "loss": 0.6588, "step": 705 }, { "epoch": 0.29, "learning_rate": 1.6696553766083167e-05, "loss": 0.7192, "step": 706 }, { "epoch": 0.29, "learning_rate": 1.668671158503067e-05, "loss": 0.7439, "step": 707 }, { "epoch": 0.29, "learning_rate": 1.667685767438622e-05, "loss": 0.6251, "step": 708 }, { "epoch": 0.29, "learning_rate": 1.6666992051435215e-05, "loss": 0.5684, "step": 709 }, { "epoch": 0.29, "learning_rate": 1.6657114733483564e-05, "loss": 0.7041, "step": 710 }, { "epoch": 0.29, "learning_rate": 1.6647225737857716e-05, "loss": 0.6618, "step": 711 }, { "epoch": 0.29, "learning_rate": 1.6637325081904595e-05, "loss": 0.6626, "step": 712 }, { "epoch": 0.29, "learning_rate": 1.662741278299158e-05, "loss": 0.7749, "step": 713 }, { "epoch": 0.29, "learning_rate": 1.6617488858506478e-05, "loss": 0.6481, "step": 714 }, { "epoch": 0.29, "learning_rate": 1.6607553325857473e-05, "loss": 0.6862, "step": 715 }, { "epoch": 0.29, "learning_rate": 1.659760620247313e-05, "loss": 0.5548, "step": 716 }, { "epoch": 0.29, "learning_rate": 1.6587647505802342e-05, "loss": 0.6451, "step": 717 }, { "epoch": 0.29, "learning_rate": 1.65776772533143e-05, "loss": 0.6363, "step": 718 }, { "epoch": 0.29, "learning_rate": 1.6567695462498465e-05, "loss": 0.631, "step": 719 }, { "epoch": 0.29, "learning_rate": 1.6557702150864538e-05, "loss": 0.6345, "step": 720 }, { "epoch": 0.29, "learning_rate": 1.6547697335942438e-05, "loss": 0.71, "step": 721 }, { "epoch": 0.3, "learning_rate": 1.6537681035282247e-05, "loss": 0.6811, "step": 722 }, { "epoch": 0.3, "learning_rate": 1.652765326645421e-05, "loss": 0.6662, "step": 723 }, { "epoch": 0.3, "learning_rate": 1.6517614047048683e-05, "loss": 0.651, "step": 724 }, { "epoch": 0.3, "learning_rate": 1.6507563394676106e-05, "loss": 0.6302, "step": 725 }, { "epoch": 0.3, "learning_rate": 1.6497501326966974e-05, "loss": 0.7978, "step": 726 }, { "epoch": 0.3, "learning_rate": 1.6487427861571815e-05, "loss": 0.6405, "step": 727 }, { "epoch": 0.3, "learning_rate": 1.6477343016161138e-05, "loss": 0.645, "step": 728 }, { "epoch": 0.3, "learning_rate": 1.646724680842543e-05, "loss": 0.6541, "step": 729 }, { "epoch": 0.3, "learning_rate": 1.6457139256075084e-05, "loss": 0.6866, "step": 730 }, { "epoch": 0.3, "learning_rate": 1.6447020376840423e-05, "loss": 0.6497, "step": 731 }, { "epoch": 0.3, "learning_rate": 1.6436890188471622e-05, "loss": 0.6977, "step": 732 }, { "epoch": 0.3, "learning_rate": 1.6426748708738696e-05, "loss": 0.6914, "step": 733 }, { "epoch": 0.3, "learning_rate": 1.6416595955431468e-05, "loss": 0.6886, "step": 734 }, { "epoch": 0.3, "learning_rate": 1.640643194635954e-05, "loss": 0.671, "step": 735 }, { "epoch": 0.3, "learning_rate": 1.6396256699352252e-05, "loss": 0.7131, "step": 736 }, { "epoch": 0.3, "learning_rate": 1.6386070232258667e-05, "loss": 0.6452, "step": 737 }, { "epoch": 0.3, "learning_rate": 1.6375872562947516e-05, "loss": 0.6659, "step": 738 }, { "epoch": 0.3, "learning_rate": 1.6365663709307193e-05, "loss": 0.6776, "step": 739 }, { "epoch": 0.3, "learning_rate": 1.635544368924571e-05, "loss": 0.6921, "step": 740 }, { "epoch": 0.3, "learning_rate": 1.634521252069065e-05, "loss": 0.7061, "step": 741 }, { "epoch": 0.3, "learning_rate": 1.6334970221589182e-05, "loss": 0.6026, "step": 742 }, { "epoch": 0.3, "learning_rate": 1.632471680990797e-05, "loss": 0.6983, "step": 743 }, { "epoch": 0.3, "learning_rate": 1.6314452303633193e-05, "loss": 0.6635, "step": 744 }, { "epoch": 0.3, "learning_rate": 1.6304176720770482e-05, "loss": 0.6981, "step": 745 }, { "epoch": 0.3, "learning_rate": 1.6293890079344892e-05, "loss": 0.662, "step": 746 }, { "epoch": 0.31, "learning_rate": 1.6283592397400895e-05, "loss": 0.6246, "step": 747 }, { "epoch": 0.31, "learning_rate": 1.6273283693002312e-05, "loss": 0.6972, "step": 748 }, { "epoch": 0.31, "learning_rate": 1.6262963984232307e-05, "loss": 0.6705, "step": 749 }, { "epoch": 0.31, "learning_rate": 1.625263328919335e-05, "loss": 0.6856, "step": 750 }, { "epoch": 0.31, "learning_rate": 1.624229162600717e-05, "loss": 0.6791, "step": 751 }, { "epoch": 0.31, "learning_rate": 1.6231939012814758e-05, "loss": 0.7502, "step": 752 }, { "epoch": 0.31, "learning_rate": 1.6221575467776292e-05, "loss": 0.7194, "step": 753 }, { "epoch": 0.31, "learning_rate": 1.6211201009071134e-05, "loss": 0.7124, "step": 754 }, { "epoch": 0.31, "learning_rate": 1.6200815654897798e-05, "loss": 0.6352, "step": 755 }, { "epoch": 0.31, "learning_rate": 1.6190419423473897e-05, "loss": 0.6864, "step": 756 }, { "epoch": 0.31, "learning_rate": 1.6180012333036133e-05, "loss": 0.687, "step": 757 }, { "epoch": 0.31, "learning_rate": 1.6169594401840255e-05, "loss": 0.6668, "step": 758 }, { "epoch": 0.31, "learning_rate": 1.6159165648161026e-05, "loss": 0.6435, "step": 759 }, { "epoch": 0.31, "learning_rate": 1.6148726090292196e-05, "loss": 0.5814, "step": 760 }, { "epoch": 0.31, "learning_rate": 1.6138275746546467e-05, "loss": 0.6166, "step": 761 }, { "epoch": 0.31, "learning_rate": 1.6127814635255462e-05, "loss": 0.6986, "step": 762 }, { "epoch": 0.31, "learning_rate": 1.6117342774769687e-05, "loss": 0.6934, "step": 763 }, { "epoch": 0.31, "learning_rate": 1.6106860183458514e-05, "loss": 0.7462, "step": 764 }, { "epoch": 0.31, "learning_rate": 1.6096366879710127e-05, "loss": 0.6056, "step": 765 }, { "epoch": 0.31, "learning_rate": 1.608586288193151e-05, "loss": 0.7394, "step": 766 }, { "epoch": 0.31, "learning_rate": 1.6075348208548395e-05, "loss": 0.683, "step": 767 }, { "epoch": 0.31, "learning_rate": 1.6064822878005262e-05, "loss": 0.7596, "step": 768 }, { "epoch": 0.31, "learning_rate": 1.605428690876526e-05, "loss": 0.6305, "step": 769 }, { "epoch": 0.31, "learning_rate": 1.6043740319310218e-05, "loss": 0.6892, "step": 770 }, { "epoch": 0.32, "learning_rate": 1.6033183128140585e-05, "loss": 0.6026, "step": 771 }, { "epoch": 0.32, "learning_rate": 1.602261535377542e-05, "loss": 0.7555, "step": 772 }, { "epoch": 0.32, "learning_rate": 1.6012037014752322e-05, "loss": 0.683, "step": 773 }, { "epoch": 0.32, "learning_rate": 1.600144812962745e-05, "loss": 0.6646, "step": 774 }, { "epoch": 0.32, "learning_rate": 1.5990848716975447e-05, "loss": 0.6627, "step": 775 }, { "epoch": 0.32, "learning_rate": 1.5980238795389424e-05, "loss": 0.6583, "step": 776 }, { "epoch": 0.32, "learning_rate": 1.5969618383480926e-05, "loss": 0.6748, "step": 777 }, { "epoch": 0.32, "learning_rate": 1.595898749987991e-05, "loss": 0.6598, "step": 778 }, { "epoch": 0.32, "learning_rate": 1.5948346163234694e-05, "loss": 0.7278, "step": 779 }, { "epoch": 0.32, "learning_rate": 1.5937694392211923e-05, "loss": 0.6913, "step": 780 }, { "epoch": 0.32, "learning_rate": 1.5927032205496565e-05, "loss": 0.6754, "step": 781 }, { "epoch": 0.32, "learning_rate": 1.5916359621791847e-05, "loss": 0.6236, "step": 782 }, { "epoch": 0.32, "learning_rate": 1.5905676659819232e-05, "loss": 0.6992, "step": 783 }, { "epoch": 0.32, "learning_rate": 1.5894983338318396e-05, "loss": 0.6583, "step": 784 }, { "epoch": 0.32, "learning_rate": 1.5884279676047186e-05, "loss": 0.6383, "step": 785 }, { "epoch": 0.32, "learning_rate": 1.587356569178158e-05, "loss": 0.7256, "step": 786 }, { "epoch": 0.32, "learning_rate": 1.5862841404315675e-05, "loss": 0.6713, "step": 787 }, { "epoch": 0.32, "learning_rate": 1.585210683246163e-05, "loss": 0.7189, "step": 788 }, { "epoch": 0.32, "learning_rate": 1.5841361995049655e-05, "loss": 0.6596, "step": 789 }, { "epoch": 0.32, "learning_rate": 1.5830606910927956e-05, "loss": 0.7346, "step": 790 }, { "epoch": 0.32, "learning_rate": 1.5819841598962722e-05, "loss": 0.8004, "step": 791 }, { "epoch": 0.32, "learning_rate": 1.5809066078038082e-05, "loss": 0.6389, "step": 792 }, { "epoch": 0.32, "learning_rate": 1.5798280367056072e-05, "loss": 0.6678, "step": 793 }, { "epoch": 0.32, "learning_rate": 1.57874844849366e-05, "loss": 0.6333, "step": 794 }, { "epoch": 0.33, "learning_rate": 1.5776678450617426e-05, "loss": 0.6448, "step": 795 }, { "epoch": 0.33, "learning_rate": 1.5765862283054105e-05, "loss": 0.6582, "step": 796 }, { "epoch": 0.33, "learning_rate": 1.5755036001219974e-05, "loss": 0.777, "step": 797 }, { "epoch": 0.33, "learning_rate": 1.5744199624106115e-05, "loss": 0.7066, "step": 798 }, { "epoch": 0.33, "learning_rate": 1.5733353170721316e-05, "loss": 0.7088, "step": 799 }, { "epoch": 0.33, "learning_rate": 1.572249666009204e-05, "loss": 0.7351, "step": 800 }, { "epoch": 0.33, "learning_rate": 1.571163011126239e-05, "loss": 0.7026, "step": 801 }, { "epoch": 0.33, "learning_rate": 1.570075354329408e-05, "loss": 0.6308, "step": 802 }, { "epoch": 0.33, "learning_rate": 1.5689866975266404e-05, "loss": 0.6274, "step": 803 }, { "epoch": 0.33, "learning_rate": 1.5678970426276186e-05, "loss": 0.623, "step": 804 }, { "epoch": 0.33, "learning_rate": 1.5668063915437768e-05, "loss": 0.6752, "step": 805 }, { "epoch": 0.33, "learning_rate": 1.5657147461882965e-05, "loss": 0.7502, "step": 806 }, { "epoch": 0.33, "learning_rate": 1.5646221084761032e-05, "loss": 0.6367, "step": 807 }, { "epoch": 0.33, "learning_rate": 1.5635284803238632e-05, "loss": 0.5682, "step": 808 }, { "epoch": 0.33, "learning_rate": 1.56243386364998e-05, "loss": 0.6257, "step": 809 }, { "epoch": 0.33, "learning_rate": 1.5613382603745918e-05, "loss": 0.6056, "step": 810 }, { "epoch": 0.33, "learning_rate": 1.560241672419566e-05, "loss": 0.6451, "step": 811 }, { "epoch": 0.33, "learning_rate": 1.559144101708499e-05, "loss": 0.6364, "step": 812 }, { "epoch": 0.33, "learning_rate": 1.55804555016671e-05, "loss": 0.6185, "step": 813 }, { "epoch": 0.33, "learning_rate": 1.556946019721239e-05, "loss": 0.6231, "step": 814 }, { "epoch": 0.33, "learning_rate": 1.555845512300844e-05, "loss": 0.6298, "step": 815 }, { "epoch": 0.33, "learning_rate": 1.5547440298359948e-05, "loss": 0.7487, "step": 816 }, { "epoch": 0.33, "learning_rate": 1.5536415742588737e-05, "loss": 0.6154, "step": 817 }, { "epoch": 0.33, "learning_rate": 1.5525381475033692e-05, "loss": 0.62, "step": 818 }, { "epoch": 0.33, "learning_rate": 1.551433751505073e-05, "loss": 0.7088, "step": 819 }, { "epoch": 0.34, "learning_rate": 1.550328388201277e-05, "loss": 0.7701, "step": 820 }, { "epoch": 0.34, "learning_rate": 1.549222059530971e-05, "loss": 0.7539, "step": 821 }, { "epoch": 0.34, "learning_rate": 1.5481147674348366e-05, "loss": 0.6378, "step": 822 }, { "epoch": 0.34, "learning_rate": 1.547006513855247e-05, "loss": 0.7523, "step": 823 }, { "epoch": 0.34, "learning_rate": 1.545897300736261e-05, "loss": 0.6113, "step": 824 }, { "epoch": 0.34, "learning_rate": 1.5447871300236207e-05, "loss": 0.6059, "step": 825 }, { "epoch": 0.34, "learning_rate": 1.5436760036647485e-05, "loss": 0.6869, "step": 826 }, { "epoch": 0.34, "learning_rate": 1.5425639236087425e-05, "loss": 0.6053, "step": 827 }, { "epoch": 0.34, "learning_rate": 1.541450891806374e-05, "loss": 0.6317, "step": 828 }, { "epoch": 0.34, "learning_rate": 1.5403369102100837e-05, "loss": 0.6418, "step": 829 }, { "epoch": 0.34, "learning_rate": 1.539221980773979e-05, "loss": 0.6198, "step": 830 }, { "epoch": 0.34, "learning_rate": 1.5381061054538294e-05, "loss": 0.7284, "step": 831 }, { "epoch": 0.34, "learning_rate": 1.5369892862070636e-05, "loss": 0.7141, "step": 832 }, { "epoch": 0.34, "learning_rate": 1.5358715249927663e-05, "loss": 0.6593, "step": 833 }, { "epoch": 0.34, "learning_rate": 1.5347528237716742e-05, "loss": 0.6567, "step": 834 }, { "epoch": 0.34, "learning_rate": 1.533633184506174e-05, "loss": 0.7172, "step": 835 }, { "epoch": 0.34, "learning_rate": 1.5325126091602965e-05, "loss": 0.6276, "step": 836 }, { "epoch": 0.34, "learning_rate": 1.5313910996997156e-05, "loss": 0.6423, "step": 837 }, { "epoch": 0.34, "learning_rate": 1.5302686580917428e-05, "loss": 0.7415, "step": 838 }, { "epoch": 0.34, "learning_rate": 1.5291452863053257e-05, "loss": 0.6518, "step": 839 }, { "epoch": 0.34, "learning_rate": 1.528020986311043e-05, "loss": 0.7427, "step": 840 }, { "epoch": 0.34, "learning_rate": 1.526895760081102e-05, "loss": 0.6607, "step": 841 }, { "epoch": 0.34, "learning_rate": 1.525769609589335e-05, "loss": 0.6617, "step": 842 }, { "epoch": 0.34, "learning_rate": 1.5246425368111944e-05, "loss": 0.7278, "step": 843 }, { "epoch": 0.35, "learning_rate": 1.523514543723751e-05, "loss": 0.6573, "step": 844 }, { "epoch": 0.35, "learning_rate": 1.5223856323056909e-05, "loss": 0.6599, "step": 845 }, { "epoch": 0.35, "learning_rate": 1.5212558045373106e-05, "loss": 0.6769, "step": 846 }, { "epoch": 0.35, "learning_rate": 1.5201250624005133e-05, "loss": 0.7313, "step": 847 }, { "epoch": 0.35, "learning_rate": 1.5189934078788069e-05, "loss": 0.5937, "step": 848 }, { "epoch": 0.35, "learning_rate": 1.5178608429572996e-05, "loss": 0.7483, "step": 849 }, { "epoch": 0.35, "learning_rate": 1.5167273696226965e-05, "loss": 0.6856, "step": 850 }, { "epoch": 0.35, "learning_rate": 1.5155929898632959e-05, "loss": 0.5891, "step": 851 }, { "epoch": 0.35, "learning_rate": 1.5144577056689872e-05, "loss": 0.659, "step": 852 }, { "epoch": 0.35, "learning_rate": 1.513321519031245e-05, "loss": 0.7016, "step": 853 }, { "epoch": 0.35, "learning_rate": 1.512184431943128e-05, "loss": 0.6616, "step": 854 }, { "epoch": 0.35, "learning_rate": 1.5110464463992736e-05, "loss": 0.71, "step": 855 }, { "epoch": 0.35, "learning_rate": 1.5099075643958959e-05, "loss": 0.705, "step": 856 }, { "epoch": 0.35, "learning_rate": 1.5087677879307811e-05, "loss": 0.6308, "step": 857 }, { "epoch": 0.35, "learning_rate": 1.5076271190032845e-05, "loss": 0.6952, "step": 858 }, { "epoch": 0.35, "learning_rate": 1.5064855596143271e-05, "loss": 0.6808, "step": 859 }, { "epoch": 0.35, "learning_rate": 1.5053431117663922e-05, "loss": 0.6553, "step": 860 }, { "epoch": 0.35, "learning_rate": 1.5041997774635206e-05, "loss": 0.6436, "step": 861 }, { "epoch": 0.35, "learning_rate": 1.5030555587113091e-05, "loss": 0.6478, "step": 862 }, { "epoch": 0.35, "learning_rate": 1.5019104575169055e-05, "loss": 0.6779, "step": 863 }, { "epoch": 0.35, "learning_rate": 1.5007644758890059e-05, "loss": 0.672, "step": 864 }, { "epoch": 0.35, "learning_rate": 1.49961761583785e-05, "loss": 0.6943, "step": 865 }, { "epoch": 0.35, "learning_rate": 1.4984698793752193e-05, "loss": 0.6371, "step": 866 }, { "epoch": 0.35, "learning_rate": 1.4973212685144324e-05, "loss": 0.6548, "step": 867 }, { "epoch": 0.35, "learning_rate": 1.4961717852703417e-05, "loss": 0.7141, "step": 868 }, { "epoch": 0.36, "learning_rate": 1.4950214316593295e-05, "loss": 0.6136, "step": 869 }, { "epoch": 0.36, "learning_rate": 1.4938702096993057e-05, "loss": 0.7227, "step": 870 }, { "epoch": 0.36, "learning_rate": 1.4927181214097028e-05, "loss": 0.6827, "step": 871 }, { "epoch": 0.36, "learning_rate": 1.4915651688114733e-05, "loss": 0.7099, "step": 872 }, { "epoch": 0.36, "learning_rate": 1.4904113539270852e-05, "loss": 0.6496, "step": 873 }, { "epoch": 0.36, "learning_rate": 1.48925667878052e-05, "loss": 0.6346, "step": 874 }, { "epoch": 0.36, "learning_rate": 1.4881011453972675e-05, "loss": 0.6683, "step": 875 }, { "epoch": 0.36, "learning_rate": 1.486944755804324e-05, "loss": 0.6045, "step": 876 }, { "epoch": 0.36, "learning_rate": 1.4857875120301866e-05, "loss": 0.6831, "step": 877 }, { "epoch": 0.36, "learning_rate": 1.484629416104851e-05, "loss": 0.561, "step": 878 }, { "epoch": 0.36, "learning_rate": 1.4834704700598084e-05, "loss": 0.6503, "step": 879 }, { "epoch": 0.36, "learning_rate": 1.4823106759280404e-05, "loss": 0.6729, "step": 880 }, { "epoch": 0.36, "learning_rate": 1.4811500357440166e-05, "loss": 0.6476, "step": 881 }, { "epoch": 0.36, "learning_rate": 1.4799885515436912e-05, "loss": 0.6282, "step": 882 }, { "epoch": 0.36, "learning_rate": 1.4788262253644983e-05, "loss": 0.6954, "step": 883 }, { "epoch": 0.36, "learning_rate": 1.4776630592453492e-05, "loss": 0.6332, "step": 884 }, { "epoch": 0.36, "learning_rate": 1.4764990552266287e-05, "loss": 0.6766, "step": 885 }, { "epoch": 0.36, "learning_rate": 1.4753342153501913e-05, "loss": 0.692, "step": 886 }, { "epoch": 0.36, "learning_rate": 1.4741685416593574e-05, "loss": 0.6567, "step": 887 }, { "epoch": 0.36, "learning_rate": 1.4730020361989108e-05, "loss": 0.631, "step": 888 }, { "epoch": 0.36, "learning_rate": 1.4718347010150936e-05, "loss": 0.6719, "step": 889 }, { "epoch": 0.36, "learning_rate": 1.470666538155604e-05, "loss": 0.6715, "step": 890 }, { "epoch": 0.36, "learning_rate": 1.4694975496695918e-05, "loss": 0.6915, "step": 891 }, { "epoch": 0.36, "learning_rate": 1.4683277376076548e-05, "loss": 0.7192, "step": 892 }, { "epoch": 0.37, "learning_rate": 1.467157104021836e-05, "loss": 0.6995, "step": 893 }, { "epoch": 0.37, "learning_rate": 1.4659856509656194e-05, "loss": 0.6105, "step": 894 }, { "epoch": 0.37, "learning_rate": 1.4648133804939257e-05, "loss": 0.6545, "step": 895 }, { "epoch": 0.37, "learning_rate": 1.4636402946631108e-05, "loss": 0.6753, "step": 896 }, { "epoch": 0.37, "learning_rate": 1.4624663955309597e-05, "loss": 0.6135, "step": 897 }, { "epoch": 0.37, "learning_rate": 1.4612916851566851e-05, "loss": 0.6606, "step": 898 }, { "epoch": 0.37, "learning_rate": 1.4601161656009218e-05, "loss": 0.569, "step": 899 }, { "epoch": 0.37, "learning_rate": 1.4589398389257246e-05, "loss": 0.7102, "step": 900 }, { "epoch": 0.37, "learning_rate": 1.4577627071945642e-05, "loss": 0.6408, "step": 901 }, { "epoch": 0.37, "learning_rate": 1.4565847724723225e-05, "loss": 0.6975, "step": 902 }, { "epoch": 0.37, "learning_rate": 1.4554060368252916e-05, "loss": 0.6743, "step": 903 }, { "epoch": 0.37, "learning_rate": 1.454226502321167e-05, "loss": 0.6644, "step": 904 }, { "epoch": 0.37, "learning_rate": 1.4530461710290467e-05, "loss": 0.6601, "step": 905 }, { "epoch": 0.37, "learning_rate": 1.4518650450194261e-05, "loss": 0.6432, "step": 906 }, { "epoch": 0.37, "learning_rate": 1.4506831263641939e-05, "loss": 0.6417, "step": 907 }, { "epoch": 0.37, "learning_rate": 1.4495004171366302e-05, "loss": 0.6346, "step": 908 }, { "epoch": 0.37, "learning_rate": 1.4483169194114014e-05, "loss": 0.6178, "step": 909 }, { "epoch": 0.37, "learning_rate": 1.4471326352645573e-05, "loss": 0.7311, "step": 910 }, { "epoch": 0.37, "learning_rate": 1.445947566773527e-05, "loss": 0.7122, "step": 911 }, { "epoch": 0.37, "learning_rate": 1.4447617160171154e-05, "loss": 0.6745, "step": 912 }, { "epoch": 0.37, "learning_rate": 1.4435750850755001e-05, "loss": 0.7504, "step": 913 }, { "epoch": 0.37, "learning_rate": 1.4423876760302266e-05, "loss": 0.7315, "step": 914 }, { "epoch": 0.37, "learning_rate": 1.4411994909642059e-05, "loss": 0.7143, "step": 915 }, { "epoch": 0.37, "learning_rate": 1.4400105319617102e-05, "loss": 0.7352, "step": 916 }, { "epoch": 0.37, "learning_rate": 1.4388208011083688e-05, "loss": 0.6552, "step": 917 }, { "epoch": 0.38, "learning_rate": 1.4376303004911654e-05, "loss": 0.6561, "step": 918 }, { "epoch": 0.38, "learning_rate": 1.4364390321984334e-05, "loss": 0.6391, "step": 919 }, { "epoch": 0.38, "learning_rate": 1.4352469983198542e-05, "loss": 0.5917, "step": 920 }, { "epoch": 0.38, "learning_rate": 1.4340542009464513e-05, "loss": 0.7887, "step": 921 }, { "epoch": 0.38, "learning_rate": 1.4328606421705868e-05, "loss": 0.7109, "step": 922 }, { "epoch": 0.38, "learning_rate": 1.4316663240859595e-05, "loss": 0.6414, "step": 923 }, { "epoch": 0.38, "learning_rate": 1.4304712487875999e-05, "loss": 0.6761, "step": 924 }, { "epoch": 0.38, "learning_rate": 1.429275418371866e-05, "loss": 0.6634, "step": 925 }, { "epoch": 0.38, "learning_rate": 1.4280788349364414e-05, "loss": 0.6568, "step": 926 }, { "epoch": 0.38, "learning_rate": 1.4268815005803305e-05, "loss": 0.7302, "step": 927 }, { "epoch": 0.38, "learning_rate": 1.4256834174038545e-05, "loss": 0.6097, "step": 928 }, { "epoch": 0.38, "learning_rate": 1.4244845875086486e-05, "loss": 0.7543, "step": 929 }, { "epoch": 0.38, "learning_rate": 1.4232850129976573e-05, "loss": 0.7901, "step": 930 }, { "epoch": 0.38, "learning_rate": 1.4220846959751315e-05, "loss": 0.6433, "step": 931 }, { "epoch": 0.38, "learning_rate": 1.420883638546625e-05, "loss": 0.6497, "step": 932 }, { "epoch": 0.38, "learning_rate": 1.41968184281899e-05, "loss": 0.7194, "step": 933 }, { "epoch": 0.38, "learning_rate": 1.4184793109003734e-05, "loss": 0.5924, "step": 934 }, { "epoch": 0.38, "learning_rate": 1.4172760449002148e-05, "loss": 0.6818, "step": 935 }, { "epoch": 0.38, "learning_rate": 1.4160720469292402e-05, "loss": 0.6699, "step": 936 }, { "epoch": 0.38, "learning_rate": 1.41486731909946e-05, "loss": 0.6364, "step": 937 }, { "epoch": 0.38, "learning_rate": 1.4136618635241655e-05, "loss": 0.6482, "step": 938 }, { "epoch": 0.38, "learning_rate": 1.4124556823179234e-05, "loss": 0.6275, "step": 939 }, { "epoch": 0.38, "learning_rate": 1.4112487775965741e-05, "loss": 0.7372, "step": 940 }, { "epoch": 0.38, "learning_rate": 1.410041151477227e-05, "loss": 0.6012, "step": 941 }, { "epoch": 0.39, "learning_rate": 1.4088328060782573e-05, "loss": 0.7349, "step": 942 }, { "epoch": 0.39, "learning_rate": 1.4076237435193011e-05, "loss": 0.6688, "step": 943 }, { "epoch": 0.39, "learning_rate": 1.4064139659212534e-05, "loss": 0.7441, "step": 944 }, { "epoch": 0.39, "learning_rate": 1.4052034754062626e-05, "loss": 0.7159, "step": 945 }, { "epoch": 0.39, "learning_rate": 1.403992274097729e-05, "loss": 0.6244, "step": 946 }, { "epoch": 0.39, "learning_rate": 1.402780364120298e-05, "loss": 0.6445, "step": 947 }, { "epoch": 0.39, "learning_rate": 1.4015677475998595e-05, "loss": 0.6686, "step": 948 }, { "epoch": 0.39, "learning_rate": 1.4003544266635419e-05, "loss": 0.6924, "step": 949 }, { "epoch": 0.39, "learning_rate": 1.3991404034397102e-05, "loss": 0.6661, "step": 950 }, { "epoch": 0.39, "learning_rate": 1.3979256800579605e-05, "loss": 0.6329, "step": 951 }, { "epoch": 0.39, "learning_rate": 1.3967102586491179e-05, "loss": 0.716, "step": 952 }, { "epoch": 0.39, "learning_rate": 1.3954941413452309e-05, "loss": 0.7043, "step": 953 }, { "epoch": 0.39, "learning_rate": 1.3942773302795697e-05, "loss": 0.6532, "step": 954 }, { "epoch": 0.39, "learning_rate": 1.3930598275866205e-05, "loss": 0.6309, "step": 955 }, { "epoch": 0.39, "learning_rate": 1.3918416354020836e-05, "loss": 0.5948, "step": 956 }, { "epoch": 0.39, "learning_rate": 1.3906227558628684e-05, "loss": 0.6993, "step": 957 }, { "epoch": 0.39, "learning_rate": 1.3894031911070904e-05, "loss": 0.6112, "step": 958 }, { "epoch": 0.39, "learning_rate": 1.3881829432740665e-05, "loss": 0.6158, "step": 959 }, { "epoch": 0.39, "learning_rate": 1.3869620145043123e-05, "loss": 0.6842, "step": 960 }, { "epoch": 0.39, "learning_rate": 1.3857404069395373e-05, "loss": 0.6148, "step": 961 }, { "epoch": 0.39, "learning_rate": 1.3845181227226423e-05, "loss": 0.6307, "step": 962 }, { "epoch": 0.39, "learning_rate": 1.383295163997715e-05, "loss": 0.729, "step": 963 }, { "epoch": 0.39, "learning_rate": 1.3820715329100256e-05, "loss": 0.6533, "step": 964 }, { "epoch": 0.39, "learning_rate": 1.3808472316060251e-05, "loss": 0.6902, "step": 965 }, { "epoch": 0.39, "learning_rate": 1.3796222622333389e-05, "loss": 0.6106, "step": 966 }, { "epoch": 0.4, "learning_rate": 1.3783966269407647e-05, "loss": 0.6843, "step": 967 }, { "epoch": 0.4, "learning_rate": 1.3771703278782689e-05, "loss": 0.6475, "step": 968 }, { "epoch": 0.4, "learning_rate": 1.375943367196981e-05, "loss": 0.6113, "step": 969 }, { "epoch": 0.4, "learning_rate": 1.3747157470491923e-05, "loss": 0.617, "step": 970 }, { "epoch": 0.4, "learning_rate": 1.3734874695883504e-05, "loss": 0.6739, "step": 971 }, { "epoch": 0.4, "learning_rate": 1.372258536969056e-05, "loss": 0.5792, "step": 972 }, { "epoch": 0.4, "learning_rate": 1.371028951347059e-05, "loss": 0.6993, "step": 973 }, { "epoch": 0.4, "learning_rate": 1.3697987148792546e-05, "loss": 0.689, "step": 974 }, { "epoch": 0.4, "learning_rate": 1.3685678297236805e-05, "loss": 0.6088, "step": 975 }, { "epoch": 0.4, "learning_rate": 1.3673362980395115e-05, "loss": 0.6996, "step": 976 }, { "epoch": 0.4, "learning_rate": 1.3661041219870563e-05, "loss": 0.6402, "step": 977 }, { "epoch": 0.4, "learning_rate": 1.364871303727755e-05, "loss": 0.6131, "step": 978 }, { "epoch": 0.4, "learning_rate": 1.3636378454241728e-05, "loss": 0.6024, "step": 979 }, { "epoch": 0.4, "learning_rate": 1.3624037492399991e-05, "loss": 0.6856, "step": 980 }, { "epoch": 0.4, "learning_rate": 1.3611690173400414e-05, "loss": 0.6063, "step": 981 }, { "epoch": 0.4, "learning_rate": 1.3599336518902228e-05, "loss": 0.6971, "step": 982 }, { "epoch": 0.4, "learning_rate": 1.358697655057577e-05, "loss": 0.6286, "step": 983 }, { "epoch": 0.4, "learning_rate": 1.3574610290102462e-05, "loss": 0.6562, "step": 984 }, { "epoch": 0.4, "learning_rate": 1.3562237759174755e-05, "loss": 0.6645, "step": 985 }, { "epoch": 0.4, "learning_rate": 1.3549858979496104e-05, "loss": 0.656, "step": 986 }, { "epoch": 0.4, "learning_rate": 1.3537473972780924e-05, "loss": 0.6358, "step": 987 }, { "epoch": 0.4, "learning_rate": 1.3525082760754557e-05, "loss": 0.612, "step": 988 }, { "epoch": 0.4, "learning_rate": 1.3512685365153226e-05, "loss": 0.6456, "step": 989 }, { "epoch": 0.4, "learning_rate": 1.3500281807724003e-05, "loss": 0.6466, "step": 990 }, { "epoch": 0.41, "learning_rate": 1.3487872110224765e-05, "loss": 0.6977, "step": 991 }, { "epoch": 0.41, "learning_rate": 1.3475456294424163e-05, "loss": 0.6613, "step": 992 }, { "epoch": 0.41, "learning_rate": 1.3463034382101581e-05, "loss": 0.6815, "step": 993 }, { "epoch": 0.41, "learning_rate": 1.3450606395047094e-05, "loss": 0.6862, "step": 994 }, { "epoch": 0.41, "learning_rate": 1.343817235506144e-05, "loss": 0.5749, "step": 995 }, { "epoch": 0.41, "learning_rate": 1.3425732283955968e-05, "loss": 0.6317, "step": 996 }, { "epoch": 0.41, "learning_rate": 1.341328620355261e-05, "loss": 0.6255, "step": 997 }, { "epoch": 0.41, "learning_rate": 1.3400834135683836e-05, "loss": 0.6544, "step": 998 }, { "epoch": 0.41, "learning_rate": 1.3388376102192622e-05, "loss": 0.6195, "step": 999 }, { "epoch": 0.41, "learning_rate": 1.3375912124932406e-05, "loss": 0.7076, "step": 1000 }, { "epoch": 0.41, "learning_rate": 1.3363442225767055e-05, "loss": 0.6262, "step": 1001 }, { "epoch": 0.41, "learning_rate": 1.3350966426570825e-05, "loss": 0.5793, "step": 1002 }, { "epoch": 0.41, "learning_rate": 1.333848474922832e-05, "loss": 0.7112, "step": 1003 }, { "epoch": 0.41, "learning_rate": 1.3325997215634457e-05, "loss": 0.6114, "step": 1004 }, { "epoch": 0.41, "learning_rate": 1.3313503847694418e-05, "loss": 0.633, "step": 1005 }, { "epoch": 0.41, "learning_rate": 1.330100466732363e-05, "loss": 0.7571, "step": 1006 }, { "epoch": 0.41, "learning_rate": 1.3288499696447708e-05, "loss": 0.7234, "step": 1007 }, { "epoch": 0.41, "learning_rate": 1.3275988957002429e-05, "loss": 0.648, "step": 1008 }, { "epoch": 0.41, "learning_rate": 1.3263472470933682e-05, "loss": 0.7128, "step": 1009 }, { "epoch": 0.41, "learning_rate": 1.3250950260197455e-05, "loss": 0.6688, "step": 1010 }, { "epoch": 0.41, "learning_rate": 1.3238422346759758e-05, "loss": 0.6612, "step": 1011 }, { "epoch": 0.41, "learning_rate": 1.3225888752596614e-05, "loss": 0.654, "step": 1012 }, { "epoch": 0.41, "learning_rate": 1.3213349499694004e-05, "loss": 0.6246, "step": 1013 }, { "epoch": 0.41, "learning_rate": 1.3200804610047842e-05, "loss": 0.6233, "step": 1014 }, { "epoch": 0.41, "learning_rate": 1.318825410566393e-05, "loss": 0.6381, "step": 1015 }, { "epoch": 0.42, "learning_rate": 1.3175698008557914e-05, "loss": 0.626, "step": 1016 }, { "epoch": 0.42, "learning_rate": 1.3163136340755254e-05, "loss": 0.6816, "step": 1017 }, { "epoch": 0.42, "learning_rate": 1.3150569124291186e-05, "loss": 0.5663, "step": 1018 }, { "epoch": 0.42, "learning_rate": 1.3137996381210672e-05, "loss": 0.7454, "step": 1019 }, { "epoch": 0.42, "learning_rate": 1.3125418133568369e-05, "loss": 0.6833, "step": 1020 }, { "epoch": 0.42, "learning_rate": 1.3112834403428593e-05, "loss": 0.6682, "step": 1021 }, { "epoch": 0.42, "learning_rate": 1.3100245212865279e-05, "loss": 0.5654, "step": 1022 }, { "epoch": 0.42, "learning_rate": 1.3087650583961936e-05, "loss": 0.6166, "step": 1023 }, { "epoch": 0.42, "learning_rate": 1.3075050538811611e-05, "loss": 0.7112, "step": 1024 }, { "epoch": 0.42, "learning_rate": 1.3062445099516863e-05, "loss": 0.7039, "step": 1025 }, { "epoch": 0.42, "learning_rate": 1.3049834288189702e-05, "loss": 0.7618, "step": 1026 }, { "epoch": 0.42, "learning_rate": 1.3037218126951567e-05, "loss": 0.6539, "step": 1027 }, { "epoch": 0.42, "learning_rate": 1.3024596637933277e-05, "loss": 0.6936, "step": 1028 }, { "epoch": 0.42, "learning_rate": 1.3011969843275003e-05, "loss": 0.6558, "step": 1029 }, { "epoch": 0.42, "learning_rate": 1.2999337765126217e-05, "loss": 0.6517, "step": 1030 }, { "epoch": 0.42, "learning_rate": 1.2986700425645663e-05, "loss": 0.6512, "step": 1031 }, { "epoch": 0.42, "learning_rate": 1.2974057847001306e-05, "loss": 0.6663, "step": 1032 }, { "epoch": 0.42, "learning_rate": 1.2961410051370322e-05, "loss": 0.7137, "step": 1033 }, { "epoch": 0.42, "learning_rate": 1.2948757060939019e-05, "loss": 0.5942, "step": 1034 }, { "epoch": 0.42, "learning_rate": 1.293609889790282e-05, "loss": 0.6702, "step": 1035 }, { "epoch": 0.42, "learning_rate": 1.2923435584466228e-05, "loss": 0.7645, "step": 1036 }, { "epoch": 0.42, "learning_rate": 1.2910767142842777e-05, "loss": 0.6491, "step": 1037 }, { "epoch": 0.42, "learning_rate": 1.2898093595254998e-05, "loss": 0.5949, "step": 1038 }, { "epoch": 0.42, "learning_rate": 1.2885414963934374e-05, "loss": 0.5756, "step": 1039 }, { "epoch": 0.43, "learning_rate": 1.2872731271121315e-05, "loss": 0.6602, "step": 1040 }, { "epoch": 0.43, "learning_rate": 1.28600425390651e-05, "loss": 0.6815, "step": 1041 }, { "epoch": 0.43, "learning_rate": 1.2847348790023858e-05, "loss": 0.6687, "step": 1042 }, { "epoch": 0.43, "learning_rate": 1.283465004626451e-05, "loss": 0.5653, "step": 1043 }, { "epoch": 0.43, "learning_rate": 1.2821946330062738e-05, "loss": 0.7003, "step": 1044 }, { "epoch": 0.43, "learning_rate": 1.2809237663702951e-05, "loss": 0.5824, "step": 1045 }, { "epoch": 0.43, "learning_rate": 1.2796524069478243e-05, "loss": 0.7416, "step": 1046 }, { "epoch": 0.43, "learning_rate": 1.2783805569690343e-05, "loss": 0.6764, "step": 1047 }, { "epoch": 0.43, "learning_rate": 1.27710821866496e-05, "loss": 0.6823, "step": 1048 }, { "epoch": 0.43, "learning_rate": 1.275835394267492e-05, "loss": 0.6791, "step": 1049 }, { "epoch": 0.43, "learning_rate": 1.2745620860093726e-05, "loss": 0.6758, "step": 1050 }, { "epoch": 0.43, "learning_rate": 1.273288296124195e-05, "loss": 0.7278, "step": 1051 }, { "epoch": 0.43, "learning_rate": 1.2720140268463958e-05, "loss": 0.568, "step": 1052 }, { "epoch": 0.43, "learning_rate": 1.2707392804112529e-05, "loss": 0.6155, "step": 1053 }, { "epoch": 0.43, "learning_rate": 1.2694640590548808e-05, "loss": 0.6709, "step": 1054 }, { "epoch": 0.43, "learning_rate": 1.2681883650142283e-05, "loss": 0.6508, "step": 1055 }, { "epoch": 0.43, "learning_rate": 1.2669122005270724e-05, "loss": 0.757, "step": 1056 }, { "epoch": 0.43, "learning_rate": 1.265635567832015e-05, "loss": 0.6936, "step": 1057 }, { "epoch": 0.43, "learning_rate": 1.2643584691684802e-05, "loss": 0.6403, "step": 1058 }, { "epoch": 0.43, "learning_rate": 1.2630809067767094e-05, "loss": 0.64, "step": 1059 }, { "epoch": 0.43, "learning_rate": 1.2618028828977563e-05, "loss": 0.6297, "step": 1060 }, { "epoch": 0.43, "learning_rate": 1.2605243997734857e-05, "loss": 0.7047, "step": 1061 }, { "epoch": 0.43, "learning_rate": 1.259245459646567e-05, "loss": 0.6494, "step": 1062 }, { "epoch": 0.43, "learning_rate": 1.2579660647604715e-05, "loss": 0.6567, "step": 1063 }, { "epoch": 0.43, "learning_rate": 1.2566862173594689e-05, "loss": 0.6357, "step": 1064 }, { "epoch": 0.44, "learning_rate": 1.2554059196886212e-05, "loss": 0.6891, "step": 1065 }, { "epoch": 0.44, "learning_rate": 1.2541251739937814e-05, "loss": 0.6514, "step": 1066 }, { "epoch": 0.44, "learning_rate": 1.2528439825215884e-05, "loss": 0.7016, "step": 1067 }, { "epoch": 0.44, "learning_rate": 1.2515623475194623e-05, "loss": 0.5765, "step": 1068 }, { "epoch": 0.44, "learning_rate": 1.2502802712356017e-05, "loss": 0.6344, "step": 1069 }, { "epoch": 0.44, "learning_rate": 1.2489977559189796e-05, "loss": 0.6569, "step": 1070 }, { "epoch": 0.44, "learning_rate": 1.2477148038193392e-05, "loss": 0.7095, "step": 1071 }, { "epoch": 0.44, "learning_rate": 1.2464314171871888e-05, "loss": 0.6326, "step": 1072 }, { "epoch": 0.44, "learning_rate": 1.2451475982737996e-05, "loss": 0.6426, "step": 1073 }, { "epoch": 0.44, "learning_rate": 1.2438633493312016e-05, "loss": 0.666, "step": 1074 }, { "epoch": 0.44, "learning_rate": 1.2425786726121783e-05, "loss": 0.7223, "step": 1075 }, { "epoch": 0.44, "learning_rate": 1.241293570370264e-05, "loss": 0.7513, "step": 1076 }, { "epoch": 0.44, "learning_rate": 1.2400080448597396e-05, "loss": 0.7125, "step": 1077 }, { "epoch": 0.44, "learning_rate": 1.2387220983356283e-05, "loss": 0.6276, "step": 1078 }, { "epoch": 0.44, "learning_rate": 1.2374357330536919e-05, "loss": 0.5754, "step": 1079 }, { "epoch": 0.44, "learning_rate": 1.2361489512704264e-05, "loss": 0.6219, "step": 1080 }, { "epoch": 0.44, "learning_rate": 1.234861755243059e-05, "loss": 0.7364, "step": 1081 }, { "epoch": 0.44, "learning_rate": 1.2335741472295426e-05, "loss": 0.6935, "step": 1082 }, { "epoch": 0.44, "learning_rate": 1.2322861294885542e-05, "loss": 0.6673, "step": 1083 }, { "epoch": 0.44, "learning_rate": 1.230997704279488e-05, "loss": 0.667, "step": 1084 }, { "epoch": 0.44, "learning_rate": 1.2297088738624548e-05, "loss": 0.6083, "step": 1085 }, { "epoch": 0.44, "learning_rate": 1.2284196404982746e-05, "loss": 0.6952, "step": 1086 }, { "epoch": 0.44, "learning_rate": 1.2271300064484746e-05, "loss": 0.5991, "step": 1087 }, { "epoch": 0.44, "learning_rate": 1.2258399739752848e-05, "loss": 0.6693, "step": 1088 }, { "epoch": 0.45, "learning_rate": 1.2245495453416352e-05, "loss": 0.6277, "step": 1089 }, { "epoch": 0.45, "learning_rate": 1.223258722811149e-05, "loss": 0.6063, "step": 1090 }, { "epoch": 0.45, "learning_rate": 1.2219675086481418e-05, "loss": 0.6917, "step": 1091 }, { "epoch": 0.45, "learning_rate": 1.2206759051176151e-05, "loss": 0.6114, "step": 1092 }, { "epoch": 0.45, "learning_rate": 1.2193839144852546e-05, "loss": 0.6676, "step": 1093 }, { "epoch": 0.45, "learning_rate": 1.218091539017424e-05, "loss": 0.6606, "step": 1094 }, { "epoch": 0.45, "learning_rate": 1.2167987809811625e-05, "loss": 0.7116, "step": 1095 }, { "epoch": 0.45, "learning_rate": 1.2155056426441803e-05, "loss": 0.624, "step": 1096 }, { "epoch": 0.45, "learning_rate": 1.2142121262748545e-05, "loss": 0.6998, "step": 1097 }, { "epoch": 0.45, "learning_rate": 1.212918234142226e-05, "loss": 0.6688, "step": 1098 }, { "epoch": 0.45, "learning_rate": 1.2116239685159941e-05, "loss": 0.6496, "step": 1099 }, { "epoch": 0.45, "learning_rate": 1.2103293316665137e-05, "loss": 0.6768, "step": 1100 }, { "epoch": 0.45, "learning_rate": 1.2090343258647912e-05, "loss": 0.6663, "step": 1101 }, { "epoch": 0.45, "learning_rate": 1.2077389533824789e-05, "loss": 0.642, "step": 1102 }, { "epoch": 0.45, "learning_rate": 1.2064432164918738e-05, "loss": 0.6956, "step": 1103 }, { "epoch": 0.45, "learning_rate": 1.2051471174659116e-05, "loss": 0.627, "step": 1104 }, { "epoch": 0.45, "learning_rate": 1.2038506585781626e-05, "loss": 0.7477, "step": 1105 }, { "epoch": 0.45, "learning_rate": 1.2025538421028293e-05, "loss": 0.6547, "step": 1106 }, { "epoch": 0.45, "learning_rate": 1.201256670314741e-05, "loss": 0.6947, "step": 1107 }, { "epoch": 0.45, "learning_rate": 1.1999591454893511e-05, "loss": 0.7223, "step": 1108 }, { "epoch": 0.45, "learning_rate": 1.1986612699027305e-05, "loss": 0.5807, "step": 1109 }, { "epoch": 0.45, "learning_rate": 1.1973630458315667e-05, "loss": 0.6823, "step": 1110 }, { "epoch": 0.45, "learning_rate": 1.1960644755531587e-05, "loss": 0.6151, "step": 1111 }, { "epoch": 0.45, "learning_rate": 1.194765561345412e-05, "loss": 0.6339, "step": 1112 }, { "epoch": 0.46, "learning_rate": 1.1934663054868358e-05, "loss": 0.6318, "step": 1113 }, { "epoch": 0.46, "learning_rate": 1.1921667102565384e-05, "loss": 0.6354, "step": 1114 }, { "epoch": 0.46, "learning_rate": 1.1908667779342243e-05, "loss": 0.6517, "step": 1115 }, { "epoch": 0.46, "learning_rate": 1.1895665108001879e-05, "loss": 0.6742, "step": 1116 }, { "epoch": 0.46, "learning_rate": 1.1882659111353118e-05, "loss": 0.6612, "step": 1117 }, { "epoch": 0.46, "learning_rate": 1.1869649812210618e-05, "loss": 0.7326, "step": 1118 }, { "epoch": 0.46, "learning_rate": 1.185663723339483e-05, "loss": 0.6571, "step": 1119 }, { "epoch": 0.46, "learning_rate": 1.1843621397731954e-05, "loss": 0.6196, "step": 1120 }, { "epoch": 0.46, "learning_rate": 1.1830602328053911e-05, "loss": 0.7858, "step": 1121 }, { "epoch": 0.46, "learning_rate": 1.1817580047198287e-05, "loss": 0.7536, "step": 1122 }, { "epoch": 0.46, "learning_rate": 1.180455457800831e-05, "loss": 0.7371, "step": 1123 }, { "epoch": 0.46, "learning_rate": 1.179152594333279e-05, "loss": 0.6093, "step": 1124 }, { "epoch": 0.46, "learning_rate": 1.1778494166026096e-05, "loss": 0.6953, "step": 1125 }, { "epoch": 0.46, "learning_rate": 1.1765459268948111e-05, "loss": 0.5785, "step": 1126 }, { "epoch": 0.46, "learning_rate": 1.1752421274964188e-05, "loss": 0.6453, "step": 1127 }, { "epoch": 0.46, "learning_rate": 1.1739380206945108e-05, "loss": 0.5911, "step": 1128 }, { "epoch": 0.46, "learning_rate": 1.1726336087767054e-05, "loss": 0.6226, "step": 1129 }, { "epoch": 0.46, "learning_rate": 1.1713288940311562e-05, "loss": 0.6772, "step": 1130 }, { "epoch": 0.46, "learning_rate": 1.1700238787465463e-05, "loss": 0.6431, "step": 1131 }, { "epoch": 0.46, "learning_rate": 1.168718565212088e-05, "loss": 0.6819, "step": 1132 }, { "epoch": 0.46, "learning_rate": 1.1674129557175156e-05, "loss": 0.6575, "step": 1133 }, { "epoch": 0.46, "learning_rate": 1.1661070525530827e-05, "loss": 0.6557, "step": 1134 }, { "epoch": 0.46, "learning_rate": 1.1648008580095587e-05, "loss": 0.671, "step": 1135 }, { "epoch": 0.46, "learning_rate": 1.1634943743782235e-05, "loss": 0.6451, "step": 1136 }, { "epoch": 0.46, "learning_rate": 1.1621876039508638e-05, "loss": 0.6994, "step": 1137 }, { "epoch": 0.47, "learning_rate": 1.160880549019771e-05, "loss": 0.6015, "step": 1138 }, { "epoch": 0.47, "learning_rate": 1.1595732118777332e-05, "loss": 0.5878, "step": 1139 }, { "epoch": 0.47, "learning_rate": 1.1582655948180357e-05, "loss": 0.6532, "step": 1140 }, { "epoch": 0.47, "learning_rate": 1.1569577001344532e-05, "loss": 0.7016, "step": 1141 }, { "epoch": 0.47, "learning_rate": 1.1556495301212485e-05, "loss": 0.6226, "step": 1142 }, { "epoch": 0.47, "learning_rate": 1.154341087073167e-05, "loss": 0.7173, "step": 1143 }, { "epoch": 0.47, "learning_rate": 1.1530323732854326e-05, "loss": 0.6313, "step": 1144 }, { "epoch": 0.47, "learning_rate": 1.1517233910537453e-05, "loss": 0.6334, "step": 1145 }, { "epoch": 0.47, "learning_rate": 1.1504141426742744e-05, "loss": 0.6283, "step": 1146 }, { "epoch": 0.47, "learning_rate": 1.1491046304436575e-05, "loss": 0.7231, "step": 1147 }, { "epoch": 0.47, "learning_rate": 1.147794856658994e-05, "loss": 0.5591, "step": 1148 }, { "epoch": 0.47, "learning_rate": 1.146484823617843e-05, "loss": 0.597, "step": 1149 }, { "epoch": 0.47, "learning_rate": 1.1451745336182173e-05, "loss": 0.6484, "step": 1150 }, { "epoch": 0.47, "learning_rate": 1.1438639889585818e-05, "loss": 0.6414, "step": 1151 }, { "epoch": 0.47, "learning_rate": 1.1425531919378469e-05, "loss": 0.6337, "step": 1152 }, { "epoch": 0.47, "learning_rate": 1.1412421448553665e-05, "loss": 0.5903, "step": 1153 }, { "epoch": 0.47, "learning_rate": 1.1399308500109326e-05, "loss": 0.6858, "step": 1154 }, { "epoch": 0.47, "learning_rate": 1.138619309704772e-05, "loss": 0.6119, "step": 1155 }, { "epoch": 0.47, "learning_rate": 1.1373075262375421e-05, "loss": 0.657, "step": 1156 }, { "epoch": 0.47, "learning_rate": 1.1359955019103273e-05, "loss": 0.6326, "step": 1157 }, { "epoch": 0.47, "learning_rate": 1.1346832390246334e-05, "loss": 0.6183, "step": 1158 }, { "epoch": 0.47, "learning_rate": 1.1333707398823856e-05, "loss": 0.68, "step": 1159 }, { "epoch": 0.47, "learning_rate": 1.132058006785924e-05, "loss": 0.6826, "step": 1160 }, { "epoch": 0.47, "learning_rate": 1.1307450420379971e-05, "loss": 0.6321, "step": 1161 }, { "epoch": 0.48, "learning_rate": 1.1294318479417618e-05, "loss": 0.7396, "step": 1162 }, { "epoch": 0.48, "learning_rate": 1.1281184268007766e-05, "loss": 0.6435, "step": 1163 }, { "epoch": 0.48, "learning_rate": 1.1268047809189976e-05, "loss": 0.7927, "step": 1164 }, { "epoch": 0.48, "learning_rate": 1.1254909126007765e-05, "loss": 0.7204, "step": 1165 }, { "epoch": 0.48, "learning_rate": 1.1241768241508537e-05, "loss": 0.7077, "step": 1166 }, { "epoch": 0.48, "learning_rate": 1.1228625178743572e-05, "loss": 0.6306, "step": 1167 }, { "epoch": 0.48, "learning_rate": 1.1215479960767958e-05, "loss": 0.767, "step": 1168 }, { "epoch": 0.48, "learning_rate": 1.1202332610640574e-05, "loss": 0.5404, "step": 1169 }, { "epoch": 0.48, "learning_rate": 1.118918315142403e-05, "loss": 0.6272, "step": 1170 }, { "epoch": 0.48, "learning_rate": 1.1176031606184645e-05, "loss": 0.6591, "step": 1171 }, { "epoch": 0.48, "learning_rate": 1.1162877997992389e-05, "loss": 0.6481, "step": 1172 }, { "epoch": 0.48, "learning_rate": 1.1149722349920855e-05, "loss": 0.6648, "step": 1173 }, { "epoch": 0.48, "learning_rate": 1.1136564685047213e-05, "loss": 0.6006, "step": 1174 }, { "epoch": 0.48, "learning_rate": 1.1123405026452176e-05, "loss": 0.6927, "step": 1175 }, { "epoch": 0.48, "learning_rate": 1.1110243397219945e-05, "loss": 0.6562, "step": 1176 }, { "epoch": 0.48, "learning_rate": 1.1097079820438181e-05, "loss": 0.6103, "step": 1177 }, { "epoch": 0.48, "learning_rate": 1.1083914319197967e-05, "loss": 0.6468, "step": 1178 }, { "epoch": 0.48, "learning_rate": 1.1070746916593756e-05, "loss": 0.6356, "step": 1179 }, { "epoch": 0.48, "learning_rate": 1.1057577635723337e-05, "loss": 0.5703, "step": 1180 }, { "epoch": 0.48, "learning_rate": 1.1044406499687797e-05, "loss": 0.6586, "step": 1181 }, { "epoch": 0.48, "learning_rate": 1.1031233531591471e-05, "loss": 0.7869, "step": 1182 }, { "epoch": 0.48, "learning_rate": 1.1018058754541915e-05, "loss": 0.6808, "step": 1183 }, { "epoch": 0.48, "learning_rate": 1.1004882191649857e-05, "loss": 0.6611, "step": 1184 }, { "epoch": 0.48, "learning_rate": 1.099170386602915e-05, "loss": 0.7283, "step": 1185 }, { "epoch": 0.48, "learning_rate": 1.0978523800796747e-05, "loss": 0.6204, "step": 1186 }, { "epoch": 0.49, "learning_rate": 1.0965342019072654e-05, "loss": 0.664, "step": 1187 }, { "epoch": 0.49, "learning_rate": 1.0952158543979878e-05, "loss": 0.7295, "step": 1188 }, { "epoch": 0.49, "learning_rate": 1.0938973398644407e-05, "loss": 0.7179, "step": 1189 }, { "epoch": 0.49, "learning_rate": 1.0925786606195153e-05, "loss": 0.546, "step": 1190 }, { "epoch": 0.49, "learning_rate": 1.0912598189763919e-05, "loss": 0.6948, "step": 1191 }, { "epoch": 0.49, "learning_rate": 1.0899408172485357e-05, "loss": 0.6008, "step": 1192 }, { "epoch": 0.49, "learning_rate": 1.0886216577496926e-05, "loss": 0.6768, "step": 1193 }, { "epoch": 0.49, "learning_rate": 1.0873023427938855e-05, "loss": 0.6157, "step": 1194 }, { "epoch": 0.49, "learning_rate": 1.0859828746954098e-05, "loss": 0.6702, "step": 1195 }, { "epoch": 0.49, "learning_rate": 1.0846632557688295e-05, "loss": 0.6785, "step": 1196 }, { "epoch": 0.49, "learning_rate": 1.083343488328973e-05, "loss": 0.6163, "step": 1197 }, { "epoch": 0.49, "learning_rate": 1.08202357469093e-05, "loss": 0.6327, "step": 1198 }, { "epoch": 0.49, "learning_rate": 1.0807035171700455e-05, "loss": 0.6351, "step": 1199 }, { "epoch": 0.49, "learning_rate": 1.0793833180819183e-05, "loss": 0.5806, "step": 1200 }, { "epoch": 0.49, "learning_rate": 1.0780629797423938e-05, "loss": 0.714, "step": 1201 }, { "epoch": 0.49, "learning_rate": 1.0767425044675634e-05, "loss": 0.7203, "step": 1202 }, { "epoch": 0.49, "learning_rate": 1.0754218945737575e-05, "loss": 0.701, "step": 1203 }, { "epoch": 0.49, "learning_rate": 1.0741011523775433e-05, "loss": 0.6727, "step": 1204 }, { "epoch": 0.49, "learning_rate": 1.0727802801957198e-05, "loss": 0.5973, "step": 1205 }, { "epoch": 0.49, "learning_rate": 1.0714592803453138e-05, "loss": 0.7249, "step": 1206 }, { "epoch": 0.49, "learning_rate": 1.0701381551435767e-05, "loss": 0.5716, "step": 1207 }, { "epoch": 0.49, "learning_rate": 1.0688169069079793e-05, "loss": 0.6326, "step": 1208 }, { "epoch": 0.49, "learning_rate": 1.067495537956208e-05, "loss": 0.6047, "step": 1209 }, { "epoch": 0.49, "learning_rate": 1.0661740506061616e-05, "loss": 0.617, "step": 1210 }, { "epoch": 0.5, "learning_rate": 1.0648524471759463e-05, "loss": 0.6481, "step": 1211 }, { "epoch": 0.5, "learning_rate": 1.0635307299838715e-05, "loss": 0.5981, "step": 1212 }, { "epoch": 0.5, "learning_rate": 1.062208901348447e-05, "loss": 0.6404, "step": 1213 }, { "epoch": 0.5, "learning_rate": 1.0608869635883776e-05, "loss": 0.7058, "step": 1214 }, { "epoch": 0.5, "learning_rate": 1.0595649190225593e-05, "loss": 0.6684, "step": 1215 }, { "epoch": 0.5, "learning_rate": 1.0582427699700759e-05, "loss": 0.6638, "step": 1216 }, { "epoch": 0.5, "learning_rate": 1.0569205187501943e-05, "loss": 0.7179, "step": 1217 }, { "epoch": 0.5, "learning_rate": 1.0555981676823606e-05, "loss": 0.6787, "step": 1218 }, { "epoch": 0.5, "learning_rate": 1.0542757190861959e-05, "loss": 0.6338, "step": 1219 }, { "epoch": 0.5, "learning_rate": 1.0529531752814928e-05, "loss": 0.7203, "step": 1220 }, { "epoch": 0.5, "learning_rate": 1.0516305385882103e-05, "loss": 0.6431, "step": 1221 }, { "epoch": 0.5, "learning_rate": 1.0503078113264715e-05, "loss": 0.6501, "step": 1222 }, { "epoch": 0.5, "learning_rate": 1.0489849958165567e-05, "loss": 0.678, "step": 1223 }, { "epoch": 0.5, "learning_rate": 1.0476620943789021e-05, "loss": 0.6206, "step": 1224 }, { "epoch": 0.5, "learning_rate": 1.046339109334095e-05, "loss": 0.6784, "step": 1225 }, { "epoch": 0.5, "learning_rate": 1.0450160430028679e-05, "loss": 0.7266, "step": 1226 }, { "epoch": 0.5, "learning_rate": 1.0436928977060973e-05, "loss": 0.6878, "step": 1227 }, { "epoch": 0.5, "learning_rate": 1.0423696757647977e-05, "loss": 0.6823, "step": 1228 }, { "epoch": 0.5, "learning_rate": 1.0410463795001177e-05, "loss": 0.6077, "step": 1229 }, { "epoch": 0.5, "learning_rate": 1.039723011233337e-05, "loss": 0.5919, "step": 1230 }, { "epoch": 0.5, "learning_rate": 1.0383995732858609e-05, "loss": 0.6481, "step": 1231 }, { "epoch": 0.5, "learning_rate": 1.0370760679792173e-05, "loss": 0.653, "step": 1232 }, { "epoch": 0.5, "learning_rate": 1.0357524976350522e-05, "loss": 0.6937, "step": 1233 }, { "epoch": 0.5, "learning_rate": 1.0344288645751257e-05, "loss": 0.6202, "step": 1234 }, { "epoch": 0.5, "learning_rate": 1.0331051711213079e-05, "loss": 0.6429, "step": 1235 }, { "epoch": 0.51, "learning_rate": 1.031781419595575e-05, "loss": 0.5797, "step": 1236 }, { "epoch": 0.51, "learning_rate": 1.0304576123200045e-05, "loss": 0.6533, "step": 1237 }, { "epoch": 0.51, "learning_rate": 1.0291337516167725e-05, "loss": 0.6404, "step": 1238 }, { "epoch": 0.51, "learning_rate": 1.0278098398081482e-05, "loss": 0.6956, "step": 1239 }, { "epoch": 0.51, "learning_rate": 1.0264858792164908e-05, "loss": 0.6668, "step": 1240 }, { "epoch": 0.51, "learning_rate": 1.025161872164245e-05, "loss": 0.6413, "step": 1241 }, { "epoch": 0.51, "learning_rate": 1.0238378209739366e-05, "loss": 0.6431, "step": 1242 }, { "epoch": 0.51, "learning_rate": 1.0225137279681696e-05, "loss": 0.6379, "step": 1243 }, { "epoch": 0.51, "learning_rate": 1.0211895954696204e-05, "loss": 0.652, "step": 1244 }, { "epoch": 0.51, "learning_rate": 1.019865425801036e-05, "loss": 0.6102, "step": 1245 }, { "epoch": 0.51, "learning_rate": 1.0185412212852268e-05, "loss": 0.5827, "step": 1246 }, { "epoch": 0.51, "learning_rate": 1.017216984245066e-05, "loss": 0.6555, "step": 1247 }, { "epoch": 0.51, "learning_rate": 1.0158927170034831e-05, "loss": 0.6318, "step": 1248 }, { "epoch": 0.51, "learning_rate": 1.0145684218834604e-05, "loss": 0.7373, "step": 1249 }, { "epoch": 0.51, "learning_rate": 1.0132441012080296e-05, "loss": 0.6725, "step": 1250 }, { "epoch": 0.51, "learning_rate": 1.011919757300267e-05, "loss": 0.6071, "step": 1251 }, { "epoch": 0.51, "learning_rate": 1.0105953924832894e-05, "loss": 0.65, "step": 1252 }, { "epoch": 0.51, "learning_rate": 1.0092710090802506e-05, "loss": 0.6226, "step": 1253 }, { "epoch": 0.51, "learning_rate": 1.0079466094143373e-05, "loss": 0.5319, "step": 1254 }, { "epoch": 0.51, "learning_rate": 1.0066221958087636e-05, "loss": 0.6565, "step": 1255 }, { "epoch": 0.51, "learning_rate": 1.0052977705867697e-05, "loss": 0.6192, "step": 1256 }, { "epoch": 0.51, "learning_rate": 1.0039733360716145e-05, "loss": 0.6654, "step": 1257 }, { "epoch": 0.51, "learning_rate": 1.0026488945865744e-05, "loss": 0.6046, "step": 1258 }, { "epoch": 0.51, "learning_rate": 1.0013244484549376e-05, "loss": 0.6024, "step": 1259 }, { "epoch": 0.52, "learning_rate": 1e-05, "loss": 0.6731, "step": 1260 }, { "epoch": 0.52, "learning_rate": 9.986755515450625e-06, "loss": 0.7332, "step": 1261 }, { "epoch": 0.52, "learning_rate": 9.973511054134259e-06, "loss": 0.6355, "step": 1262 }, { "epoch": 0.52, "learning_rate": 9.960266639283857e-06, "loss": 0.6642, "step": 1263 }, { "epoch": 0.52, "learning_rate": 9.947022294132306e-06, "loss": 0.6271, "step": 1264 }, { "epoch": 0.52, "learning_rate": 9.933778041912365e-06, "loss": 0.6326, "step": 1265 }, { "epoch": 0.52, "learning_rate": 9.920533905856634e-06, "loss": 0.6505, "step": 1266 }, { "epoch": 0.52, "learning_rate": 9.907289909197496e-06, "loss": 0.7731, "step": 1267 }, { "epoch": 0.52, "learning_rate": 9.894046075167106e-06, "loss": 0.6125, "step": 1268 }, { "epoch": 0.52, "learning_rate": 9.880802426997334e-06, "loss": 0.7185, "step": 1269 }, { "epoch": 0.52, "learning_rate": 9.867558987919704e-06, "loss": 0.7106, "step": 1270 }, { "epoch": 0.52, "learning_rate": 9.854315781165398e-06, "loss": 0.7425, "step": 1271 }, { "epoch": 0.52, "learning_rate": 9.841072829965172e-06, "loss": 0.6497, "step": 1272 }, { "epoch": 0.52, "learning_rate": 9.827830157549344e-06, "loss": 0.5556, "step": 1273 }, { "epoch": 0.52, "learning_rate": 9.814587787147735e-06, "loss": 0.6899, "step": 1274 }, { "epoch": 0.52, "learning_rate": 9.801345741989646e-06, "loss": 0.5726, "step": 1275 }, { "epoch": 0.52, "learning_rate": 9.788104045303797e-06, "loss": 0.6331, "step": 1276 }, { "epoch": 0.52, "learning_rate": 9.774862720318305e-06, "loss": 0.7118, "step": 1277 }, { "epoch": 0.52, "learning_rate": 9.761621790260636e-06, "loss": 0.6878, "step": 1278 }, { "epoch": 0.52, "learning_rate": 9.748381278357554e-06, "loss": 0.6541, "step": 1279 }, { "epoch": 0.52, "learning_rate": 9.735141207835095e-06, "loss": 0.6494, "step": 1280 }, { "epoch": 0.52, "learning_rate": 9.72190160191852e-06, "loss": 0.7121, "step": 1281 }, { "epoch": 0.52, "learning_rate": 9.708662483832279e-06, "loss": 0.6267, "step": 1282 }, { "epoch": 0.52, "learning_rate": 9.695423876799957e-06, "loss": 0.5758, "step": 1283 }, { "epoch": 0.52, "learning_rate": 9.682185804044252e-06, "loss": 0.6203, "step": 1284 }, { "epoch": 0.53, "learning_rate": 9.668948288786923e-06, "loss": 0.6874, "step": 1285 }, { "epoch": 0.53, "learning_rate": 9.655711354248747e-06, "loss": 0.6808, "step": 1286 }, { "epoch": 0.53, "learning_rate": 9.642475023649483e-06, "loss": 0.6903, "step": 1287 }, { "epoch": 0.53, "learning_rate": 9.62923932020783e-06, "loss": 0.6611, "step": 1288 }, { "epoch": 0.53, "learning_rate": 9.616004267141396e-06, "loss": 0.6374, "step": 1289 }, { "epoch": 0.53, "learning_rate": 9.602769887666633e-06, "loss": 0.6496, "step": 1290 }, { "epoch": 0.53, "learning_rate": 9.589536204998823e-06, "loss": 0.7096, "step": 1291 }, { "epoch": 0.53, "learning_rate": 9.576303242352025e-06, "loss": 0.6436, "step": 1292 }, { "epoch": 0.53, "learning_rate": 9.563071022939028e-06, "loss": 0.6056, "step": 1293 }, { "epoch": 0.53, "learning_rate": 9.549839569971323e-06, "loss": 0.5985, "step": 1294 }, { "epoch": 0.53, "learning_rate": 9.536608906659052e-06, "loss": 0.6914, "step": 1295 }, { "epoch": 0.53, "learning_rate": 9.523379056210982e-06, "loss": 0.6994, "step": 1296 }, { "epoch": 0.53, "learning_rate": 9.510150041834436e-06, "loss": 0.6008, "step": 1297 }, { "epoch": 0.53, "learning_rate": 9.496921886735287e-06, "loss": 0.6901, "step": 1298 }, { "epoch": 0.53, "learning_rate": 9.483694614117898e-06, "loss": 0.6402, "step": 1299 }, { "epoch": 0.53, "learning_rate": 9.470468247185076e-06, "loss": 0.6977, "step": 1300 }, { "epoch": 0.53, "learning_rate": 9.457242809138045e-06, "loss": 0.6134, "step": 1301 }, { "epoch": 0.53, "learning_rate": 9.444018323176399e-06, "loss": 0.7395, "step": 1302 }, { "epoch": 0.53, "learning_rate": 9.430794812498062e-06, "loss": 0.6213, "step": 1303 }, { "epoch": 0.53, "learning_rate": 9.417572300299244e-06, "loss": 0.6868, "step": 1304 }, { "epoch": 0.53, "learning_rate": 9.404350809774412e-06, "loss": 0.6937, "step": 1305 }, { "epoch": 0.53, "learning_rate": 9.391130364116226e-06, "loss": 0.6271, "step": 1306 }, { "epoch": 0.53, "learning_rate": 9.377910986515529e-06, "loss": 0.7128, "step": 1307 }, { "epoch": 0.53, "learning_rate": 9.364692700161287e-06, "loss": 0.6465, "step": 1308 }, { "epoch": 0.54, "learning_rate": 9.35147552824054e-06, "loss": 0.635, "step": 1309 }, { "epoch": 0.54, "learning_rate": 9.33825949393839e-06, "loss": 0.699, "step": 1310 }, { "epoch": 0.54, "learning_rate": 9.325044620437924e-06, "loss": 0.7016, "step": 1311 }, { "epoch": 0.54, "learning_rate": 9.311830930920214e-06, "loss": 0.6355, "step": 1312 }, { "epoch": 0.54, "learning_rate": 9.298618448564236e-06, "loss": 0.6209, "step": 1313 }, { "epoch": 0.54, "learning_rate": 9.285407196546862e-06, "loss": 0.6426, "step": 1314 }, { "epoch": 0.54, "learning_rate": 9.272197198042804e-06, "loss": 0.6308, "step": 1315 }, { "epoch": 0.54, "learning_rate": 9.25898847622457e-06, "loss": 0.723, "step": 1316 }, { "epoch": 0.54, "learning_rate": 9.24578105426243e-06, "loss": 0.5931, "step": 1317 }, { "epoch": 0.54, "learning_rate": 9.232574955324369e-06, "loss": 0.7124, "step": 1318 }, { "epoch": 0.54, "learning_rate": 9.219370202576067e-06, "loss": 0.7423, "step": 1319 }, { "epoch": 0.54, "learning_rate": 9.206166819180822e-06, "loss": 0.6254, "step": 1320 }, { "epoch": 0.54, "learning_rate": 9.192964828299545e-06, "loss": 0.5903, "step": 1321 }, { "epoch": 0.54, "learning_rate": 9.179764253090703e-06, "loss": 0.6314, "step": 1322 }, { "epoch": 0.54, "learning_rate": 9.166565116710273e-06, "loss": 0.6325, "step": 1323 }, { "epoch": 0.54, "learning_rate": 9.153367442311712e-06, "loss": 0.6226, "step": 1324 }, { "epoch": 0.54, "learning_rate": 9.140171253045906e-06, "loss": 0.6444, "step": 1325 }, { "epoch": 0.54, "learning_rate": 9.12697657206115e-06, "loss": 0.5342, "step": 1326 }, { "epoch": 0.54, "learning_rate": 9.113783422503076e-06, "loss": 0.6056, "step": 1327 }, { "epoch": 0.54, "learning_rate": 9.100591827514643e-06, "loss": 0.6532, "step": 1328 }, { "epoch": 0.54, "learning_rate": 9.087401810236084e-06, "loss": 0.6605, "step": 1329 }, { "epoch": 0.54, "learning_rate": 9.07421339380485e-06, "loss": 0.5691, "step": 1330 }, { "epoch": 0.54, "learning_rate": 9.061026601355596e-06, "loss": 0.6582, "step": 1331 }, { "epoch": 0.54, "learning_rate": 9.047841456020125e-06, "loss": 0.6211, "step": 1332 }, { "epoch": 0.54, "learning_rate": 9.034657980927351e-06, "loss": 0.5902, "step": 1333 }, { "epoch": 0.55, "learning_rate": 9.021476199203255e-06, "loss": 0.757, "step": 1334 }, { "epoch": 0.55, "learning_rate": 9.008296133970855e-06, "loss": 0.7054, "step": 1335 }, { "epoch": 0.55, "learning_rate": 8.995117808350146e-06, "loss": 0.6219, "step": 1336 }, { "epoch": 0.55, "learning_rate": 8.981941245458086e-06, "loss": 0.6548, "step": 1337 }, { "epoch": 0.55, "learning_rate": 8.968766468408532e-06, "loss": 0.6361, "step": 1338 }, { "epoch": 0.55, "learning_rate": 8.955593500312208e-06, "loss": 0.6198, "step": 1339 }, { "epoch": 0.55, "learning_rate": 8.942422364276668e-06, "loss": 0.682, "step": 1340 }, { "epoch": 0.55, "learning_rate": 8.929253083406248e-06, "loss": 0.6167, "step": 1341 }, { "epoch": 0.55, "learning_rate": 8.916085680802038e-06, "loss": 0.6933, "step": 1342 }, { "epoch": 0.55, "learning_rate": 8.902920179561822e-06, "loss": 0.6746, "step": 1343 }, { "epoch": 0.55, "learning_rate": 8.889756602780059e-06, "loss": 0.6791, "step": 1344 }, { "epoch": 0.55, "learning_rate": 8.876594973547825e-06, "loss": 0.6868, "step": 1345 }, { "epoch": 0.55, "learning_rate": 8.863435314952787e-06, "loss": 0.592, "step": 1346 }, { "epoch": 0.55, "learning_rate": 8.85027765007915e-06, "loss": 0.65, "step": 1347 }, { "epoch": 0.55, "learning_rate": 8.837122002007614e-06, "loss": 0.6359, "step": 1348 }, { "epoch": 0.55, "learning_rate": 8.82396839381536e-06, "loss": 0.6293, "step": 1349 }, { "epoch": 0.55, "learning_rate": 8.810816848575971e-06, "loss": 0.6673, "step": 1350 }, { "epoch": 0.55, "learning_rate": 8.797667389359426e-06, "loss": 0.6763, "step": 1351 }, { "epoch": 0.55, "learning_rate": 8.784520039232044e-06, "loss": 0.5985, "step": 1352 }, { "epoch": 0.55, "learning_rate": 8.771374821256431e-06, "loss": 0.629, "step": 1353 }, { "epoch": 0.55, "learning_rate": 8.758231758491467e-06, "loss": 0.6728, "step": 1354 }, { "epoch": 0.55, "learning_rate": 8.745090873992239e-06, "loss": 0.7148, "step": 1355 }, { "epoch": 0.55, "learning_rate": 8.731952190810029e-06, "loss": 0.6358, "step": 1356 }, { "epoch": 0.55, "learning_rate": 8.718815731992239e-06, "loss": 0.7123, "step": 1357 }, { "epoch": 0.56, "learning_rate": 8.705681520582382e-06, "loss": 0.7215, "step": 1358 }, { "epoch": 0.56, "learning_rate": 8.69254957962003e-06, "loss": 0.5794, "step": 1359 }, { "epoch": 0.56, "learning_rate": 8.679419932140765e-06, "loss": 0.6664, "step": 1360 }, { "epoch": 0.56, "learning_rate": 8.666292601176146e-06, "loss": 0.6564, "step": 1361 }, { "epoch": 0.56, "learning_rate": 8.653167609753667e-06, "loss": 0.7458, "step": 1362 }, { "epoch": 0.56, "learning_rate": 8.640044980896734e-06, "loss": 0.6763, "step": 1363 }, { "epoch": 0.56, "learning_rate": 8.62692473762458e-06, "loss": 0.6906, "step": 1364 }, { "epoch": 0.56, "learning_rate": 8.613806902952286e-06, "loss": 0.6191, "step": 1365 }, { "epoch": 0.56, "learning_rate": 8.600691499890677e-06, "loss": 0.6593, "step": 1366 }, { "epoch": 0.56, "learning_rate": 8.587578551446338e-06, "loss": 0.6647, "step": 1367 }, { "epoch": 0.56, "learning_rate": 8.574468080621533e-06, "loss": 0.6891, "step": 1368 }, { "epoch": 0.56, "learning_rate": 8.561360110414185e-06, "loss": 0.7016, "step": 1369 }, { "epoch": 0.56, "learning_rate": 8.54825466381783e-06, "loss": 0.623, "step": 1370 }, { "epoch": 0.56, "learning_rate": 8.535151763821574e-06, "loss": 0.5997, "step": 1371 }, { "epoch": 0.56, "learning_rate": 8.522051433410064e-06, "loss": 0.6429, "step": 1372 }, { "epoch": 0.56, "learning_rate": 8.508953695563428e-06, "loss": 0.6178, "step": 1373 }, { "epoch": 0.56, "learning_rate": 8.495858573257258e-06, "loss": 0.5654, "step": 1374 }, { "epoch": 0.56, "learning_rate": 8.48276608946255e-06, "loss": 0.6463, "step": 1375 }, { "epoch": 0.56, "learning_rate": 8.469676267145674e-06, "loss": 0.6155, "step": 1376 }, { "epoch": 0.56, "learning_rate": 8.456589129268335e-06, "loss": 0.6585, "step": 1377 }, { "epoch": 0.56, "learning_rate": 8.443504698787517e-06, "loss": 0.6637, "step": 1378 }, { "epoch": 0.56, "learning_rate": 8.430422998655473e-06, "loss": 0.6316, "step": 1379 }, { "epoch": 0.56, "learning_rate": 8.417344051819646e-06, "loss": 0.5966, "step": 1380 }, { "epoch": 0.56, "learning_rate": 8.40426788122267e-06, "loss": 0.6883, "step": 1381 }, { "epoch": 0.57, "learning_rate": 8.391194509802294e-06, "loss": 0.5809, "step": 1382 }, { "epoch": 0.57, "learning_rate": 8.378123960491362e-06, "loss": 0.6205, "step": 1383 }, { "epoch": 0.57, "learning_rate": 8.365056256217772e-06, "loss": 0.6574, "step": 1384 }, { "epoch": 0.57, "learning_rate": 8.351991419904416e-06, "loss": 0.6135, "step": 1385 }, { "epoch": 0.57, "learning_rate": 8.338929474469177e-06, "loss": 0.6732, "step": 1386 }, { "epoch": 0.57, "learning_rate": 8.325870442824848e-06, "loss": 0.571, "step": 1387 }, { "epoch": 0.57, "learning_rate": 8.312814347879121e-06, "loss": 0.6187, "step": 1388 }, { "epoch": 0.57, "learning_rate": 8.299761212534539e-06, "loss": 0.7331, "step": 1389 }, { "epoch": 0.57, "learning_rate": 8.286711059688441e-06, "loss": 0.5907, "step": 1390 }, { "epoch": 0.57, "learning_rate": 8.273663912232948e-06, "loss": 0.6352, "step": 1391 }, { "epoch": 0.57, "learning_rate": 8.260619793054894e-06, "loss": 0.6186, "step": 1392 }, { "epoch": 0.57, "learning_rate": 8.247578725035819e-06, "loss": 0.6895, "step": 1393 }, { "epoch": 0.57, "learning_rate": 8.234540731051892e-06, "loss": 0.5519, "step": 1394 }, { "epoch": 0.57, "learning_rate": 8.221505833973908e-06, "loss": 0.6142, "step": 1395 }, { "epoch": 0.57, "learning_rate": 8.208474056667212e-06, "loss": 0.591, "step": 1396 }, { "epoch": 0.57, "learning_rate": 8.195445421991692e-06, "loss": 0.634, "step": 1397 }, { "epoch": 0.57, "learning_rate": 8.182419952801716e-06, "loss": 0.7016, "step": 1398 }, { "epoch": 0.57, "learning_rate": 8.169397671946092e-06, "loss": 0.6793, "step": 1399 }, { "epoch": 0.57, "learning_rate": 8.15637860226805e-06, "loss": 0.6162, "step": 1400 }, { "epoch": 0.57, "learning_rate": 8.143362766605173e-06, "loss": 0.5626, "step": 1401 }, { "epoch": 0.57, "learning_rate": 8.130350187789387e-06, "loss": 0.6736, "step": 1402 }, { "epoch": 0.57, "learning_rate": 8.117340888646885e-06, "loss": 0.6167, "step": 1403 }, { "epoch": 0.57, "learning_rate": 8.104334891998124e-06, "loss": 0.6183, "step": 1404 }, { "epoch": 0.57, "learning_rate": 8.091332220657759e-06, "loss": 0.6364, "step": 1405 }, { "epoch": 0.57, "learning_rate": 8.078332897434617e-06, "loss": 0.7039, "step": 1406 }, { "epoch": 0.58, "learning_rate": 8.065336945131647e-06, "loss": 0.6088, "step": 1407 }, { "epoch": 0.58, "learning_rate": 8.052344386545882e-06, "loss": 0.5764, "step": 1408 }, { "epoch": 0.58, "learning_rate": 8.039355244468418e-06, "loss": 0.6885, "step": 1409 }, { "epoch": 0.58, "learning_rate": 8.026369541684334e-06, "loss": 0.6421, "step": 1410 }, { "epoch": 0.58, "learning_rate": 8.013387300972698e-06, "loss": 0.7, "step": 1411 }, { "epoch": 0.58, "learning_rate": 8.000408545106492e-06, "loss": 0.6638, "step": 1412 }, { "epoch": 0.58, "learning_rate": 7.987433296852589e-06, "loss": 0.6438, "step": 1413 }, { "epoch": 0.58, "learning_rate": 7.97446157897171e-06, "loss": 0.6526, "step": 1414 }, { "epoch": 0.58, "learning_rate": 7.961493414218377e-06, "loss": 0.6316, "step": 1415 }, { "epoch": 0.58, "learning_rate": 7.948528825340891e-06, "loss": 0.6365, "step": 1416 }, { "epoch": 0.58, "learning_rate": 7.935567835081265e-06, "loss": 0.5808, "step": 1417 }, { "epoch": 0.58, "learning_rate": 7.922610466175213e-06, "loss": 0.6729, "step": 1418 }, { "epoch": 0.58, "learning_rate": 7.909656741352092e-06, "loss": 0.6027, "step": 1419 }, { "epoch": 0.58, "learning_rate": 7.896706683334863e-06, "loss": 0.5762, "step": 1420 }, { "epoch": 0.58, "learning_rate": 7.883760314840064e-06, "loss": 0.6939, "step": 1421 }, { "epoch": 0.58, "learning_rate": 7.870817658577743e-06, "loss": 0.6654, "step": 1422 }, { "epoch": 0.58, "learning_rate": 7.85787873725146e-06, "loss": 0.6737, "step": 1423 }, { "epoch": 0.58, "learning_rate": 7.844943573558202e-06, "loss": 0.5907, "step": 1424 }, { "epoch": 0.58, "learning_rate": 7.83201219018838e-06, "loss": 0.6303, "step": 1425 }, { "epoch": 0.58, "learning_rate": 7.819084609825762e-06, "loss": 0.6821, "step": 1426 }, { "epoch": 0.58, "learning_rate": 7.806160855147456e-06, "loss": 0.6315, "step": 1427 }, { "epoch": 0.58, "learning_rate": 7.793240948823852e-06, "loss": 0.6252, "step": 1428 }, { "epoch": 0.58, "learning_rate": 7.780324913518586e-06, "loss": 0.6126, "step": 1429 }, { "epoch": 0.58, "learning_rate": 7.767412771888515e-06, "loss": 0.5741, "step": 1430 }, { "epoch": 0.59, "learning_rate": 7.754504546583652e-06, "loss": 0.6822, "step": 1431 }, { "epoch": 0.59, "learning_rate": 7.741600260247155e-06, "loss": 0.6312, "step": 1432 }, { "epoch": 0.59, "learning_rate": 7.728699935515257e-06, "loss": 0.6389, "step": 1433 }, { "epoch": 0.59, "learning_rate": 7.715803595017257e-06, "loss": 0.6923, "step": 1434 }, { "epoch": 0.59, "learning_rate": 7.702911261375454e-06, "loss": 0.622, "step": 1435 }, { "epoch": 0.59, "learning_rate": 7.69002295720512e-06, "loss": 0.6477, "step": 1436 }, { "epoch": 0.59, "learning_rate": 7.677138705114463e-06, "loss": 0.6774, "step": 1437 }, { "epoch": 0.59, "learning_rate": 7.664258527704576e-06, "loss": 0.655, "step": 1438 }, { "epoch": 0.59, "learning_rate": 7.651382447569418e-06, "loss": 0.6349, "step": 1439 }, { "epoch": 0.59, "learning_rate": 7.638510487295738e-06, "loss": 0.5454, "step": 1440 }, { "epoch": 0.59, "learning_rate": 7.625642669463084e-06, "loss": 0.6604, "step": 1441 }, { "epoch": 0.59, "learning_rate": 7.61277901664372e-06, "loss": 0.5886, "step": 1442 }, { "epoch": 0.59, "learning_rate": 7.599919551402606e-06, "loss": 0.6668, "step": 1443 }, { "epoch": 0.59, "learning_rate": 7.587064296297364e-06, "loss": 0.6225, "step": 1444 }, { "epoch": 0.59, "learning_rate": 7.574213273878221e-06, "loss": 0.6207, "step": 1445 }, { "epoch": 0.59, "learning_rate": 7.56136650668799e-06, "loss": 0.6111, "step": 1446 }, { "epoch": 0.59, "learning_rate": 7.548524017262007e-06, "loss": 0.7042, "step": 1447 }, { "epoch": 0.59, "learning_rate": 7.535685828128117e-06, "loss": 0.685, "step": 1448 }, { "epoch": 0.59, "learning_rate": 7.52285196180661e-06, "loss": 0.697, "step": 1449 }, { "epoch": 0.59, "learning_rate": 7.510022440810203e-06, "loss": 0.6194, "step": 1450 }, { "epoch": 0.59, "learning_rate": 7.497197287643986e-06, "loss": 0.5635, "step": 1451 }, { "epoch": 0.59, "learning_rate": 7.48437652480538e-06, "loss": 0.6443, "step": 1452 }, { "epoch": 0.59, "learning_rate": 7.471560174784122e-06, "loss": 0.6482, "step": 1453 }, { "epoch": 0.59, "learning_rate": 7.458748260062187e-06, "loss": 0.6442, "step": 1454 }, { "epoch": 0.59, "learning_rate": 7.445940803113792e-06, "loss": 0.6554, "step": 1455 }, { "epoch": 0.6, "learning_rate": 7.433137826405314e-06, "loss": 0.6285, "step": 1456 }, { "epoch": 0.6, "learning_rate": 7.420339352395285e-06, "loss": 0.5656, "step": 1457 }, { "epoch": 0.6, "learning_rate": 7.407545403534334e-06, "loss": 0.7026, "step": 1458 }, { "epoch": 0.6, "learning_rate": 7.394756002265147e-06, "loss": 0.6109, "step": 1459 }, { "epoch": 0.6, "learning_rate": 7.381971171022442e-06, "loss": 0.631, "step": 1460 }, { "epoch": 0.6, "learning_rate": 7.369190932232911e-06, "loss": 0.6284, "step": 1461 }, { "epoch": 0.6, "learning_rate": 7.356415308315201e-06, "loss": 0.6287, "step": 1462 }, { "epoch": 0.6, "learning_rate": 7.343644321679851e-06, "loss": 0.6145, "step": 1463 }, { "epoch": 0.6, "learning_rate": 7.3308779947292776e-06, "loss": 0.5997, "step": 1464 }, { "epoch": 0.6, "learning_rate": 7.318116349857719e-06, "loss": 0.7314, "step": 1465 }, { "epoch": 0.6, "learning_rate": 7.305359409451192e-06, "loss": 0.663, "step": 1466 }, { "epoch": 0.6, "learning_rate": 7.2926071958874765e-06, "loss": 0.6189, "step": 1467 }, { "epoch": 0.6, "learning_rate": 7.279859731536045e-06, "loss": 0.7363, "step": 1468 }, { "epoch": 0.6, "learning_rate": 7.2671170387580534e-06, "loss": 0.6659, "step": 1469 }, { "epoch": 0.6, "learning_rate": 7.2543791399062755e-06, "loss": 0.552, "step": 1470 }, { "epoch": 0.6, "learning_rate": 7.241646057325084e-06, "loss": 0.6937, "step": 1471 }, { "epoch": 0.6, "learning_rate": 7.228917813350404e-06, "loss": 0.6792, "step": 1472 }, { "epoch": 0.6, "learning_rate": 7.216194430309657e-06, "loss": 0.6138, "step": 1473 }, { "epoch": 0.6, "learning_rate": 7.203475930521764e-06, "loss": 0.662, "step": 1474 }, { "epoch": 0.6, "learning_rate": 7.190762336297052e-06, "loss": 0.6375, "step": 1475 }, { "epoch": 0.6, "learning_rate": 7.1780536699372685e-06, "loss": 0.6624, "step": 1476 }, { "epoch": 0.6, "learning_rate": 7.165349953735494e-06, "loss": 0.6535, "step": 1477 }, { "epoch": 0.6, "learning_rate": 7.1526512099761424e-06, "loss": 0.6433, "step": 1478 }, { "epoch": 0.6, "learning_rate": 7.139957460934902e-06, "loss": 0.6151, "step": 1479 }, { "epoch": 0.61, "learning_rate": 7.127268728878687e-06, "loss": 0.6065, "step": 1480 }, { "epoch": 0.61, "learning_rate": 7.11458503606563e-06, "loss": 0.6915, "step": 1481 }, { "epoch": 0.61, "learning_rate": 7.101906404745006e-06, "loss": 0.6012, "step": 1482 }, { "epoch": 0.61, "learning_rate": 7.089232857157228e-06, "loss": 0.6355, "step": 1483 }, { "epoch": 0.61, "learning_rate": 7.076564415533774e-06, "loss": 0.5928, "step": 1484 }, { "epoch": 0.61, "learning_rate": 7.063901102097184e-06, "loss": 0.6511, "step": 1485 }, { "epoch": 0.61, "learning_rate": 7.0512429390609825e-06, "loss": 0.6225, "step": 1486 }, { "epoch": 0.61, "learning_rate": 7.038589948629677e-06, "loss": 0.6154, "step": 1487 }, { "epoch": 0.61, "learning_rate": 7.0259421529986946e-06, "loss": 0.5748, "step": 1488 }, { "epoch": 0.61, "learning_rate": 7.013299574354342e-06, "loss": 0.6038, "step": 1489 }, { "epoch": 0.61, "learning_rate": 7.00066223487379e-06, "loss": 0.6378, "step": 1490 }, { "epoch": 0.61, "learning_rate": 6.9880301567250005e-06, "loss": 0.6833, "step": 1491 }, { "epoch": 0.61, "learning_rate": 6.975403362066727e-06, "loss": 0.6165, "step": 1492 }, { "epoch": 0.61, "learning_rate": 6.962781873048435e-06, "loss": 0.6691, "step": 1493 }, { "epoch": 0.61, "learning_rate": 6.9501657118102994e-06, "loss": 0.5977, "step": 1494 }, { "epoch": 0.61, "learning_rate": 6.93755490048314e-06, "loss": 0.6229, "step": 1495 }, { "epoch": 0.61, "learning_rate": 6.92494946118839e-06, "loss": 0.6642, "step": 1496 }, { "epoch": 0.61, "learning_rate": 6.91234941603807e-06, "loss": 0.6479, "step": 1497 }, { "epoch": 0.61, "learning_rate": 6.899754787134725e-06, "loss": 0.6376, "step": 1498 }, { "epoch": 0.61, "learning_rate": 6.887165596571411e-06, "loss": 0.6789, "step": 1499 }, { "epoch": 0.61, "learning_rate": 6.874581866431633e-06, "loss": 0.7342, "step": 1500 }, { "epoch": 0.61, "learning_rate": 6.86200361878933e-06, "loss": 0.6027, "step": 1501 }, { "epoch": 0.61, "learning_rate": 6.849430875708818e-06, "loss": 0.6349, "step": 1502 }, { "epoch": 0.61, "learning_rate": 6.836863659244746e-06, "loss": 0.6059, "step": 1503 }, { "epoch": 0.61, "learning_rate": 6.82430199144209e-06, "loss": 0.6761, "step": 1504 }, { "epoch": 0.62, "learning_rate": 6.811745894336074e-06, "loss": 0.646, "step": 1505 }, { "epoch": 0.62, "learning_rate": 6.799195389952163e-06, "loss": 0.6818, "step": 1506 }, { "epoch": 0.62, "learning_rate": 6.786650500306e-06, "loss": 0.6167, "step": 1507 }, { "epoch": 0.62, "learning_rate": 6.77411124740339e-06, "loss": 0.7354, "step": 1508 }, { "epoch": 0.62, "learning_rate": 6.7615776532402456e-06, "loss": 0.5956, "step": 1509 }, { "epoch": 0.62, "learning_rate": 6.7490497398025444e-06, "loss": 0.6544, "step": 1510 }, { "epoch": 0.62, "learning_rate": 6.736527529066319e-06, "loss": 0.6957, "step": 1511 }, { "epoch": 0.62, "learning_rate": 6.724011042997576e-06, "loss": 0.6441, "step": 1512 }, { "epoch": 0.62, "learning_rate": 6.7115003035522985e-06, "loss": 0.6593, "step": 1513 }, { "epoch": 0.62, "learning_rate": 6.698995332676375e-06, "loss": 0.642, "step": 1514 }, { "epoch": 0.62, "learning_rate": 6.686496152305586e-06, "loss": 0.6411, "step": 1515 }, { "epoch": 0.62, "learning_rate": 6.674002784365547e-06, "loss": 0.6947, "step": 1516 }, { "epoch": 0.62, "learning_rate": 6.66151525077168e-06, "loss": 0.6317, "step": 1517 }, { "epoch": 0.62, "learning_rate": 6.649033573429178e-06, "loss": 0.5698, "step": 1518 }, { "epoch": 0.62, "learning_rate": 6.6365577742329455e-06, "loss": 0.622, "step": 1519 }, { "epoch": 0.62, "learning_rate": 6.6240878750676e-06, "loss": 0.6135, "step": 1520 }, { "epoch": 0.62, "learning_rate": 6.611623897807382e-06, "loss": 0.7081, "step": 1521 }, { "epoch": 0.62, "learning_rate": 6.5991658643161696e-06, "loss": 0.6611, "step": 1522 }, { "epoch": 0.62, "learning_rate": 6.586713796447392e-06, "loss": 0.5996, "step": 1523 }, { "epoch": 0.62, "learning_rate": 6.574267716044033e-06, "loss": 0.6354, "step": 1524 }, { "epoch": 0.62, "learning_rate": 6.561827644938563e-06, "loss": 0.5704, "step": 1525 }, { "epoch": 0.62, "learning_rate": 6.549393604952906e-06, "loss": 0.6245, "step": 1526 }, { "epoch": 0.62, "learning_rate": 6.536965617898423e-06, "loss": 0.69, "step": 1527 }, { "epoch": 0.62, "learning_rate": 6.524543705575839e-06, "loss": 0.649, "step": 1528 }, { "epoch": 0.63, "learning_rate": 6.512127889775239e-06, "loss": 0.6557, "step": 1529 }, { "epoch": 0.63, "learning_rate": 6.499718192275999e-06, "loss": 0.6709, "step": 1530 }, { "epoch": 0.63, "learning_rate": 6.487314634846774e-06, "loss": 0.6524, "step": 1531 }, { "epoch": 0.63, "learning_rate": 6.474917239245445e-06, "loss": 0.5817, "step": 1532 }, { "epoch": 0.63, "learning_rate": 6.4625260272190775e-06, "loss": 0.7199, "step": 1533 }, { "epoch": 0.63, "learning_rate": 6.450141020503902e-06, "loss": 0.6143, "step": 1534 }, { "epoch": 0.63, "learning_rate": 6.43776224082525e-06, "loss": 0.5836, "step": 1535 }, { "epoch": 0.63, "learning_rate": 6.425389709897543e-06, "loss": 0.5933, "step": 1536 }, { "epoch": 0.63, "learning_rate": 6.4130234494242315e-06, "loss": 0.7129, "step": 1537 }, { "epoch": 0.63, "learning_rate": 6.400663481097774e-06, "loss": 0.6216, "step": 1538 }, { "epoch": 0.63, "learning_rate": 6.388309826599588e-06, "loss": 0.7252, "step": 1539 }, { "epoch": 0.63, "learning_rate": 6.375962507600009e-06, "loss": 0.6145, "step": 1540 }, { "epoch": 0.63, "learning_rate": 6.363621545758276e-06, "loss": 0.6862, "step": 1541 }, { "epoch": 0.63, "learning_rate": 6.3512869627224535e-06, "loss": 0.6897, "step": 1542 }, { "epoch": 0.63, "learning_rate": 6.338958780129441e-06, "loss": 0.6848, "step": 1543 }, { "epoch": 0.63, "learning_rate": 6.326637019604888e-06, "loss": 0.659, "step": 1544 }, { "epoch": 0.63, "learning_rate": 6.314321702763198e-06, "loss": 0.5883, "step": 1545 }, { "epoch": 0.63, "learning_rate": 6.302012851207455e-06, "loss": 0.6645, "step": 1546 }, { "epoch": 0.63, "learning_rate": 6.289710486529412e-06, "loss": 0.6635, "step": 1547 }, { "epoch": 0.63, "learning_rate": 6.277414630309444e-06, "loss": 0.5986, "step": 1548 }, { "epoch": 0.63, "learning_rate": 6.265125304116498e-06, "loss": 0.7029, "step": 1549 }, { "epoch": 0.63, "learning_rate": 6.252842529508081e-06, "loss": 0.6566, "step": 1550 }, { "epoch": 0.63, "learning_rate": 6.240566328030193e-06, "loss": 0.6431, "step": 1551 }, { "epoch": 0.63, "learning_rate": 6.228296721217317e-06, "loss": 0.6174, "step": 1552 }, { "epoch": 0.63, "learning_rate": 6.216033730592357e-06, "loss": 0.6436, "step": 1553 }, { "epoch": 0.64, "learning_rate": 6.2037773776666134e-06, "loss": 0.6042, "step": 1554 }, { "epoch": 0.64, "learning_rate": 6.191527683939753e-06, "loss": 0.648, "step": 1555 }, { "epoch": 0.64, "learning_rate": 6.179284670899745e-06, "loss": 0.6231, "step": 1556 }, { "epoch": 0.64, "learning_rate": 6.167048360022856e-06, "loss": 0.6698, "step": 1557 }, { "epoch": 0.64, "learning_rate": 6.154818772773579e-06, "loss": 0.6573, "step": 1558 }, { "epoch": 0.64, "learning_rate": 6.142595930604631e-06, "loss": 0.7049, "step": 1559 }, { "epoch": 0.64, "learning_rate": 6.130379854956879e-06, "loss": 0.6447, "step": 1560 }, { "epoch": 0.64, "learning_rate": 6.118170567259336e-06, "loss": 0.7026, "step": 1561 }, { "epoch": 0.64, "learning_rate": 6.105968088929098e-06, "loss": 0.6007, "step": 1562 }, { "epoch": 0.64, "learning_rate": 6.093772441371315e-06, "loss": 0.6078, "step": 1563 }, { "epoch": 0.64, "learning_rate": 6.081583645979168e-06, "loss": 0.6222, "step": 1564 }, { "epoch": 0.64, "learning_rate": 6.069401724133796e-06, "loss": 0.6396, "step": 1565 }, { "epoch": 0.64, "learning_rate": 6.057226697204308e-06, "loss": 0.6941, "step": 1566 }, { "epoch": 0.64, "learning_rate": 6.045058586547692e-06, "loss": 0.6709, "step": 1567 }, { "epoch": 0.64, "learning_rate": 6.032897413508822e-06, "loss": 0.6249, "step": 1568 }, { "epoch": 0.64, "learning_rate": 6.0207431994203955e-06, "loss": 0.6003, "step": 1569 }, { "epoch": 0.64, "learning_rate": 6.0085959656028994e-06, "loss": 0.661, "step": 1570 }, { "epoch": 0.64, "learning_rate": 5.9964557333645845e-06, "loss": 0.6845, "step": 1571 }, { "epoch": 0.64, "learning_rate": 5.984322524001409e-06, "loss": 0.6322, "step": 1572 }, { "epoch": 0.64, "learning_rate": 5.972196358797024e-06, "loss": 0.6658, "step": 1573 }, { "epoch": 0.64, "learning_rate": 5.960077259022713e-06, "loss": 0.632, "step": 1574 }, { "epoch": 0.64, "learning_rate": 5.947965245937375e-06, "loss": 0.6772, "step": 1575 }, { "epoch": 0.64, "learning_rate": 5.9358603407874695e-06, "loss": 0.6451, "step": 1576 }, { "epoch": 0.64, "learning_rate": 5.9237625648069895e-06, "loss": 0.6318, "step": 1577 }, { "epoch": 0.65, "learning_rate": 5.9116719392174304e-06, "loss": 0.6824, "step": 1578 }, { "epoch": 0.65, "learning_rate": 5.89958848522773e-06, "loss": 0.6331, "step": 1579 }, { "epoch": 0.65, "learning_rate": 5.887512224034263e-06, "loss": 0.6536, "step": 1580 }, { "epoch": 0.65, "learning_rate": 5.8754431768207694e-06, "loss": 0.6422, "step": 1581 }, { "epoch": 0.65, "learning_rate": 5.8633813647583505e-06, "loss": 0.6291, "step": 1582 }, { "epoch": 0.65, "learning_rate": 5.851326809005402e-06, "loss": 0.6412, "step": 1583 }, { "epoch": 0.65, "learning_rate": 5.8392795307076e-06, "loss": 0.6001, "step": 1584 }, { "epoch": 0.65, "learning_rate": 5.827239550997856e-06, "loss": 0.6229, "step": 1585 }, { "epoch": 0.65, "learning_rate": 5.815206890996267e-06, "loss": 0.6071, "step": 1586 }, { "epoch": 0.65, "learning_rate": 5.803181571810106e-06, "loss": 0.691, "step": 1587 }, { "epoch": 0.65, "learning_rate": 5.791163614533753e-06, "loss": 0.5727, "step": 1588 }, { "epoch": 0.65, "learning_rate": 5.7791530402486884e-06, "loss": 0.6147, "step": 1589 }, { "epoch": 0.65, "learning_rate": 5.76714987002343e-06, "loss": 0.6413, "step": 1590 }, { "epoch": 0.65, "learning_rate": 5.755154124913514e-06, "loss": 0.7356, "step": 1591 }, { "epoch": 0.65, "learning_rate": 5.743165825961454e-06, "loss": 0.6126, "step": 1592 }, { "epoch": 0.65, "learning_rate": 5.731184994196697e-06, "loss": 0.7526, "step": 1593 }, { "epoch": 0.65, "learning_rate": 5.719211650635586e-06, "loss": 0.6431, "step": 1594 }, { "epoch": 0.65, "learning_rate": 5.707245816281345e-06, "loss": 0.5429, "step": 1595 }, { "epoch": 0.65, "learning_rate": 5.695287512124011e-06, "loss": 0.6359, "step": 1596 }, { "epoch": 0.65, "learning_rate": 5.683336759140409e-06, "loss": 0.5654, "step": 1597 }, { "epoch": 0.65, "learning_rate": 5.671393578294133e-06, "loss": 0.6572, "step": 1598 }, { "epoch": 0.65, "learning_rate": 5.659457990535491e-06, "loss": 0.6754, "step": 1599 }, { "epoch": 0.65, "learning_rate": 5.647530016801457e-06, "loss": 0.6868, "step": 1600 }, { "epoch": 0.65, "learning_rate": 5.635609678015668e-06, "loss": 0.647, "step": 1601 }, { "epoch": 0.65, "learning_rate": 5.62369699508835e-06, "loss": 0.5354, "step": 1602 }, { "epoch": 0.66, "learning_rate": 5.611791988916317e-06, "loss": 0.6123, "step": 1603 }, { "epoch": 0.66, "learning_rate": 5.5998946803829e-06, "loss": 0.578, "step": 1604 }, { "epoch": 0.66, "learning_rate": 5.588005090357943e-06, "loss": 0.652, "step": 1605 }, { "epoch": 0.66, "learning_rate": 5.576123239697735e-06, "loss": 0.6212, "step": 1606 }, { "epoch": 0.66, "learning_rate": 5.564249149244998e-06, "loss": 0.7071, "step": 1607 }, { "epoch": 0.66, "learning_rate": 5.552382839828847e-06, "loss": 0.6945, "step": 1608 }, { "epoch": 0.66, "learning_rate": 5.540524332264734e-06, "loss": 0.6102, "step": 1609 }, { "epoch": 0.66, "learning_rate": 5.528673647354432e-06, "loss": 0.6677, "step": 1610 }, { "epoch": 0.66, "learning_rate": 5.516830805885989e-06, "loss": 0.5845, "step": 1611 }, { "epoch": 0.66, "learning_rate": 5.504995828633704e-06, "loss": 0.6303, "step": 1612 }, { "epoch": 0.66, "learning_rate": 5.493168736358063e-06, "loss": 0.6699, "step": 1613 }, { "epoch": 0.66, "learning_rate": 5.481349549805741e-06, "loss": 0.6219, "step": 1614 }, { "epoch": 0.66, "learning_rate": 5.469538289709535e-06, "loss": 0.5898, "step": 1615 }, { "epoch": 0.66, "learning_rate": 5.457734976788331e-06, "loss": 0.6263, "step": 1616 }, { "epoch": 0.66, "learning_rate": 5.445939631747089e-06, "loss": 0.6121, "step": 1617 }, { "epoch": 0.66, "learning_rate": 5.434152275276776e-06, "loss": 0.6207, "step": 1618 }, { "epoch": 0.66, "learning_rate": 5.4223729280543634e-06, "loss": 0.607, "step": 1619 }, { "epoch": 0.66, "learning_rate": 5.410601610742754e-06, "loss": 0.6156, "step": 1620 }, { "epoch": 0.66, "learning_rate": 5.39883834399078e-06, "loss": 0.696, "step": 1621 }, { "epoch": 0.66, "learning_rate": 5.38708314843315e-06, "loss": 0.6141, "step": 1622 }, { "epoch": 0.66, "learning_rate": 5.375336044690405e-06, "loss": 0.6294, "step": 1623 }, { "epoch": 0.66, "learning_rate": 5.363597053368897e-06, "loss": 0.6457, "step": 1624 }, { "epoch": 0.66, "learning_rate": 5.3518661950607465e-06, "loss": 0.5826, "step": 1625 }, { "epoch": 0.66, "learning_rate": 5.340143490343813e-06, "loss": 0.5124, "step": 1626 }, { "epoch": 0.67, "learning_rate": 5.328428959781643e-06, "loss": 0.6587, "step": 1627 }, { "epoch": 0.67, "learning_rate": 5.316722623923454e-06, "loss": 0.5949, "step": 1628 }, { "epoch": 0.67, "learning_rate": 5.305024503304086e-06, "loss": 0.6249, "step": 1629 }, { "epoch": 0.67, "learning_rate": 5.293334618443962e-06, "loss": 0.5911, "step": 1630 }, { "epoch": 0.67, "learning_rate": 5.281652989849067e-06, "loss": 0.6717, "step": 1631 }, { "epoch": 0.67, "learning_rate": 5.269979638010893e-06, "loss": 0.6239, "step": 1632 }, { "epoch": 0.67, "learning_rate": 5.2583145834064295e-06, "loss": 0.669, "step": 1633 }, { "epoch": 0.67, "learning_rate": 5.24665784649809e-06, "loss": 0.6046, "step": 1634 }, { "epoch": 0.67, "learning_rate": 5.235009447733717e-06, "loss": 0.637, "step": 1635 }, { "epoch": 0.67, "learning_rate": 5.223369407546509e-06, "loss": 0.6707, "step": 1636 }, { "epoch": 0.67, "learning_rate": 5.211737746355021e-06, "loss": 0.6737, "step": 1637 }, { "epoch": 0.67, "learning_rate": 5.2001144845630906e-06, "loss": 0.6084, "step": 1638 }, { "epoch": 0.67, "learning_rate": 5.188499642559838e-06, "loss": 0.6288, "step": 1639 }, { "epoch": 0.67, "learning_rate": 5.176893240719602e-06, "loss": 0.6209, "step": 1640 }, { "epoch": 0.67, "learning_rate": 5.165295299401921e-06, "loss": 0.7151, "step": 1641 }, { "epoch": 0.67, "learning_rate": 5.153705838951495e-06, "loss": 0.6241, "step": 1642 }, { "epoch": 0.67, "learning_rate": 5.1421248796981385e-06, "loss": 0.6221, "step": 1643 }, { "epoch": 0.67, "learning_rate": 5.1305524419567595e-06, "loss": 0.6406, "step": 1644 }, { "epoch": 0.67, "learning_rate": 5.1189885460273255e-06, "loss": 0.6736, "step": 1645 }, { "epoch": 0.67, "learning_rate": 5.107433212194801e-06, "loss": 0.6042, "step": 1646 }, { "epoch": 0.67, "learning_rate": 5.095886460729152e-06, "loss": 0.6682, "step": 1647 }, { "epoch": 0.67, "learning_rate": 5.08434831188527e-06, "loss": 0.7017, "step": 1648 }, { "epoch": 0.67, "learning_rate": 5.072818785902975e-06, "loss": 0.6445, "step": 1649 }, { "epoch": 0.67, "learning_rate": 5.061297903006943e-06, "loss": 0.5683, "step": 1650 }, { "epoch": 0.67, "learning_rate": 5.049785683406704e-06, "loss": 0.5783, "step": 1651 }, { "epoch": 0.68, "learning_rate": 5.038282147296585e-06, "loss": 0.613, "step": 1652 }, { "epoch": 0.68, "learning_rate": 5.026787314855679e-06, "loss": 0.539, "step": 1653 }, { "epoch": 0.68, "learning_rate": 5.015301206247813e-06, "loss": 0.6494, "step": 1654 }, { "epoch": 0.68, "learning_rate": 5.003823841621504e-06, "loss": 0.6016, "step": 1655 }, { "epoch": 0.68, "learning_rate": 4.992355241109949e-06, "loss": 0.604, "step": 1656 }, { "epoch": 0.68, "learning_rate": 4.980895424830948e-06, "loss": 0.5705, "step": 1657 }, { "epoch": 0.68, "learning_rate": 4.96944441288691e-06, "loss": 0.6403, "step": 1658 }, { "epoch": 0.68, "learning_rate": 4.958002225364797e-06, "loss": 0.578, "step": 1659 }, { "epoch": 0.68, "learning_rate": 4.94656888233608e-06, "loss": 0.6708, "step": 1660 }, { "epoch": 0.68, "learning_rate": 4.935144403856731e-06, "loss": 0.6255, "step": 1661 }, { "epoch": 0.68, "learning_rate": 4.923728809967156e-06, "loss": 0.629, "step": 1662 }, { "epoch": 0.68, "learning_rate": 4.912322120692194e-06, "loss": 0.6734, "step": 1663 }, { "epoch": 0.68, "learning_rate": 4.900924356041044e-06, "loss": 0.6246, "step": 1664 }, { "epoch": 0.68, "learning_rate": 4.889535536007267e-06, "loss": 0.5585, "step": 1665 }, { "epoch": 0.68, "learning_rate": 4.878155680568721e-06, "loss": 0.5961, "step": 1666 }, { "epoch": 0.68, "learning_rate": 4.866784809687553e-06, "loss": 0.5819, "step": 1667 }, { "epoch": 0.68, "learning_rate": 4.855422943310129e-06, "loss": 0.5051, "step": 1668 }, { "epoch": 0.68, "learning_rate": 4.844070101367043e-06, "loss": 0.6588, "step": 1669 }, { "epoch": 0.68, "learning_rate": 4.832726303773042e-06, "loss": 0.6368, "step": 1670 }, { "epoch": 0.68, "learning_rate": 4.821391570427008e-06, "loss": 0.5931, "step": 1671 }, { "epoch": 0.68, "learning_rate": 4.810065921211936e-06, "loss": 0.7271, "step": 1672 }, { "epoch": 0.68, "learning_rate": 4.79874937599487e-06, "loss": 0.6914, "step": 1673 }, { "epoch": 0.68, "learning_rate": 4.787441954626895e-06, "loss": 0.5705, "step": 1674 }, { "epoch": 0.68, "learning_rate": 4.776143676943093e-06, "loss": 0.6522, "step": 1675 }, { "epoch": 0.69, "learning_rate": 4.764854562762491e-06, "loss": 0.647, "step": 1676 }, { "epoch": 0.69, "learning_rate": 4.753574631888063e-06, "loss": 0.6377, "step": 1677 }, { "epoch": 0.69, "learning_rate": 4.742303904106653e-06, "loss": 0.6902, "step": 1678 }, { "epoch": 0.69, "learning_rate": 4.731042399188981e-06, "loss": 0.7178, "step": 1679 }, { "epoch": 0.69, "learning_rate": 4.719790136889569e-06, "loss": 0.6393, "step": 1680 }, { "epoch": 0.69, "learning_rate": 4.708547136946742e-06, "loss": 0.5724, "step": 1681 }, { "epoch": 0.69, "learning_rate": 4.697313419082573e-06, "loss": 0.6538, "step": 1682 }, { "epoch": 0.69, "learning_rate": 4.6860890030028485e-06, "loss": 0.7321, "step": 1683 }, { "epoch": 0.69, "learning_rate": 4.674873908397039e-06, "loss": 0.6671, "step": 1684 }, { "epoch": 0.69, "learning_rate": 4.663668154938262e-06, "loss": 0.6276, "step": 1685 }, { "epoch": 0.69, "learning_rate": 4.65247176228326e-06, "loss": 0.6324, "step": 1686 }, { "epoch": 0.69, "learning_rate": 4.64128475007234e-06, "loss": 0.6553, "step": 1687 }, { "epoch": 0.69, "learning_rate": 4.630107137929365e-06, "loss": 0.5936, "step": 1688 }, { "epoch": 0.69, "learning_rate": 4.618938945461708e-06, "loss": 0.6397, "step": 1689 }, { "epoch": 0.69, "learning_rate": 4.6077801922602105e-06, "loss": 0.5604, "step": 1690 }, { "epoch": 0.69, "learning_rate": 4.596630897899164e-06, "loss": 0.7081, "step": 1691 }, { "epoch": 0.69, "learning_rate": 4.585491081936263e-06, "loss": 0.6613, "step": 1692 }, { "epoch": 0.69, "learning_rate": 4.57436076391258e-06, "loss": 0.687, "step": 1693 }, { "epoch": 0.69, "learning_rate": 4.563239963352517e-06, "loss": 0.5905, "step": 1694 }, { "epoch": 0.69, "learning_rate": 4.552128699763795e-06, "loss": 0.6323, "step": 1695 }, { "epoch": 0.69, "learning_rate": 4.5410269926373905e-06, "loss": 0.6664, "step": 1696 }, { "epoch": 0.69, "learning_rate": 4.529934861447532e-06, "loss": 0.6958, "step": 1697 }, { "epoch": 0.69, "learning_rate": 4.518852325651638e-06, "loss": 0.6338, "step": 1698 }, { "epoch": 0.69, "learning_rate": 4.507779404690294e-06, "loss": 0.6107, "step": 1699 }, { "epoch": 0.7, "learning_rate": 4.496716117987234e-06, "loss": 0.633, "step": 1700 }, { "epoch": 0.7, "learning_rate": 4.485662484949275e-06, "loss": 0.6524, "step": 1701 }, { "epoch": 0.7, "learning_rate": 4.474618524966313e-06, "loss": 0.6004, "step": 1702 }, { "epoch": 0.7, "learning_rate": 4.463584257411264e-06, "loss": 0.6358, "step": 1703 }, { "epoch": 0.7, "learning_rate": 4.452559701640053e-06, "loss": 0.652, "step": 1704 }, { "epoch": 0.7, "learning_rate": 4.441544876991566e-06, "loss": 0.533, "step": 1705 }, { "epoch": 0.7, "learning_rate": 4.43053980278761e-06, "loss": 0.6784, "step": 1706 }, { "epoch": 0.7, "learning_rate": 4.4195444983329035e-06, "loss": 0.6295, "step": 1707 }, { "epoch": 0.7, "learning_rate": 4.4085589829150125e-06, "loss": 0.5547, "step": 1708 }, { "epoch": 0.7, "learning_rate": 4.397583275804344e-06, "loss": 0.5615, "step": 1709 }, { "epoch": 0.7, "learning_rate": 4.386617396254085e-06, "loss": 0.6918, "step": 1710 }, { "epoch": 0.7, "learning_rate": 4.375661363500201e-06, "loss": 0.5957, "step": 1711 }, { "epoch": 0.7, "learning_rate": 4.364715196761368e-06, "loss": 0.6164, "step": 1712 }, { "epoch": 0.7, "learning_rate": 4.353778915238969e-06, "loss": 0.6334, "step": 1713 }, { "epoch": 0.7, "learning_rate": 4.342852538117039e-06, "loss": 0.6347, "step": 1714 }, { "epoch": 0.7, "learning_rate": 4.331936084562235e-06, "loss": 0.6551, "step": 1715 }, { "epoch": 0.7, "learning_rate": 4.32102957372382e-06, "loss": 0.6247, "step": 1716 }, { "epoch": 0.7, "learning_rate": 4.310133024733602e-06, "loss": 0.6626, "step": 1717 }, { "epoch": 0.7, "learning_rate": 4.299246456705921e-06, "loss": 0.6122, "step": 1718 }, { "epoch": 0.7, "learning_rate": 4.288369888737614e-06, "loss": 0.6568, "step": 1719 }, { "epoch": 0.7, "learning_rate": 4.277503339907961e-06, "loss": 0.7027, "step": 1720 }, { "epoch": 0.7, "learning_rate": 4.266646829278685e-06, "loss": 0.5958, "step": 1721 }, { "epoch": 0.7, "learning_rate": 4.255800375893885e-06, "loss": 0.6179, "step": 1722 }, { "epoch": 0.7, "learning_rate": 4.244963998780028e-06, "loss": 0.6805, "step": 1723 }, { "epoch": 0.7, "learning_rate": 4.234137716945897e-06, "loss": 0.6868, "step": 1724 }, { "epoch": 0.71, "learning_rate": 4.223321549382578e-06, "loss": 0.5573, "step": 1725 }, { "epoch": 0.71, "learning_rate": 4.212515515063399e-06, "loss": 0.597, "step": 1726 }, { "epoch": 0.71, "learning_rate": 4.201719632943931e-06, "loss": 0.5847, "step": 1727 }, { "epoch": 0.71, "learning_rate": 4.1909339219619225e-06, "loss": 0.647, "step": 1728 }, { "epoch": 0.71, "learning_rate": 4.180158401037282e-06, "loss": 0.6489, "step": 1729 }, { "epoch": 0.71, "learning_rate": 4.16939308907205e-06, "loss": 0.651, "step": 1730 }, { "epoch": 0.71, "learning_rate": 4.15863800495035e-06, "loss": 0.6043, "step": 1731 }, { "epoch": 0.71, "learning_rate": 4.147893167538375e-06, "loss": 0.5854, "step": 1732 }, { "epoch": 0.71, "learning_rate": 4.137158595684329e-06, "loss": 0.6114, "step": 1733 }, { "epoch": 0.71, "learning_rate": 4.126434308218421e-06, "loss": 0.6202, "step": 1734 }, { "epoch": 0.71, "learning_rate": 4.115720323952818e-06, "loss": 0.6272, "step": 1735 }, { "epoch": 0.71, "learning_rate": 4.105016661681605e-06, "loss": 0.6163, "step": 1736 }, { "epoch": 0.71, "learning_rate": 4.0943233401807715e-06, "loss": 0.6743, "step": 1737 }, { "epoch": 0.71, "learning_rate": 4.083640378208156e-06, "loss": 0.7581, "step": 1738 }, { "epoch": 0.71, "learning_rate": 4.072967794503437e-06, "loss": 0.6449, "step": 1739 }, { "epoch": 0.71, "learning_rate": 4.0623056077880775e-06, "loss": 0.6043, "step": 1740 }, { "epoch": 0.71, "learning_rate": 4.05165383676531e-06, "loss": 0.6249, "step": 1741 }, { "epoch": 0.71, "learning_rate": 4.04101250012009e-06, "loss": 0.6138, "step": 1742 }, { "epoch": 0.71, "learning_rate": 4.030381616519074e-06, "loss": 0.6771, "step": 1743 }, { "epoch": 0.71, "learning_rate": 4.0197612046105815e-06, "loss": 0.6382, "step": 1744 }, { "epoch": 0.71, "learning_rate": 4.009151283024557e-06, "loss": 0.658, "step": 1745 }, { "epoch": 0.71, "learning_rate": 3.998551870372554e-06, "loss": 0.6129, "step": 1746 }, { "epoch": 0.71, "learning_rate": 3.98796298524768e-06, "loss": 0.6675, "step": 1747 }, { "epoch": 0.71, "learning_rate": 3.977384646224584e-06, "loss": 0.5419, "step": 1748 }, { "epoch": 0.72, "learning_rate": 3.9668168718594155e-06, "loss": 0.6792, "step": 1749 }, { "epoch": 0.72, "learning_rate": 3.956259680689784e-06, "loss": 0.5761, "step": 1750 }, { "epoch": 0.72, "learning_rate": 3.945713091234743e-06, "loss": 0.5577, "step": 1751 }, { "epoch": 0.72, "learning_rate": 3.935177121994741e-06, "loss": 0.5979, "step": 1752 }, { "epoch": 0.72, "learning_rate": 3.924651791451606e-06, "loss": 0.649, "step": 1753 }, { "epoch": 0.72, "learning_rate": 3.9141371180684925e-06, "loss": 0.6042, "step": 1754 }, { "epoch": 0.72, "learning_rate": 3.903633120289876e-06, "loss": 0.5785, "step": 1755 }, { "epoch": 0.72, "learning_rate": 3.893139816541487e-06, "loss": 0.5849, "step": 1756 }, { "epoch": 0.72, "learning_rate": 3.8826572252303145e-06, "loss": 0.6188, "step": 1757 }, { "epoch": 0.72, "learning_rate": 3.872185364744543e-06, "loss": 0.5875, "step": 1758 }, { "epoch": 0.72, "learning_rate": 3.861724253453535e-06, "loss": 0.5596, "step": 1759 }, { "epoch": 0.72, "learning_rate": 3.851273909707809e-06, "loss": 0.6497, "step": 1760 }, { "epoch": 0.72, "learning_rate": 3.840834351838977e-06, "loss": 0.5923, "step": 1761 }, { "epoch": 0.72, "learning_rate": 3.8304055981597495e-06, "loss": 0.668, "step": 1762 }, { "epoch": 0.72, "learning_rate": 3.819987666963869e-06, "loss": 0.5548, "step": 1763 }, { "epoch": 0.72, "learning_rate": 3.809580576526104e-06, "loss": 0.5588, "step": 1764 }, { "epoch": 0.72, "learning_rate": 3.799184345102205e-06, "loss": 0.6508, "step": 1765 }, { "epoch": 0.72, "learning_rate": 3.7887989909288648e-06, "loss": 0.5973, "step": 1766 }, { "epoch": 0.72, "learning_rate": 3.7784245322237113e-06, "loss": 0.5946, "step": 1767 }, { "epoch": 0.72, "learning_rate": 3.7680609871852436e-06, "loss": 0.5996, "step": 1768 }, { "epoch": 0.72, "learning_rate": 3.7577083739928313e-06, "loss": 0.601, "step": 1769 }, { "epoch": 0.72, "learning_rate": 3.7473667108066524e-06, "loss": 0.6741, "step": 1770 }, { "epoch": 0.72, "learning_rate": 3.7370360157676955e-06, "loss": 0.6801, "step": 1771 }, { "epoch": 0.72, "learning_rate": 3.726716306997692e-06, "loss": 0.6208, "step": 1772 }, { "epoch": 0.72, "learning_rate": 3.7164076025991068e-06, "loss": 0.6188, "step": 1773 }, { "epoch": 0.73, "learning_rate": 3.70610992065511e-06, "loss": 0.573, "step": 1774 }, { "epoch": 0.73, "learning_rate": 3.695823279229521e-06, "loss": 0.6968, "step": 1775 }, { "epoch": 0.73, "learning_rate": 3.68554769636681e-06, "loss": 0.6047, "step": 1776 }, { "epoch": 0.73, "learning_rate": 3.6752831900920306e-06, "loss": 0.5545, "step": 1777 }, { "epoch": 0.73, "learning_rate": 3.665029778410819e-06, "loss": 0.6083, "step": 1778 }, { "epoch": 0.73, "learning_rate": 3.6547874793093497e-06, "loss": 0.6485, "step": 1779 }, { "epoch": 0.73, "learning_rate": 3.6445563107542925e-06, "loss": 0.6835, "step": 1780 }, { "epoch": 0.73, "learning_rate": 3.634336290692808e-06, "loss": 0.6244, "step": 1781 }, { "epoch": 0.73, "learning_rate": 3.624127437052484e-06, "loss": 0.6111, "step": 1782 }, { "epoch": 0.73, "learning_rate": 3.6139297677413367e-06, "loss": 0.5409, "step": 1783 }, { "epoch": 0.73, "learning_rate": 3.6037433006477475e-06, "loss": 0.5767, "step": 1784 }, { "epoch": 0.73, "learning_rate": 3.5935680536404626e-06, "loss": 0.6752, "step": 1785 }, { "epoch": 0.73, "learning_rate": 3.5834040445685325e-06, "loss": 0.5394, "step": 1786 }, { "epoch": 0.73, "learning_rate": 3.5732512912613073e-06, "loss": 0.571, "step": 1787 }, { "epoch": 0.73, "learning_rate": 3.5631098115283833e-06, "loss": 0.6408, "step": 1788 }, { "epoch": 0.73, "learning_rate": 3.5529796231595793e-06, "loss": 0.6153, "step": 1789 }, { "epoch": 0.73, "learning_rate": 3.5428607439249197e-06, "loss": 0.6044, "step": 1790 }, { "epoch": 0.73, "learning_rate": 3.532753191574576e-06, "loss": 0.6072, "step": 1791 }, { "epoch": 0.73, "learning_rate": 3.5226569838388647e-06, "loss": 0.6745, "step": 1792 }, { "epoch": 0.73, "learning_rate": 3.5125721384281874e-06, "loss": 0.5946, "step": 1793 }, { "epoch": 0.73, "learning_rate": 3.502498673033026e-06, "loss": 0.5848, "step": 1794 }, { "epoch": 0.73, "learning_rate": 3.4924366053238977e-06, "loss": 0.6502, "step": 1795 }, { "epoch": 0.73, "learning_rate": 3.482385952951318e-06, "loss": 0.6443, "step": 1796 }, { "epoch": 0.73, "learning_rate": 3.472346733545792e-06, "loss": 0.6579, "step": 1797 }, { "epoch": 0.74, "learning_rate": 3.4623189647177533e-06, "loss": 0.5671, "step": 1798 }, { "epoch": 0.74, "learning_rate": 3.4523026640575664e-06, "loss": 0.6066, "step": 1799 }, { "epoch": 0.74, "learning_rate": 3.442297849135462e-06, "loss": 0.6075, "step": 1800 }, { "epoch": 0.74, "learning_rate": 3.4323045375015384e-06, "loss": 0.6898, "step": 1801 }, { "epoch": 0.74, "learning_rate": 3.4223227466857045e-06, "loss": 0.597, "step": 1802 }, { "epoch": 0.74, "learning_rate": 3.4123524941976593e-06, "loss": 0.6686, "step": 1803 }, { "epoch": 0.74, "learning_rate": 3.4023937975268728e-06, "loss": 0.6153, "step": 1804 }, { "epoch": 0.74, "learning_rate": 3.39244667414253e-06, "loss": 0.6611, "step": 1805 }, { "epoch": 0.74, "learning_rate": 3.3825111414935287e-06, "loss": 0.5729, "step": 1806 }, { "epoch": 0.74, "learning_rate": 3.3725872170084193e-06, "loss": 0.6562, "step": 1807 }, { "epoch": 0.74, "learning_rate": 3.3626749180954033e-06, "loss": 0.6539, "step": 1808 }, { "epoch": 0.74, "learning_rate": 3.352774262142284e-06, "loss": 0.6655, "step": 1809 }, { "epoch": 0.74, "learning_rate": 3.342885266516436e-06, "loss": 0.6419, "step": 1810 }, { "epoch": 0.74, "learning_rate": 3.3330079485647894e-06, "loss": 0.6072, "step": 1811 }, { "epoch": 0.74, "learning_rate": 3.3231423256137784e-06, "loss": 0.5955, "step": 1812 }, { "epoch": 0.74, "learning_rate": 3.3132884149693346e-06, "loss": 0.6535, "step": 1813 }, { "epoch": 0.74, "learning_rate": 3.3034462339168317e-06, "loss": 0.6097, "step": 1814 }, { "epoch": 0.74, "learning_rate": 3.2936157997210816e-06, "loss": 0.6541, "step": 1815 }, { "epoch": 0.74, "learning_rate": 3.283797129626274e-06, "loss": 0.5382, "step": 1816 }, { "epoch": 0.74, "learning_rate": 3.27399024085598e-06, "loss": 0.6066, "step": 1817 }, { "epoch": 0.74, "learning_rate": 3.264195150613091e-06, "loss": 0.5851, "step": 1818 }, { "epoch": 0.74, "learning_rate": 3.254411876079803e-06, "loss": 0.6842, "step": 1819 }, { "epoch": 0.74, "learning_rate": 3.244640434417595e-06, "loss": 0.5912, "step": 1820 }, { "epoch": 0.74, "learning_rate": 3.2348808427671784e-06, "loss": 0.6499, "step": 1821 }, { "epoch": 0.74, "learning_rate": 3.2251331182484868e-06, "loss": 0.6182, "step": 1822 }, { "epoch": 0.75, "learning_rate": 3.215397277960626e-06, "loss": 0.6126, "step": 1823 }, { "epoch": 0.75, "learning_rate": 3.205673338981865e-06, "loss": 0.6458, "step": 1824 }, { "epoch": 0.75, "learning_rate": 3.195961318369595e-06, "loss": 0.6305, "step": 1825 }, { "epoch": 0.75, "learning_rate": 3.1862612331602906e-06, "loss": 0.5899, "step": 1826 }, { "epoch": 0.75, "learning_rate": 3.176573100369504e-06, "loss": 0.6442, "step": 1827 }, { "epoch": 0.75, "learning_rate": 3.166896936991808e-06, "loss": 0.7171, "step": 1828 }, { "epoch": 0.75, "learning_rate": 3.157232760000789e-06, "loss": 0.6163, "step": 1829 }, { "epoch": 0.75, "learning_rate": 3.147580586348998e-06, "loss": 0.585, "step": 1830 }, { "epoch": 0.75, "learning_rate": 3.137940432967942e-06, "loss": 0.5771, "step": 1831 }, { "epoch": 0.75, "learning_rate": 3.1283123167680306e-06, "loss": 0.6859, "step": 1832 }, { "epoch": 0.75, "learning_rate": 3.1186962546385613e-06, "loss": 0.5714, "step": 1833 }, { "epoch": 0.75, "learning_rate": 3.1090922634476963e-06, "loss": 0.6438, "step": 1834 }, { "epoch": 0.75, "learning_rate": 3.099500360042407e-06, "loss": 0.6202, "step": 1835 }, { "epoch": 0.75, "learning_rate": 3.089920561248476e-06, "loss": 0.6056, "step": 1836 }, { "epoch": 0.75, "learning_rate": 3.080352883870442e-06, "loss": 0.6483, "step": 1837 }, { "epoch": 0.75, "learning_rate": 3.0707973446915863e-06, "loss": 0.5747, "step": 1838 }, { "epoch": 0.75, "learning_rate": 3.0612539604739e-06, "loss": 0.6485, "step": 1839 }, { "epoch": 0.75, "learning_rate": 3.0517227479580425e-06, "loss": 0.6238, "step": 1840 }, { "epoch": 0.75, "learning_rate": 3.042203723863334e-06, "loss": 0.6091, "step": 1841 }, { "epoch": 0.75, "learning_rate": 3.0326969048877032e-06, "loss": 0.5766, "step": 1842 }, { "epoch": 0.75, "learning_rate": 3.023202307707679e-06, "loss": 0.5843, "step": 1843 }, { "epoch": 0.75, "learning_rate": 3.013719948978342e-06, "loss": 0.6129, "step": 1844 }, { "epoch": 0.75, "learning_rate": 3.0042498453333137e-06, "loss": 0.6563, "step": 1845 }, { "epoch": 0.75, "learning_rate": 2.9947920133847108e-06, "loss": 0.6044, "step": 1846 }, { "epoch": 0.76, "learning_rate": 2.985346469723124e-06, "loss": 0.6976, "step": 1847 }, { "epoch": 0.76, "learning_rate": 2.975913230917595e-06, "loss": 0.6492, "step": 1848 }, { "epoch": 0.76, "learning_rate": 2.9664923135155723e-06, "loss": 0.6004, "step": 1849 }, { "epoch": 0.76, "learning_rate": 2.9570837340428994e-06, "loss": 0.6734, "step": 1850 }, { "epoch": 0.76, "learning_rate": 2.947687509003766e-06, "loss": 0.6337, "step": 1851 }, { "epoch": 0.76, "learning_rate": 2.938303654880702e-06, "loss": 0.5893, "step": 1852 }, { "epoch": 0.76, "learning_rate": 2.9289321881345257e-06, "loss": 0.6212, "step": 1853 }, { "epoch": 0.76, "learning_rate": 2.9195731252043333e-06, "loss": 0.6412, "step": 1854 }, { "epoch": 0.76, "learning_rate": 2.9102264825074657e-06, "loss": 0.6447, "step": 1855 }, { "epoch": 0.76, "learning_rate": 2.900892276439463e-06, "loss": 0.6326, "step": 1856 }, { "epoch": 0.76, "learning_rate": 2.8915705233740653e-06, "loss": 0.639, "step": 1857 }, { "epoch": 0.76, "learning_rate": 2.8822612396631557e-06, "loss": 0.6734, "step": 1858 }, { "epoch": 0.76, "learning_rate": 2.872964441636752e-06, "loss": 0.6496, "step": 1859 }, { "epoch": 0.76, "learning_rate": 2.863680145602963e-06, "loss": 0.6014, "step": 1860 }, { "epoch": 0.76, "learning_rate": 2.854408367847977e-06, "loss": 0.5486, "step": 1861 }, { "epoch": 0.76, "learning_rate": 2.845149124636014e-06, "loss": 0.6532, "step": 1862 }, { "epoch": 0.76, "learning_rate": 2.8359024322093067e-06, "loss": 0.6207, "step": 1863 }, { "epoch": 0.76, "learning_rate": 2.8266683067880807e-06, "loss": 0.5728, "step": 1864 }, { "epoch": 0.76, "learning_rate": 2.817446764570504e-06, "loss": 0.5612, "step": 1865 }, { "epoch": 0.76, "learning_rate": 2.8082378217326843e-06, "loss": 0.6476, "step": 1866 }, { "epoch": 0.76, "learning_rate": 2.799041494428617e-06, "loss": 0.5984, "step": 1867 }, { "epoch": 0.76, "learning_rate": 2.7898577987901786e-06, "loss": 0.7022, "step": 1868 }, { "epoch": 0.76, "learning_rate": 2.7806867509270754e-06, "loss": 0.5719, "step": 1869 }, { "epoch": 0.76, "learning_rate": 2.771528366926837e-06, "loss": 0.6535, "step": 1870 }, { "epoch": 0.76, "learning_rate": 2.762382662854778e-06, "loss": 0.6677, "step": 1871 }, { "epoch": 0.77, "learning_rate": 2.7532496547539623e-06, "loss": 0.6694, "step": 1872 }, { "epoch": 0.77, "learning_rate": 2.7441293586451936e-06, "loss": 0.6706, "step": 1873 }, { "epoch": 0.77, "learning_rate": 2.7350217905269647e-06, "loss": 0.5942, "step": 1874 }, { "epoch": 0.77, "learning_rate": 2.725926966375456e-06, "loss": 0.6493, "step": 1875 }, { "epoch": 0.77, "learning_rate": 2.716844902144481e-06, "loss": 0.6309, "step": 1876 }, { "epoch": 0.77, "learning_rate": 2.707775613765471e-06, "loss": 0.6233, "step": 1877 }, { "epoch": 0.77, "learning_rate": 2.6987191171474548e-06, "loss": 0.6256, "step": 1878 }, { "epoch": 0.77, "learning_rate": 2.689675428177013e-06, "loss": 0.6972, "step": 1879 }, { "epoch": 0.77, "learning_rate": 2.6806445627182686e-06, "loss": 0.6433, "step": 1880 }, { "epoch": 0.77, "learning_rate": 2.671626536612838e-06, "loss": 0.5779, "step": 1881 }, { "epoch": 0.77, "learning_rate": 2.6626213656798295e-06, "loss": 0.6134, "step": 1882 }, { "epoch": 0.77, "learning_rate": 2.6536290657157883e-06, "loss": 0.6828, "step": 1883 }, { "epoch": 0.77, "learning_rate": 2.6446496524946894e-06, "loss": 0.674, "step": 1884 }, { "epoch": 0.77, "learning_rate": 2.635683141767904e-06, "loss": 0.7182, "step": 1885 }, { "epoch": 0.77, "learning_rate": 2.626729549264161e-06, "loss": 0.637, "step": 1886 }, { "epoch": 0.77, "learning_rate": 2.6177888906895398e-06, "loss": 0.6608, "step": 1887 }, { "epoch": 0.77, "learning_rate": 2.608861181727421e-06, "loss": 0.5861, "step": 1888 }, { "epoch": 0.77, "learning_rate": 2.599946438038481e-06, "loss": 0.6304, "step": 1889 }, { "epoch": 0.77, "learning_rate": 2.591044675260641e-06, "loss": 0.5967, "step": 1890 }, { "epoch": 0.77, "learning_rate": 2.5821559090090565e-06, "loss": 0.6824, "step": 1891 }, { "epoch": 0.77, "learning_rate": 2.5732801548760898e-06, "loss": 0.6641, "step": 1892 }, { "epoch": 0.77, "learning_rate": 2.5644174284312686e-06, "loss": 0.5963, "step": 1893 }, { "epoch": 0.77, "learning_rate": 2.5555677452212792e-06, "loss": 0.6095, "step": 1894 }, { "epoch": 0.77, "learning_rate": 2.5467311207699143e-06, "loss": 0.5949, "step": 1895 }, { "epoch": 0.78, "learning_rate": 2.5379075705780733e-06, "loss": 0.6551, "step": 1896 }, { "epoch": 0.78, "learning_rate": 2.5290971101237083e-06, "loss": 0.6472, "step": 1897 }, { "epoch": 0.78, "learning_rate": 2.5202997548618226e-06, "loss": 0.7092, "step": 1898 }, { "epoch": 0.78, "learning_rate": 2.511515520224418e-06, "loss": 0.6603, "step": 1899 }, { "epoch": 0.78, "learning_rate": 2.5027444216204888e-06, "loss": 0.6436, "step": 1900 }, { "epoch": 0.78, "learning_rate": 2.49398647443599e-06, "loss": 0.6595, "step": 1901 }, { "epoch": 0.78, "learning_rate": 2.485241694033793e-06, "loss": 0.6608, "step": 1902 }, { "epoch": 0.78, "learning_rate": 2.476510095753688e-06, "loss": 0.6017, "step": 1903 }, { "epoch": 0.78, "learning_rate": 2.467791694912329e-06, "loss": 0.6715, "step": 1904 }, { "epoch": 0.78, "learning_rate": 2.459086506803231e-06, "loss": 0.614, "step": 1905 }, { "epoch": 0.78, "learning_rate": 2.450394546696723e-06, "loss": 0.6029, "step": 1906 }, { "epoch": 0.78, "learning_rate": 2.441715829839928e-06, "loss": 0.5832, "step": 1907 }, { "epoch": 0.78, "learning_rate": 2.43305037145675e-06, "loss": 0.6692, "step": 1908 }, { "epoch": 0.78, "learning_rate": 2.424398186747823e-06, "loss": 0.6473, "step": 1909 }, { "epoch": 0.78, "learning_rate": 2.415759290890506e-06, "loss": 0.6604, "step": 1910 }, { "epoch": 0.78, "learning_rate": 2.4071336990388396e-06, "loss": 0.6651, "step": 1911 }, { "epoch": 0.78, "learning_rate": 2.3985214263235344e-06, "loss": 0.6874, "step": 1912 }, { "epoch": 0.78, "learning_rate": 2.38992248785193e-06, "loss": 0.567, "step": 1913 }, { "epoch": 0.78, "learning_rate": 2.38133689870798e-06, "loss": 0.6496, "step": 1914 }, { "epoch": 0.78, "learning_rate": 2.3727646739522226e-06, "loss": 0.6692, "step": 1915 }, { "epoch": 0.78, "learning_rate": 2.364205828621745e-06, "loss": 0.5966, "step": 1916 }, { "epoch": 0.78, "learning_rate": 2.3556603777301745e-06, "loss": 0.625, "step": 1917 }, { "epoch": 0.78, "learning_rate": 2.3471283362676334e-06, "loss": 0.6688, "step": 1918 }, { "epoch": 0.78, "learning_rate": 2.3386097192007296e-06, "loss": 0.5881, "step": 1919 }, { "epoch": 0.78, "learning_rate": 2.3301045414725167e-06, "loss": 0.6437, "step": 1920 }, { "epoch": 0.79, "learning_rate": 2.321612818002472e-06, "loss": 0.6213, "step": 1921 }, { "epoch": 0.79, "learning_rate": 2.313134563686482e-06, "loss": 0.6376, "step": 1922 }, { "epoch": 0.79, "learning_rate": 2.304669793396793e-06, "loss": 0.6311, "step": 1923 }, { "epoch": 0.79, "learning_rate": 2.29621852198201e-06, "loss": 0.7239, "step": 1924 }, { "epoch": 0.79, "learning_rate": 2.287780764267047e-06, "loss": 0.577, "step": 1925 }, { "epoch": 0.79, "learning_rate": 2.2793565350531243e-06, "loss": 0.6204, "step": 1926 }, { "epoch": 0.79, "learning_rate": 2.270945849117722e-06, "loss": 0.5755, "step": 1927 }, { "epoch": 0.79, "learning_rate": 2.262548721214569e-06, "loss": 0.6284, "step": 1928 }, { "epoch": 0.79, "learning_rate": 2.254165166073605e-06, "loss": 0.6777, "step": 1929 }, { "epoch": 0.79, "learning_rate": 2.2457951984009684e-06, "loss": 0.5707, "step": 1930 }, { "epoch": 0.79, "learning_rate": 2.237438832878961e-06, "loss": 0.5719, "step": 1931 }, { "epoch": 0.79, "learning_rate": 2.2290960841660157e-06, "loss": 0.675, "step": 1932 }, { "epoch": 0.79, "learning_rate": 2.2207669668966934e-06, "loss": 0.6788, "step": 1933 }, { "epoch": 0.79, "learning_rate": 2.212451495681629e-06, "loss": 0.6523, "step": 1934 }, { "epoch": 0.79, "learning_rate": 2.2041496851075316e-06, "loss": 0.545, "step": 1935 }, { "epoch": 0.79, "learning_rate": 2.1958615497371416e-06, "loss": 0.6307, "step": 1936 }, { "epoch": 0.79, "learning_rate": 2.187587104109208e-06, "loss": 0.608, "step": 1937 }, { "epoch": 0.79, "learning_rate": 2.1793263627384753e-06, "loss": 0.6708, "step": 1938 }, { "epoch": 0.79, "learning_rate": 2.171079340115636e-06, "loss": 0.5998, "step": 1939 }, { "epoch": 0.79, "learning_rate": 2.16284605070733e-06, "loss": 0.673, "step": 1940 }, { "epoch": 0.79, "learning_rate": 2.154626508956097e-06, "loss": 0.57, "step": 1941 }, { "epoch": 0.79, "learning_rate": 2.1464207292803696e-06, "loss": 0.6615, "step": 1942 }, { "epoch": 0.79, "learning_rate": 2.1382287260744283e-06, "loss": 0.6227, "step": 1943 }, { "epoch": 0.79, "learning_rate": 2.130050513708399e-06, "loss": 0.6064, "step": 1944 }, { "epoch": 0.8, "learning_rate": 2.1218861065282137e-06, "loss": 0.672, "step": 1945 }, { "epoch": 0.8, "learning_rate": 2.1137355188555796e-06, "loss": 0.6932, "step": 1946 }, { "epoch": 0.8, "learning_rate": 2.105598764987973e-06, "loss": 0.6351, "step": 1947 }, { "epoch": 0.8, "learning_rate": 2.0974758591985945e-06, "loss": 0.6846, "step": 1948 }, { "epoch": 0.8, "learning_rate": 2.08936681573636e-06, "loss": 0.7049, "step": 1949 }, { "epoch": 0.8, "learning_rate": 2.0812716488258655e-06, "loss": 0.6386, "step": 1950 }, { "epoch": 0.8, "learning_rate": 2.0731903726673596e-06, "loss": 0.6514, "step": 1951 }, { "epoch": 0.8, "learning_rate": 2.0651230014367385e-06, "loss": 0.7024, "step": 1952 }, { "epoch": 0.8, "learning_rate": 2.057069549285491e-06, "loss": 0.6957, "step": 1953 }, { "epoch": 0.8, "learning_rate": 2.0490300303407017e-06, "loss": 0.5995, "step": 1954 }, { "epoch": 0.8, "learning_rate": 2.041004458705006e-06, "loss": 0.6713, "step": 1955 }, { "epoch": 0.8, "learning_rate": 2.0329928484565784e-06, "loss": 0.5666, "step": 1956 }, { "epoch": 0.8, "learning_rate": 2.024995213649099e-06, "loss": 0.5551, "step": 1957 }, { "epoch": 0.8, "learning_rate": 2.01701156831174e-06, "loss": 0.5885, "step": 1958 }, { "epoch": 0.8, "learning_rate": 2.00904192644912e-06, "loss": 0.6711, "step": 1959 }, { "epoch": 0.8, "learning_rate": 2.0010863020413075e-06, "loss": 0.5906, "step": 1960 }, { "epoch": 0.8, "learning_rate": 1.993144709043777e-06, "loss": 0.638, "step": 1961 }, { "epoch": 0.8, "learning_rate": 1.9852171613873837e-06, "loss": 0.5898, "step": 1962 }, { "epoch": 0.8, "learning_rate": 1.977303672978357e-06, "loss": 0.6459, "step": 1963 }, { "epoch": 0.8, "learning_rate": 1.969404257698253e-06, "loss": 0.6811, "step": 1964 }, { "epoch": 0.8, "learning_rate": 1.961518929403944e-06, "loss": 0.6946, "step": 1965 }, { "epoch": 0.8, "learning_rate": 1.9536477019275955e-06, "loss": 0.651, "step": 1966 }, { "epoch": 0.8, "learning_rate": 1.9457905890766325e-06, "loss": 0.5738, "step": 1967 }, { "epoch": 0.8, "learning_rate": 1.9379476046337285e-06, "loss": 0.5757, "step": 1968 }, { "epoch": 0.8, "learning_rate": 1.9301187623567606e-06, "loss": 0.6743, "step": 1969 }, { "epoch": 0.81, "learning_rate": 1.9223040759788138e-06, "loss": 0.6349, "step": 1970 }, { "epoch": 0.81, "learning_rate": 1.9145035592081274e-06, "loss": 0.6481, "step": 1971 }, { "epoch": 0.81, "learning_rate": 1.906717225728094e-06, "loss": 0.5878, "step": 1972 }, { "epoch": 0.81, "learning_rate": 1.8989450891972205e-06, "loss": 0.634, "step": 1973 }, { "epoch": 0.81, "learning_rate": 1.8911871632491153e-06, "loss": 0.6869, "step": 1974 }, { "epoch": 0.81, "learning_rate": 1.8834434614924567e-06, "loss": 0.6659, "step": 1975 }, { "epoch": 0.81, "learning_rate": 1.8757139975109683e-06, "loss": 0.5889, "step": 1976 }, { "epoch": 0.81, "learning_rate": 1.8679987848634063e-06, "loss": 0.5565, "step": 1977 }, { "epoch": 0.81, "learning_rate": 1.8602978370835156e-06, "loss": 0.628, "step": 1978 }, { "epoch": 0.81, "learning_rate": 1.852611167680033e-06, "loss": 0.5637, "step": 1979 }, { "epoch": 0.81, "learning_rate": 1.8449387901366366e-06, "loss": 0.6013, "step": 1980 }, { "epoch": 0.81, "learning_rate": 1.8372807179119366e-06, "loss": 0.6237, "step": 1981 }, { "epoch": 0.81, "learning_rate": 1.8296369644394562e-06, "loss": 0.5399, "step": 1982 }, { "epoch": 0.81, "learning_rate": 1.8220075431275918e-06, "loss": 0.5737, "step": 1983 }, { "epoch": 0.81, "learning_rate": 1.814392467359607e-06, "loss": 0.6142, "step": 1984 }, { "epoch": 0.81, "learning_rate": 1.806791750493594e-06, "loss": 0.6594, "step": 1985 }, { "epoch": 0.81, "learning_rate": 1.799205405862463e-06, "loss": 0.6424, "step": 1986 }, { "epoch": 0.81, "learning_rate": 1.7916334467739083e-06, "loss": 0.6634, "step": 1987 }, { "epoch": 0.81, "learning_rate": 1.7840758865103934e-06, "loss": 0.6226, "step": 1988 }, { "epoch": 0.81, "learning_rate": 1.7765327383291187e-06, "loss": 0.6087, "step": 1989 }, { "epoch": 0.81, "learning_rate": 1.7690040154620092e-06, "loss": 0.5728, "step": 1990 }, { "epoch": 0.81, "learning_rate": 1.7614897311156864e-06, "loss": 0.6584, "step": 1991 }, { "epoch": 0.81, "learning_rate": 1.7539898984714342e-06, "loss": 0.5712, "step": 1992 }, { "epoch": 0.81, "learning_rate": 1.746504530685199e-06, "loss": 0.67, "step": 1993 }, { "epoch": 0.82, "learning_rate": 1.739033640887544e-06, "loss": 0.6612, "step": 1994 }, { "epoch": 0.82, "learning_rate": 1.7315772421836364e-06, "loss": 0.5817, "step": 1995 }, { "epoch": 0.82, "learning_rate": 1.7241353476532307e-06, "loss": 0.641, "step": 1996 }, { "epoch": 0.82, "learning_rate": 1.7167079703506296e-06, "loss": 0.6266, "step": 1997 }, { "epoch": 0.82, "learning_rate": 1.7092951233046795e-06, "loss": 0.6762, "step": 1998 }, { "epoch": 0.82, "learning_rate": 1.701896819518727e-06, "loss": 0.5922, "step": 1999 }, { "epoch": 0.82, "learning_rate": 1.6945130719706205e-06, "loss": 0.6613, "step": 2000 }, { "epoch": 0.82, "learning_rate": 1.6871438936126604e-06, "loss": 0.6853, "step": 2001 }, { "epoch": 0.82, "learning_rate": 1.6797892973716057e-06, "loss": 0.6097, "step": 2002 }, { "epoch": 0.82, "learning_rate": 1.6724492961486206e-06, "loss": 0.6052, "step": 2003 }, { "epoch": 0.82, "learning_rate": 1.665123902819279e-06, "loss": 0.6344, "step": 2004 }, { "epoch": 0.82, "learning_rate": 1.6578131302335255e-06, "loss": 0.6501, "step": 2005 }, { "epoch": 0.82, "learning_rate": 1.6505169912156548e-06, "loss": 0.635, "step": 2006 }, { "epoch": 0.82, "learning_rate": 1.6432354985642984e-06, "loss": 0.6982, "step": 2007 }, { "epoch": 0.82, "learning_rate": 1.6359686650523888e-06, "loss": 0.6389, "step": 2008 }, { "epoch": 0.82, "learning_rate": 1.6287165034271503e-06, "loss": 0.6041, "step": 2009 }, { "epoch": 0.82, "learning_rate": 1.6214790264100666e-06, "loss": 0.6301, "step": 2010 }, { "epoch": 0.82, "learning_rate": 1.614256246696858e-06, "loss": 0.6172, "step": 2011 }, { "epoch": 0.82, "learning_rate": 1.6070481769574753e-06, "loss": 0.5996, "step": 2012 }, { "epoch": 0.82, "learning_rate": 1.5998548298360527e-06, "loss": 0.6409, "step": 2013 }, { "epoch": 0.82, "learning_rate": 1.5926762179509093e-06, "loss": 0.6465, "step": 2014 }, { "epoch": 0.82, "learning_rate": 1.585512353894505e-06, "loss": 0.546, "step": 2015 }, { "epoch": 0.82, "learning_rate": 1.5783632502334411e-06, "loss": 0.577, "step": 2016 }, { "epoch": 0.82, "learning_rate": 1.5712289195084185e-06, "loss": 0.6104, "step": 2017 }, { "epoch": 0.83, "learning_rate": 1.5641093742342284e-06, "loss": 0.5518, "step": 2018 }, { "epoch": 0.83, "learning_rate": 1.5570046268997209e-06, "loss": 0.6396, "step": 2019 }, { "epoch": 0.83, "learning_rate": 1.5499146899677942e-06, "loss": 0.5909, "step": 2020 }, { "epoch": 0.83, "learning_rate": 1.5428395758753655e-06, "loss": 0.7107, "step": 2021 }, { "epoch": 0.83, "learning_rate": 1.535779297033344e-06, "loss": 0.5169, "step": 2022 }, { "epoch": 0.83, "learning_rate": 1.528733865826625e-06, "loss": 0.5648, "step": 2023 }, { "epoch": 0.83, "learning_rate": 1.521703294614052e-06, "loss": 0.6138, "step": 2024 }, { "epoch": 0.83, "learning_rate": 1.5146875957284012e-06, "loss": 0.5918, "step": 2025 }, { "epoch": 0.83, "learning_rate": 1.5076867814763629e-06, "loss": 0.5838, "step": 2026 }, { "epoch": 0.83, "learning_rate": 1.5007008641385168e-06, "loss": 0.6687, "step": 2027 }, { "epoch": 0.83, "learning_rate": 1.4937298559693136e-06, "loss": 0.6345, "step": 2028 }, { "epoch": 0.83, "learning_rate": 1.4867737691970441e-06, "loss": 0.5792, "step": 2029 }, { "epoch": 0.83, "learning_rate": 1.4798326160238342e-06, "loss": 0.6309, "step": 2030 }, { "epoch": 0.83, "learning_rate": 1.4729064086256017e-06, "loss": 0.5896, "step": 2031 }, { "epoch": 0.83, "learning_rate": 1.4659951591520593e-06, "loss": 0.6591, "step": 2032 }, { "epoch": 0.83, "learning_rate": 1.4590988797266704e-06, "loss": 0.5904, "step": 2033 }, { "epoch": 0.83, "learning_rate": 1.4522175824466456e-06, "loss": 0.6664, "step": 2034 }, { "epoch": 0.83, "learning_rate": 1.445351279382915e-06, "loss": 0.614, "step": 2035 }, { "epoch": 0.83, "learning_rate": 1.4384999825800984e-06, "loss": 0.6194, "step": 2036 }, { "epoch": 0.83, "learning_rate": 1.4316637040565029e-06, "loss": 0.6533, "step": 2037 }, { "epoch": 0.83, "learning_rate": 1.4248424558040819e-06, "loss": 0.7002, "step": 2038 }, { "epoch": 0.83, "learning_rate": 1.4180362497884247e-06, "loss": 0.5797, "step": 2039 }, { "epoch": 0.83, "learning_rate": 1.4112450979487412e-06, "loss": 0.653, "step": 2040 }, { "epoch": 0.83, "learning_rate": 1.4044690121978244e-06, "loss": 0.6399, "step": 2041 }, { "epoch": 0.83, "learning_rate": 1.397708004422047e-06, "loss": 0.6636, "step": 2042 }, { "epoch": 0.84, "learning_rate": 1.3909620864813246e-06, "loss": 0.7017, "step": 2043 }, { "epoch": 0.84, "learning_rate": 1.384231270209111e-06, "loss": 0.5476, "step": 2044 }, { "epoch": 0.84, "learning_rate": 1.3775155674123598e-06, "loss": 0.617, "step": 2045 }, { "epoch": 0.84, "learning_rate": 1.370814989871525e-06, "loss": 0.6658, "step": 2046 }, { "epoch": 0.84, "learning_rate": 1.364129549340516e-06, "loss": 0.6601, "step": 2047 }, { "epoch": 0.84, "learning_rate": 1.3574592575466995e-06, "loss": 0.586, "step": 2048 }, { "epoch": 0.84, "learning_rate": 1.350804126190859e-06, "loss": 0.5854, "step": 2049 }, { "epoch": 0.84, "learning_rate": 1.344164166947194e-06, "loss": 0.6498, "step": 2050 }, { "epoch": 0.84, "learning_rate": 1.337539391463285e-06, "loss": 0.6421, "step": 2051 }, { "epoch": 0.84, "learning_rate": 1.3309298113600755e-06, "loss": 0.6372, "step": 2052 }, { "epoch": 0.84, "learning_rate": 1.3243354382318585e-06, "loss": 0.6794, "step": 2053 }, { "epoch": 0.84, "learning_rate": 1.3177562836462487e-06, "loss": 0.5993, "step": 2054 }, { "epoch": 0.84, "learning_rate": 1.3111923591441643e-06, "loss": 0.5806, "step": 2055 }, { "epoch": 0.84, "learning_rate": 1.3046436762398073e-06, "loss": 0.6225, "step": 2056 }, { "epoch": 0.84, "learning_rate": 1.2981102464206463e-06, "loss": 0.6666, "step": 2057 }, { "epoch": 0.84, "learning_rate": 1.2915920811473937e-06, "loss": 0.684, "step": 2058 }, { "epoch": 0.84, "learning_rate": 1.2850891918539787e-06, "loss": 0.6658, "step": 2059 }, { "epoch": 0.84, "learning_rate": 1.2786015899475445e-06, "loss": 0.6397, "step": 2060 }, { "epoch": 0.84, "learning_rate": 1.2721292868084068e-06, "loss": 0.6532, "step": 2061 }, { "epoch": 0.84, "learning_rate": 1.2656722937900534e-06, "loss": 0.5878, "step": 2062 }, { "epoch": 0.84, "learning_rate": 1.2592306222191086e-06, "loss": 0.6735, "step": 2063 }, { "epoch": 0.84, "learning_rate": 1.2528042833953269e-06, "loss": 0.6792, "step": 2064 }, { "epoch": 0.84, "learning_rate": 1.2463932885915643e-06, "loss": 0.6204, "step": 2065 }, { "epoch": 0.84, "learning_rate": 1.2399976490537557e-06, "loss": 0.611, "step": 2066 }, { "epoch": 0.85, "learning_rate": 1.2336173760009096e-06, "loss": 0.6381, "step": 2067 }, { "epoch": 0.85, "learning_rate": 1.227252480625074e-06, "loss": 0.6973, "step": 2068 }, { "epoch": 0.85, "learning_rate": 1.220902974091317e-06, "loss": 0.5756, "step": 2069 }, { "epoch": 0.85, "learning_rate": 1.2145688675377243e-06, "loss": 0.6774, "step": 2070 }, { "epoch": 0.85, "learning_rate": 1.2082501720753538e-06, "loss": 0.6484, "step": 2071 }, { "epoch": 0.85, "learning_rate": 1.2019468987882433e-06, "loss": 0.6147, "step": 2072 }, { "epoch": 0.85, "learning_rate": 1.195659058733366e-06, "loss": 0.6161, "step": 2073 }, { "epoch": 0.85, "learning_rate": 1.1893866629406315e-06, "loss": 0.6277, "step": 2074 }, { "epoch": 0.85, "learning_rate": 1.1831297224128491e-06, "loss": 0.593, "step": 2075 }, { "epoch": 0.85, "learning_rate": 1.176888248125726e-06, "loss": 0.5932, "step": 2076 }, { "epoch": 0.85, "learning_rate": 1.170662251027831e-06, "loss": 0.5988, "step": 2077 }, { "epoch": 0.85, "learning_rate": 1.164451742040591e-06, "loss": 0.5786, "step": 2078 }, { "epoch": 0.85, "learning_rate": 1.1582567320582561e-06, "loss": 0.6803, "step": 2079 }, { "epoch": 0.85, "learning_rate": 1.1520772319478945e-06, "loss": 0.6807, "step": 2080 }, { "epoch": 0.85, "learning_rate": 1.1459132525493677e-06, "loss": 0.5992, "step": 2081 }, { "epoch": 0.85, "learning_rate": 1.1397648046753062e-06, "loss": 0.6366, "step": 2082 }, { "epoch": 0.85, "learning_rate": 1.1336318991111028e-06, "loss": 0.6556, "step": 2083 }, { "epoch": 0.85, "learning_rate": 1.1275145466148807e-06, "loss": 0.6221, "step": 2084 }, { "epoch": 0.85, "learning_rate": 1.1214127579174804e-06, "loss": 0.5919, "step": 2085 }, { "epoch": 0.85, "learning_rate": 1.1153265437224437e-06, "loss": 0.6734, "step": 2086 }, { "epoch": 0.85, "learning_rate": 1.1092559147059912e-06, "loss": 0.5559, "step": 2087 }, { "epoch": 0.85, "learning_rate": 1.1032008815170082e-06, "loss": 0.6266, "step": 2088 }, { "epoch": 0.85, "learning_rate": 1.0971614547770138e-06, "loss": 0.6294, "step": 2089 }, { "epoch": 0.85, "learning_rate": 1.0911376450801603e-06, "loss": 0.578, "step": 2090 }, { "epoch": 0.85, "learning_rate": 1.0851294629931962e-06, "loss": 0.565, "step": 2091 }, { "epoch": 0.86, "learning_rate": 1.0791369190554658e-06, "loss": 0.6215, "step": 2092 }, { "epoch": 0.86, "learning_rate": 1.0731600237788731e-06, "loss": 0.5673, "step": 2093 }, { "epoch": 0.86, "learning_rate": 1.0671987876478763e-06, "loss": 0.6209, "step": 2094 }, { "epoch": 0.86, "learning_rate": 1.061253221119467e-06, "loss": 0.6993, "step": 2095 }, { "epoch": 0.86, "learning_rate": 1.055323334623143e-06, "loss": 0.6755, "step": 2096 }, { "epoch": 0.86, "learning_rate": 1.0494091385609029e-06, "loss": 0.6167, "step": 2097 }, { "epoch": 0.86, "learning_rate": 1.0435106433072195e-06, "loss": 0.6113, "step": 2098 }, { "epoch": 0.86, "learning_rate": 1.0376278592090217e-06, "loss": 0.624, "step": 2099 }, { "epoch": 0.86, "learning_rate": 1.0317607965856802e-06, "loss": 0.6265, "step": 2100 }, { "epoch": 0.86, "learning_rate": 1.0259094657289893e-06, "loss": 0.6008, "step": 2101 }, { "epoch": 0.86, "learning_rate": 1.020073876903147e-06, "loss": 0.6696, "step": 2102 }, { "epoch": 0.86, "learning_rate": 1.0142540403447321e-06, "loss": 0.6099, "step": 2103 }, { "epoch": 0.86, "learning_rate": 1.0084499662627e-06, "loss": 0.6025, "step": 2104 }, { "epoch": 0.86, "learning_rate": 1.0026616648383468e-06, "loss": 0.6235, "step": 2105 }, { "epoch": 0.86, "learning_rate": 9.968891462253084e-07, "loss": 0.6619, "step": 2106 }, { "epoch": 0.86, "learning_rate": 9.911324205495298e-07, "loss": 0.5741, "step": 2107 }, { "epoch": 0.86, "learning_rate": 9.853914979092571e-07, "loss": 0.6666, "step": 2108 }, { "epoch": 0.86, "learning_rate": 9.79666388375009e-07, "loss": 0.6399, "step": 2109 }, { "epoch": 0.86, "learning_rate": 9.739571019895721e-07, "loss": 0.6142, "step": 2110 }, { "epoch": 0.86, "learning_rate": 9.682636487679753e-07, "loss": 0.5909, "step": 2111 }, { "epoch": 0.86, "learning_rate": 9.625860386974705e-07, "loss": 0.6401, "step": 2112 }, { "epoch": 0.86, "learning_rate": 9.569242817375169e-07, "loss": 0.6571, "step": 2113 }, { "epoch": 0.86, "learning_rate": 9.512783878197706e-07, "loss": 0.6035, "step": 2114 }, { "epoch": 0.86, "learning_rate": 9.456483668480587e-07, "loss": 0.6043, "step": 2115 }, { "epoch": 0.87, "learning_rate": 9.400342286983599e-07, "loss": 0.6343, "step": 2116 }, { "epoch": 0.87, "learning_rate": 9.344359832187999e-07, "loss": 0.5704, "step": 2117 }, { "epoch": 0.87, "learning_rate": 9.288536402296211e-07, "loss": 0.669, "step": 2118 }, { "epoch": 0.87, "learning_rate": 9.232872095231693e-07, "loss": 0.6577, "step": 2119 }, { "epoch": 0.87, "learning_rate": 9.177367008638838e-07, "loss": 0.6304, "step": 2120 }, { "epoch": 0.87, "learning_rate": 9.12202123988265e-07, "loss": 0.5941, "step": 2121 }, { "epoch": 0.87, "learning_rate": 9.066834886048748e-07, "loss": 0.5584, "step": 2122 }, { "epoch": 0.87, "learning_rate": 9.011808043943038e-07, "loss": 0.5959, "step": 2123 }, { "epoch": 0.87, "learning_rate": 8.956940810091674e-07, "loss": 0.6213, "step": 2124 }, { "epoch": 0.87, "learning_rate": 8.902233280740824e-07, "loss": 0.606, "step": 2125 }, { "epoch": 0.87, "learning_rate": 8.847685551856455e-07, "loss": 0.6536, "step": 2126 }, { "epoch": 0.87, "learning_rate": 8.793297719124294e-07, "loss": 0.5667, "step": 2127 }, { "epoch": 0.87, "learning_rate": 8.739069877949524e-07, "loss": 0.6364, "step": 2128 }, { "epoch": 0.87, "learning_rate": 8.685002123456699e-07, "loss": 0.5998, "step": 2129 }, { "epoch": 0.87, "learning_rate": 8.631094550489538e-07, "loss": 0.6742, "step": 2130 }, { "epoch": 0.87, "learning_rate": 8.577347253610813e-07, "loss": 0.5554, "step": 2131 }, { "epoch": 0.87, "learning_rate": 8.523760327102126e-07, "loss": 0.5789, "step": 2132 }, { "epoch": 0.87, "learning_rate": 8.470333864963742e-07, "loss": 0.6365, "step": 2133 }, { "epoch": 0.87, "learning_rate": 8.417067960914482e-07, "loss": 0.6097, "step": 2134 }, { "epoch": 0.87, "learning_rate": 8.363962708391482e-07, "loss": 0.6142, "step": 2135 }, { "epoch": 0.87, "learning_rate": 8.311018200550114e-07, "loss": 0.6367, "step": 2136 }, { "epoch": 0.87, "learning_rate": 8.25823453026372e-07, "loss": 0.6714, "step": 2137 }, { "epoch": 0.87, "learning_rate": 8.20561179012358e-07, "loss": 0.5737, "step": 2138 }, { "epoch": 0.87, "learning_rate": 8.153150072438598e-07, "loss": 0.6305, "step": 2139 }, { "epoch": 0.87, "learning_rate": 8.100849469235272e-07, "loss": 0.6203, "step": 2140 }, { "epoch": 0.88, "learning_rate": 8.048710072257481e-07, "loss": 0.68, "step": 2141 }, { "epoch": 0.88, "learning_rate": 7.996731972966287e-07, "loss": 0.6501, "step": 2142 }, { "epoch": 0.88, "learning_rate": 7.944915262539798e-07, "loss": 0.671, "step": 2143 }, { "epoch": 0.88, "learning_rate": 7.893260031873096e-07, "loss": 0.6335, "step": 2144 }, { "epoch": 0.88, "learning_rate": 7.841766371577919e-07, "loss": 0.5948, "step": 2145 }, { "epoch": 0.88, "learning_rate": 7.790434371982625e-07, "loss": 0.6684, "step": 2146 }, { "epoch": 0.88, "learning_rate": 7.739264123131973e-07, "loss": 0.5923, "step": 2147 }, { "epoch": 0.88, "learning_rate": 7.68825571478703e-07, "loss": 0.6126, "step": 2148 }, { "epoch": 0.88, "learning_rate": 7.637409236424887e-07, "loss": 0.602, "step": 2149 }, { "epoch": 0.88, "learning_rate": 7.586724777238686e-07, "loss": 0.5567, "step": 2150 }, { "epoch": 0.88, "learning_rate": 7.53620242613724e-07, "loss": 0.6304, "step": 2151 }, { "epoch": 0.88, "learning_rate": 7.485842271745125e-07, "loss": 0.6843, "step": 2152 }, { "epoch": 0.88, "learning_rate": 7.435644402402298e-07, "loss": 0.6658, "step": 2153 }, { "epoch": 0.88, "learning_rate": 7.385608906164077e-07, "loss": 0.6084, "step": 2154 }, { "epoch": 0.88, "learning_rate": 7.335735870800975e-07, "loss": 0.5483, "step": 2155 }, { "epoch": 0.88, "learning_rate": 7.28602538379849e-07, "loss": 0.6574, "step": 2156 }, { "epoch": 0.88, "learning_rate": 7.236477532356978e-07, "loss": 0.6799, "step": 2157 }, { "epoch": 0.88, "learning_rate": 7.187092403391549e-07, "loss": 0.5712, "step": 2158 }, { "epoch": 0.88, "learning_rate": 7.137870083531818e-07, "loss": 0.6346, "step": 2159 }, { "epoch": 0.88, "learning_rate": 7.088810659121815e-07, "loss": 0.5653, "step": 2160 }, { "epoch": 0.88, "learning_rate": 7.039914216219867e-07, "loss": 0.6856, "step": 2161 }, { "epoch": 0.88, "learning_rate": 6.991180840598388e-07, "loss": 0.6299, "step": 2162 }, { "epoch": 0.88, "learning_rate": 6.942610617743706e-07, "loss": 0.7086, "step": 2163 }, { "epoch": 0.88, "learning_rate": 6.894203632856e-07, "loss": 0.6392, "step": 2164 }, { "epoch": 0.89, "learning_rate": 6.845959970849059e-07, "loss": 0.6112, "step": 2165 }, { "epoch": 0.89, "learning_rate": 6.797879716350242e-07, "loss": 0.6383, "step": 2166 }, { "epoch": 0.89, "learning_rate": 6.749962953700184e-07, "loss": 0.5758, "step": 2167 }, { "epoch": 0.89, "learning_rate": 6.702209766952794e-07, "loss": 0.5865, "step": 2168 }, { "epoch": 0.89, "learning_rate": 6.654620239874987e-07, "loss": 0.5631, "step": 2169 }, { "epoch": 0.89, "learning_rate": 6.607194455946641e-07, "loss": 0.6528, "step": 2170 }, { "epoch": 0.89, "learning_rate": 6.5599324983604e-07, "loss": 0.6998, "step": 2171 }, { "epoch": 0.89, "learning_rate": 6.512834450021488e-07, "loss": 0.5728, "step": 2172 }, { "epoch": 0.89, "learning_rate": 6.465900393547641e-07, "loss": 0.612, "step": 2173 }, { "epoch": 0.89, "learning_rate": 6.419130411268881e-07, "loss": 0.6587, "step": 2174 }, { "epoch": 0.89, "learning_rate": 6.37252458522748e-07, "loss": 0.6191, "step": 2175 }, { "epoch": 0.89, "learning_rate": 6.326082997177718e-07, "loss": 0.6054, "step": 2176 }, { "epoch": 0.89, "learning_rate": 6.279805728585764e-07, "loss": 0.6761, "step": 2177 }, { "epoch": 0.89, "learning_rate": 6.233692860629592e-07, "loss": 0.6088, "step": 2178 }, { "epoch": 0.89, "learning_rate": 6.187744474198721e-07, "loss": 0.628, "step": 2179 }, { "epoch": 0.89, "learning_rate": 6.141960649894208e-07, "loss": 0.5644, "step": 2180 }, { "epoch": 0.89, "learning_rate": 6.096341468028377e-07, "loss": 0.5896, "step": 2181 }, { "epoch": 0.89, "learning_rate": 6.050887008624817e-07, "loss": 0.6135, "step": 2182 }, { "epoch": 0.89, "learning_rate": 6.005597351418091e-07, "loss": 0.5854, "step": 2183 }, { "epoch": 0.89, "learning_rate": 5.960472575853715e-07, "loss": 0.5639, "step": 2184 }, { "epoch": 0.89, "learning_rate": 5.915512761087993e-07, "loss": 0.6133, "step": 2185 }, { "epoch": 0.89, "learning_rate": 5.870717985987817e-07, "loss": 0.6259, "step": 2186 }, { "epoch": 0.89, "learning_rate": 5.826088329130575e-07, "loss": 0.6688, "step": 2187 }, { "epoch": 0.89, "learning_rate": 5.781623868804054e-07, "loss": 0.5743, "step": 2188 }, { "epoch": 0.89, "learning_rate": 5.737324683006218e-07, "loss": 0.6643, "step": 2189 }, { "epoch": 0.9, "learning_rate": 5.693190849445107e-07, "loss": 0.6536, "step": 2190 }, { "epoch": 0.9, "learning_rate": 5.649222445538749e-07, "loss": 0.6991, "step": 2191 }, { "epoch": 0.9, "learning_rate": 5.605419548414947e-07, "loss": 0.5986, "step": 2192 }, { "epoch": 0.9, "learning_rate": 5.561782234911195e-07, "loss": 0.6193, "step": 2193 }, { "epoch": 0.9, "learning_rate": 5.518310581574515e-07, "loss": 0.6578, "step": 2194 }, { "epoch": 0.9, "learning_rate": 5.475004664661321e-07, "loss": 0.6087, "step": 2195 }, { "epoch": 0.9, "learning_rate": 5.431864560137334e-07, "loss": 0.6861, "step": 2196 }, { "epoch": 0.9, "learning_rate": 5.388890343677389e-07, "loss": 0.5871, "step": 2197 }, { "epoch": 0.9, "learning_rate": 5.346082090665328e-07, "loss": 0.5883, "step": 2198 }, { "epoch": 0.9, "learning_rate": 5.303439876193861e-07, "loss": 0.6302, "step": 2199 }, { "epoch": 0.9, "learning_rate": 5.260963775064443e-07, "loss": 0.5928, "step": 2200 }, { "epoch": 0.9, "learning_rate": 5.218653861787171e-07, "loss": 0.6021, "step": 2201 }, { "epoch": 0.9, "learning_rate": 5.176510210580577e-07, "loss": 0.6564, "step": 2202 }, { "epoch": 0.9, "learning_rate": 5.134532895371546e-07, "loss": 0.6093, "step": 2203 }, { "epoch": 0.9, "learning_rate": 5.09272198979519e-07, "loss": 0.5523, "step": 2204 }, { "epoch": 0.9, "learning_rate": 5.051077567194729e-07, "loss": 0.6599, "step": 2205 }, { "epoch": 0.9, "learning_rate": 5.009599700621315e-07, "loss": 0.6436, "step": 2206 }, { "epoch": 0.9, "learning_rate": 4.968288462833959e-07, "loss": 0.6187, "step": 2207 }, { "epoch": 0.9, "learning_rate": 4.927143926299383e-07, "loss": 0.6571, "step": 2208 }, { "epoch": 0.9, "learning_rate": 4.88616616319183e-07, "loss": 0.5974, "step": 2209 }, { "epoch": 0.9, "learning_rate": 4.84535524539309e-07, "loss": 0.6324, "step": 2210 }, { "epoch": 0.9, "learning_rate": 4.804711244492166e-07, "loss": 0.5792, "step": 2211 }, { "epoch": 0.9, "learning_rate": 4.764234231785381e-07, "loss": 0.5577, "step": 2212 }, { "epoch": 0.9, "learning_rate": 4.723924278276015e-07, "loss": 0.6164, "step": 2213 }, { "epoch": 0.91, "learning_rate": 4.6837814546743853e-07, "loss": 0.6528, "step": 2214 }, { "epoch": 0.91, "learning_rate": 4.6438058313976295e-07, "loss": 0.699, "step": 2215 }, { "epoch": 0.91, "learning_rate": 4.603997478569533e-07, "loss": 0.6786, "step": 2216 }, { "epoch": 0.91, "learning_rate": 4.5643564660205033e-07, "loss": 0.6093, "step": 2217 }, { "epoch": 0.91, "learning_rate": 4.5248828632873944e-07, "loss": 0.7176, "step": 2218 }, { "epoch": 0.91, "learning_rate": 4.4855767396134064e-07, "loss": 0.5165, "step": 2219 }, { "epoch": 0.91, "learning_rate": 4.44643816394793e-07, "loss": 0.5428, "step": 2220 }, { "epoch": 0.91, "learning_rate": 4.4074672049464695e-07, "loss": 0.5496, "step": 2221 }, { "epoch": 0.91, "learning_rate": 4.3686639309705183e-07, "loss": 0.6485, "step": 2222 }, { "epoch": 0.91, "learning_rate": 4.3300284100873615e-07, "loss": 0.5744, "step": 2223 }, { "epoch": 0.91, "learning_rate": 4.2915607100701084e-07, "loss": 0.7151, "step": 2224 }, { "epoch": 0.91, "learning_rate": 4.253260898397382e-07, "loss": 0.5757, "step": 2225 }, { "epoch": 0.91, "learning_rate": 4.215129042253385e-07, "loss": 0.6434, "step": 2226 }, { "epoch": 0.91, "learning_rate": 4.177165208527645e-07, "loss": 0.5767, "step": 2227 }, { "epoch": 0.91, "learning_rate": 4.139369463814991e-07, "loss": 0.622, "step": 2228 }, { "epoch": 0.91, "learning_rate": 4.1017418744153437e-07, "loss": 0.7016, "step": 2229 }, { "epoch": 0.91, "learning_rate": 4.064282506333728e-07, "loss": 0.6253, "step": 2230 }, { "epoch": 0.91, "learning_rate": 4.0269914252799914e-07, "loss": 0.6492, "step": 2231 }, { "epoch": 0.91, "learning_rate": 3.989868696668864e-07, "loss": 0.5724, "step": 2232 }, { "epoch": 0.91, "learning_rate": 3.9529143856196995e-07, "loss": 0.6454, "step": 2233 }, { "epoch": 0.91, "learning_rate": 3.9161285569564114e-07, "loss": 0.7064, "step": 2234 }, { "epoch": 0.91, "learning_rate": 3.8795112752074483e-07, "loss": 0.5768, "step": 2235 }, { "epoch": 0.91, "learning_rate": 3.843062604605496e-07, "loss": 0.5388, "step": 2236 }, { "epoch": 0.91, "learning_rate": 3.8067826090875336e-07, "loss": 0.5691, "step": 2237 }, { "epoch": 0.91, "learning_rate": 3.7706713522946637e-07, "loss": 0.6014, "step": 2238 }, { "epoch": 0.92, "learning_rate": 3.7347288975719374e-07, "loss": 0.6078, "step": 2239 }, { "epoch": 0.92, "learning_rate": 3.698955307968355e-07, "loss": 0.5974, "step": 2240 }, { "epoch": 0.92, "learning_rate": 3.663350646236663e-07, "loss": 0.5968, "step": 2241 }, { "epoch": 0.92, "learning_rate": 3.627914974833302e-07, "loss": 0.5903, "step": 2242 }, { "epoch": 0.92, "learning_rate": 3.59264835591826e-07, "loss": 0.6221, "step": 2243 }, { "epoch": 0.92, "learning_rate": 3.557550851354985e-07, "loss": 0.6264, "step": 2244 }, { "epoch": 0.92, "learning_rate": 3.522622522710306e-07, "loss": 0.6753, "step": 2245 }, { "epoch": 0.92, "learning_rate": 3.4878634312542124e-07, "loss": 0.6668, "step": 2246 }, { "epoch": 0.92, "learning_rate": 3.4532736379598974e-07, "loss": 0.625, "step": 2247 }, { "epoch": 0.92, "learning_rate": 3.418853203503525e-07, "loss": 0.5118, "step": 2248 }, { "epoch": 0.92, "learning_rate": 3.384602188264219e-07, "loss": 0.5872, "step": 2249 }, { "epoch": 0.92, "learning_rate": 3.350520652323863e-07, "loss": 0.617, "step": 2250 }, { "epoch": 0.92, "learning_rate": 3.316608655467113e-07, "loss": 0.62, "step": 2251 }, { "epoch": 0.92, "learning_rate": 3.282866257181183e-07, "loss": 0.7399, "step": 2252 }, { "epoch": 0.92, "learning_rate": 3.2492935166557606e-07, "loss": 0.6344, "step": 2253 }, { "epoch": 0.92, "learning_rate": 3.2158904927829913e-07, "loss": 0.6004, "step": 2254 }, { "epoch": 0.92, "learning_rate": 3.18265724415725e-07, "loss": 0.6416, "step": 2255 }, { "epoch": 0.92, "learning_rate": 3.149593829075137e-07, "loss": 0.5987, "step": 2256 }, { "epoch": 0.92, "learning_rate": 3.116700305535303e-07, "loss": 0.6007, "step": 2257 }, { "epoch": 0.92, "learning_rate": 3.083976731238414e-07, "loss": 0.6365, "step": 2258 }, { "epoch": 0.92, "learning_rate": 3.0514231635869863e-07, "loss": 0.6236, "step": 2259 }, { "epoch": 0.92, "learning_rate": 3.019039659685341e-07, "loss": 0.5542, "step": 2260 }, { "epoch": 0.92, "learning_rate": 2.9868262763394717e-07, "loss": 0.5988, "step": 2261 }, { "epoch": 0.92, "learning_rate": 2.9547830700569545e-07, "loss": 0.5921, "step": 2262 }, { "epoch": 0.93, "learning_rate": 2.922910097046838e-07, "loss": 0.628, "step": 2263 }, { "epoch": 0.93, "learning_rate": 2.8912074132195545e-07, "loss": 0.6705, "step": 2264 }, { "epoch": 0.93, "learning_rate": 2.8596750741868295e-07, "loss": 0.6937, "step": 2265 }, { "epoch": 0.93, "learning_rate": 2.8283131352615733e-07, "loss": 0.6698, "step": 2266 }, { "epoch": 0.93, "learning_rate": 2.7971216514577906e-07, "loss": 0.599, "step": 2267 }, { "epoch": 0.93, "learning_rate": 2.7661006774904687e-07, "loss": 0.5504, "step": 2268 }, { "epoch": 0.93, "learning_rate": 2.735250267775491e-07, "loss": 0.6754, "step": 2269 }, { "epoch": 0.93, "learning_rate": 2.7045704764295686e-07, "loss": 0.6503, "step": 2270 }, { "epoch": 0.93, "learning_rate": 2.674061357270086e-07, "loss": 0.6587, "step": 2271 }, { "epoch": 0.93, "learning_rate": 2.6437229638150655e-07, "loss": 0.6321, "step": 2272 }, { "epoch": 0.93, "learning_rate": 2.613555349283026e-07, "loss": 0.6236, "step": 2273 }, { "epoch": 0.93, "learning_rate": 2.5835585665929365e-07, "loss": 0.6365, "step": 2274 }, { "epoch": 0.93, "learning_rate": 2.553732668364095e-07, "loss": 0.6323, "step": 2275 }, { "epoch": 0.93, "learning_rate": 2.524077706916028e-07, "loss": 0.6625, "step": 2276 }, { "epoch": 0.93, "learning_rate": 2.4945937342684024e-07, "loss": 0.5559, "step": 2277 }, { "epoch": 0.93, "learning_rate": 2.4652808021409567e-07, "loss": 0.704, "step": 2278 }, { "epoch": 0.93, "learning_rate": 2.4361389619534157e-07, "loss": 0.6453, "step": 2279 }, { "epoch": 0.93, "learning_rate": 2.4071682648253437e-07, "loss": 0.6299, "step": 2280 }, { "epoch": 0.93, "learning_rate": 2.3783687615760998e-07, "loss": 0.604, "step": 2281 }, { "epoch": 0.93, "learning_rate": 2.3497405027247845e-07, "loss": 0.5761, "step": 2282 }, { "epoch": 0.93, "learning_rate": 2.3212835384900268e-07, "loss": 0.6042, "step": 2283 }, { "epoch": 0.93, "learning_rate": 2.292997918790063e-07, "loss": 0.5963, "step": 2284 }, { "epoch": 0.93, "learning_rate": 2.2648836932425032e-07, "loss": 0.6466, "step": 2285 }, { "epoch": 0.93, "learning_rate": 2.2369409111643204e-07, "loss": 0.6451, "step": 2286 }, { "epoch": 0.93, "learning_rate": 2.2091696215717606e-07, "loss": 0.6075, "step": 2287 }, { "epoch": 0.94, "learning_rate": 2.1815698731802227e-07, "loss": 0.6647, "step": 2288 }, { "epoch": 0.94, "learning_rate": 2.154141714404212e-07, "loss": 0.6471, "step": 2289 }, { "epoch": 0.94, "learning_rate": 2.126885193357231e-07, "loss": 0.5999, "step": 2290 }, { "epoch": 0.94, "learning_rate": 2.0998003578517e-07, "loss": 0.7041, "step": 2291 }, { "epoch": 0.94, "learning_rate": 2.07288725539887e-07, "loss": 0.5889, "step": 2292 }, { "epoch": 0.94, "learning_rate": 2.0461459332087653e-07, "loss": 0.6521, "step": 2293 }, { "epoch": 0.94, "learning_rate": 2.0195764381900295e-07, "loss": 0.6011, "step": 2294 }, { "epoch": 0.94, "learning_rate": 1.99317881694997e-07, "loss": 0.7214, "step": 2295 }, { "epoch": 0.94, "learning_rate": 1.9669531157943233e-07, "loss": 0.6214, "step": 2296 }, { "epoch": 0.94, "learning_rate": 1.9408993807272903e-07, "loss": 0.7617, "step": 2297 }, { "epoch": 0.94, "learning_rate": 1.9150176574514233e-07, "loss": 0.6108, "step": 2298 }, { "epoch": 0.94, "learning_rate": 1.889307991367506e-07, "loss": 0.6425, "step": 2299 }, { "epoch": 0.94, "learning_rate": 1.8637704275745294e-07, "loss": 0.657, "step": 2300 }, { "epoch": 0.94, "learning_rate": 1.8384050108695707e-07, "loss": 0.5848, "step": 2301 }, { "epoch": 0.94, "learning_rate": 1.8132117857477593e-07, "loss": 0.5859, "step": 2302 }, { "epoch": 0.94, "learning_rate": 1.788190796402134e-07, "loss": 0.6068, "step": 2303 }, { "epoch": 0.94, "learning_rate": 1.7633420867236518e-07, "loss": 0.5742, "step": 2304 }, { "epoch": 0.94, "learning_rate": 1.738665700301012e-07, "loss": 0.5427, "step": 2305 }, { "epoch": 0.94, "learning_rate": 1.7141616804206784e-07, "loss": 0.6511, "step": 2306 }, { "epoch": 0.94, "learning_rate": 1.6898300700667115e-07, "loss": 0.5694, "step": 2307 }, { "epoch": 0.94, "learning_rate": 1.665670911920758e-07, "loss": 0.6221, "step": 2308 }, { "epoch": 0.94, "learning_rate": 1.6416842483619743e-07, "loss": 0.6609, "step": 2309 }, { "epoch": 0.94, "learning_rate": 1.6178701214669023e-07, "loss": 0.5805, "step": 2310 }, { "epoch": 0.94, "learning_rate": 1.5942285730094265e-07, "loss": 0.6571, "step": 2311 }, { "epoch": 0.95, "learning_rate": 1.5707596444607288e-07, "loss": 0.5605, "step": 2312 }, { "epoch": 0.95, "learning_rate": 1.5474633769891667e-07, "loss": 0.6185, "step": 2313 }, { "epoch": 0.95, "learning_rate": 1.5243398114602182e-07, "loss": 0.6843, "step": 2314 }, { "epoch": 0.95, "learning_rate": 1.5013889884364253e-07, "loss": 0.7244, "step": 2315 }, { "epoch": 0.95, "learning_rate": 1.4786109481773058e-07, "loss": 0.5748, "step": 2316 }, { "epoch": 0.95, "learning_rate": 1.4560057306392872e-07, "loss": 0.5641, "step": 2317 }, { "epoch": 0.95, "learning_rate": 1.4335733754756388e-07, "loss": 0.6369, "step": 2318 }, { "epoch": 0.95, "learning_rate": 1.4113139220364058e-07, "loss": 0.644, "step": 2319 }, { "epoch": 0.95, "learning_rate": 1.3892274093683323e-07, "loss": 0.6212, "step": 2320 }, { "epoch": 0.95, "learning_rate": 1.367313876214782e-07, "loss": 0.6529, "step": 2321 }, { "epoch": 0.95, "learning_rate": 1.345573361015706e-07, "loss": 0.6405, "step": 2322 }, { "epoch": 0.95, "learning_rate": 1.3240059019075546e-07, "loss": 0.5578, "step": 2323 }, { "epoch": 0.95, "learning_rate": 1.302611536723164e-07, "loss": 0.5952, "step": 2324 }, { "epoch": 0.95, "learning_rate": 1.2813903029918028e-07, "loss": 0.6696, "step": 2325 }, { "epoch": 0.95, "learning_rate": 1.260342237938994e-07, "loss": 0.6686, "step": 2326 }, { "epoch": 0.95, "learning_rate": 1.239467378486503e-07, "loss": 0.6727, "step": 2327 }, { "epoch": 0.95, "learning_rate": 1.2187657612522717e-07, "loss": 0.7231, "step": 2328 }, { "epoch": 0.95, "learning_rate": 1.1982374225503412e-07, "loss": 0.6203, "step": 2329 }, { "epoch": 0.95, "learning_rate": 1.177882398390795e-07, "loss": 0.6314, "step": 2330 }, { "epoch": 0.95, "learning_rate": 1.1577007244796823e-07, "loss": 0.6109, "step": 2331 }, { "epoch": 0.95, "learning_rate": 1.1376924362189956e-07, "loss": 0.6494, "step": 2332 }, { "epoch": 0.95, "learning_rate": 1.1178575687065485e-07, "loss": 0.7083, "step": 2333 }, { "epoch": 0.95, "learning_rate": 1.0981961567359756e-07, "loss": 0.5516, "step": 2334 }, { "epoch": 0.95, "learning_rate": 1.0787082347966104e-07, "loss": 0.5751, "step": 2335 }, { "epoch": 0.96, "learning_rate": 1.0593938370734969e-07, "loss": 0.5631, "step": 2336 }, { "epoch": 0.96, "learning_rate": 1.0402529974472442e-07, "loss": 0.5252, "step": 2337 }, { "epoch": 0.96, "learning_rate": 1.0212857494940276e-07, "loss": 0.5919, "step": 2338 }, { "epoch": 0.96, "learning_rate": 1.0024921264855436e-07, "loss": 0.59, "step": 2339 }, { "epoch": 0.96, "learning_rate": 9.83872161388888e-08, "loss": 0.6296, "step": 2340 }, { "epoch": 0.96, "learning_rate": 9.654258868665334e-08, "loss": 0.5512, "step": 2341 }, { "epoch": 0.96, "learning_rate": 9.471533352762962e-08, "loss": 0.5536, "step": 2342 }, { "epoch": 0.96, "learning_rate": 9.290545386712147e-08, "loss": 0.6088, "step": 2343 }, { "epoch": 0.96, "learning_rate": 9.111295287995814e-08, "loss": 0.5847, "step": 2344 }, { "epoch": 0.96, "learning_rate": 8.933783371047888e-08, "loss": 0.5719, "step": 2345 }, { "epoch": 0.96, "learning_rate": 8.758009947253621e-08, "loss": 0.5565, "step": 2346 }, { "epoch": 0.96, "learning_rate": 8.583975324948479e-08, "loss": 0.5576, "step": 2347 }, { "epoch": 0.96, "learning_rate": 8.411679809417706e-08, "loss": 0.644, "step": 2348 }, { "epoch": 0.96, "learning_rate": 8.241123702896093e-08, "loss": 0.5563, "step": 2349 }, { "epoch": 0.96, "learning_rate": 8.072307304567206e-08, "loss": 0.6191, "step": 2350 }, { "epoch": 0.96, "learning_rate": 7.9052309105625e-08, "loss": 0.6436, "step": 2351 }, { "epoch": 0.96, "learning_rate": 7.739894813961645e-08, "loss": 0.6221, "step": 2352 }, { "epoch": 0.96, "learning_rate": 7.576299304791312e-08, "loss": 0.6291, "step": 2353 }, { "epoch": 0.96, "learning_rate": 7.414444670024834e-08, "loss": 0.672, "step": 2354 }, { "epoch": 0.96, "learning_rate": 7.254331193581987e-08, "loss": 0.617, "step": 2355 }, { "epoch": 0.96, "learning_rate": 7.095959156328103e-08, "loss": 0.6161, "step": 2356 }, { "epoch": 0.96, "learning_rate": 6.939328836073733e-08, "loss": 0.6588, "step": 2357 }, { "epoch": 0.96, "learning_rate": 6.784440507574319e-08, "loss": 0.6391, "step": 2358 }, { "epoch": 0.96, "learning_rate": 6.631294442529412e-08, "loss": 0.6367, "step": 2359 }, { "epoch": 0.96, "learning_rate": 6.479890909582453e-08, "loss": 0.6447, "step": 2360 }, { "epoch": 0.97, "learning_rate": 6.330230174319996e-08, "loss": 0.5896, "step": 2361 }, { "epoch": 0.97, "learning_rate": 6.182312499271703e-08, "loss": 0.6164, "step": 2362 }, { "epoch": 0.97, "learning_rate": 6.036138143909464e-08, "loss": 0.5986, "step": 2363 }, { "epoch": 0.97, "learning_rate": 5.891707364647281e-08, "loss": 0.6594, "step": 2364 }, { "epoch": 0.97, "learning_rate": 5.749020414840267e-08, "loss": 0.6121, "step": 2365 }, { "epoch": 0.97, "learning_rate": 5.6080775447849846e-08, "loss": 0.6253, "step": 2366 }, { "epoch": 0.97, "learning_rate": 5.46887900171833e-08, "loss": 0.6113, "step": 2367 }, { "epoch": 0.97, "learning_rate": 5.3314250298173164e-08, "loss": 0.5733, "step": 2368 }, { "epoch": 0.97, "learning_rate": 5.195715870199181e-08, "loss": 0.617, "step": 2369 }, { "epoch": 0.97, "learning_rate": 5.06175176091972e-08, "loss": 0.6222, "step": 2370 }, { "epoch": 0.97, "learning_rate": 4.9295329369741794e-08, "loss": 0.6593, "step": 2371 }, { "epoch": 0.97, "learning_rate": 4.799059630296144e-08, "loss": 0.6411, "step": 2372 }, { "epoch": 0.97, "learning_rate": 4.6703320697572e-08, "loss": 0.5944, "step": 2373 }, { "epoch": 0.97, "learning_rate": 4.5433504811667193e-08, "loss": 0.5662, "step": 2374 }, { "epoch": 0.97, "learning_rate": 4.418115087271302e-08, "loss": 0.5919, "step": 2375 }, { "epoch": 0.97, "learning_rate": 4.2946261077544404e-08, "loss": 0.6008, "step": 2376 }, { "epoch": 0.97, "learning_rate": 4.172883759235968e-08, "loss": 0.6319, "step": 2377 }, { "epoch": 0.97, "learning_rate": 4.0528882552720584e-08, "loss": 0.632, "step": 2378 }, { "epoch": 0.97, "learning_rate": 3.9346398063546676e-08, "loss": 0.665, "step": 2379 }, { "epoch": 0.97, "learning_rate": 3.8181386199109825e-08, "loss": 0.6213, "step": 2380 }, { "epoch": 0.97, "learning_rate": 3.7033849003031975e-08, "loss": 0.6881, "step": 2381 }, { "epoch": 0.97, "learning_rate": 3.59037884882818e-08, "loss": 0.6056, "step": 2382 }, { "epoch": 0.97, "learning_rate": 3.4791206637172504e-08, "loss": 0.5901, "step": 2383 }, { "epoch": 0.97, "learning_rate": 3.369610540135404e-08, "loss": 0.6251, "step": 2384 }, { "epoch": 0.98, "learning_rate": 3.261848670181533e-08, "loss": 0.6456, "step": 2385 }, { "epoch": 0.98, "learning_rate": 3.15583524288765e-08, "loss": 0.6233, "step": 2386 }, { "epoch": 0.98, "learning_rate": 3.051570444218665e-08, "loss": 0.6181, "step": 2387 }, { "epoch": 0.98, "learning_rate": 2.9490544570723867e-08, "loss": 0.7135, "step": 2388 }, { "epoch": 0.98, "learning_rate": 2.8482874612785205e-08, "loss": 0.6401, "step": 2389 }, { "epoch": 0.98, "learning_rate": 2.7492696335991165e-08, "loss": 0.5945, "step": 2390 }, { "epoch": 0.98, "learning_rate": 2.6520011477274567e-08, "loss": 0.585, "step": 2391 }, { "epoch": 0.98, "learning_rate": 2.5564821742888324e-08, "loss": 0.6019, "step": 2392 }, { "epoch": 0.98, "learning_rate": 2.46271288083888e-08, "loss": 0.5982, "step": 2393 }, { "epoch": 0.98, "learning_rate": 2.37069343186469e-08, "loss": 0.6452, "step": 2394 }, { "epoch": 0.98, "learning_rate": 2.2804239887832536e-08, "loss": 0.5608, "step": 2395 }, { "epoch": 0.98, "learning_rate": 2.1919047099421274e-08, "loss": 0.6123, "step": 2396 }, { "epoch": 0.98, "learning_rate": 2.10513575061877e-08, "loss": 0.5814, "step": 2397 }, { "epoch": 0.98, "learning_rate": 2.0201172630199827e-08, "loss": 0.6203, "step": 2398 }, { "epoch": 0.98, "learning_rate": 1.9368493962823585e-08, "loss": 0.6164, "step": 2399 }, { "epoch": 0.98, "learning_rate": 1.8553322964713906e-08, "loss": 0.5999, "step": 2400 }, { "epoch": 0.98, "learning_rate": 1.7755661065814723e-08, "loss": 0.6118, "step": 2401 }, { "epoch": 0.98, "learning_rate": 1.697550966535566e-08, "loss": 0.5894, "step": 2402 }, { "epoch": 0.98, "learning_rate": 1.621287013185202e-08, "loss": 0.5942, "step": 2403 }, { "epoch": 0.98, "learning_rate": 1.546774380309701e-08, "loss": 0.5385, "step": 2404 }, { "epoch": 0.98, "learning_rate": 1.4740131986166196e-08, "loss": 0.6258, "step": 2405 }, { "epoch": 0.98, "learning_rate": 1.4030035957410815e-08, "loss": 0.576, "step": 2406 }, { "epoch": 0.98, "learning_rate": 1.3337456962454476e-08, "loss": 0.5796, "step": 2407 }, { "epoch": 0.98, "learning_rate": 1.2662396216196471e-08, "loss": 0.5841, "step": 2408 }, { "epoch": 0.98, "learning_rate": 1.2004854902801789e-08, "loss": 0.6282, "step": 2409 }, { "epoch": 0.99, "learning_rate": 1.1364834175708883e-08, "loss": 0.6947, "step": 2410 }, { "epoch": 0.99, "learning_rate": 1.0742335157618578e-08, "loss": 0.5592, "step": 2411 }, { "epoch": 0.99, "learning_rate": 1.0137358940495168e-08, "loss": 0.6063, "step": 2412 }, { "epoch": 0.99, "learning_rate": 9.549906585567536e-09, "loss": 0.623, "step": 2413 }, { "epoch": 0.99, "learning_rate": 8.979979123323601e-09, "loss": 0.5986, "step": 2414 }, { "epoch": 0.99, "learning_rate": 8.427577553509203e-09, "loss": 0.6024, "step": 2415 }, { "epoch": 0.99, "learning_rate": 7.892702845129218e-09, "loss": 0.6173, "step": 2416 }, { "epoch": 0.99, "learning_rate": 7.375355936440898e-09, "loss": 0.7068, "step": 2417 }, { "epoch": 0.99, "learning_rate": 6.875537734954973e-09, "loss": 0.6872, "step": 2418 }, { "epoch": 0.99, "learning_rate": 6.3932491174356626e-09, "loss": 0.6079, "step": 2419 }, { "epoch": 0.99, "learning_rate": 5.928490929897335e-09, "loss": 0.5665, "step": 2420 }, { "epoch": 0.99, "learning_rate": 5.4812639876000715e-09, "loss": 0.6371, "step": 2421 }, { "epoch": 0.99, "learning_rate": 5.051569075056328e-09, "loss": 0.5822, "step": 2422 }, { "epoch": 0.99, "learning_rate": 4.639406946018721e-09, "loss": 0.6264, "step": 2423 }, { "epoch": 0.99, "learning_rate": 4.2447783234900175e-09, "loss": 0.625, "step": 2424 }, { "epoch": 0.99, "learning_rate": 3.867683899712038e-09, "loss": 0.6325, "step": 2425 }, { "epoch": 0.99, "learning_rate": 3.5081243361700934e-09, "loss": 0.6229, "step": 2426 }, { "epoch": 0.99, "learning_rate": 3.166100263592986e-09, "loss": 0.6136, "step": 2427 }, { "epoch": 0.99, "learning_rate": 2.8416122819452384e-09, "loss": 0.6242, "step": 2428 }, { "epoch": 0.99, "learning_rate": 2.5346609604326446e-09, "loss": 0.6293, "step": 2429 }, { "epoch": 0.99, "learning_rate": 2.245246837498938e-09, "loss": 0.5637, "step": 2430 }, { "epoch": 0.99, "learning_rate": 1.973370420823573e-09, "loss": 0.5801, "step": 2431 }, { "epoch": 0.99, "learning_rate": 1.719032187321723e-09, "loss": 0.6315, "step": 2432 }, { "epoch": 0.99, "learning_rate": 1.4822325831453932e-09, "loss": 0.6365, "step": 2433 }, { "epoch": 1.0, "learning_rate": 1.2629720236800868e-09, "loss": 0.5855, "step": 2434 }, { "epoch": 1.0, "learning_rate": 1.0612508935459176e-09, "loss": 0.6802, "step": 2435 }, { "epoch": 1.0, "learning_rate": 8.770695465920576e-10, "loss": 0.5994, "step": 2436 }, { "epoch": 1.0, "learning_rate": 7.104283059056194e-10, "loss": 0.5722, "step": 2437 }, { "epoch": 1.0, "learning_rate": 5.613274638005539e-10, "loss": 0.4984, "step": 2438 }, { "epoch": 1.0, "learning_rate": 4.2976728182653193e-10, "loss": 0.6701, "step": 2439 }, { "epoch": 1.0, "learning_rate": 3.1574799076006247e-10, "loss": 0.5628, "step": 2440 }, { "epoch": 1.0, "learning_rate": 2.1926979061004382e-10, "loss": 0.6036, "step": 2441 }, { "epoch": 1.0, "learning_rate": 1.4033285061554325e-10, "loss": 0.6475, "step": 2442 }, { "epoch": 1.0, "learning_rate": 7.893730924468657e-11, "loss": 0.6088, "step": 2443 }, { "epoch": 1.0, "learning_rate": 3.508327419576851e-11, "loss": 0.6262, "step": 2444 }, { "epoch": 1.0, "learning_rate": 8.770822395032241e-12, "loss": 0.5745, "step": 2445 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.6366, "step": 2446 }, { "epoch": 1.0, "step": 2446, "total_flos": 191695072002048.0, "train_loss": 0.6746118150815941, "train_runtime": 25368.0481, "train_samples_per_second": 12.339, "train_steps_per_second": 0.096 } ], "logging_steps": 1.0, "max_steps": 2446, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 191695072002048.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }