diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14706 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 2446, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.702702702702703e-07, + "loss": 3.914, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 5.405405405405406e-07, + "loss": 3.7247, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 8.108108108108109e-07, + "loss": 3.8179, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.0810810810810812e-06, + "loss": 3.9695, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.3513513513513515e-06, + "loss": 4.035, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6216216216216219e-06, + "loss": 3.7208, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.8918918918918922e-06, + "loss": 3.6515, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 2.1621621621621623e-06, + "loss": 3.4713, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 2.432432432432433e-06, + "loss": 3.5122, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 2.702702702702703e-06, + "loss": 2.5384, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.9729729729729736e-06, + "loss": 2.6081, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 3.2432432432432437e-06, + "loss": 2.3291, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 3.513513513513514e-06, + "loss": 1.8066, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 3.7837837837837844e-06, + "loss": 1.5921, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 4.0540540540540545e-06, + "loss": 1.4388, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.324324324324325e-06, + "loss": 1.4086, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 4.594594594594596e-06, + "loss": 1.21, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 4.864864864864866e-06, + "loss": 1.0597, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 5.135135135135135e-06, + "loss": 1.0664, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 5.405405405405406e-06, + "loss": 1.1034, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 5.675675675675676e-06, + "loss": 1.0144, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 5.945945945945947e-06, + "loss": 0.9312, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 6.2162162162162164e-06, + "loss": 0.966, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 6.486486486486487e-06, + "loss": 0.8463, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 6.7567567567567575e-06, + "loss": 0.9202, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 7.027027027027028e-06, + "loss": 0.9266, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 7.297297297297298e-06, + "loss": 0.8965, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 7.567567567567569e-06, + "loss": 0.8654, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 7.837837837837838e-06, + "loss": 0.8189, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 8.108108108108109e-06, + "loss": 0.8193, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 8.378378378378378e-06, + "loss": 0.8061, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 8.64864864864865e-06, + "loss": 0.8261, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.91891891891892e-06, + "loss": 0.8132, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 9.189189189189191e-06, + "loss": 0.778, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 9.45945945945946e-06, + "loss": 0.8129, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 9.729729729729732e-06, + "loss": 0.7442, + "step": 36 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.7654, + "step": 37 + }, + { + "epoch": 0.02, + "learning_rate": 1.027027027027027e-05, + "loss": 0.8042, + "step": 38 + }, + { + "epoch": 0.02, + "learning_rate": 1.0540540540540541e-05, + "loss": 0.7447, + "step": 39 + }, + { + "epoch": 0.02, + "learning_rate": 1.0810810810810812e-05, + "loss": 0.8085, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 1.1081081081081081e-05, + "loss": 0.6754, + "step": 41 + }, + { + "epoch": 0.02, + "learning_rate": 1.1351351351351352e-05, + "loss": 0.7993, + "step": 42 + }, + { + "epoch": 0.02, + "learning_rate": 1.1621621621621622e-05, + "loss": 0.7228, + "step": 43 + }, + { + "epoch": 0.02, + "learning_rate": 1.1891891891891894e-05, + "loss": 0.7996, + "step": 44 + }, + { + "epoch": 0.02, + "learning_rate": 1.2162162162162164e-05, + "loss": 0.7621, + "step": 45 + }, + { + "epoch": 0.02, + "learning_rate": 1.2432432432432433e-05, + "loss": 0.7324, + "step": 46 + }, + { + "epoch": 0.02, + "learning_rate": 1.2702702702702702e-05, + "loss": 0.7214, + "step": 47 + }, + { + "epoch": 0.02, + "learning_rate": 1.2972972972972975e-05, + "loss": 0.7916, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 1.3243243243243244e-05, + "loss": 0.7493, + "step": 49 + }, + { + "epoch": 0.02, + "learning_rate": 1.3513513513513515e-05, + "loss": 0.7094, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 1.3783783783783784e-05, + "loss": 0.6915, + "step": 51 + }, + { + "epoch": 0.02, + "learning_rate": 1.4054054054054055e-05, + "loss": 0.7377, + "step": 52 + }, + { + "epoch": 0.02, + "learning_rate": 1.4324324324324326e-05, + "loss": 0.7305, + "step": 53 + }, + { + "epoch": 0.02, + "learning_rate": 1.4594594594594596e-05, + "loss": 0.736, + "step": 54 + }, + { + "epoch": 0.02, + "learning_rate": 1.4864864864864865e-05, + "loss": 0.7118, + "step": 55 + }, + { + "epoch": 0.02, + "learning_rate": 1.5135135135135138e-05, + "loss": 0.7813, + "step": 56 + }, + { + "epoch": 0.02, + "learning_rate": 1.540540540540541e-05, + "loss": 0.7843, + "step": 57 + }, + { + "epoch": 0.02, + "learning_rate": 1.5675675675675676e-05, + "loss": 0.7459, + "step": 58 + }, + { + "epoch": 0.02, + "learning_rate": 1.5945945945945947e-05, + "loss": 0.7001, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 1.6216216216216218e-05, + "loss": 0.7714, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 1.648648648648649e-05, + "loss": 0.7398, + "step": 61 + }, + { + "epoch": 0.03, + "learning_rate": 1.6756756756756757e-05, + "loss": 0.7837, + "step": 62 + }, + { + "epoch": 0.03, + "learning_rate": 1.7027027027027028e-05, + "loss": 0.7814, + "step": 63 + }, + { + "epoch": 0.03, + "learning_rate": 1.72972972972973e-05, + "loss": 0.7091, + "step": 64 + }, + { + "epoch": 0.03, + "learning_rate": 1.756756756756757e-05, + "loss": 0.7132, + "step": 65 + }, + { + "epoch": 0.03, + "learning_rate": 1.783783783783784e-05, + "loss": 0.7662, + "step": 66 + }, + { + "epoch": 0.03, + "learning_rate": 1.8108108108108108e-05, + "loss": 0.7957, + "step": 67 + }, + { + "epoch": 0.03, + "learning_rate": 1.8378378378378383e-05, + "loss": 0.8157, + "step": 68 + }, + { + "epoch": 0.03, + "learning_rate": 1.864864864864865e-05, + "loss": 0.6874, + "step": 69 + }, + { + "epoch": 0.03, + "learning_rate": 1.891891891891892e-05, + "loss": 0.6407, + "step": 70 + }, + { + "epoch": 0.03, + "learning_rate": 1.918918918918919e-05, + "loss": 0.7226, + "step": 71 + }, + { + "epoch": 0.03, + "learning_rate": 1.9459459459459463e-05, + "loss": 0.7492, + "step": 72 + }, + { + "epoch": 0.03, + "learning_rate": 1.972972972972973e-05, + "loss": 0.6559, + "step": 73 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 0.7014, + "step": 74 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999991229177605e-05, + "loss": 0.7113, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999964916725805e-05, + "loss": 0.7679, + "step": 76 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999921062690757e-05, + "loss": 0.7724, + "step": 77 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999859667149386e-05, + "loss": 0.6777, + "step": 78 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999780730209394e-05, + "loss": 0.7554, + "step": 79 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999684252009243e-05, + "loss": 0.6715, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999570232718174e-05, + "loss": 0.7803, + "step": 81 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999438672536202e-05, + "loss": 0.7306, + "step": 82 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999289571694097e-05, + "loss": 0.7824, + "step": 83 + }, + { + "epoch": 0.03, + "learning_rate": 1.999912293045341e-05, + "loss": 0.7659, + "step": 84 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998938749106455e-05, + "loss": 0.8113, + "step": 85 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998737027976323e-05, + "loss": 0.7753, + "step": 86 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998517767416856e-05, + "loss": 0.7205, + "step": 87 + }, + { + "epoch": 0.04, + "learning_rate": 1.999828096781268e-05, + "loss": 0.7603, + "step": 88 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998026629579178e-05, + "loss": 0.7418, + "step": 89 + }, + { + "epoch": 0.04, + "learning_rate": 1.99977547531625e-05, + "loss": 0.7534, + "step": 90 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997465339039568e-05, + "loss": 0.7847, + "step": 91 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997158387718057e-05, + "loss": 0.7377, + "step": 92 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996833899736406e-05, + "loss": 0.7227, + "step": 93 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996491875663833e-05, + "loss": 0.7066, + "step": 94 + }, + { + "epoch": 0.04, + "learning_rate": 1.999613231610029e-05, + "loss": 0.7835, + "step": 95 + }, + { + "epoch": 0.04, + "learning_rate": 1.999575522167651e-05, + "loss": 0.7709, + "step": 96 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995360593053983e-05, + "loss": 0.6646, + "step": 97 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994948430924944e-05, + "loss": 0.7428, + "step": 98 + }, + { + "epoch": 0.04, + "learning_rate": 1.99945187360124e-05, + "loss": 0.637, + "step": 99 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994071509070104e-05, + "loss": 0.7413, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993606750882566e-05, + "loss": 0.7932, + "step": 101 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993124462265045e-05, + "loss": 0.7157, + "step": 102 + }, + { + "epoch": 0.04, + "learning_rate": 1.999262464406356e-05, + "loss": 0.7333, + "step": 103 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992107297154872e-05, + "loss": 0.7356, + "step": 104 + }, + { + "epoch": 0.04, + "learning_rate": 1.999157242244649e-05, + "loss": 0.7008, + "step": 105 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991020020876676e-05, + "loss": 0.7209, + "step": 106 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990450093414437e-05, + "loss": 0.736, + "step": 107 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989862641059504e-05, + "loss": 0.7011, + "step": 108 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989257664842382e-05, + "loss": 0.6757, + "step": 109 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988635165824293e-05, + "loss": 0.6541, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 1.99879951450972e-05, + "loss": 0.6673, + "step": 111 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987337603783806e-05, + "loss": 0.7241, + "step": 112 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986662543037548e-05, + "loss": 0.7017, + "step": 113 + }, + { + "epoch": 0.05, + "learning_rate": 1.998596996404259e-05, + "loss": 0.6831, + "step": 114 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985259868013836e-05, + "loss": 0.7272, + "step": 115 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984532256196905e-05, + "loss": 0.7158, + "step": 116 + }, + { + "epoch": 0.05, + "learning_rate": 1.998378712986815e-05, + "loss": 0.7135, + "step": 117 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983024490334645e-05, + "loss": 0.697, + "step": 118 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982244338934186e-05, + "loss": 0.7524, + "step": 119 + }, + { + "epoch": 0.05, + "learning_rate": 1.998144667703529e-05, + "loss": 0.6382, + "step": 120 + }, + { + "epoch": 0.05, + "learning_rate": 1.998063150603718e-05, + "loss": 0.7254, + "step": 121 + }, + { + "epoch": 0.05, + "learning_rate": 1.99797988273698e-05, + "loss": 0.7586, + "step": 122 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978948642493817e-05, + "loss": 0.7567, + "step": 123 + }, + { + "epoch": 0.05, + "learning_rate": 1.997808095290058e-05, + "loss": 0.761, + "step": 124 + }, + { + "epoch": 0.05, + "learning_rate": 1.997719576011217e-05, + "loss": 0.6922, + "step": 125 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976293065681355e-05, + "loss": 0.8705, + "step": 126 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975372871191613e-05, + "loss": 0.8465, + "step": 127 + }, + { + "epoch": 0.05, + "learning_rate": 1.9974435178257114e-05, + "loss": 0.6818, + "step": 128 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973479988522727e-05, + "loss": 0.7871, + "step": 129 + }, + { + "epoch": 0.05, + "learning_rate": 1.997250730366401e-05, + "loss": 0.689, + "step": 130 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971517125387215e-05, + "loss": 0.7687, + "step": 131 + }, + { + "epoch": 0.05, + "learning_rate": 1.997050945542928e-05, + "loss": 0.7739, + "step": 132 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969484295557814e-05, + "loss": 0.6592, + "step": 133 + }, + { + "epoch": 0.05, + "learning_rate": 1.9968441647571124e-05, + "loss": 0.7345, + "step": 134 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967381513298188e-05, + "loss": 0.699, + "step": 135 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966303894598645e-05, + "loss": 0.6508, + "step": 136 + }, + { + "epoch": 0.06, + "learning_rate": 1.996520879336283e-05, + "loss": 0.7698, + "step": 137 + }, + { + "epoch": 0.06, + "learning_rate": 1.996409621151172e-05, + "loss": 0.6685, + "step": 138 + }, + { + "epoch": 0.06, + "learning_rate": 1.996296615099697e-05, + "loss": 0.7438, + "step": 139 + }, + { + "epoch": 0.06, + "learning_rate": 1.9961818613800892e-05, + "loss": 0.7239, + "step": 140 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960653601936454e-05, + "loss": 0.7671, + "step": 141 + }, + { + "epoch": 0.06, + "learning_rate": 1.995947111744728e-05, + "loss": 0.6988, + "step": 142 + }, + { + "epoch": 0.06, + "learning_rate": 1.995827116240764e-05, + "loss": 0.6864, + "step": 143 + }, + { + "epoch": 0.06, + "learning_rate": 1.995705373892246e-05, + "loss": 0.7423, + "step": 144 + }, + { + "epoch": 0.06, + "learning_rate": 1.995581884912729e-05, + "loss": 0.7609, + "step": 145 + }, + { + "epoch": 0.06, + "learning_rate": 1.9954566495188333e-05, + "loss": 0.7873, + "step": 146 + }, + { + "epoch": 0.06, + "learning_rate": 1.995329667930243e-05, + "loss": 0.6349, + "step": 147 + }, + { + "epoch": 0.06, + "learning_rate": 1.995200940369704e-05, + "loss": 0.7028, + "step": 148 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950704670630258e-05, + "loss": 0.661, + "step": 149 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949382482390803e-05, + "loss": 0.7158, + "step": 150 + }, + { + "epoch": 0.06, + "learning_rate": 1.994804284129801e-05, + "loss": 0.6674, + "step": 151 + }, + { + "epoch": 0.06, + "learning_rate": 1.994668574970183e-05, + "loss": 0.7478, + "step": 152 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945311209982822e-05, + "loss": 0.7583, + "step": 153 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943919224552154e-05, + "loss": 0.7372, + "step": 154 + }, + { + "epoch": 0.06, + "learning_rate": 1.99425097958516e-05, + "loss": 0.7122, + "step": 155 + }, + { + "epoch": 0.06, + "learning_rate": 1.994108292635353e-05, + "loss": 0.7396, + "step": 156 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939638618560906e-05, + "loss": 0.6849, + "step": 157 + }, + { + "epoch": 0.06, + "learning_rate": 1.9938176875007284e-05, + "loss": 0.643, + "step": 158 + }, + { + "epoch": 0.07, + "learning_rate": 1.99366976982568e-05, + "loss": 0.7633, + "step": 159 + }, + { + "epoch": 0.07, + "learning_rate": 1.9935201090904177e-05, + "loss": 0.6894, + "step": 160 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933687055574705e-05, + "loss": 0.8241, + "step": 161 + }, + { + "epoch": 0.07, + "learning_rate": 1.993215559492426e-05, + "loss": 0.7478, + "step": 162 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930606711639266e-05, + "loss": 0.7617, + "step": 163 + }, + { + "epoch": 0.07, + "learning_rate": 1.992904040843672e-05, + "loss": 0.6509, + "step": 164 + }, + { + "epoch": 0.07, + "learning_rate": 1.992745668806418e-05, + "loss": 0.7458, + "step": 165 + }, + { + "epoch": 0.07, + "learning_rate": 1.9925855553299755e-05, + "loss": 0.7561, + "step": 166 + }, + { + "epoch": 0.07, + "learning_rate": 1.992423700695209e-05, + "loss": 0.7045, + "step": 167 + }, + { + "epoch": 0.07, + "learning_rate": 1.9922601051860386e-05, + "loss": 0.7136, + "step": 168 + }, + { + "epoch": 0.07, + "learning_rate": 1.9920947690894376e-05, + "loss": 0.6977, + "step": 169 + }, + { + "epoch": 0.07, + "learning_rate": 1.991927692695433e-05, + "loss": 0.7501, + "step": 170 + }, + { + "epoch": 0.07, + "learning_rate": 1.9917588762971037e-05, + "loss": 0.7366, + "step": 171 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915883201905824e-05, + "loss": 0.6958, + "step": 172 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914160246750517e-05, + "loss": 0.6859, + "step": 173 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912419900527467e-05, + "loss": 0.7127, + "step": 174 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910662166289523e-05, + "loss": 0.7156, + "step": 175 + }, + { + "epoch": 0.07, + "learning_rate": 1.9908887047120046e-05, + "loss": 0.7304, + "step": 176 + }, + { + "epoch": 0.07, + "learning_rate": 1.990709454613288e-05, + "loss": 0.6756, + "step": 177 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905284666472374e-05, + "loss": 0.6849, + "step": 178 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903457411313347e-05, + "loss": 0.6512, + "step": 179 + }, + { + "epoch": 0.07, + "learning_rate": 1.9901612783861114e-05, + "loss": 0.669, + "step": 180 + }, + { + "epoch": 0.07, + "learning_rate": 1.9899750787351445e-05, + "loss": 0.6937, + "step": 181 + }, + { + "epoch": 0.07, + "learning_rate": 1.9897871425050598e-05, + "loss": 0.7683, + "step": 182 + }, + { + "epoch": 0.07, + "learning_rate": 1.989597470025528e-05, + "loss": 0.7368, + "step": 183 + }, + { + "epoch": 0.08, + "learning_rate": 1.989406061629265e-05, + "loss": 0.785, + "step": 184 + }, + { + "epoch": 0.08, + "learning_rate": 1.989212917652034e-05, + "loss": 0.6766, + "step": 185 + }, + { + "epoch": 0.08, + "learning_rate": 1.9890180384326404e-05, + "loss": 0.642, + "step": 186 + }, + { + "epoch": 0.08, + "learning_rate": 1.9888214243129348e-05, + "loss": 0.6693, + "step": 187 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886230756378102e-05, + "loss": 0.7124, + "step": 188 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884229927552036e-05, + "loss": 0.7384, + "step": 189 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882211760160924e-05, + "loss": 0.7166, + "step": 190 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880176257744968e-05, + "loss": 0.6555, + "step": 191 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878123423874777e-05, + "loss": 0.7438, + "step": 192 + }, + { + "epoch": 0.08, + "learning_rate": 1.987605326215135e-05, + "loss": 0.666, + "step": 193 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873965776206103e-05, + "loss": 0.7085, + "step": 194 + }, + { + "epoch": 0.08, + "learning_rate": 1.9871860969700824e-05, + "loss": 0.7651, + "step": 195 + }, + { + "epoch": 0.08, + "learning_rate": 1.9869738846327685e-05, + "loss": 0.7598, + "step": 196 + }, + { + "epoch": 0.08, + "learning_rate": 1.9867599409809248e-05, + "loss": 0.6693, + "step": 197 + }, + { + "epoch": 0.08, + "learning_rate": 1.986544266389843e-05, + "loss": 0.6429, + "step": 198 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863268612378525e-05, + "loss": 0.6636, + "step": 199 + }, + { + "epoch": 0.08, + "learning_rate": 1.986107725906317e-05, + "loss": 0.6937, + "step": 200 + }, + { + "epoch": 0.08, + "learning_rate": 1.985886860779636e-05, + "loss": 0.6555, + "step": 201 + }, + { + "epoch": 0.08, + "learning_rate": 1.9856642662452437e-05, + "loss": 0.7356, + "step": 202 + }, + { + "epoch": 0.08, + "learning_rate": 1.9854399426936072e-05, + "loss": 0.7173, + "step": 203 + }, + { + "epoch": 0.08, + "learning_rate": 1.985213890518227e-05, + "loss": 0.6426, + "step": 204 + }, + { + "epoch": 0.08, + "learning_rate": 1.984986110115636e-05, + "loss": 0.7005, + "step": 205 + }, + { + "epoch": 0.08, + "learning_rate": 1.984756601885398e-05, + "loss": 0.6916, + "step": 206 + }, + { + "epoch": 0.08, + "learning_rate": 1.9845253662301085e-05, + "loss": 0.7192, + "step": 207 + }, + { + "epoch": 0.09, + "learning_rate": 1.984292403555393e-05, + "loss": 0.7189, + "step": 208 + }, + { + "epoch": 0.09, + "learning_rate": 1.984057714269906e-05, + "loss": 0.6656, + "step": 209 + }, + { + "epoch": 0.09, + "learning_rate": 1.9838212987853312e-05, + "loss": 0.729, + "step": 210 + }, + { + "epoch": 0.09, + "learning_rate": 1.9835831575163803e-05, + "loss": 0.7722, + "step": 211 + }, + { + "epoch": 0.09, + "learning_rate": 1.9833432908807927e-05, + "loss": 0.724, + "step": 212 + }, + { + "epoch": 0.09, + "learning_rate": 1.9831016992993334e-05, + "loss": 0.7056, + "step": 213 + }, + { + "epoch": 0.09, + "learning_rate": 1.9828583831957935e-05, + "loss": 0.6936, + "step": 214 + }, + { + "epoch": 0.09, + "learning_rate": 1.98261334299699e-05, + "loss": 0.7692, + "step": 215 + }, + { + "epoch": 0.09, + "learning_rate": 1.9823665791327635e-05, + "loss": 0.686, + "step": 216 + }, + { + "epoch": 0.09, + "learning_rate": 1.9821180920359788e-05, + "loss": 0.7909, + "step": 217 + }, + { + "epoch": 0.09, + "learning_rate": 1.9818678821425227e-05, + "loss": 0.7024, + "step": 218 + }, + { + "epoch": 0.09, + "learning_rate": 1.9816159498913044e-05, + "loss": 0.6972, + "step": 219 + }, + { + "epoch": 0.09, + "learning_rate": 1.981362295724255e-05, + "loss": 0.7299, + "step": 220 + }, + { + "epoch": 0.09, + "learning_rate": 1.981106920086325e-05, + "loss": 0.7137, + "step": 221 + }, + { + "epoch": 0.09, + "learning_rate": 1.980849823425486e-05, + "loss": 0.6248, + "step": 222 + }, + { + "epoch": 0.09, + "learning_rate": 1.9805910061927274e-05, + "loss": 0.7232, + "step": 223 + }, + { + "epoch": 0.09, + "learning_rate": 1.9803304688420568e-05, + "loss": 0.705, + "step": 224 + }, + { + "epoch": 0.09, + "learning_rate": 1.9800682118305006e-05, + "loss": 0.6893, + "step": 225 + }, + { + "epoch": 0.09, + "learning_rate": 1.9798042356181e-05, + "loss": 0.7827, + "step": 226 + }, + { + "epoch": 0.09, + "learning_rate": 1.9795385406679125e-05, + "loss": 0.6216, + "step": 227 + }, + { + "epoch": 0.09, + "learning_rate": 1.9792711274460116e-05, + "loss": 0.6915, + "step": 228 + }, + { + "epoch": 0.09, + "learning_rate": 1.979001996421483e-05, + "loss": 0.743, + "step": 229 + }, + { + "epoch": 0.09, + "learning_rate": 1.978731148066428e-05, + "loss": 0.7098, + "step": 230 + }, + { + "epoch": 0.09, + "learning_rate": 1.978458582855958e-05, + "loss": 0.6684, + "step": 231 + }, + { + "epoch": 0.09, + "learning_rate": 1.978184301268198e-05, + "loss": 0.6526, + "step": 232 + }, + { + "epoch": 0.1, + "learning_rate": 1.9779083037842825e-05, + "loss": 0.7063, + "step": 233 + }, + { + "epoch": 0.1, + "learning_rate": 1.977630590888357e-05, + "loss": 0.6826, + "step": 234 + }, + { + "epoch": 0.1, + "learning_rate": 1.977351163067575e-05, + "loss": 0.7599, + "step": 235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9770700208120996e-05, + "loss": 0.7276, + "step": 236 + }, + { + "epoch": 0.1, + "learning_rate": 1.9767871646150998e-05, + "loss": 0.6454, + "step": 237 + }, + { + "epoch": 0.1, + "learning_rate": 1.9765025949727526e-05, + "loss": 0.7429, + "step": 238 + }, + { + "epoch": 0.1, + "learning_rate": 1.976216312384239e-05, + "loss": 0.6716, + "step": 239 + }, + { + "epoch": 0.1, + "learning_rate": 1.975928317351747e-05, + "loss": 0.7179, + "step": 240 + }, + { + "epoch": 0.1, + "learning_rate": 1.975638610380466e-05, + "loss": 0.7334, + "step": 241 + }, + { + "epoch": 0.1, + "learning_rate": 1.975347191978591e-05, + "loss": 0.7534, + "step": 242 + }, + { + "epoch": 0.1, + "learning_rate": 1.9750540626573164e-05, + "loss": 0.7452, + "step": 243 + }, + { + "epoch": 0.1, + "learning_rate": 1.97475922293084e-05, + "loss": 0.7468, + "step": 244 + }, + { + "epoch": 0.1, + "learning_rate": 1.9744626733163593e-05, + "loss": 0.6564, + "step": 245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9741644143340707e-05, + "loss": 0.662, + "step": 246 + }, + { + "epoch": 0.1, + "learning_rate": 1.9738644465071698e-05, + "loss": 0.7549, + "step": 247 + }, + { + "epoch": 0.1, + "learning_rate": 1.9735627703618494e-05, + "loss": 0.6778, + "step": 248 + }, + { + "epoch": 0.1, + "learning_rate": 1.9732593864272994e-05, + "loss": 0.7245, + "step": 249 + }, + { + "epoch": 0.1, + "learning_rate": 1.9729542952357045e-05, + "loss": 0.7622, + "step": 250 + }, + { + "epoch": 0.1, + "learning_rate": 1.9726474973222453e-05, + "loss": 0.7587, + "step": 251 + }, + { + "epoch": 0.1, + "learning_rate": 1.9723389932250955e-05, + "loss": 0.7281, + "step": 252 + }, + { + "epoch": 0.1, + "learning_rate": 1.9720287834854222e-05, + "loss": 0.6908, + "step": 253 + }, + { + "epoch": 0.1, + "learning_rate": 1.9717168686473845e-05, + "loss": 0.6357, + "step": 254 + }, + { + "epoch": 0.1, + "learning_rate": 1.971403249258132e-05, + "loss": 0.6483, + "step": 255 + }, + { + "epoch": 0.1, + "learning_rate": 1.9710879258678045e-05, + "loss": 0.7361, + "step": 256 + }, + { + "epoch": 0.11, + "learning_rate": 1.970770899029532e-05, + "loss": 0.7461, + "step": 257 + }, + { + "epoch": 0.11, + "learning_rate": 1.9704521692994305e-05, + "loss": 0.6225, + "step": 258 + }, + { + "epoch": 0.11, + "learning_rate": 1.9701317372366055e-05, + "loss": 0.7378, + "step": 259 + }, + { + "epoch": 0.11, + "learning_rate": 1.969809603403147e-05, + "loss": 0.6854, + "step": 260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694857683641304e-05, + "loss": 0.7554, + "step": 261 + }, + { + "epoch": 0.11, + "learning_rate": 1.969160232687616e-05, + "loss": 0.6839, + "step": 262 + }, + { + "epoch": 0.11, + "learning_rate": 1.9688329969446473e-05, + "loss": 0.7053, + "step": 263 + }, + { + "epoch": 0.11, + "learning_rate": 1.968504061709249e-05, + "loss": 0.7743, + "step": 264 + }, + { + "epoch": 0.11, + "learning_rate": 1.9681734275584278e-05, + "loss": 0.7763, + "step": 265 + }, + { + "epoch": 0.11, + "learning_rate": 1.96784109507217e-05, + "loss": 0.7409, + "step": 266 + }, + { + "epoch": 0.11, + "learning_rate": 1.9675070648334426e-05, + "loss": 0.6643, + "step": 267 + }, + { + "epoch": 0.11, + "learning_rate": 1.9671713374281883e-05, + "loss": 0.698, + "step": 268 + }, + { + "epoch": 0.11, + "learning_rate": 1.966833913445329e-05, + "loss": 0.7453, + "step": 269 + }, + { + "epoch": 0.11, + "learning_rate": 1.9664947934767614e-05, + "loss": 0.6386, + "step": 270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9661539781173582e-05, + "loss": 0.7071, + "step": 271 + }, + { + "epoch": 0.11, + "learning_rate": 1.965811467964965e-05, + "loss": 0.6803, + "step": 272 + }, + { + "epoch": 0.11, + "learning_rate": 1.9654672636204014e-05, + "loss": 0.6056, + "step": 273 + }, + { + "epoch": 0.11, + "learning_rate": 1.965121365687458e-05, + "loss": 0.6341, + "step": 274 + }, + { + "epoch": 0.11, + "learning_rate": 1.9647737747728972e-05, + "loss": 0.6479, + "step": 275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9644244914864502e-05, + "loss": 0.7001, + "step": 276 + }, + { + "epoch": 0.11, + "learning_rate": 1.9640735164408176e-05, + "loss": 0.7345, + "step": 277 + }, + { + "epoch": 0.11, + "learning_rate": 1.9637208502516673e-05, + "loss": 0.7122, + "step": 278 + }, + { + "epoch": 0.11, + "learning_rate": 1.9633664935376335e-05, + "loss": 0.6863, + "step": 279 + }, + { + "epoch": 0.11, + "learning_rate": 1.9630104469203165e-05, + "loss": 0.6755, + "step": 280 + }, + { + "epoch": 0.11, + "learning_rate": 1.9626527110242808e-05, + "loss": 0.7279, + "step": 281 + }, + { + "epoch": 0.12, + "learning_rate": 1.9622932864770538e-05, + "loss": 0.7658, + "step": 282 + }, + { + "epoch": 0.12, + "learning_rate": 1.9619321739091247e-05, + "loss": 0.8178, + "step": 283 + }, + { + "epoch": 0.12, + "learning_rate": 1.9615693739539452e-05, + "loss": 0.7593, + "step": 284 + }, + { + "epoch": 0.12, + "learning_rate": 1.961204887247926e-05, + "loss": 0.7031, + "step": 285 + }, + { + "epoch": 0.12, + "learning_rate": 1.9608387144304363e-05, + "loss": 0.7344, + "step": 286 + }, + { + "epoch": 0.12, + "learning_rate": 1.9604708561438033e-05, + "loss": 0.7371, + "step": 287 + }, + { + "epoch": 0.12, + "learning_rate": 1.960101313033312e-05, + "loss": 0.6481, + "step": 288 + }, + { + "epoch": 0.12, + "learning_rate": 1.9597300857472e-05, + "loss": 0.7669, + "step": 289 + }, + { + "epoch": 0.12, + "learning_rate": 1.959357174936663e-05, + "loss": 0.7682, + "step": 290 + }, + { + "epoch": 0.12, + "learning_rate": 1.9589825812558468e-05, + "loss": 0.6921, + "step": 291 + }, + { + "epoch": 0.12, + "learning_rate": 1.95860630536185e-05, + "loss": 0.7272, + "step": 292 + }, + { + "epoch": 0.12, + "learning_rate": 1.9582283479147237e-05, + "loss": 0.6797, + "step": 293 + }, + { + "epoch": 0.12, + "learning_rate": 1.9578487095774666e-05, + "loss": 0.6798, + "step": 294 + }, + { + "epoch": 0.12, + "learning_rate": 1.9574673910160263e-05, + "loss": 0.6218, + "step": 295 + }, + { + "epoch": 0.12, + "learning_rate": 1.957084392899299e-05, + "loss": 0.6786, + "step": 296 + }, + { + "epoch": 0.12, + "learning_rate": 1.9566997158991265e-05, + "loss": 0.6568, + "step": 297 + }, + { + "epoch": 0.12, + "learning_rate": 1.956313360690295e-05, + "loss": 0.6483, + "step": 298 + }, + { + "epoch": 0.12, + "learning_rate": 1.9559253279505354e-05, + "loss": 0.6623, + "step": 299 + }, + { + "epoch": 0.12, + "learning_rate": 1.955535618360521e-05, + "loss": 0.6965, + "step": 300 + }, + { + "epoch": 0.12, + "learning_rate": 1.9551442326038664e-05, + "loss": 0.7012, + "step": 301 + }, + { + "epoch": 0.12, + "learning_rate": 1.9547511713671264e-05, + "loss": 0.6993, + "step": 302 + }, + { + "epoch": 0.12, + "learning_rate": 1.9543564353397953e-05, + "loss": 0.6872, + "step": 303 + }, + { + "epoch": 0.12, + "learning_rate": 1.953960025214305e-05, + "loss": 0.6712, + "step": 304 + }, + { + "epoch": 0.12, + "learning_rate": 1.953561941686024e-05, + "loss": 0.7013, + "step": 305 + }, + { + "epoch": 0.13, + "learning_rate": 1.9531621854532562e-05, + "loss": 0.7142, + "step": 306 + }, + { + "epoch": 0.13, + "learning_rate": 1.95276075721724e-05, + "loss": 0.6824, + "step": 307 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523576576821463e-05, + "loss": 0.7435, + "step": 308 + }, + { + "epoch": 0.13, + "learning_rate": 1.9519528875550783e-05, + "loss": 0.731, + "step": 309 + }, + { + "epoch": 0.13, + "learning_rate": 1.9515464475460692e-05, + "loss": 0.6508, + "step": 310 + }, + { + "epoch": 0.13, + "learning_rate": 1.951138338368082e-05, + "loss": 0.77, + "step": 311 + }, + { + "epoch": 0.13, + "learning_rate": 1.9507285607370065e-05, + "loss": 0.666, + "step": 312 + }, + { + "epoch": 0.13, + "learning_rate": 1.9503171153716606e-05, + "loss": 0.6168, + "step": 313 + }, + { + "epoch": 0.13, + "learning_rate": 1.949904002993787e-05, + "loss": 0.6891, + "step": 314 + }, + { + "epoch": 0.13, + "learning_rate": 1.949489224328053e-05, + "loss": 0.658, + "step": 315 + }, + { + "epoch": 0.13, + "learning_rate": 1.9490727801020485e-05, + "loss": 0.698, + "step": 316 + }, + { + "epoch": 0.13, + "learning_rate": 1.9486546710462847e-05, + "loss": 0.7834, + "step": 317 + }, + { + "epoch": 0.13, + "learning_rate": 1.9482348978941947e-05, + "loss": 0.7289, + "step": 318 + }, + { + "epoch": 0.13, + "learning_rate": 1.9478134613821286e-05, + "loss": 0.6546, + "step": 319 + }, + { + "epoch": 0.13, + "learning_rate": 1.9473903622493554e-05, + "loss": 0.6534, + "step": 320 + }, + { + "epoch": 0.13, + "learning_rate": 1.9469656012380617e-05, + "loss": 0.7529, + "step": 321 + }, + { + "epoch": 0.13, + "learning_rate": 1.946539179093347e-05, + "loss": 0.7308, + "step": 322 + }, + { + "epoch": 0.13, + "learning_rate": 1.946111096563226e-05, + "loss": 0.7221, + "step": 323 + }, + { + "epoch": 0.13, + "learning_rate": 1.945681354398627e-05, + "loss": 0.6508, + "step": 324 + }, + { + "epoch": 0.13, + "learning_rate": 1.945249953353387e-05, + "loss": 0.6877, + "step": 325 + }, + { + "epoch": 0.13, + "learning_rate": 1.944816894184255e-05, + "loss": 0.7452, + "step": 326 + }, + { + "epoch": 0.13, + "learning_rate": 1.9443821776508885e-05, + "loss": 0.6759, + "step": 327 + }, + { + "epoch": 0.13, + "learning_rate": 1.943945804515851e-05, + "loss": 0.6791, + "step": 328 + }, + { + "epoch": 0.13, + "learning_rate": 1.9435077755446124e-05, + "loss": 0.7525, + "step": 329 + }, + { + "epoch": 0.13, + "learning_rate": 1.9430680915055492e-05, + "loss": 0.7387, + "step": 330 + }, + { + "epoch": 0.14, + "learning_rate": 1.942626753169938e-05, + "loss": 0.6926, + "step": 331 + }, + { + "epoch": 0.14, + "learning_rate": 1.9421837613119597e-05, + "loss": 0.7244, + "step": 332 + }, + { + "epoch": 0.14, + "learning_rate": 1.9417391167086946e-05, + "loss": 0.709, + "step": 333 + }, + { + "epoch": 0.14, + "learning_rate": 1.941292820140122e-05, + "loss": 0.674, + "step": 334 + }, + { + "epoch": 0.14, + "learning_rate": 1.9408448723891203e-05, + "loss": 0.6841, + "step": 335 + }, + { + "epoch": 0.14, + "learning_rate": 1.940395274241463e-05, + "loss": 0.7307, + "step": 336 + }, + { + "epoch": 0.14, + "learning_rate": 1.9399440264858192e-05, + "loss": 0.7572, + "step": 337 + }, + { + "epoch": 0.14, + "learning_rate": 1.9394911299137522e-05, + "loss": 0.7091, + "step": 338 + }, + { + "epoch": 0.14, + "learning_rate": 1.9390365853197163e-05, + "loss": 0.6941, + "step": 339 + }, + { + "epoch": 0.14, + "learning_rate": 1.938580393501058e-05, + "loss": 0.705, + "step": 340 + }, + { + "epoch": 0.14, + "learning_rate": 1.938122555258013e-05, + "loss": 0.6568, + "step": 341 + }, + { + "epoch": 0.14, + "learning_rate": 1.9376630713937043e-05, + "loss": 0.6852, + "step": 342 + }, + { + "epoch": 0.14, + "learning_rate": 1.9372019427141424e-05, + "loss": 0.7676, + "step": 343 + }, + { + "epoch": 0.14, + "learning_rate": 1.9367391700282228e-05, + "loss": 0.6645, + "step": 344 + }, + { + "epoch": 0.14, + "learning_rate": 1.9362747541477255e-05, + "loss": 0.6581, + "step": 345 + }, + { + "epoch": 0.14, + "learning_rate": 1.9358086958873116e-05, + "loss": 0.6912, + "step": 346 + }, + { + "epoch": 0.14, + "learning_rate": 1.935340996064524e-05, + "loss": 0.5606, + "step": 347 + }, + { + "epoch": 0.14, + "learning_rate": 1.9348716554997854e-05, + "loss": 0.6755, + "step": 348 + }, + { + "epoch": 0.14, + "learning_rate": 1.9344006750163962e-05, + "loss": 0.7071, + "step": 349 + }, + { + "epoch": 0.14, + "learning_rate": 1.9339280554405336e-05, + "loss": 0.6998, + "step": 350 + }, + { + "epoch": 0.14, + "learning_rate": 1.93345379760125e-05, + "loss": 0.6462, + "step": 351 + }, + { + "epoch": 0.14, + "learning_rate": 1.9329779023304724e-05, + "loss": 0.6674, + "step": 352 + }, + { + "epoch": 0.14, + "learning_rate": 1.9325003704629982e-05, + "loss": 0.756, + "step": 353 + }, + { + "epoch": 0.14, + "learning_rate": 1.9320212028364976e-05, + "loss": 0.7485, + "step": 354 + }, + { + "epoch": 0.15, + "learning_rate": 1.9315404002915093e-05, + "loss": 0.6598, + "step": 355 + }, + { + "epoch": 0.15, + "learning_rate": 1.9310579636714402e-05, + "loss": 0.7027, + "step": 356 + }, + { + "epoch": 0.15, + "learning_rate": 1.930573893822563e-05, + "loss": 0.6232, + "step": 357 + }, + { + "epoch": 0.15, + "learning_rate": 1.9300881915940163e-05, + "loss": 0.7015, + "step": 358 + }, + { + "epoch": 0.15, + "learning_rate": 1.9296008578378015e-05, + "loss": 0.8247, + "step": 359 + }, + { + "epoch": 0.15, + "learning_rate": 1.929111893408782e-05, + "loss": 0.7135, + "step": 360 + }, + { + "epoch": 0.15, + "learning_rate": 1.9286212991646823e-05, + "loss": 0.6709, + "step": 361 + }, + { + "epoch": 0.15, + "learning_rate": 1.928129075966085e-05, + "loss": 0.6455, + "step": 362 + }, + { + "epoch": 0.15, + "learning_rate": 1.9276352246764305e-05, + "loss": 0.6988, + "step": 363 + }, + { + "epoch": 0.15, + "learning_rate": 1.9271397461620154e-05, + "loss": 0.6794, + "step": 364 + }, + { + "epoch": 0.15, + "learning_rate": 1.9266426412919905e-05, + "loss": 0.7772, + "step": 365 + }, + { + "epoch": 0.15, + "learning_rate": 1.9261439109383594e-05, + "loss": 0.6561, + "step": 366 + }, + { + "epoch": 0.15, + "learning_rate": 1.925643555975977e-05, + "loss": 0.6804, + "step": 367 + }, + { + "epoch": 0.15, + "learning_rate": 1.925141577282549e-05, + "loss": 0.7263, + "step": 368 + }, + { + "epoch": 0.15, + "learning_rate": 1.924637975738628e-05, + "loss": 0.7349, + "step": 369 + }, + { + "epoch": 0.15, + "learning_rate": 1.9241327522276133e-05, + "loss": 0.6582, + "step": 370 + }, + { + "epoch": 0.15, + "learning_rate": 1.923625907635751e-05, + "loss": 0.625, + "step": 371 + }, + { + "epoch": 0.15, + "learning_rate": 1.92311744285213e-05, + "loss": 0.6996, + "step": 372 + }, + { + "epoch": 0.15, + "learning_rate": 1.9226073587686805e-05, + "loss": 0.7124, + "step": 373 + }, + { + "epoch": 0.15, + "learning_rate": 1.922095656280174e-05, + "loss": 0.7546, + "step": 374 + }, + { + "epoch": 0.15, + "learning_rate": 1.921582336284221e-05, + "loss": 0.7235, + "step": 375 + }, + { + "epoch": 0.15, + "learning_rate": 1.9210673996812694e-05, + "loss": 0.7486, + "step": 376 + }, + { + "epoch": 0.15, + "learning_rate": 1.920550847374602e-05, + "loss": 0.7058, + "step": 377 + }, + { + "epoch": 0.15, + "learning_rate": 1.9200326802703374e-05, + "loss": 0.7482, + "step": 378 + }, + { + "epoch": 0.15, + "learning_rate": 1.919512899277425e-05, + "loss": 0.6735, + "step": 379 + }, + { + "epoch": 0.16, + "learning_rate": 1.9189915053076472e-05, + "loss": 0.6708, + "step": 380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9184684992756142e-05, + "loss": 0.675, + "step": 381 + }, + { + "epoch": 0.16, + "learning_rate": 1.9179438820987645e-05, + "loss": 0.6281, + "step": 382 + }, + { + "epoch": 0.16, + "learning_rate": 1.917417654697363e-05, + "loss": 0.6828, + "step": 383 + }, + { + "epoch": 0.16, + "learning_rate": 1.9168898179944994e-05, + "loss": 0.6829, + "step": 384 + }, + { + "epoch": 0.16, + "learning_rate": 1.9163603729160854e-05, + "loss": 0.7762, + "step": 385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9158293203908552e-05, + "loss": 0.6461, + "step": 386 + }, + { + "epoch": 0.16, + "learning_rate": 1.9152966613503627e-05, + "loss": 0.6883, + "step": 387 + }, + { + "epoch": 0.16, + "learning_rate": 1.914762396728979e-05, + "loss": 0.7082, + "step": 388 + }, + { + "epoch": 0.16, + "learning_rate": 1.914226527463892e-05, + "loss": 0.7298, + "step": 389 + }, + { + "epoch": 0.16, + "learning_rate": 1.9136890544951046e-05, + "loss": 0.6838, + "step": 390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9131499787654334e-05, + "loss": 0.6955, + "step": 391 + }, + { + "epoch": 0.16, + "learning_rate": 1.912609301220505e-05, + "loss": 0.6945, + "step": 392 + }, + { + "epoch": 0.16, + "learning_rate": 1.912067022808757e-05, + "loss": 0.6849, + "step": 393 + }, + { + "epoch": 0.16, + "learning_rate": 1.9115231444814356e-05, + "loss": 0.672, + "step": 394 + }, + { + "epoch": 0.16, + "learning_rate": 1.910977667192592e-05, + "loss": 0.7544, + "step": 395 + }, + { + "epoch": 0.16, + "learning_rate": 1.9104305918990832e-05, + "loss": 0.6818, + "step": 396 + }, + { + "epoch": 0.16, + "learning_rate": 1.9098819195605697e-05, + "loss": 0.6401, + "step": 397 + }, + { + "epoch": 0.16, + "learning_rate": 1.9093316511395128e-05, + "loss": 0.8019, + "step": 398 + }, + { + "epoch": 0.16, + "learning_rate": 1.908779787601174e-05, + "loss": 0.6541, + "step": 399 + }, + { + "epoch": 0.16, + "learning_rate": 1.908226329913612e-05, + "loss": 0.7466, + "step": 400 + }, + { + "epoch": 0.16, + "learning_rate": 1.907671279047683e-05, + "loss": 0.6918, + "step": 401 + }, + { + "epoch": 0.16, + "learning_rate": 1.9071146359770384e-05, + "loss": 0.6416, + "step": 402 + }, + { + "epoch": 0.16, + "learning_rate": 1.9065564016781204e-05, + "loss": 0.7059, + "step": 403 + }, + { + "epoch": 0.17, + "learning_rate": 1.9059965771301644e-05, + "loss": 0.6869, + "step": 404 + }, + { + "epoch": 0.17, + "learning_rate": 1.9054351633151945e-05, + "loss": 0.6925, + "step": 405 + }, + { + "epoch": 0.17, + "learning_rate": 1.9048721612180232e-05, + "loss": 0.6839, + "step": 406 + }, + { + "epoch": 0.17, + "learning_rate": 1.9043075718262485e-05, + "loss": 0.7487, + "step": 407 + }, + { + "epoch": 0.17, + "learning_rate": 1.9037413961302534e-05, + "loss": 0.6022, + "step": 408 + }, + { + "epoch": 0.17, + "learning_rate": 1.9031736351232025e-05, + "loss": 0.6931, + "step": 409 + }, + { + "epoch": 0.17, + "learning_rate": 1.9026042898010428e-05, + "loss": 0.6718, + "step": 410 + }, + { + "epoch": 0.17, + "learning_rate": 1.9020333611624993e-05, + "loss": 0.7025, + "step": 411 + }, + { + "epoch": 0.17, + "learning_rate": 1.9014608502090744e-05, + "loss": 0.6595, + "step": 412 + }, + { + "epoch": 0.17, + "learning_rate": 1.9008867579450472e-05, + "loss": 0.6941, + "step": 413 + }, + { + "epoch": 0.17, + "learning_rate": 1.9003110853774694e-05, + "loss": 0.7194, + "step": 414 + }, + { + "epoch": 0.17, + "learning_rate": 1.8997338335161656e-05, + "loss": 0.7182, + "step": 415 + }, + { + "epoch": 0.17, + "learning_rate": 1.89915500337373e-05, + "loss": 0.6861, + "step": 416 + }, + { + "epoch": 0.17, + "learning_rate": 1.8985745959655268e-05, + "loss": 0.7109, + "step": 417 + }, + { + "epoch": 0.17, + "learning_rate": 1.8979926123096858e-05, + "loss": 0.686, + "step": 418 + }, + { + "epoch": 0.17, + "learning_rate": 1.8974090534271013e-05, + "loss": 0.6813, + "step": 419 + }, + { + "epoch": 0.17, + "learning_rate": 1.896823920341432e-05, + "loss": 0.6497, + "step": 420 + }, + { + "epoch": 0.17, + "learning_rate": 1.8962372140790984e-05, + "loss": 0.6652, + "step": 421 + }, + { + "epoch": 0.17, + "learning_rate": 1.895648935669278e-05, + "loss": 0.6616, + "step": 422 + }, + { + "epoch": 0.17, + "learning_rate": 1.8950590861439098e-05, + "loss": 0.6319, + "step": 423 + }, + { + "epoch": 0.17, + "learning_rate": 1.8944676665376858e-05, + "loss": 0.6901, + "step": 424 + }, + { + "epoch": 0.17, + "learning_rate": 1.8938746778880535e-05, + "loss": 0.6906, + "step": 425 + }, + { + "epoch": 0.17, + "learning_rate": 1.8932801212352124e-05, + "loss": 0.6569, + "step": 426 + }, + { + "epoch": 0.17, + "learning_rate": 1.8926839976221128e-05, + "loss": 0.6184, + "step": 427 + }, + { + "epoch": 0.17, + "learning_rate": 1.8920863080944534e-05, + "loss": 0.6378, + "step": 428 + }, + { + "epoch": 0.18, + "learning_rate": 1.8914870537006805e-05, + "loss": 0.6651, + "step": 429 + }, + { + "epoch": 0.18, + "learning_rate": 1.8908862354919843e-05, + "loss": 0.7356, + "step": 430 + }, + { + "epoch": 0.18, + "learning_rate": 1.8902838545222987e-05, + "loss": 0.6437, + "step": 431 + }, + { + "epoch": 0.18, + "learning_rate": 1.8896799118482995e-05, + "loss": 0.7163, + "step": 432 + }, + { + "epoch": 0.18, + "learning_rate": 1.889074408529401e-05, + "loss": 0.6933, + "step": 433 + }, + { + "epoch": 0.18, + "learning_rate": 1.888467345627756e-05, + "loss": 0.7284, + "step": 434 + }, + { + "epoch": 0.18, + "learning_rate": 1.887858724208252e-05, + "loss": 0.6641, + "step": 435 + }, + { + "epoch": 0.18, + "learning_rate": 1.8872485453385124e-05, + "loss": 0.6717, + "step": 436 + }, + { + "epoch": 0.18, + "learning_rate": 1.88663681008889e-05, + "loss": 0.7557, + "step": 437 + }, + { + "epoch": 0.18, + "learning_rate": 1.8860235195324695e-05, + "loss": 0.707, + "step": 438 + }, + { + "epoch": 0.18, + "learning_rate": 1.8854086747450636e-05, + "loss": 0.6398, + "step": 439 + }, + { + "epoch": 0.18, + "learning_rate": 1.8847922768052105e-05, + "loss": 0.7226, + "step": 440 + }, + { + "epoch": 0.18, + "learning_rate": 1.8841743267941746e-05, + "loss": 0.6508, + "step": 441 + }, + { + "epoch": 0.18, + "learning_rate": 1.8835548257959413e-05, + "loss": 0.7045, + "step": 442 + }, + { + "epoch": 0.18, + "learning_rate": 1.882933774897217e-05, + "loss": 0.7008, + "step": 443 + }, + { + "epoch": 0.18, + "learning_rate": 1.8823111751874277e-05, + "loss": 0.7322, + "step": 444 + }, + { + "epoch": 0.18, + "learning_rate": 1.8816870277587155e-05, + "loss": 0.7538, + "step": 445 + }, + { + "epoch": 0.18, + "learning_rate": 1.881061333705937e-05, + "loss": 0.6776, + "step": 446 + }, + { + "epoch": 0.18, + "learning_rate": 1.8804340941266638e-05, + "loss": 0.6401, + "step": 447 + }, + { + "epoch": 0.18, + "learning_rate": 1.879805310121176e-05, + "loss": 0.6541, + "step": 448 + }, + { + "epoch": 0.18, + "learning_rate": 1.879174982792465e-05, + "loss": 0.8082, + "step": 449 + }, + { + "epoch": 0.18, + "learning_rate": 1.8785431132462278e-05, + "loss": 0.5973, + "step": 450 + }, + { + "epoch": 0.18, + "learning_rate": 1.8779097025908684e-05, + "loss": 0.6512, + "step": 451 + }, + { + "epoch": 0.18, + "learning_rate": 1.8772747519374927e-05, + "loss": 0.7673, + "step": 452 + }, + { + "epoch": 0.19, + "learning_rate": 1.8766382623999094e-05, + "loss": 0.8369, + "step": 453 + }, + { + "epoch": 0.19, + "learning_rate": 1.8760002350946244e-05, + "loss": 0.6954, + "step": 454 + }, + { + "epoch": 0.19, + "learning_rate": 1.875360671140844e-05, + "loss": 0.7501, + "step": 455 + }, + { + "epoch": 0.19, + "learning_rate": 1.8747195716604675e-05, + "loss": 0.6257, + "step": 456 + }, + { + "epoch": 0.19, + "learning_rate": 1.8740769377780893e-05, + "loss": 0.821, + "step": 457 + }, + { + "epoch": 0.19, + "learning_rate": 1.873432770620995e-05, + "loss": 0.6851, + "step": 458 + }, + { + "epoch": 0.19, + "learning_rate": 1.8727870713191593e-05, + "loss": 0.673, + "step": 459 + }, + { + "epoch": 0.19, + "learning_rate": 1.872139841005246e-05, + "loss": 0.74, + "step": 460 + }, + { + "epoch": 0.19, + "learning_rate": 1.8714910808146024e-05, + "loss": 0.807, + "step": 461 + }, + { + "epoch": 0.19, + "learning_rate": 1.8708407918852608e-05, + "loss": 0.6937, + "step": 462 + }, + { + "epoch": 0.19, + "learning_rate": 1.8701889753579356e-05, + "loss": 0.6517, + "step": 463 + }, + { + "epoch": 0.19, + "learning_rate": 1.8695356323760197e-05, + "loss": 0.7829, + "step": 464 + }, + { + "epoch": 0.19, + "learning_rate": 1.868880764085584e-05, + "loss": 0.7234, + "step": 465 + }, + { + "epoch": 0.19, + "learning_rate": 1.8682243716353754e-05, + "loss": 0.7447, + "step": 466 + }, + { + "epoch": 0.19, + "learning_rate": 1.8675664561768144e-05, + "loss": 0.6787, + "step": 467 + }, + { + "epoch": 0.19, + "learning_rate": 1.8669070188639924e-05, + "loss": 0.6403, + "step": 468 + }, + { + "epoch": 0.19, + "learning_rate": 1.866246060853672e-05, + "loss": 0.6881, + "step": 469 + }, + { + "epoch": 0.19, + "learning_rate": 1.8655835833052808e-05, + "loss": 0.7213, + "step": 470 + }, + { + "epoch": 0.19, + "learning_rate": 1.8649195873809143e-05, + "loss": 0.7371, + "step": 471 + }, + { + "epoch": 0.19, + "learning_rate": 1.8642540742453302e-05, + "loss": 0.6393, + "step": 472 + }, + { + "epoch": 0.19, + "learning_rate": 1.863587045065949e-05, + "loss": 0.5962, + "step": 473 + }, + { + "epoch": 0.19, + "learning_rate": 1.8629185010128478e-05, + "loss": 0.6924, + "step": 474 + }, + { + "epoch": 0.19, + "learning_rate": 1.862248443258764e-05, + "loss": 0.618, + "step": 475 + }, + { + "epoch": 0.19, + "learning_rate": 1.8615768729790893e-05, + "loss": 0.6643, + "step": 476 + }, + { + "epoch": 0.2, + "learning_rate": 1.8609037913518676e-05, + "loss": 0.7445, + "step": 477 + }, + { + "epoch": 0.2, + "learning_rate": 1.8602291995577957e-05, + "loss": 0.7095, + "step": 478 + }, + { + "epoch": 0.2, + "learning_rate": 1.8595530987802177e-05, + "loss": 0.6985, + "step": 479 + }, + { + "epoch": 0.2, + "learning_rate": 1.8588754902051262e-05, + "loss": 0.7014, + "step": 480 + }, + { + "epoch": 0.2, + "learning_rate": 1.8581963750211577e-05, + "loss": 0.7189, + "step": 481 + }, + { + "epoch": 0.2, + "learning_rate": 1.857515754419592e-05, + "loss": 0.719, + "step": 482 + }, + { + "epoch": 0.2, + "learning_rate": 1.8568336295943498e-05, + "loss": 0.6508, + "step": 483 + }, + { + "epoch": 0.2, + "learning_rate": 1.8561500017419902e-05, + "loss": 0.7039, + "step": 484 + }, + { + "epoch": 0.2, + "learning_rate": 1.8554648720617086e-05, + "loss": 0.7891, + "step": 485 + }, + { + "epoch": 0.2, + "learning_rate": 1.8547782417553355e-05, + "loss": 0.7368, + "step": 486 + }, + { + "epoch": 0.2, + "learning_rate": 1.8540901120273332e-05, + "loss": 0.6893, + "step": 487 + }, + { + "epoch": 0.2, + "learning_rate": 1.8534004840847943e-05, + "loss": 0.6616, + "step": 488 + }, + { + "epoch": 0.2, + "learning_rate": 1.8527093591374397e-05, + "loss": 0.7322, + "step": 489 + }, + { + "epoch": 0.2, + "learning_rate": 1.8520167383976168e-05, + "loss": 0.749, + "step": 490 + }, + { + "epoch": 0.2, + "learning_rate": 1.8513226230802958e-05, + "loss": 0.6647, + "step": 491 + }, + { + "epoch": 0.2, + "learning_rate": 1.850627014403069e-05, + "loss": 0.6962, + "step": 492 + }, + { + "epoch": 0.2, + "learning_rate": 1.8499299135861488e-05, + "loss": 0.6822, + "step": 493 + }, + { + "epoch": 0.2, + "learning_rate": 1.849231321852364e-05, + "loss": 0.6693, + "step": 494 + }, + { + "epoch": 0.2, + "learning_rate": 1.8485312404271604e-05, + "loss": 0.6608, + "step": 495 + }, + { + "epoch": 0.2, + "learning_rate": 1.8478296705385953e-05, + "loss": 0.6914, + "step": 496 + }, + { + "epoch": 0.2, + "learning_rate": 1.8471266134173377e-05, + "loss": 0.6816, + "step": 497 + }, + { + "epoch": 0.2, + "learning_rate": 1.8464220702966656e-05, + "loss": 0.7194, + "step": 498 + }, + { + "epoch": 0.2, + "learning_rate": 1.8457160424124637e-05, + "loss": 0.6852, + "step": 499 + }, + { + "epoch": 0.2, + "learning_rate": 1.8450085310032206e-05, + "loss": 0.6765, + "step": 500 + }, + { + "epoch": 0.2, + "learning_rate": 1.8442995373100282e-05, + "loss": 0.751, + "step": 501 + }, + { + "epoch": 0.21, + "learning_rate": 1.8435890625765776e-05, + "loss": 0.7049, + "step": 502 + }, + { + "epoch": 0.21, + "learning_rate": 1.8428771080491582e-05, + "loss": 0.6654, + "step": 503 + }, + { + "epoch": 0.21, + "learning_rate": 1.8421636749766563e-05, + "loss": 0.7253, + "step": 504 + }, + { + "epoch": 0.21, + "learning_rate": 1.8414487646105496e-05, + "loss": 0.7771, + "step": 505 + }, + { + "epoch": 0.21, + "learning_rate": 1.8407323782049093e-05, + "loss": 0.7697, + "step": 506 + }, + { + "epoch": 0.21, + "learning_rate": 1.840014517016395e-05, + "loss": 0.7785, + "step": 507 + }, + { + "epoch": 0.21, + "learning_rate": 1.8392951823042525e-05, + "loss": 0.6777, + "step": 508 + }, + { + "epoch": 0.21, + "learning_rate": 1.8385743753303144e-05, + "loss": 0.6817, + "step": 509 + }, + { + "epoch": 0.21, + "learning_rate": 1.8378520973589937e-05, + "loss": 0.6524, + "step": 510 + }, + { + "epoch": 0.21, + "learning_rate": 1.837128349657285e-05, + "loss": 0.663, + "step": 511 + }, + { + "epoch": 0.21, + "learning_rate": 1.8364031334947612e-05, + "loss": 0.5611, + "step": 512 + }, + { + "epoch": 0.21, + "learning_rate": 1.8356764501435704e-05, + "loss": 0.6298, + "step": 513 + }, + { + "epoch": 0.21, + "learning_rate": 1.8349483008784346e-05, + "loss": 0.6959, + "step": 514 + }, + { + "epoch": 0.21, + "learning_rate": 1.8342186869766475e-05, + "loss": 0.6743, + "step": 515 + }, + { + "epoch": 0.21, + "learning_rate": 1.833487609718072e-05, + "loss": 0.7166, + "step": 516 + }, + { + "epoch": 0.21, + "learning_rate": 1.832755070385138e-05, + "loss": 0.7456, + "step": 517 + }, + { + "epoch": 0.21, + "learning_rate": 1.8320210702628397e-05, + "loss": 0.6953, + "step": 518 + }, + { + "epoch": 0.21, + "learning_rate": 1.8312856106387343e-05, + "loss": 0.7597, + "step": 519 + }, + { + "epoch": 0.21, + "learning_rate": 1.8305486928029383e-05, + "loss": 0.7405, + "step": 520 + }, + { + "epoch": 0.21, + "learning_rate": 1.8298103180481276e-05, + "loss": 0.6394, + "step": 521 + }, + { + "epoch": 0.21, + "learning_rate": 1.8290704876695325e-05, + "loss": 0.6471, + "step": 522 + }, + { + "epoch": 0.21, + "learning_rate": 1.828329202964937e-05, + "loss": 0.7252, + "step": 523 + }, + { + "epoch": 0.21, + "learning_rate": 1.8275864652346772e-05, + "loss": 0.6842, + "step": 524 + }, + { + "epoch": 0.21, + "learning_rate": 1.8268422757816366e-05, + "loss": 0.673, + "step": 525 + }, + { + "epoch": 0.22, + "learning_rate": 1.826096635911246e-05, + "loss": 0.6223, + "step": 526 + }, + { + "epoch": 0.22, + "learning_rate": 1.8253495469314803e-05, + "loss": 0.7223, + "step": 527 + }, + { + "epoch": 0.22, + "learning_rate": 1.8246010101528566e-05, + "loss": 0.7398, + "step": 528 + }, + { + "epoch": 0.22, + "learning_rate": 1.8238510268884316e-05, + "loss": 0.7027, + "step": 529 + }, + { + "epoch": 0.22, + "learning_rate": 1.823099598453799e-05, + "loss": 0.7069, + "step": 530 + }, + { + "epoch": 0.22, + "learning_rate": 1.8223467261670885e-05, + "loss": 0.6365, + "step": 531 + }, + { + "epoch": 0.22, + "learning_rate": 1.8215924113489613e-05, + "loss": 0.6707, + "step": 532 + }, + { + "epoch": 0.22, + "learning_rate": 1.8208366553226095e-05, + "loss": 0.7026, + "step": 533 + }, + { + "epoch": 0.22, + "learning_rate": 1.820079459413754e-05, + "loss": 0.6887, + "step": 534 + }, + { + "epoch": 0.22, + "learning_rate": 1.8193208249506408e-05, + "loss": 0.638, + "step": 535 + }, + { + "epoch": 0.22, + "learning_rate": 1.8185607532640396e-05, + "loss": 0.638, + "step": 536 + }, + { + "epoch": 0.22, + "learning_rate": 1.817799245687241e-05, + "loss": 0.7042, + "step": 537 + }, + { + "epoch": 0.22, + "learning_rate": 1.8170363035560544e-05, + "loss": 0.6734, + "step": 538 + }, + { + "epoch": 0.22, + "learning_rate": 1.8162719282088064e-05, + "loss": 0.6363, + "step": 539 + }, + { + "epoch": 0.22, + "learning_rate": 1.8155061209863368e-05, + "loss": 0.6991, + "step": 540 + }, + { + "epoch": 0.22, + "learning_rate": 1.814738883231997e-05, + "loss": 0.7348, + "step": 541 + }, + { + "epoch": 0.22, + "learning_rate": 1.8139702162916485e-05, + "loss": 0.8031, + "step": 542 + }, + { + "epoch": 0.22, + "learning_rate": 1.8132001215136595e-05, + "loss": 0.6623, + "step": 543 + }, + { + "epoch": 0.22, + "learning_rate": 1.8124286002489034e-05, + "loss": 0.6902, + "step": 544 + }, + { + "epoch": 0.22, + "learning_rate": 1.8116556538507547e-05, + "loss": 0.6894, + "step": 545 + }, + { + "epoch": 0.22, + "learning_rate": 1.8108812836750887e-05, + "loss": 0.6904, + "step": 546 + }, + { + "epoch": 0.22, + "learning_rate": 1.810105491080278e-05, + "loss": 0.6872, + "step": 547 + }, + { + "epoch": 0.22, + "learning_rate": 1.8093282774271908e-05, + "loss": 0.6051, + "step": 548 + }, + { + "epoch": 0.22, + "learning_rate": 1.8085496440791874e-05, + "loss": 0.6974, + "step": 549 + }, + { + "epoch": 0.22, + "learning_rate": 1.807769592402119e-05, + "loss": 0.6279, + "step": 550 + }, + { + "epoch": 0.23, + "learning_rate": 1.806988123764324e-05, + "loss": 0.6083, + "step": 551 + }, + { + "epoch": 0.23, + "learning_rate": 1.8062052395366275e-05, + "loss": 0.7348, + "step": 552 + }, + { + "epoch": 0.23, + "learning_rate": 1.805420941092337e-05, + "loss": 0.6704, + "step": 553 + }, + { + "epoch": 0.23, + "learning_rate": 1.8046352298072408e-05, + "loss": 0.7317, + "step": 554 + }, + { + "epoch": 0.23, + "learning_rate": 1.8038481070596057e-05, + "loss": 0.6518, + "step": 555 + }, + { + "epoch": 0.23, + "learning_rate": 1.803059574230175e-05, + "loss": 0.6152, + "step": 556 + }, + { + "epoch": 0.23, + "learning_rate": 1.8022696327021645e-05, + "loss": 0.6646, + "step": 557 + }, + { + "epoch": 0.23, + "learning_rate": 1.8014782838612616e-05, + "loss": 0.6791, + "step": 558 + }, + { + "epoch": 0.23, + "learning_rate": 1.8006855290956226e-05, + "loss": 0.6175, + "step": 559 + }, + { + "epoch": 0.23, + "learning_rate": 1.7998913697958693e-05, + "loss": 0.6689, + "step": 560 + }, + { + "epoch": 0.23, + "learning_rate": 1.7990958073550882e-05, + "loss": 0.6883, + "step": 561 + }, + { + "epoch": 0.23, + "learning_rate": 1.7982988431688266e-05, + "loss": 0.726, + "step": 562 + }, + { + "epoch": 0.23, + "learning_rate": 1.79750047863509e-05, + "loss": 0.7097, + "step": 563 + }, + { + "epoch": 0.23, + "learning_rate": 1.7967007151543425e-05, + "loss": 0.6863, + "step": 564 + }, + { + "epoch": 0.23, + "learning_rate": 1.7958995541294997e-05, + "loss": 0.6354, + "step": 565 + }, + { + "epoch": 0.23, + "learning_rate": 1.7950969969659303e-05, + "loss": 0.5956, + "step": 566 + }, + { + "epoch": 0.23, + "learning_rate": 1.7942930450714515e-05, + "loss": 0.6322, + "step": 567 + }, + { + "epoch": 0.23, + "learning_rate": 1.7934876998563263e-05, + "loss": 0.6341, + "step": 568 + }, + { + "epoch": 0.23, + "learning_rate": 1.7926809627332642e-05, + "loss": 0.6763, + "step": 569 + }, + { + "epoch": 0.23, + "learning_rate": 1.7918728351174136e-05, + "loss": 0.6754, + "step": 570 + }, + { + "epoch": 0.23, + "learning_rate": 1.7910633184263643e-05, + "loss": 0.6639, + "step": 571 + }, + { + "epoch": 0.23, + "learning_rate": 1.790252414080141e-05, + "loss": 0.7094, + "step": 572 + }, + { + "epoch": 0.23, + "learning_rate": 1.7894401235012028e-05, + "loss": 0.7198, + "step": 573 + }, + { + "epoch": 0.23, + "learning_rate": 1.788626448114442e-05, + "loss": 0.6438, + "step": 574 + }, + { + "epoch": 0.24, + "learning_rate": 1.7878113893471786e-05, + "loss": 0.6869, + "step": 575 + }, + { + "epoch": 0.24, + "learning_rate": 1.7869949486291604e-05, + "loss": 0.668, + "step": 576 + }, + { + "epoch": 0.24, + "learning_rate": 1.7861771273925576e-05, + "loss": 0.7005, + "step": 577 + }, + { + "epoch": 0.24, + "learning_rate": 1.7853579270719635e-05, + "loss": 0.7021, + "step": 578 + }, + { + "epoch": 0.24, + "learning_rate": 1.7845373491043905e-05, + "loss": 0.6996, + "step": 579 + }, + { + "epoch": 0.24, + "learning_rate": 1.7837153949292674e-05, + "loss": 0.6826, + "step": 580 + }, + { + "epoch": 0.24, + "learning_rate": 1.7828920659884364e-05, + "loss": 0.7017, + "step": 581 + }, + { + "epoch": 0.24, + "learning_rate": 1.782067363726153e-05, + "loss": 0.7321, + "step": 582 + }, + { + "epoch": 0.24, + "learning_rate": 1.7812412895890792e-05, + "loss": 0.6876, + "step": 583 + }, + { + "epoch": 0.24, + "learning_rate": 1.7804138450262862e-05, + "loss": 0.6382, + "step": 584 + }, + { + "epoch": 0.24, + "learning_rate": 1.779585031489247e-05, + "loss": 0.6685, + "step": 585 + }, + { + "epoch": 0.24, + "learning_rate": 1.7787548504318372e-05, + "loss": 0.6965, + "step": 586 + }, + { + "epoch": 0.24, + "learning_rate": 1.7779233033103306e-05, + "loss": 0.6898, + "step": 587 + }, + { + "epoch": 0.24, + "learning_rate": 1.7770903915833986e-05, + "loss": 0.7186, + "step": 588 + }, + { + "epoch": 0.24, + "learning_rate": 1.7762561167121042e-05, + "loss": 0.5961, + "step": 589 + }, + { + "epoch": 0.24, + "learning_rate": 1.775420480159903e-05, + "loss": 0.706, + "step": 590 + }, + { + "epoch": 0.24, + "learning_rate": 1.7745834833926395e-05, + "loss": 0.6752, + "step": 591 + }, + { + "epoch": 0.24, + "learning_rate": 1.7737451278785435e-05, + "loss": 0.5933, + "step": 592 + }, + { + "epoch": 0.24, + "learning_rate": 1.772905415088228e-05, + "loss": 0.6891, + "step": 593 + }, + { + "epoch": 0.24, + "learning_rate": 1.772064346494688e-05, + "loss": 0.7117, + "step": 594 + }, + { + "epoch": 0.24, + "learning_rate": 1.7712219235732954e-05, + "loss": 0.6617, + "step": 595 + }, + { + "epoch": 0.24, + "learning_rate": 1.7703781478017995e-05, + "loss": 0.6343, + "step": 596 + }, + { + "epoch": 0.24, + "learning_rate": 1.769533020660321e-05, + "loss": 0.5853, + "step": 597 + }, + { + "epoch": 0.24, + "learning_rate": 1.768686543631352e-05, + "loss": 0.6899, + "step": 598 + }, + { + "epoch": 0.24, + "learning_rate": 1.767838718199753e-05, + "loss": 0.6233, + "step": 599 + }, + { + "epoch": 0.25, + "learning_rate": 1.7669895458527487e-05, + "loss": 0.7243, + "step": 600 + }, + { + "epoch": 0.25, + "learning_rate": 1.766139028079927e-05, + "loss": 0.6416, + "step": 601 + }, + { + "epoch": 0.25, + "learning_rate": 1.765287166373237e-05, + "loss": 0.6862, + "step": 602 + }, + { + "epoch": 0.25, + "learning_rate": 1.7644339622269827e-05, + "loss": 0.6542, + "step": 603 + }, + { + "epoch": 0.25, + "learning_rate": 1.7635794171378257e-05, + "loss": 0.7114, + "step": 604 + }, + { + "epoch": 0.25, + "learning_rate": 1.762723532604778e-05, + "loss": 0.8002, + "step": 605 + }, + { + "epoch": 0.25, + "learning_rate": 1.761866310129202e-05, + "loss": 0.6922, + "step": 606 + }, + { + "epoch": 0.25, + "learning_rate": 1.7610077512148073e-05, + "loss": 0.7517, + "step": 607 + }, + { + "epoch": 0.25, + "learning_rate": 1.760147857367647e-05, + "loss": 0.6715, + "step": 608 + }, + { + "epoch": 0.25, + "learning_rate": 1.7592866300961163e-05, + "loss": 0.6672, + "step": 609 + }, + { + "epoch": 0.25, + "learning_rate": 1.7584240709109498e-05, + "loss": 0.695, + "step": 610 + }, + { + "epoch": 0.25, + "learning_rate": 1.757560181325218e-05, + "loss": 0.6148, + "step": 611 + }, + { + "epoch": 0.25, + "learning_rate": 1.7566949628543252e-05, + "loss": 0.6443, + "step": 612 + }, + { + "epoch": 0.25, + "learning_rate": 1.7558284170160073e-05, + "loss": 0.7007, + "step": 613 + }, + { + "epoch": 0.25, + "learning_rate": 1.754960545330328e-05, + "loss": 0.6778, + "step": 614 + }, + { + "epoch": 0.25, + "learning_rate": 1.754091349319677e-05, + "loss": 0.6917, + "step": 615 + }, + { + "epoch": 0.25, + "learning_rate": 1.753220830508767e-05, + "loss": 0.656, + "step": 616 + }, + { + "epoch": 0.25, + "learning_rate": 1.7523489904246312e-05, + "loss": 0.71, + "step": 617 + }, + { + "epoch": 0.25, + "learning_rate": 1.7514758305966206e-05, + "loss": 0.668, + "step": 618 + }, + { + "epoch": 0.25, + "learning_rate": 1.7506013525564012e-05, + "loss": 0.6859, + "step": 619 + }, + { + "epoch": 0.25, + "learning_rate": 1.7497255578379514e-05, + "loss": 0.7139, + "step": 620 + }, + { + "epoch": 0.25, + "learning_rate": 1.7488484479775585e-05, + "loss": 0.6734, + "step": 621 + }, + { + "epoch": 0.25, + "learning_rate": 1.7479700245138184e-05, + "loss": 0.6442, + "step": 622 + }, + { + "epoch": 0.25, + "learning_rate": 1.7470902889876295e-05, + "loss": 0.6452, + "step": 623 + }, + { + "epoch": 0.26, + "learning_rate": 1.746209242942193e-05, + "loss": 0.6672, + "step": 624 + }, + { + "epoch": 0.26, + "learning_rate": 1.745326887923009e-05, + "loss": 0.6883, + "step": 625 + }, + { + "epoch": 0.26, + "learning_rate": 1.7444432254778725e-05, + "loss": 0.7069, + "step": 626 + }, + { + "epoch": 0.26, + "learning_rate": 1.7435582571568736e-05, + "loss": 0.7285, + "step": 627 + }, + { + "epoch": 0.26, + "learning_rate": 1.7426719845123914e-05, + "loss": 0.6627, + "step": 628 + }, + { + "epoch": 0.26, + "learning_rate": 1.7417844090990947e-05, + "loss": 0.667, + "step": 629 + }, + { + "epoch": 0.26, + "learning_rate": 1.7408955324739363e-05, + "loss": 0.6565, + "step": 630 + }, + { + "epoch": 0.26, + "learning_rate": 1.7400053561961523e-05, + "loss": 0.7884, + "step": 631 + }, + { + "epoch": 0.26, + "learning_rate": 1.7391138818272578e-05, + "loss": 0.6268, + "step": 632 + }, + { + "epoch": 0.26, + "learning_rate": 1.738221110931046e-05, + "loss": 0.7447, + "step": 633 + }, + { + "epoch": 0.26, + "learning_rate": 1.737327045073584e-05, + "loss": 0.7412, + "step": 634 + }, + { + "epoch": 0.26, + "learning_rate": 1.73643168582321e-05, + "loss": 0.6906, + "step": 635 + }, + { + "epoch": 0.26, + "learning_rate": 1.7355350347505312e-05, + "loss": 0.6412, + "step": 636 + }, + { + "epoch": 0.26, + "learning_rate": 1.7346370934284214e-05, + "loss": 0.715, + "step": 637 + }, + { + "epoch": 0.26, + "learning_rate": 1.7337378634320173e-05, + "loss": 0.7714, + "step": 638 + }, + { + "epoch": 0.26, + "learning_rate": 1.7328373463387166e-05, + "loss": 0.6954, + "step": 639 + }, + { + "epoch": 0.26, + "learning_rate": 1.7319355437281737e-05, + "loss": 0.6106, + "step": 640 + }, + { + "epoch": 0.26, + "learning_rate": 1.731032457182299e-05, + "loss": 0.6845, + "step": 641 + }, + { + "epoch": 0.26, + "learning_rate": 1.730128088285255e-05, + "loss": 0.7755, + "step": 642 + }, + { + "epoch": 0.26, + "learning_rate": 1.7292224386234534e-05, + "loss": 0.5845, + "step": 643 + }, + { + "epoch": 0.26, + "learning_rate": 1.7283155097855525e-05, + "loss": 0.674, + "step": 644 + }, + { + "epoch": 0.26, + "learning_rate": 1.727407303362455e-05, + "loss": 0.6503, + "step": 645 + }, + { + "epoch": 0.26, + "learning_rate": 1.7264978209473035e-05, + "loss": 0.6771, + "step": 646 + }, + { + "epoch": 0.26, + "learning_rate": 1.725587064135481e-05, + "loss": 0.6127, + "step": 647 + }, + { + "epoch": 0.26, + "learning_rate": 1.724675034524604e-05, + "loss": 0.7146, + "step": 648 + }, + { + "epoch": 0.27, + "learning_rate": 1.7237617337145224e-05, + "loss": 0.6379, + "step": 649 + }, + { + "epoch": 0.27, + "learning_rate": 1.7228471633073164e-05, + "loss": 0.6424, + "step": 650 + }, + { + "epoch": 0.27, + "learning_rate": 1.721931324907293e-05, + "loss": 0.6366, + "step": 651 + }, + { + "epoch": 0.27, + "learning_rate": 1.7210142201209825e-05, + "loss": 0.712, + "step": 652 + }, + { + "epoch": 0.27, + "learning_rate": 1.7200958505571386e-05, + "loss": 0.6458, + "step": 653 + }, + { + "epoch": 0.27, + "learning_rate": 1.719176217826732e-05, + "loss": 0.67, + "step": 654 + }, + { + "epoch": 0.27, + "learning_rate": 1.71825532354295e-05, + "loss": 0.6944, + "step": 655 + }, + { + "epoch": 0.27, + "learning_rate": 1.7173331693211922e-05, + "loss": 0.7089, + "step": 656 + }, + { + "epoch": 0.27, + "learning_rate": 1.7164097567790693e-05, + "loss": 0.6424, + "step": 657 + }, + { + "epoch": 0.27, + "learning_rate": 1.7154850875363987e-05, + "loss": 0.6755, + "step": 658 + }, + { + "epoch": 0.27, + "learning_rate": 1.7145591632152025e-05, + "loss": 0.6924, + "step": 659 + }, + { + "epoch": 0.27, + "learning_rate": 1.7136319854397037e-05, + "loss": 0.6625, + "step": 660 + }, + { + "epoch": 0.27, + "learning_rate": 1.712703555836325e-05, + "loss": 0.6687, + "step": 661 + }, + { + "epoch": 0.27, + "learning_rate": 1.7117738760336846e-05, + "loss": 0.6443, + "step": 662 + }, + { + "epoch": 0.27, + "learning_rate": 1.7108429476625937e-05, + "loss": 0.7328, + "step": 663 + }, + { + "epoch": 0.27, + "learning_rate": 1.7099107723560537e-05, + "loss": 0.5872, + "step": 664 + }, + { + "epoch": 0.27, + "learning_rate": 1.708977351749254e-05, + "loss": 0.6595, + "step": 665 + }, + { + "epoch": 0.27, + "learning_rate": 1.7080426874795666e-05, + "loss": 0.6828, + "step": 666 + }, + { + "epoch": 0.27, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.6646, + "step": 667 + }, + { + "epoch": 0.27, + "learning_rate": 1.7061696345119304e-05, + "loss": 0.7373, + "step": 668 + }, + { + "epoch": 0.27, + "learning_rate": 1.7052312490996237e-05, + "loss": 0.7879, + "step": 669 + }, + { + "epoch": 0.27, + "learning_rate": 1.7042916265957107e-05, + "loss": 0.6031, + "step": 670 + }, + { + "epoch": 0.27, + "learning_rate": 1.703350768648443e-05, + "loss": 0.6827, + "step": 671 + }, + { + "epoch": 0.27, + "learning_rate": 1.702408676908241e-05, + "loss": 0.6262, + "step": 672 + }, + { + "epoch": 0.28, + "learning_rate": 1.701465353027688e-05, + "loss": 0.7102, + "step": 673 + }, + { + "epoch": 0.28, + "learning_rate": 1.7005207986615293e-05, + "loss": 0.6267, + "step": 674 + }, + { + "epoch": 0.28, + "learning_rate": 1.699575015466669e-05, + "loss": 0.7388, + "step": 675 + }, + { + "epoch": 0.28, + "learning_rate": 1.698628005102166e-05, + "loss": 0.7303, + "step": 676 + }, + { + "epoch": 0.28, + "learning_rate": 1.6976797692292325e-05, + "loss": 0.6721, + "step": 677 + }, + { + "epoch": 0.28, + "learning_rate": 1.6967303095112297e-05, + "loss": 0.6575, + "step": 678 + }, + { + "epoch": 0.28, + "learning_rate": 1.695779627613667e-05, + "loss": 0.7179, + "step": 679 + }, + { + "epoch": 0.28, + "learning_rate": 1.6948277252041957e-05, + "loss": 0.6756, + "step": 680 + }, + { + "epoch": 0.28, + "learning_rate": 1.69387460395261e-05, + "loss": 0.6721, + "step": 681 + }, + { + "epoch": 0.28, + "learning_rate": 1.6929202655308414e-05, + "loss": 0.6859, + "step": 682 + }, + { + "epoch": 0.28, + "learning_rate": 1.691964711612956e-05, + "loss": 0.6197, + "step": 683 + }, + { + "epoch": 0.28, + "learning_rate": 1.691007943875153e-05, + "loss": 0.6698, + "step": 684 + }, + { + "epoch": 0.28, + "learning_rate": 1.6900499639957596e-05, + "loss": 0.6647, + "step": 685 + }, + { + "epoch": 0.28, + "learning_rate": 1.689090773655231e-05, + "loss": 0.7312, + "step": 686 + }, + { + "epoch": 0.28, + "learning_rate": 1.688130374536144e-05, + "loss": 0.6452, + "step": 687 + }, + { + "epoch": 0.28, + "learning_rate": 1.6871687683231975e-05, + "loss": 0.6965, + "step": 688 + }, + { + "epoch": 0.28, + "learning_rate": 1.686205956703206e-05, + "loss": 0.6312, + "step": 689 + }, + { + "epoch": 0.28, + "learning_rate": 1.6852419413651003e-05, + "loss": 0.6391, + "step": 690 + }, + { + "epoch": 0.28, + "learning_rate": 1.6842767239999214e-05, + "loss": 0.7419, + "step": 691 + }, + { + "epoch": 0.28, + "learning_rate": 1.6833103063008194e-05, + "loss": 0.6088, + "step": 692 + }, + { + "epoch": 0.28, + "learning_rate": 1.6823426899630498e-05, + "loss": 0.625, + "step": 693 + }, + { + "epoch": 0.28, + "learning_rate": 1.681373876683971e-05, + "loss": 0.7497, + "step": 694 + }, + { + "epoch": 0.28, + "learning_rate": 1.680403868163041e-05, + "loss": 0.7308, + "step": 695 + }, + { + "epoch": 0.28, + "learning_rate": 1.6794326661018136e-05, + "loss": 0.698, + "step": 696 + }, + { + "epoch": 0.28, + "learning_rate": 1.6784602722039376e-05, + "loss": 0.6865, + "step": 697 + }, + { + "epoch": 0.29, + "learning_rate": 1.6774866881751518e-05, + "loss": 0.7627, + "step": 698 + }, + { + "epoch": 0.29, + "learning_rate": 1.6765119157232824e-05, + "loss": 0.6541, + "step": 699 + }, + { + "epoch": 0.29, + "learning_rate": 1.6755359565582408e-05, + "loss": 0.6987, + "step": 700 + }, + { + "epoch": 0.29, + "learning_rate": 1.6745588123920197e-05, + "loss": 0.6267, + "step": 701 + }, + { + "epoch": 0.29, + "learning_rate": 1.6735804849386914e-05, + "loss": 0.6864, + "step": 702 + }, + { + "epoch": 0.29, + "learning_rate": 1.6726009759144023e-05, + "loss": 0.6408, + "step": 703 + }, + { + "epoch": 0.29, + "learning_rate": 1.6716202870373726e-05, + "loss": 0.7263, + "step": 704 + }, + { + "epoch": 0.29, + "learning_rate": 1.670638420027892e-05, + "loss": 0.6588, + "step": 705 + }, + { + "epoch": 0.29, + "learning_rate": 1.6696553766083167e-05, + "loss": 0.7192, + "step": 706 + }, + { + "epoch": 0.29, + "learning_rate": 1.668671158503067e-05, + "loss": 0.7439, + "step": 707 + }, + { + "epoch": 0.29, + "learning_rate": 1.667685767438622e-05, + "loss": 0.6251, + "step": 708 + }, + { + "epoch": 0.29, + "learning_rate": 1.6666992051435215e-05, + "loss": 0.5684, + "step": 709 + }, + { + "epoch": 0.29, + "learning_rate": 1.6657114733483564e-05, + "loss": 0.7041, + "step": 710 + }, + { + "epoch": 0.29, + "learning_rate": 1.6647225737857716e-05, + "loss": 0.6618, + "step": 711 + }, + { + "epoch": 0.29, + "learning_rate": 1.6637325081904595e-05, + "loss": 0.6626, + "step": 712 + }, + { + "epoch": 0.29, + "learning_rate": 1.662741278299158e-05, + "loss": 0.7749, + "step": 713 + }, + { + "epoch": 0.29, + "learning_rate": 1.6617488858506478e-05, + "loss": 0.6481, + "step": 714 + }, + { + "epoch": 0.29, + "learning_rate": 1.6607553325857473e-05, + "loss": 0.6862, + "step": 715 + }, + { + "epoch": 0.29, + "learning_rate": 1.659760620247313e-05, + "loss": 0.5548, + "step": 716 + }, + { + "epoch": 0.29, + "learning_rate": 1.6587647505802342e-05, + "loss": 0.6451, + "step": 717 + }, + { + "epoch": 0.29, + "learning_rate": 1.65776772533143e-05, + "loss": 0.6363, + "step": 718 + }, + { + "epoch": 0.29, + "learning_rate": 1.6567695462498465e-05, + "loss": 0.631, + "step": 719 + }, + { + "epoch": 0.29, + "learning_rate": 1.6557702150864538e-05, + "loss": 0.6345, + "step": 720 + }, + { + "epoch": 0.29, + "learning_rate": 1.6547697335942438e-05, + "loss": 0.71, + "step": 721 + }, + { + "epoch": 0.3, + "learning_rate": 1.6537681035282247e-05, + "loss": 0.6811, + "step": 722 + }, + { + "epoch": 0.3, + "learning_rate": 1.652765326645421e-05, + "loss": 0.6662, + "step": 723 + }, + { + "epoch": 0.3, + "learning_rate": 1.6517614047048683e-05, + "loss": 0.651, + "step": 724 + }, + { + "epoch": 0.3, + "learning_rate": 1.6507563394676106e-05, + "loss": 0.6302, + "step": 725 + }, + { + "epoch": 0.3, + "learning_rate": 1.6497501326966974e-05, + "loss": 0.7978, + "step": 726 + }, + { + "epoch": 0.3, + "learning_rate": 1.6487427861571815e-05, + "loss": 0.6405, + "step": 727 + }, + { + "epoch": 0.3, + "learning_rate": 1.6477343016161138e-05, + "loss": 0.645, + "step": 728 + }, + { + "epoch": 0.3, + "learning_rate": 1.646724680842543e-05, + "loss": 0.6541, + "step": 729 + }, + { + "epoch": 0.3, + "learning_rate": 1.6457139256075084e-05, + "loss": 0.6866, + "step": 730 + }, + { + "epoch": 0.3, + "learning_rate": 1.6447020376840423e-05, + "loss": 0.6497, + "step": 731 + }, + { + "epoch": 0.3, + "learning_rate": 1.6436890188471622e-05, + "loss": 0.6977, + "step": 732 + }, + { + "epoch": 0.3, + "learning_rate": 1.6426748708738696e-05, + "loss": 0.6914, + "step": 733 + }, + { + "epoch": 0.3, + "learning_rate": 1.6416595955431468e-05, + "loss": 0.6886, + "step": 734 + }, + { + "epoch": 0.3, + "learning_rate": 1.640643194635954e-05, + "loss": 0.671, + "step": 735 + }, + { + "epoch": 0.3, + "learning_rate": 1.6396256699352252e-05, + "loss": 0.7131, + "step": 736 + }, + { + "epoch": 0.3, + "learning_rate": 1.6386070232258667e-05, + "loss": 0.6452, + "step": 737 + }, + { + "epoch": 0.3, + "learning_rate": 1.6375872562947516e-05, + "loss": 0.6659, + "step": 738 + }, + { + "epoch": 0.3, + "learning_rate": 1.6365663709307193e-05, + "loss": 0.6776, + "step": 739 + }, + { + "epoch": 0.3, + "learning_rate": 1.635544368924571e-05, + "loss": 0.6921, + "step": 740 + }, + { + "epoch": 0.3, + "learning_rate": 1.634521252069065e-05, + "loss": 0.7061, + "step": 741 + }, + { + "epoch": 0.3, + "learning_rate": 1.6334970221589182e-05, + "loss": 0.6026, + "step": 742 + }, + { + "epoch": 0.3, + "learning_rate": 1.632471680990797e-05, + "loss": 0.6983, + "step": 743 + }, + { + "epoch": 0.3, + "learning_rate": 1.6314452303633193e-05, + "loss": 0.6635, + "step": 744 + }, + { + "epoch": 0.3, + "learning_rate": 1.6304176720770482e-05, + "loss": 0.6981, + "step": 745 + }, + { + "epoch": 0.3, + "learning_rate": 1.6293890079344892e-05, + "loss": 0.662, + "step": 746 + }, + { + "epoch": 0.31, + "learning_rate": 1.6283592397400895e-05, + "loss": 0.6246, + "step": 747 + }, + { + "epoch": 0.31, + "learning_rate": 1.6273283693002312e-05, + "loss": 0.6972, + "step": 748 + }, + { + "epoch": 0.31, + "learning_rate": 1.6262963984232307e-05, + "loss": 0.6705, + "step": 749 + }, + { + "epoch": 0.31, + "learning_rate": 1.625263328919335e-05, + "loss": 0.6856, + "step": 750 + }, + { + "epoch": 0.31, + "learning_rate": 1.624229162600717e-05, + "loss": 0.6791, + "step": 751 + }, + { + "epoch": 0.31, + "learning_rate": 1.6231939012814758e-05, + "loss": 0.7502, + "step": 752 + }, + { + "epoch": 0.31, + "learning_rate": 1.6221575467776292e-05, + "loss": 0.7194, + "step": 753 + }, + { + "epoch": 0.31, + "learning_rate": 1.6211201009071134e-05, + "loss": 0.7124, + "step": 754 + }, + { + "epoch": 0.31, + "learning_rate": 1.6200815654897798e-05, + "loss": 0.6352, + "step": 755 + }, + { + "epoch": 0.31, + "learning_rate": 1.6190419423473897e-05, + "loss": 0.6864, + "step": 756 + }, + { + "epoch": 0.31, + "learning_rate": 1.6180012333036133e-05, + "loss": 0.687, + "step": 757 + }, + { + "epoch": 0.31, + "learning_rate": 1.6169594401840255e-05, + "loss": 0.6668, + "step": 758 + }, + { + "epoch": 0.31, + "learning_rate": 1.6159165648161026e-05, + "loss": 0.6435, + "step": 759 + }, + { + "epoch": 0.31, + "learning_rate": 1.6148726090292196e-05, + "loss": 0.5814, + "step": 760 + }, + { + "epoch": 0.31, + "learning_rate": 1.6138275746546467e-05, + "loss": 0.6166, + "step": 761 + }, + { + "epoch": 0.31, + "learning_rate": 1.6127814635255462e-05, + "loss": 0.6986, + "step": 762 + }, + { + "epoch": 0.31, + "learning_rate": 1.6117342774769687e-05, + "loss": 0.6934, + "step": 763 + }, + { + "epoch": 0.31, + "learning_rate": 1.6106860183458514e-05, + "loss": 0.7462, + "step": 764 + }, + { + "epoch": 0.31, + "learning_rate": 1.6096366879710127e-05, + "loss": 0.6056, + "step": 765 + }, + { + "epoch": 0.31, + "learning_rate": 1.608586288193151e-05, + "loss": 0.7394, + "step": 766 + }, + { + "epoch": 0.31, + "learning_rate": 1.6075348208548395e-05, + "loss": 0.683, + "step": 767 + }, + { + "epoch": 0.31, + "learning_rate": 1.6064822878005262e-05, + "loss": 0.7596, + "step": 768 + }, + { + "epoch": 0.31, + "learning_rate": 1.605428690876526e-05, + "loss": 0.6305, + "step": 769 + }, + { + "epoch": 0.31, + "learning_rate": 1.6043740319310218e-05, + "loss": 0.6892, + "step": 770 + }, + { + "epoch": 0.32, + "learning_rate": 1.6033183128140585e-05, + "loss": 0.6026, + "step": 771 + }, + { + "epoch": 0.32, + "learning_rate": 1.602261535377542e-05, + "loss": 0.7555, + "step": 772 + }, + { + "epoch": 0.32, + "learning_rate": 1.6012037014752322e-05, + "loss": 0.683, + "step": 773 + }, + { + "epoch": 0.32, + "learning_rate": 1.600144812962745e-05, + "loss": 0.6646, + "step": 774 + }, + { + "epoch": 0.32, + "learning_rate": 1.5990848716975447e-05, + "loss": 0.6627, + "step": 775 + }, + { + "epoch": 0.32, + "learning_rate": 1.5980238795389424e-05, + "loss": 0.6583, + "step": 776 + }, + { + "epoch": 0.32, + "learning_rate": 1.5969618383480926e-05, + "loss": 0.6748, + "step": 777 + }, + { + "epoch": 0.32, + "learning_rate": 1.595898749987991e-05, + "loss": 0.6598, + "step": 778 + }, + { + "epoch": 0.32, + "learning_rate": 1.5948346163234694e-05, + "loss": 0.7278, + "step": 779 + }, + { + "epoch": 0.32, + "learning_rate": 1.5937694392211923e-05, + "loss": 0.6913, + "step": 780 + }, + { + "epoch": 0.32, + "learning_rate": 1.5927032205496565e-05, + "loss": 0.6754, + "step": 781 + }, + { + "epoch": 0.32, + "learning_rate": 1.5916359621791847e-05, + "loss": 0.6236, + "step": 782 + }, + { + "epoch": 0.32, + "learning_rate": 1.5905676659819232e-05, + "loss": 0.6992, + "step": 783 + }, + { + "epoch": 0.32, + "learning_rate": 1.5894983338318396e-05, + "loss": 0.6583, + "step": 784 + }, + { + "epoch": 0.32, + "learning_rate": 1.5884279676047186e-05, + "loss": 0.6383, + "step": 785 + }, + { + "epoch": 0.32, + "learning_rate": 1.587356569178158e-05, + "loss": 0.7256, + "step": 786 + }, + { + "epoch": 0.32, + "learning_rate": 1.5862841404315675e-05, + "loss": 0.6713, + "step": 787 + }, + { + "epoch": 0.32, + "learning_rate": 1.585210683246163e-05, + "loss": 0.7189, + "step": 788 + }, + { + "epoch": 0.32, + "learning_rate": 1.5841361995049655e-05, + "loss": 0.6596, + "step": 789 + }, + { + "epoch": 0.32, + "learning_rate": 1.5830606910927956e-05, + "loss": 0.7346, + "step": 790 + }, + { + "epoch": 0.32, + "learning_rate": 1.5819841598962722e-05, + "loss": 0.8004, + "step": 791 + }, + { + "epoch": 0.32, + "learning_rate": 1.5809066078038082e-05, + "loss": 0.6389, + "step": 792 + }, + { + "epoch": 0.32, + "learning_rate": 1.5798280367056072e-05, + "loss": 0.6678, + "step": 793 + }, + { + "epoch": 0.32, + "learning_rate": 1.57874844849366e-05, + "loss": 0.6333, + "step": 794 + }, + { + "epoch": 0.33, + "learning_rate": 1.5776678450617426e-05, + "loss": 0.6448, + "step": 795 + }, + { + "epoch": 0.33, + "learning_rate": 1.5765862283054105e-05, + "loss": 0.6582, + "step": 796 + }, + { + "epoch": 0.33, + "learning_rate": 1.5755036001219974e-05, + "loss": 0.777, + "step": 797 + }, + { + "epoch": 0.33, + "learning_rate": 1.5744199624106115e-05, + "loss": 0.7066, + "step": 798 + }, + { + "epoch": 0.33, + "learning_rate": 1.5733353170721316e-05, + "loss": 0.7088, + "step": 799 + }, + { + "epoch": 0.33, + "learning_rate": 1.572249666009204e-05, + "loss": 0.7351, + "step": 800 + }, + { + "epoch": 0.33, + "learning_rate": 1.571163011126239e-05, + "loss": 0.7026, + "step": 801 + }, + { + "epoch": 0.33, + "learning_rate": 1.570075354329408e-05, + "loss": 0.6308, + "step": 802 + }, + { + "epoch": 0.33, + "learning_rate": 1.5689866975266404e-05, + "loss": 0.6274, + "step": 803 + }, + { + "epoch": 0.33, + "learning_rate": 1.5678970426276186e-05, + "loss": 0.623, + "step": 804 + }, + { + "epoch": 0.33, + "learning_rate": 1.5668063915437768e-05, + "loss": 0.6752, + "step": 805 + }, + { + "epoch": 0.33, + "learning_rate": 1.5657147461882965e-05, + "loss": 0.7502, + "step": 806 + }, + { + "epoch": 0.33, + "learning_rate": 1.5646221084761032e-05, + "loss": 0.6367, + "step": 807 + }, + { + "epoch": 0.33, + "learning_rate": 1.5635284803238632e-05, + "loss": 0.5682, + "step": 808 + }, + { + "epoch": 0.33, + "learning_rate": 1.56243386364998e-05, + "loss": 0.6257, + "step": 809 + }, + { + "epoch": 0.33, + "learning_rate": 1.5613382603745918e-05, + "loss": 0.6056, + "step": 810 + }, + { + "epoch": 0.33, + "learning_rate": 1.560241672419566e-05, + "loss": 0.6451, + "step": 811 + }, + { + "epoch": 0.33, + "learning_rate": 1.559144101708499e-05, + "loss": 0.6364, + "step": 812 + }, + { + "epoch": 0.33, + "learning_rate": 1.55804555016671e-05, + "loss": 0.6185, + "step": 813 + }, + { + "epoch": 0.33, + "learning_rate": 1.556946019721239e-05, + "loss": 0.6231, + "step": 814 + }, + { + "epoch": 0.33, + "learning_rate": 1.555845512300844e-05, + "loss": 0.6298, + "step": 815 + }, + { + "epoch": 0.33, + "learning_rate": 1.5547440298359948e-05, + "loss": 0.7487, + "step": 816 + }, + { + "epoch": 0.33, + "learning_rate": 1.5536415742588737e-05, + "loss": 0.6154, + "step": 817 + }, + { + "epoch": 0.33, + "learning_rate": 1.5525381475033692e-05, + "loss": 0.62, + "step": 818 + }, + { + "epoch": 0.33, + "learning_rate": 1.551433751505073e-05, + "loss": 0.7088, + "step": 819 + }, + { + "epoch": 0.34, + "learning_rate": 1.550328388201277e-05, + "loss": 0.7701, + "step": 820 + }, + { + "epoch": 0.34, + "learning_rate": 1.549222059530971e-05, + "loss": 0.7539, + "step": 821 + }, + { + "epoch": 0.34, + "learning_rate": 1.5481147674348366e-05, + "loss": 0.6378, + "step": 822 + }, + { + "epoch": 0.34, + "learning_rate": 1.547006513855247e-05, + "loss": 0.7523, + "step": 823 + }, + { + "epoch": 0.34, + "learning_rate": 1.545897300736261e-05, + "loss": 0.6113, + "step": 824 + }, + { + "epoch": 0.34, + "learning_rate": 1.5447871300236207e-05, + "loss": 0.6059, + "step": 825 + }, + { + "epoch": 0.34, + "learning_rate": 1.5436760036647485e-05, + "loss": 0.6869, + "step": 826 + }, + { + "epoch": 0.34, + "learning_rate": 1.5425639236087425e-05, + "loss": 0.6053, + "step": 827 + }, + { + "epoch": 0.34, + "learning_rate": 1.541450891806374e-05, + "loss": 0.6317, + "step": 828 + }, + { + "epoch": 0.34, + "learning_rate": 1.5403369102100837e-05, + "loss": 0.6418, + "step": 829 + }, + { + "epoch": 0.34, + "learning_rate": 1.539221980773979e-05, + "loss": 0.6198, + "step": 830 + }, + { + "epoch": 0.34, + "learning_rate": 1.5381061054538294e-05, + "loss": 0.7284, + "step": 831 + }, + { + "epoch": 0.34, + "learning_rate": 1.5369892862070636e-05, + "loss": 0.7141, + "step": 832 + }, + { + "epoch": 0.34, + "learning_rate": 1.5358715249927663e-05, + "loss": 0.6593, + "step": 833 + }, + { + "epoch": 0.34, + "learning_rate": 1.5347528237716742e-05, + "loss": 0.6567, + "step": 834 + }, + { + "epoch": 0.34, + "learning_rate": 1.533633184506174e-05, + "loss": 0.7172, + "step": 835 + }, + { + "epoch": 0.34, + "learning_rate": 1.5325126091602965e-05, + "loss": 0.6276, + "step": 836 + }, + { + "epoch": 0.34, + "learning_rate": 1.5313910996997156e-05, + "loss": 0.6423, + "step": 837 + }, + { + "epoch": 0.34, + "learning_rate": 1.5302686580917428e-05, + "loss": 0.7415, + "step": 838 + }, + { + "epoch": 0.34, + "learning_rate": 1.5291452863053257e-05, + "loss": 0.6518, + "step": 839 + }, + { + "epoch": 0.34, + "learning_rate": 1.528020986311043e-05, + "loss": 0.7427, + "step": 840 + }, + { + "epoch": 0.34, + "learning_rate": 1.526895760081102e-05, + "loss": 0.6607, + "step": 841 + }, + { + "epoch": 0.34, + "learning_rate": 1.525769609589335e-05, + "loss": 0.6617, + "step": 842 + }, + { + "epoch": 0.34, + "learning_rate": 1.5246425368111944e-05, + "loss": 0.7278, + "step": 843 + }, + { + "epoch": 0.35, + "learning_rate": 1.523514543723751e-05, + "loss": 0.6573, + "step": 844 + }, + { + "epoch": 0.35, + "learning_rate": 1.5223856323056909e-05, + "loss": 0.6599, + "step": 845 + }, + { + "epoch": 0.35, + "learning_rate": 1.5212558045373106e-05, + "loss": 0.6769, + "step": 846 + }, + { + "epoch": 0.35, + "learning_rate": 1.5201250624005133e-05, + "loss": 0.7313, + "step": 847 + }, + { + "epoch": 0.35, + "learning_rate": 1.5189934078788069e-05, + "loss": 0.5937, + "step": 848 + }, + { + "epoch": 0.35, + "learning_rate": 1.5178608429572996e-05, + "loss": 0.7483, + "step": 849 + }, + { + "epoch": 0.35, + "learning_rate": 1.5167273696226965e-05, + "loss": 0.6856, + "step": 850 + }, + { + "epoch": 0.35, + "learning_rate": 1.5155929898632959e-05, + "loss": 0.5891, + "step": 851 + }, + { + "epoch": 0.35, + "learning_rate": 1.5144577056689872e-05, + "loss": 0.659, + "step": 852 + }, + { + "epoch": 0.35, + "learning_rate": 1.513321519031245e-05, + "loss": 0.7016, + "step": 853 + }, + { + "epoch": 0.35, + "learning_rate": 1.512184431943128e-05, + "loss": 0.6616, + "step": 854 + }, + { + "epoch": 0.35, + "learning_rate": 1.5110464463992736e-05, + "loss": 0.71, + "step": 855 + }, + { + "epoch": 0.35, + "learning_rate": 1.5099075643958959e-05, + "loss": 0.705, + "step": 856 + }, + { + "epoch": 0.35, + "learning_rate": 1.5087677879307811e-05, + "loss": 0.6308, + "step": 857 + }, + { + "epoch": 0.35, + "learning_rate": 1.5076271190032845e-05, + "loss": 0.6952, + "step": 858 + }, + { + "epoch": 0.35, + "learning_rate": 1.5064855596143271e-05, + "loss": 0.6808, + "step": 859 + }, + { + "epoch": 0.35, + "learning_rate": 1.5053431117663922e-05, + "loss": 0.6553, + "step": 860 + }, + { + "epoch": 0.35, + "learning_rate": 1.5041997774635206e-05, + "loss": 0.6436, + "step": 861 + }, + { + "epoch": 0.35, + "learning_rate": 1.5030555587113091e-05, + "loss": 0.6478, + "step": 862 + }, + { + "epoch": 0.35, + "learning_rate": 1.5019104575169055e-05, + "loss": 0.6779, + "step": 863 + }, + { + "epoch": 0.35, + "learning_rate": 1.5007644758890059e-05, + "loss": 0.672, + "step": 864 + }, + { + "epoch": 0.35, + "learning_rate": 1.49961761583785e-05, + "loss": 0.6943, + "step": 865 + }, + { + "epoch": 0.35, + "learning_rate": 1.4984698793752193e-05, + "loss": 0.6371, + "step": 866 + }, + { + "epoch": 0.35, + "learning_rate": 1.4973212685144324e-05, + "loss": 0.6548, + "step": 867 + }, + { + "epoch": 0.35, + "learning_rate": 1.4961717852703417e-05, + "loss": 0.7141, + "step": 868 + }, + { + "epoch": 0.36, + "learning_rate": 1.4950214316593295e-05, + "loss": 0.6136, + "step": 869 + }, + { + "epoch": 0.36, + "learning_rate": 1.4938702096993057e-05, + "loss": 0.7227, + "step": 870 + }, + { + "epoch": 0.36, + "learning_rate": 1.4927181214097028e-05, + "loss": 0.6827, + "step": 871 + }, + { + "epoch": 0.36, + "learning_rate": 1.4915651688114733e-05, + "loss": 0.7099, + "step": 872 + }, + { + "epoch": 0.36, + "learning_rate": 1.4904113539270852e-05, + "loss": 0.6496, + "step": 873 + }, + { + "epoch": 0.36, + "learning_rate": 1.48925667878052e-05, + "loss": 0.6346, + "step": 874 + }, + { + "epoch": 0.36, + "learning_rate": 1.4881011453972675e-05, + "loss": 0.6683, + "step": 875 + }, + { + "epoch": 0.36, + "learning_rate": 1.486944755804324e-05, + "loss": 0.6045, + "step": 876 + }, + { + "epoch": 0.36, + "learning_rate": 1.4857875120301866e-05, + "loss": 0.6831, + "step": 877 + }, + { + "epoch": 0.36, + "learning_rate": 1.484629416104851e-05, + "loss": 0.561, + "step": 878 + }, + { + "epoch": 0.36, + "learning_rate": 1.4834704700598084e-05, + "loss": 0.6503, + "step": 879 + }, + { + "epoch": 0.36, + "learning_rate": 1.4823106759280404e-05, + "loss": 0.6729, + "step": 880 + }, + { + "epoch": 0.36, + "learning_rate": 1.4811500357440166e-05, + "loss": 0.6476, + "step": 881 + }, + { + "epoch": 0.36, + "learning_rate": 1.4799885515436912e-05, + "loss": 0.6282, + "step": 882 + }, + { + "epoch": 0.36, + "learning_rate": 1.4788262253644983e-05, + "loss": 0.6954, + "step": 883 + }, + { + "epoch": 0.36, + "learning_rate": 1.4776630592453492e-05, + "loss": 0.6332, + "step": 884 + }, + { + "epoch": 0.36, + "learning_rate": 1.4764990552266287e-05, + "loss": 0.6766, + "step": 885 + }, + { + "epoch": 0.36, + "learning_rate": 1.4753342153501913e-05, + "loss": 0.692, + "step": 886 + }, + { + "epoch": 0.36, + "learning_rate": 1.4741685416593574e-05, + "loss": 0.6567, + "step": 887 + }, + { + "epoch": 0.36, + "learning_rate": 1.4730020361989108e-05, + "loss": 0.631, + "step": 888 + }, + { + "epoch": 0.36, + "learning_rate": 1.4718347010150936e-05, + "loss": 0.6719, + "step": 889 + }, + { + "epoch": 0.36, + "learning_rate": 1.470666538155604e-05, + "loss": 0.6715, + "step": 890 + }, + { + "epoch": 0.36, + "learning_rate": 1.4694975496695918e-05, + "loss": 0.6915, + "step": 891 + }, + { + "epoch": 0.36, + "learning_rate": 1.4683277376076548e-05, + "loss": 0.7192, + "step": 892 + }, + { + "epoch": 0.37, + "learning_rate": 1.467157104021836e-05, + "loss": 0.6995, + "step": 893 + }, + { + "epoch": 0.37, + "learning_rate": 1.4659856509656194e-05, + "loss": 0.6105, + "step": 894 + }, + { + "epoch": 0.37, + "learning_rate": 1.4648133804939257e-05, + "loss": 0.6545, + "step": 895 + }, + { + "epoch": 0.37, + "learning_rate": 1.4636402946631108e-05, + "loss": 0.6753, + "step": 896 + }, + { + "epoch": 0.37, + "learning_rate": 1.4624663955309597e-05, + "loss": 0.6135, + "step": 897 + }, + { + "epoch": 0.37, + "learning_rate": 1.4612916851566851e-05, + "loss": 0.6606, + "step": 898 + }, + { + "epoch": 0.37, + "learning_rate": 1.4601161656009218e-05, + "loss": 0.569, + "step": 899 + }, + { + "epoch": 0.37, + "learning_rate": 1.4589398389257246e-05, + "loss": 0.7102, + "step": 900 + }, + { + "epoch": 0.37, + "learning_rate": 1.4577627071945642e-05, + "loss": 0.6408, + "step": 901 + }, + { + "epoch": 0.37, + "learning_rate": 1.4565847724723225e-05, + "loss": 0.6975, + "step": 902 + }, + { + "epoch": 0.37, + "learning_rate": 1.4554060368252916e-05, + "loss": 0.6743, + "step": 903 + }, + { + "epoch": 0.37, + "learning_rate": 1.454226502321167e-05, + "loss": 0.6644, + "step": 904 + }, + { + "epoch": 0.37, + "learning_rate": 1.4530461710290467e-05, + "loss": 0.6601, + "step": 905 + }, + { + "epoch": 0.37, + "learning_rate": 1.4518650450194261e-05, + "loss": 0.6432, + "step": 906 + }, + { + "epoch": 0.37, + "learning_rate": 1.4506831263641939e-05, + "loss": 0.6417, + "step": 907 + }, + { + "epoch": 0.37, + "learning_rate": 1.4495004171366302e-05, + "loss": 0.6346, + "step": 908 + }, + { + "epoch": 0.37, + "learning_rate": 1.4483169194114014e-05, + "loss": 0.6178, + "step": 909 + }, + { + "epoch": 0.37, + "learning_rate": 1.4471326352645573e-05, + "loss": 0.7311, + "step": 910 + }, + { + "epoch": 0.37, + "learning_rate": 1.445947566773527e-05, + "loss": 0.7122, + "step": 911 + }, + { + "epoch": 0.37, + "learning_rate": 1.4447617160171154e-05, + "loss": 0.6745, + "step": 912 + }, + { + "epoch": 0.37, + "learning_rate": 1.4435750850755001e-05, + "loss": 0.7504, + "step": 913 + }, + { + "epoch": 0.37, + "learning_rate": 1.4423876760302266e-05, + "loss": 0.7315, + "step": 914 + }, + { + "epoch": 0.37, + "learning_rate": 1.4411994909642059e-05, + "loss": 0.7143, + "step": 915 + }, + { + "epoch": 0.37, + "learning_rate": 1.4400105319617102e-05, + "loss": 0.7352, + "step": 916 + }, + { + "epoch": 0.37, + "learning_rate": 1.4388208011083688e-05, + "loss": 0.6552, + "step": 917 + }, + { + "epoch": 0.38, + "learning_rate": 1.4376303004911654e-05, + "loss": 0.6561, + "step": 918 + }, + { + "epoch": 0.38, + "learning_rate": 1.4364390321984334e-05, + "loss": 0.6391, + "step": 919 + }, + { + "epoch": 0.38, + "learning_rate": 1.4352469983198542e-05, + "loss": 0.5917, + "step": 920 + }, + { + "epoch": 0.38, + "learning_rate": 1.4340542009464513e-05, + "loss": 0.7887, + "step": 921 + }, + { + "epoch": 0.38, + "learning_rate": 1.4328606421705868e-05, + "loss": 0.7109, + "step": 922 + }, + { + "epoch": 0.38, + "learning_rate": 1.4316663240859595e-05, + "loss": 0.6414, + "step": 923 + }, + { + "epoch": 0.38, + "learning_rate": 1.4304712487875999e-05, + "loss": 0.6761, + "step": 924 + }, + { + "epoch": 0.38, + "learning_rate": 1.429275418371866e-05, + "loss": 0.6634, + "step": 925 + }, + { + "epoch": 0.38, + "learning_rate": 1.4280788349364414e-05, + "loss": 0.6568, + "step": 926 + }, + { + "epoch": 0.38, + "learning_rate": 1.4268815005803305e-05, + "loss": 0.7302, + "step": 927 + }, + { + "epoch": 0.38, + "learning_rate": 1.4256834174038545e-05, + "loss": 0.6097, + "step": 928 + }, + { + "epoch": 0.38, + "learning_rate": 1.4244845875086486e-05, + "loss": 0.7543, + "step": 929 + }, + { + "epoch": 0.38, + "learning_rate": 1.4232850129976573e-05, + "loss": 0.7901, + "step": 930 + }, + { + "epoch": 0.38, + "learning_rate": 1.4220846959751315e-05, + "loss": 0.6433, + "step": 931 + }, + { + "epoch": 0.38, + "learning_rate": 1.420883638546625e-05, + "loss": 0.6497, + "step": 932 + }, + { + "epoch": 0.38, + "learning_rate": 1.41968184281899e-05, + "loss": 0.7194, + "step": 933 + }, + { + "epoch": 0.38, + "learning_rate": 1.4184793109003734e-05, + "loss": 0.5924, + "step": 934 + }, + { + "epoch": 0.38, + "learning_rate": 1.4172760449002148e-05, + "loss": 0.6818, + "step": 935 + }, + { + "epoch": 0.38, + "learning_rate": 1.4160720469292402e-05, + "loss": 0.6699, + "step": 936 + }, + { + "epoch": 0.38, + "learning_rate": 1.41486731909946e-05, + "loss": 0.6364, + "step": 937 + }, + { + "epoch": 0.38, + "learning_rate": 1.4136618635241655e-05, + "loss": 0.6482, + "step": 938 + }, + { + "epoch": 0.38, + "learning_rate": 1.4124556823179234e-05, + "loss": 0.6275, + "step": 939 + }, + { + "epoch": 0.38, + "learning_rate": 1.4112487775965741e-05, + "loss": 0.7372, + "step": 940 + }, + { + "epoch": 0.38, + "learning_rate": 1.410041151477227e-05, + "loss": 0.6012, + "step": 941 + }, + { + "epoch": 0.39, + "learning_rate": 1.4088328060782573e-05, + "loss": 0.7349, + "step": 942 + }, + { + "epoch": 0.39, + "learning_rate": 1.4076237435193011e-05, + "loss": 0.6688, + "step": 943 + }, + { + "epoch": 0.39, + "learning_rate": 1.4064139659212534e-05, + "loss": 0.7441, + "step": 944 + }, + { + "epoch": 0.39, + "learning_rate": 1.4052034754062626e-05, + "loss": 0.7159, + "step": 945 + }, + { + "epoch": 0.39, + "learning_rate": 1.403992274097729e-05, + "loss": 0.6244, + "step": 946 + }, + { + "epoch": 0.39, + "learning_rate": 1.402780364120298e-05, + "loss": 0.6445, + "step": 947 + }, + { + "epoch": 0.39, + "learning_rate": 1.4015677475998595e-05, + "loss": 0.6686, + "step": 948 + }, + { + "epoch": 0.39, + "learning_rate": 1.4003544266635419e-05, + "loss": 0.6924, + "step": 949 + }, + { + "epoch": 0.39, + "learning_rate": 1.3991404034397102e-05, + "loss": 0.6661, + "step": 950 + }, + { + "epoch": 0.39, + "learning_rate": 1.3979256800579605e-05, + "loss": 0.6329, + "step": 951 + }, + { + "epoch": 0.39, + "learning_rate": 1.3967102586491179e-05, + "loss": 0.716, + "step": 952 + }, + { + "epoch": 0.39, + "learning_rate": 1.3954941413452309e-05, + "loss": 0.7043, + "step": 953 + }, + { + "epoch": 0.39, + "learning_rate": 1.3942773302795697e-05, + "loss": 0.6532, + "step": 954 + }, + { + "epoch": 0.39, + "learning_rate": 1.3930598275866205e-05, + "loss": 0.6309, + "step": 955 + }, + { + "epoch": 0.39, + "learning_rate": 1.3918416354020836e-05, + "loss": 0.5948, + "step": 956 + }, + { + "epoch": 0.39, + "learning_rate": 1.3906227558628684e-05, + "loss": 0.6993, + "step": 957 + }, + { + "epoch": 0.39, + "learning_rate": 1.3894031911070904e-05, + "loss": 0.6112, + "step": 958 + }, + { + "epoch": 0.39, + "learning_rate": 1.3881829432740665e-05, + "loss": 0.6158, + "step": 959 + }, + { + "epoch": 0.39, + "learning_rate": 1.3869620145043123e-05, + "loss": 0.6842, + "step": 960 + }, + { + "epoch": 0.39, + "learning_rate": 1.3857404069395373e-05, + "loss": 0.6148, + "step": 961 + }, + { + "epoch": 0.39, + "learning_rate": 1.3845181227226423e-05, + "loss": 0.6307, + "step": 962 + }, + { + "epoch": 0.39, + "learning_rate": 1.383295163997715e-05, + "loss": 0.729, + "step": 963 + }, + { + "epoch": 0.39, + "learning_rate": 1.3820715329100256e-05, + "loss": 0.6533, + "step": 964 + }, + { + "epoch": 0.39, + "learning_rate": 1.3808472316060251e-05, + "loss": 0.6902, + "step": 965 + }, + { + "epoch": 0.39, + "learning_rate": 1.3796222622333389e-05, + "loss": 0.6106, + "step": 966 + }, + { + "epoch": 0.4, + "learning_rate": 1.3783966269407647e-05, + "loss": 0.6843, + "step": 967 + }, + { + "epoch": 0.4, + "learning_rate": 1.3771703278782689e-05, + "loss": 0.6475, + "step": 968 + }, + { + "epoch": 0.4, + "learning_rate": 1.375943367196981e-05, + "loss": 0.6113, + "step": 969 + }, + { + "epoch": 0.4, + "learning_rate": 1.3747157470491923e-05, + "loss": 0.617, + "step": 970 + }, + { + "epoch": 0.4, + "learning_rate": 1.3734874695883504e-05, + "loss": 0.6739, + "step": 971 + }, + { + "epoch": 0.4, + "learning_rate": 1.372258536969056e-05, + "loss": 0.5792, + "step": 972 + }, + { + "epoch": 0.4, + "learning_rate": 1.371028951347059e-05, + "loss": 0.6993, + "step": 973 + }, + { + "epoch": 0.4, + "learning_rate": 1.3697987148792546e-05, + "loss": 0.689, + "step": 974 + }, + { + "epoch": 0.4, + "learning_rate": 1.3685678297236805e-05, + "loss": 0.6088, + "step": 975 + }, + { + "epoch": 0.4, + "learning_rate": 1.3673362980395115e-05, + "loss": 0.6996, + "step": 976 + }, + { + "epoch": 0.4, + "learning_rate": 1.3661041219870563e-05, + "loss": 0.6402, + "step": 977 + }, + { + "epoch": 0.4, + "learning_rate": 1.364871303727755e-05, + "loss": 0.6131, + "step": 978 + }, + { + "epoch": 0.4, + "learning_rate": 1.3636378454241728e-05, + "loss": 0.6024, + "step": 979 + }, + { + "epoch": 0.4, + "learning_rate": 1.3624037492399991e-05, + "loss": 0.6856, + "step": 980 + }, + { + "epoch": 0.4, + "learning_rate": 1.3611690173400414e-05, + "loss": 0.6063, + "step": 981 + }, + { + "epoch": 0.4, + "learning_rate": 1.3599336518902228e-05, + "loss": 0.6971, + "step": 982 + }, + { + "epoch": 0.4, + "learning_rate": 1.358697655057577e-05, + "loss": 0.6286, + "step": 983 + }, + { + "epoch": 0.4, + "learning_rate": 1.3574610290102462e-05, + "loss": 0.6562, + "step": 984 + }, + { + "epoch": 0.4, + "learning_rate": 1.3562237759174755e-05, + "loss": 0.6645, + "step": 985 + }, + { + "epoch": 0.4, + "learning_rate": 1.3549858979496104e-05, + "loss": 0.656, + "step": 986 + }, + { + "epoch": 0.4, + "learning_rate": 1.3537473972780924e-05, + "loss": 0.6358, + "step": 987 + }, + { + "epoch": 0.4, + "learning_rate": 1.3525082760754557e-05, + "loss": 0.612, + "step": 988 + }, + { + "epoch": 0.4, + "learning_rate": 1.3512685365153226e-05, + "loss": 0.6456, + "step": 989 + }, + { + "epoch": 0.4, + "learning_rate": 1.3500281807724003e-05, + "loss": 0.6466, + "step": 990 + }, + { + "epoch": 0.41, + "learning_rate": 1.3487872110224765e-05, + "loss": 0.6977, + "step": 991 + }, + { + "epoch": 0.41, + "learning_rate": 1.3475456294424163e-05, + "loss": 0.6613, + "step": 992 + }, + { + "epoch": 0.41, + "learning_rate": 1.3463034382101581e-05, + "loss": 0.6815, + "step": 993 + }, + { + "epoch": 0.41, + "learning_rate": 1.3450606395047094e-05, + "loss": 0.6862, + "step": 994 + }, + { + "epoch": 0.41, + "learning_rate": 1.343817235506144e-05, + "loss": 0.5749, + "step": 995 + }, + { + "epoch": 0.41, + "learning_rate": 1.3425732283955968e-05, + "loss": 0.6317, + "step": 996 + }, + { + "epoch": 0.41, + "learning_rate": 1.341328620355261e-05, + "loss": 0.6255, + "step": 997 + }, + { + "epoch": 0.41, + "learning_rate": 1.3400834135683836e-05, + "loss": 0.6544, + "step": 998 + }, + { + "epoch": 0.41, + "learning_rate": 1.3388376102192622e-05, + "loss": 0.6195, + "step": 999 + }, + { + "epoch": 0.41, + "learning_rate": 1.3375912124932406e-05, + "loss": 0.7076, + "step": 1000 + }, + { + "epoch": 0.41, + "learning_rate": 1.3363442225767055e-05, + "loss": 0.6262, + "step": 1001 + }, + { + "epoch": 0.41, + "learning_rate": 1.3350966426570825e-05, + "loss": 0.5793, + "step": 1002 + }, + { + "epoch": 0.41, + "learning_rate": 1.333848474922832e-05, + "loss": 0.7112, + "step": 1003 + }, + { + "epoch": 0.41, + "learning_rate": 1.3325997215634457e-05, + "loss": 0.6114, + "step": 1004 + }, + { + "epoch": 0.41, + "learning_rate": 1.3313503847694418e-05, + "loss": 0.633, + "step": 1005 + }, + { + "epoch": 0.41, + "learning_rate": 1.330100466732363e-05, + "loss": 0.7571, + "step": 1006 + }, + { + "epoch": 0.41, + "learning_rate": 1.3288499696447708e-05, + "loss": 0.7234, + "step": 1007 + }, + { + "epoch": 0.41, + "learning_rate": 1.3275988957002429e-05, + "loss": 0.648, + "step": 1008 + }, + { + "epoch": 0.41, + "learning_rate": 1.3263472470933682e-05, + "loss": 0.7128, + "step": 1009 + }, + { + "epoch": 0.41, + "learning_rate": 1.3250950260197455e-05, + "loss": 0.6688, + "step": 1010 + }, + { + "epoch": 0.41, + "learning_rate": 1.3238422346759758e-05, + "loss": 0.6612, + "step": 1011 + }, + { + "epoch": 0.41, + "learning_rate": 1.3225888752596614e-05, + "loss": 0.654, + "step": 1012 + }, + { + "epoch": 0.41, + "learning_rate": 1.3213349499694004e-05, + "loss": 0.6246, + "step": 1013 + }, + { + "epoch": 0.41, + "learning_rate": 1.3200804610047842e-05, + "loss": 0.6233, + "step": 1014 + }, + { + "epoch": 0.41, + "learning_rate": 1.318825410566393e-05, + "loss": 0.6381, + "step": 1015 + }, + { + "epoch": 0.42, + "learning_rate": 1.3175698008557914e-05, + "loss": 0.626, + "step": 1016 + }, + { + "epoch": 0.42, + "learning_rate": 1.3163136340755254e-05, + "loss": 0.6816, + "step": 1017 + }, + { + "epoch": 0.42, + "learning_rate": 1.3150569124291186e-05, + "loss": 0.5663, + "step": 1018 + }, + { + "epoch": 0.42, + "learning_rate": 1.3137996381210672e-05, + "loss": 0.7454, + "step": 1019 + }, + { + "epoch": 0.42, + "learning_rate": 1.3125418133568369e-05, + "loss": 0.6833, + "step": 1020 + }, + { + "epoch": 0.42, + "learning_rate": 1.3112834403428593e-05, + "loss": 0.6682, + "step": 1021 + }, + { + "epoch": 0.42, + "learning_rate": 1.3100245212865279e-05, + "loss": 0.5654, + "step": 1022 + }, + { + "epoch": 0.42, + "learning_rate": 1.3087650583961936e-05, + "loss": 0.6166, + "step": 1023 + }, + { + "epoch": 0.42, + "learning_rate": 1.3075050538811611e-05, + "loss": 0.7112, + "step": 1024 + }, + { + "epoch": 0.42, + "learning_rate": 1.3062445099516863e-05, + "loss": 0.7039, + "step": 1025 + }, + { + "epoch": 0.42, + "learning_rate": 1.3049834288189702e-05, + "loss": 0.7618, + "step": 1026 + }, + { + "epoch": 0.42, + "learning_rate": 1.3037218126951567e-05, + "loss": 0.6539, + "step": 1027 + }, + { + "epoch": 0.42, + "learning_rate": 1.3024596637933277e-05, + "loss": 0.6936, + "step": 1028 + }, + { + "epoch": 0.42, + "learning_rate": 1.3011969843275003e-05, + "loss": 0.6558, + "step": 1029 + }, + { + "epoch": 0.42, + "learning_rate": 1.2999337765126217e-05, + "loss": 0.6517, + "step": 1030 + }, + { + "epoch": 0.42, + "learning_rate": 1.2986700425645663e-05, + "loss": 0.6512, + "step": 1031 + }, + { + "epoch": 0.42, + "learning_rate": 1.2974057847001306e-05, + "loss": 0.6663, + "step": 1032 + }, + { + "epoch": 0.42, + "learning_rate": 1.2961410051370322e-05, + "loss": 0.7137, + "step": 1033 + }, + { + "epoch": 0.42, + "learning_rate": 1.2948757060939019e-05, + "loss": 0.5942, + "step": 1034 + }, + { + "epoch": 0.42, + "learning_rate": 1.293609889790282e-05, + "loss": 0.6702, + "step": 1035 + }, + { + "epoch": 0.42, + "learning_rate": 1.2923435584466228e-05, + "loss": 0.7645, + "step": 1036 + }, + { + "epoch": 0.42, + "learning_rate": 1.2910767142842777e-05, + "loss": 0.6491, + "step": 1037 + }, + { + "epoch": 0.42, + "learning_rate": 1.2898093595254998e-05, + "loss": 0.5949, + "step": 1038 + }, + { + "epoch": 0.42, + "learning_rate": 1.2885414963934374e-05, + "loss": 0.5756, + "step": 1039 + }, + { + "epoch": 0.43, + "learning_rate": 1.2872731271121315e-05, + "loss": 0.6602, + "step": 1040 + }, + { + "epoch": 0.43, + "learning_rate": 1.28600425390651e-05, + "loss": 0.6815, + "step": 1041 + }, + { + "epoch": 0.43, + "learning_rate": 1.2847348790023858e-05, + "loss": 0.6687, + "step": 1042 + }, + { + "epoch": 0.43, + "learning_rate": 1.283465004626451e-05, + "loss": 0.5653, + "step": 1043 + }, + { + "epoch": 0.43, + "learning_rate": 1.2821946330062738e-05, + "loss": 0.7003, + "step": 1044 + }, + { + "epoch": 0.43, + "learning_rate": 1.2809237663702951e-05, + "loss": 0.5824, + "step": 1045 + }, + { + "epoch": 0.43, + "learning_rate": 1.2796524069478243e-05, + "loss": 0.7416, + "step": 1046 + }, + { + "epoch": 0.43, + "learning_rate": 1.2783805569690343e-05, + "loss": 0.6764, + "step": 1047 + }, + { + "epoch": 0.43, + "learning_rate": 1.27710821866496e-05, + "loss": 0.6823, + "step": 1048 + }, + { + "epoch": 0.43, + "learning_rate": 1.275835394267492e-05, + "loss": 0.6791, + "step": 1049 + }, + { + "epoch": 0.43, + "learning_rate": 1.2745620860093726e-05, + "loss": 0.6758, + "step": 1050 + }, + { + "epoch": 0.43, + "learning_rate": 1.273288296124195e-05, + "loss": 0.7278, + "step": 1051 + }, + { + "epoch": 0.43, + "learning_rate": 1.2720140268463958e-05, + "loss": 0.568, + "step": 1052 + }, + { + "epoch": 0.43, + "learning_rate": 1.2707392804112529e-05, + "loss": 0.6155, + "step": 1053 + }, + { + "epoch": 0.43, + "learning_rate": 1.2694640590548808e-05, + "loss": 0.6709, + "step": 1054 + }, + { + "epoch": 0.43, + "learning_rate": 1.2681883650142283e-05, + "loss": 0.6508, + "step": 1055 + }, + { + "epoch": 0.43, + "learning_rate": 1.2669122005270724e-05, + "loss": 0.757, + "step": 1056 + }, + { + "epoch": 0.43, + "learning_rate": 1.265635567832015e-05, + "loss": 0.6936, + "step": 1057 + }, + { + "epoch": 0.43, + "learning_rate": 1.2643584691684802e-05, + "loss": 0.6403, + "step": 1058 + }, + { + "epoch": 0.43, + "learning_rate": 1.2630809067767094e-05, + "loss": 0.64, + "step": 1059 + }, + { + "epoch": 0.43, + "learning_rate": 1.2618028828977563e-05, + "loss": 0.6297, + "step": 1060 + }, + { + "epoch": 0.43, + "learning_rate": 1.2605243997734857e-05, + "loss": 0.7047, + "step": 1061 + }, + { + "epoch": 0.43, + "learning_rate": 1.259245459646567e-05, + "loss": 0.6494, + "step": 1062 + }, + { + "epoch": 0.43, + "learning_rate": 1.2579660647604715e-05, + "loss": 0.6567, + "step": 1063 + }, + { + "epoch": 0.43, + "learning_rate": 1.2566862173594689e-05, + "loss": 0.6357, + "step": 1064 + }, + { + "epoch": 0.44, + "learning_rate": 1.2554059196886212e-05, + "loss": 0.6891, + "step": 1065 + }, + { + "epoch": 0.44, + "learning_rate": 1.2541251739937814e-05, + "loss": 0.6514, + "step": 1066 + }, + { + "epoch": 0.44, + "learning_rate": 1.2528439825215884e-05, + "loss": 0.7016, + "step": 1067 + }, + { + "epoch": 0.44, + "learning_rate": 1.2515623475194623e-05, + "loss": 0.5765, + "step": 1068 + }, + { + "epoch": 0.44, + "learning_rate": 1.2502802712356017e-05, + "loss": 0.6344, + "step": 1069 + }, + { + "epoch": 0.44, + "learning_rate": 1.2489977559189796e-05, + "loss": 0.6569, + "step": 1070 + }, + { + "epoch": 0.44, + "learning_rate": 1.2477148038193392e-05, + "loss": 0.7095, + "step": 1071 + }, + { + "epoch": 0.44, + "learning_rate": 1.2464314171871888e-05, + "loss": 0.6326, + "step": 1072 + }, + { + "epoch": 0.44, + "learning_rate": 1.2451475982737996e-05, + "loss": 0.6426, + "step": 1073 + }, + { + "epoch": 0.44, + "learning_rate": 1.2438633493312016e-05, + "loss": 0.666, + "step": 1074 + }, + { + "epoch": 0.44, + "learning_rate": 1.2425786726121783e-05, + "loss": 0.7223, + "step": 1075 + }, + { + "epoch": 0.44, + "learning_rate": 1.241293570370264e-05, + "loss": 0.7513, + "step": 1076 + }, + { + "epoch": 0.44, + "learning_rate": 1.2400080448597396e-05, + "loss": 0.7125, + "step": 1077 + }, + { + "epoch": 0.44, + "learning_rate": 1.2387220983356283e-05, + "loss": 0.6276, + "step": 1078 + }, + { + "epoch": 0.44, + "learning_rate": 1.2374357330536919e-05, + "loss": 0.5754, + "step": 1079 + }, + { + "epoch": 0.44, + "learning_rate": 1.2361489512704264e-05, + "loss": 0.6219, + "step": 1080 + }, + { + "epoch": 0.44, + "learning_rate": 1.234861755243059e-05, + "loss": 0.7364, + "step": 1081 + }, + { + "epoch": 0.44, + "learning_rate": 1.2335741472295426e-05, + "loss": 0.6935, + "step": 1082 + }, + { + "epoch": 0.44, + "learning_rate": 1.2322861294885542e-05, + "loss": 0.6673, + "step": 1083 + }, + { + "epoch": 0.44, + "learning_rate": 1.230997704279488e-05, + "loss": 0.667, + "step": 1084 + }, + { + "epoch": 0.44, + "learning_rate": 1.2297088738624548e-05, + "loss": 0.6083, + "step": 1085 + }, + { + "epoch": 0.44, + "learning_rate": 1.2284196404982746e-05, + "loss": 0.6952, + "step": 1086 + }, + { + "epoch": 0.44, + "learning_rate": 1.2271300064484746e-05, + "loss": 0.5991, + "step": 1087 + }, + { + "epoch": 0.44, + "learning_rate": 1.2258399739752848e-05, + "loss": 0.6693, + "step": 1088 + }, + { + "epoch": 0.45, + "learning_rate": 1.2245495453416352e-05, + "loss": 0.6277, + "step": 1089 + }, + { + "epoch": 0.45, + "learning_rate": 1.223258722811149e-05, + "loss": 0.6063, + "step": 1090 + }, + { + "epoch": 0.45, + "learning_rate": 1.2219675086481418e-05, + "loss": 0.6917, + "step": 1091 + }, + { + "epoch": 0.45, + "learning_rate": 1.2206759051176151e-05, + "loss": 0.6114, + "step": 1092 + }, + { + "epoch": 0.45, + "learning_rate": 1.2193839144852546e-05, + "loss": 0.6676, + "step": 1093 + }, + { + "epoch": 0.45, + "learning_rate": 1.218091539017424e-05, + "loss": 0.6606, + "step": 1094 + }, + { + "epoch": 0.45, + "learning_rate": 1.2167987809811625e-05, + "loss": 0.7116, + "step": 1095 + }, + { + "epoch": 0.45, + "learning_rate": 1.2155056426441803e-05, + "loss": 0.624, + "step": 1096 + }, + { + "epoch": 0.45, + "learning_rate": 1.2142121262748545e-05, + "loss": 0.6998, + "step": 1097 + }, + { + "epoch": 0.45, + "learning_rate": 1.212918234142226e-05, + "loss": 0.6688, + "step": 1098 + }, + { + "epoch": 0.45, + "learning_rate": 1.2116239685159941e-05, + "loss": 0.6496, + "step": 1099 + }, + { + "epoch": 0.45, + "learning_rate": 1.2103293316665137e-05, + "loss": 0.6768, + "step": 1100 + }, + { + "epoch": 0.45, + "learning_rate": 1.2090343258647912e-05, + "loss": 0.6663, + "step": 1101 + }, + { + "epoch": 0.45, + "learning_rate": 1.2077389533824789e-05, + "loss": 0.642, + "step": 1102 + }, + { + "epoch": 0.45, + "learning_rate": 1.2064432164918738e-05, + "loss": 0.6956, + "step": 1103 + }, + { + "epoch": 0.45, + "learning_rate": 1.2051471174659116e-05, + "loss": 0.627, + "step": 1104 + }, + { + "epoch": 0.45, + "learning_rate": 1.2038506585781626e-05, + "loss": 0.7477, + "step": 1105 + }, + { + "epoch": 0.45, + "learning_rate": 1.2025538421028293e-05, + "loss": 0.6547, + "step": 1106 + }, + { + "epoch": 0.45, + "learning_rate": 1.201256670314741e-05, + "loss": 0.6947, + "step": 1107 + }, + { + "epoch": 0.45, + "learning_rate": 1.1999591454893511e-05, + "loss": 0.7223, + "step": 1108 + }, + { + "epoch": 0.45, + "learning_rate": 1.1986612699027305e-05, + "loss": 0.5807, + "step": 1109 + }, + { + "epoch": 0.45, + "learning_rate": 1.1973630458315667e-05, + "loss": 0.6823, + "step": 1110 + }, + { + "epoch": 0.45, + "learning_rate": 1.1960644755531587e-05, + "loss": 0.6151, + "step": 1111 + }, + { + "epoch": 0.45, + "learning_rate": 1.194765561345412e-05, + "loss": 0.6339, + "step": 1112 + }, + { + "epoch": 0.46, + "learning_rate": 1.1934663054868358e-05, + "loss": 0.6318, + "step": 1113 + }, + { + "epoch": 0.46, + "learning_rate": 1.1921667102565384e-05, + "loss": 0.6354, + "step": 1114 + }, + { + "epoch": 0.46, + "learning_rate": 1.1908667779342243e-05, + "loss": 0.6517, + "step": 1115 + }, + { + "epoch": 0.46, + "learning_rate": 1.1895665108001879e-05, + "loss": 0.6742, + "step": 1116 + }, + { + "epoch": 0.46, + "learning_rate": 1.1882659111353118e-05, + "loss": 0.6612, + "step": 1117 + }, + { + "epoch": 0.46, + "learning_rate": 1.1869649812210618e-05, + "loss": 0.7326, + "step": 1118 + }, + { + "epoch": 0.46, + "learning_rate": 1.185663723339483e-05, + "loss": 0.6571, + "step": 1119 + }, + { + "epoch": 0.46, + "learning_rate": 1.1843621397731954e-05, + "loss": 0.6196, + "step": 1120 + }, + { + "epoch": 0.46, + "learning_rate": 1.1830602328053911e-05, + "loss": 0.7858, + "step": 1121 + }, + { + "epoch": 0.46, + "learning_rate": 1.1817580047198287e-05, + "loss": 0.7536, + "step": 1122 + }, + { + "epoch": 0.46, + "learning_rate": 1.180455457800831e-05, + "loss": 0.7371, + "step": 1123 + }, + { + "epoch": 0.46, + "learning_rate": 1.179152594333279e-05, + "loss": 0.6093, + "step": 1124 + }, + { + "epoch": 0.46, + "learning_rate": 1.1778494166026096e-05, + "loss": 0.6953, + "step": 1125 + }, + { + "epoch": 0.46, + "learning_rate": 1.1765459268948111e-05, + "loss": 0.5785, + "step": 1126 + }, + { + "epoch": 0.46, + "learning_rate": 1.1752421274964188e-05, + "loss": 0.6453, + "step": 1127 + }, + { + "epoch": 0.46, + "learning_rate": 1.1739380206945108e-05, + "loss": 0.5911, + "step": 1128 + }, + { + "epoch": 0.46, + "learning_rate": 1.1726336087767054e-05, + "loss": 0.6226, + "step": 1129 + }, + { + "epoch": 0.46, + "learning_rate": 1.1713288940311562e-05, + "loss": 0.6772, + "step": 1130 + }, + { + "epoch": 0.46, + "learning_rate": 1.1700238787465463e-05, + "loss": 0.6431, + "step": 1131 + }, + { + "epoch": 0.46, + "learning_rate": 1.168718565212088e-05, + "loss": 0.6819, + "step": 1132 + }, + { + "epoch": 0.46, + "learning_rate": 1.1674129557175156e-05, + "loss": 0.6575, + "step": 1133 + }, + { + "epoch": 0.46, + "learning_rate": 1.1661070525530827e-05, + "loss": 0.6557, + "step": 1134 + }, + { + "epoch": 0.46, + "learning_rate": 1.1648008580095587e-05, + "loss": 0.671, + "step": 1135 + }, + { + "epoch": 0.46, + "learning_rate": 1.1634943743782235e-05, + "loss": 0.6451, + "step": 1136 + }, + { + "epoch": 0.46, + "learning_rate": 1.1621876039508638e-05, + "loss": 0.6994, + "step": 1137 + }, + { + "epoch": 0.47, + "learning_rate": 1.160880549019771e-05, + "loss": 0.6015, + "step": 1138 + }, + { + "epoch": 0.47, + "learning_rate": 1.1595732118777332e-05, + "loss": 0.5878, + "step": 1139 + }, + { + "epoch": 0.47, + "learning_rate": 1.1582655948180357e-05, + "loss": 0.6532, + "step": 1140 + }, + { + "epoch": 0.47, + "learning_rate": 1.1569577001344532e-05, + "loss": 0.7016, + "step": 1141 + }, + { + "epoch": 0.47, + "learning_rate": 1.1556495301212485e-05, + "loss": 0.6226, + "step": 1142 + }, + { + "epoch": 0.47, + "learning_rate": 1.154341087073167e-05, + "loss": 0.7173, + "step": 1143 + }, + { + "epoch": 0.47, + "learning_rate": 1.1530323732854326e-05, + "loss": 0.6313, + "step": 1144 + }, + { + "epoch": 0.47, + "learning_rate": 1.1517233910537453e-05, + "loss": 0.6334, + "step": 1145 + }, + { + "epoch": 0.47, + "learning_rate": 1.1504141426742744e-05, + "loss": 0.6283, + "step": 1146 + }, + { + "epoch": 0.47, + "learning_rate": 1.1491046304436575e-05, + "loss": 0.7231, + "step": 1147 + }, + { + "epoch": 0.47, + "learning_rate": 1.147794856658994e-05, + "loss": 0.5591, + "step": 1148 + }, + { + "epoch": 0.47, + "learning_rate": 1.146484823617843e-05, + "loss": 0.597, + "step": 1149 + }, + { + "epoch": 0.47, + "learning_rate": 1.1451745336182173e-05, + "loss": 0.6484, + "step": 1150 + }, + { + "epoch": 0.47, + "learning_rate": 1.1438639889585818e-05, + "loss": 0.6414, + "step": 1151 + }, + { + "epoch": 0.47, + "learning_rate": 1.1425531919378469e-05, + "loss": 0.6337, + "step": 1152 + }, + { + "epoch": 0.47, + "learning_rate": 1.1412421448553665e-05, + "loss": 0.5903, + "step": 1153 + }, + { + "epoch": 0.47, + "learning_rate": 1.1399308500109326e-05, + "loss": 0.6858, + "step": 1154 + }, + { + "epoch": 0.47, + "learning_rate": 1.138619309704772e-05, + "loss": 0.6119, + "step": 1155 + }, + { + "epoch": 0.47, + "learning_rate": 1.1373075262375421e-05, + "loss": 0.657, + "step": 1156 + }, + { + "epoch": 0.47, + "learning_rate": 1.1359955019103273e-05, + "loss": 0.6326, + "step": 1157 + }, + { + "epoch": 0.47, + "learning_rate": 1.1346832390246334e-05, + "loss": 0.6183, + "step": 1158 + }, + { + "epoch": 0.47, + "learning_rate": 1.1333707398823856e-05, + "loss": 0.68, + "step": 1159 + }, + { + "epoch": 0.47, + "learning_rate": 1.132058006785924e-05, + "loss": 0.6826, + "step": 1160 + }, + { + "epoch": 0.47, + "learning_rate": 1.1307450420379971e-05, + "loss": 0.6321, + "step": 1161 + }, + { + "epoch": 0.48, + "learning_rate": 1.1294318479417618e-05, + "loss": 0.7396, + "step": 1162 + }, + { + "epoch": 0.48, + "learning_rate": 1.1281184268007766e-05, + "loss": 0.6435, + "step": 1163 + }, + { + "epoch": 0.48, + "learning_rate": 1.1268047809189976e-05, + "loss": 0.7927, + "step": 1164 + }, + { + "epoch": 0.48, + "learning_rate": 1.1254909126007765e-05, + "loss": 0.7204, + "step": 1165 + }, + { + "epoch": 0.48, + "learning_rate": 1.1241768241508537e-05, + "loss": 0.7077, + "step": 1166 + }, + { + "epoch": 0.48, + "learning_rate": 1.1228625178743572e-05, + "loss": 0.6306, + "step": 1167 + }, + { + "epoch": 0.48, + "learning_rate": 1.1215479960767958e-05, + "loss": 0.767, + "step": 1168 + }, + { + "epoch": 0.48, + "learning_rate": 1.1202332610640574e-05, + "loss": 0.5404, + "step": 1169 + }, + { + "epoch": 0.48, + "learning_rate": 1.118918315142403e-05, + "loss": 0.6272, + "step": 1170 + }, + { + "epoch": 0.48, + "learning_rate": 1.1176031606184645e-05, + "loss": 0.6591, + "step": 1171 + }, + { + "epoch": 0.48, + "learning_rate": 1.1162877997992389e-05, + "loss": 0.6481, + "step": 1172 + }, + { + "epoch": 0.48, + "learning_rate": 1.1149722349920855e-05, + "loss": 0.6648, + "step": 1173 + }, + { + "epoch": 0.48, + "learning_rate": 1.1136564685047213e-05, + "loss": 0.6006, + "step": 1174 + }, + { + "epoch": 0.48, + "learning_rate": 1.1123405026452176e-05, + "loss": 0.6927, + "step": 1175 + }, + { + "epoch": 0.48, + "learning_rate": 1.1110243397219945e-05, + "loss": 0.6562, + "step": 1176 + }, + { + "epoch": 0.48, + "learning_rate": 1.1097079820438181e-05, + "loss": 0.6103, + "step": 1177 + }, + { + "epoch": 0.48, + "learning_rate": 1.1083914319197967e-05, + "loss": 0.6468, + "step": 1178 + }, + { + "epoch": 0.48, + "learning_rate": 1.1070746916593756e-05, + "loss": 0.6356, + "step": 1179 + }, + { + "epoch": 0.48, + "learning_rate": 1.1057577635723337e-05, + "loss": 0.5703, + "step": 1180 + }, + { + "epoch": 0.48, + "learning_rate": 1.1044406499687797e-05, + "loss": 0.6586, + "step": 1181 + }, + { + "epoch": 0.48, + "learning_rate": 1.1031233531591471e-05, + "loss": 0.7869, + "step": 1182 + }, + { + "epoch": 0.48, + "learning_rate": 1.1018058754541915e-05, + "loss": 0.6808, + "step": 1183 + }, + { + "epoch": 0.48, + "learning_rate": 1.1004882191649857e-05, + "loss": 0.6611, + "step": 1184 + }, + { + "epoch": 0.48, + "learning_rate": 1.099170386602915e-05, + "loss": 0.7283, + "step": 1185 + }, + { + "epoch": 0.48, + "learning_rate": 1.0978523800796747e-05, + "loss": 0.6204, + "step": 1186 + }, + { + "epoch": 0.49, + "learning_rate": 1.0965342019072654e-05, + "loss": 0.664, + "step": 1187 + }, + { + "epoch": 0.49, + "learning_rate": 1.0952158543979878e-05, + "loss": 0.7295, + "step": 1188 + }, + { + "epoch": 0.49, + "learning_rate": 1.0938973398644407e-05, + "loss": 0.7179, + "step": 1189 + }, + { + "epoch": 0.49, + "learning_rate": 1.0925786606195153e-05, + "loss": 0.546, + "step": 1190 + }, + { + "epoch": 0.49, + "learning_rate": 1.0912598189763919e-05, + "loss": 0.6948, + "step": 1191 + }, + { + "epoch": 0.49, + "learning_rate": 1.0899408172485357e-05, + "loss": 0.6008, + "step": 1192 + }, + { + "epoch": 0.49, + "learning_rate": 1.0886216577496926e-05, + "loss": 0.6768, + "step": 1193 + }, + { + "epoch": 0.49, + "learning_rate": 1.0873023427938855e-05, + "loss": 0.6157, + "step": 1194 + }, + { + "epoch": 0.49, + "learning_rate": 1.0859828746954098e-05, + "loss": 0.6702, + "step": 1195 + }, + { + "epoch": 0.49, + "learning_rate": 1.0846632557688295e-05, + "loss": 0.6785, + "step": 1196 + }, + { + "epoch": 0.49, + "learning_rate": 1.083343488328973e-05, + "loss": 0.6163, + "step": 1197 + }, + { + "epoch": 0.49, + "learning_rate": 1.08202357469093e-05, + "loss": 0.6327, + "step": 1198 + }, + { + "epoch": 0.49, + "learning_rate": 1.0807035171700455e-05, + "loss": 0.6351, + "step": 1199 + }, + { + "epoch": 0.49, + "learning_rate": 1.0793833180819183e-05, + "loss": 0.5806, + "step": 1200 + }, + { + "epoch": 0.49, + "learning_rate": 1.0780629797423938e-05, + "loss": 0.714, + "step": 1201 + }, + { + "epoch": 0.49, + "learning_rate": 1.0767425044675634e-05, + "loss": 0.7203, + "step": 1202 + }, + { + "epoch": 0.49, + "learning_rate": 1.0754218945737575e-05, + "loss": 0.701, + "step": 1203 + }, + { + "epoch": 0.49, + "learning_rate": 1.0741011523775433e-05, + "loss": 0.6727, + "step": 1204 + }, + { + "epoch": 0.49, + "learning_rate": 1.0727802801957198e-05, + "loss": 0.5973, + "step": 1205 + }, + { + "epoch": 0.49, + "learning_rate": 1.0714592803453138e-05, + "loss": 0.7249, + "step": 1206 + }, + { + "epoch": 0.49, + "learning_rate": 1.0701381551435767e-05, + "loss": 0.5716, + "step": 1207 + }, + { + "epoch": 0.49, + "learning_rate": 1.0688169069079793e-05, + "loss": 0.6326, + "step": 1208 + }, + { + "epoch": 0.49, + "learning_rate": 1.067495537956208e-05, + "loss": 0.6047, + "step": 1209 + }, + { + "epoch": 0.49, + "learning_rate": 1.0661740506061616e-05, + "loss": 0.617, + "step": 1210 + }, + { + "epoch": 0.5, + "learning_rate": 1.0648524471759463e-05, + "loss": 0.6481, + "step": 1211 + }, + { + "epoch": 0.5, + "learning_rate": 1.0635307299838715e-05, + "loss": 0.5981, + "step": 1212 + }, + { + "epoch": 0.5, + "learning_rate": 1.062208901348447e-05, + "loss": 0.6404, + "step": 1213 + }, + { + "epoch": 0.5, + "learning_rate": 1.0608869635883776e-05, + "loss": 0.7058, + "step": 1214 + }, + { + "epoch": 0.5, + "learning_rate": 1.0595649190225593e-05, + "loss": 0.6684, + "step": 1215 + }, + { + "epoch": 0.5, + "learning_rate": 1.0582427699700759e-05, + "loss": 0.6638, + "step": 1216 + }, + { + "epoch": 0.5, + "learning_rate": 1.0569205187501943e-05, + "loss": 0.7179, + "step": 1217 + }, + { + "epoch": 0.5, + "learning_rate": 1.0555981676823606e-05, + "loss": 0.6787, + "step": 1218 + }, + { + "epoch": 0.5, + "learning_rate": 1.0542757190861959e-05, + "loss": 0.6338, + "step": 1219 + }, + { + "epoch": 0.5, + "learning_rate": 1.0529531752814928e-05, + "loss": 0.7203, + "step": 1220 + }, + { + "epoch": 0.5, + "learning_rate": 1.0516305385882103e-05, + "loss": 0.6431, + "step": 1221 + }, + { + "epoch": 0.5, + "learning_rate": 1.0503078113264715e-05, + "loss": 0.6501, + "step": 1222 + }, + { + "epoch": 0.5, + "learning_rate": 1.0489849958165567e-05, + "loss": 0.678, + "step": 1223 + }, + { + "epoch": 0.5, + "learning_rate": 1.0476620943789021e-05, + "loss": 0.6206, + "step": 1224 + }, + { + "epoch": 0.5, + "learning_rate": 1.046339109334095e-05, + "loss": 0.6784, + "step": 1225 + }, + { + "epoch": 0.5, + "learning_rate": 1.0450160430028679e-05, + "loss": 0.7266, + "step": 1226 + }, + { + "epoch": 0.5, + "learning_rate": 1.0436928977060973e-05, + "loss": 0.6878, + "step": 1227 + }, + { + "epoch": 0.5, + "learning_rate": 1.0423696757647977e-05, + "loss": 0.6823, + "step": 1228 + }, + { + "epoch": 0.5, + "learning_rate": 1.0410463795001177e-05, + "loss": 0.6077, + "step": 1229 + }, + { + "epoch": 0.5, + "learning_rate": 1.039723011233337e-05, + "loss": 0.5919, + "step": 1230 + }, + { + "epoch": 0.5, + "learning_rate": 1.0383995732858609e-05, + "loss": 0.6481, + "step": 1231 + }, + { + "epoch": 0.5, + "learning_rate": 1.0370760679792173e-05, + "loss": 0.653, + "step": 1232 + }, + { + "epoch": 0.5, + "learning_rate": 1.0357524976350522e-05, + "loss": 0.6937, + "step": 1233 + }, + { + "epoch": 0.5, + "learning_rate": 1.0344288645751257e-05, + "loss": 0.6202, + "step": 1234 + }, + { + "epoch": 0.5, + "learning_rate": 1.0331051711213079e-05, + "loss": 0.6429, + "step": 1235 + }, + { + "epoch": 0.51, + "learning_rate": 1.031781419595575e-05, + "loss": 0.5797, + "step": 1236 + }, + { + "epoch": 0.51, + "learning_rate": 1.0304576123200045e-05, + "loss": 0.6533, + "step": 1237 + }, + { + "epoch": 0.51, + "learning_rate": 1.0291337516167725e-05, + "loss": 0.6404, + "step": 1238 + }, + { + "epoch": 0.51, + "learning_rate": 1.0278098398081482e-05, + "loss": 0.6956, + "step": 1239 + }, + { + "epoch": 0.51, + "learning_rate": 1.0264858792164908e-05, + "loss": 0.6668, + "step": 1240 + }, + { + "epoch": 0.51, + "learning_rate": 1.025161872164245e-05, + "loss": 0.6413, + "step": 1241 + }, + { + "epoch": 0.51, + "learning_rate": 1.0238378209739366e-05, + "loss": 0.6431, + "step": 1242 + }, + { + "epoch": 0.51, + "learning_rate": 1.0225137279681696e-05, + "loss": 0.6379, + "step": 1243 + }, + { + "epoch": 0.51, + "learning_rate": 1.0211895954696204e-05, + "loss": 0.652, + "step": 1244 + }, + { + "epoch": 0.51, + "learning_rate": 1.019865425801036e-05, + "loss": 0.6102, + "step": 1245 + }, + { + "epoch": 0.51, + "learning_rate": 1.0185412212852268e-05, + "loss": 0.5827, + "step": 1246 + }, + { + "epoch": 0.51, + "learning_rate": 1.017216984245066e-05, + "loss": 0.6555, + "step": 1247 + }, + { + "epoch": 0.51, + "learning_rate": 1.0158927170034831e-05, + "loss": 0.6318, + "step": 1248 + }, + { + "epoch": 0.51, + "learning_rate": 1.0145684218834604e-05, + "loss": 0.7373, + "step": 1249 + }, + { + "epoch": 0.51, + "learning_rate": 1.0132441012080296e-05, + "loss": 0.6725, + "step": 1250 + }, + { + "epoch": 0.51, + "learning_rate": 1.011919757300267e-05, + "loss": 0.6071, + "step": 1251 + }, + { + "epoch": 0.51, + "learning_rate": 1.0105953924832894e-05, + "loss": 0.65, + "step": 1252 + }, + { + "epoch": 0.51, + "learning_rate": 1.0092710090802506e-05, + "loss": 0.6226, + "step": 1253 + }, + { + "epoch": 0.51, + "learning_rate": 1.0079466094143373e-05, + "loss": 0.5319, + "step": 1254 + }, + { + "epoch": 0.51, + "learning_rate": 1.0066221958087636e-05, + "loss": 0.6565, + "step": 1255 + }, + { + "epoch": 0.51, + "learning_rate": 1.0052977705867697e-05, + "loss": 0.6192, + "step": 1256 + }, + { + "epoch": 0.51, + "learning_rate": 1.0039733360716145e-05, + "loss": 0.6654, + "step": 1257 + }, + { + "epoch": 0.51, + "learning_rate": 1.0026488945865744e-05, + "loss": 0.6046, + "step": 1258 + }, + { + "epoch": 0.51, + "learning_rate": 1.0013244484549376e-05, + "loss": 0.6024, + "step": 1259 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 0.6731, + "step": 1260 + }, + { + "epoch": 0.52, + "learning_rate": 9.986755515450625e-06, + "loss": 0.7332, + "step": 1261 + }, + { + "epoch": 0.52, + "learning_rate": 9.973511054134259e-06, + "loss": 0.6355, + "step": 1262 + }, + { + "epoch": 0.52, + "learning_rate": 9.960266639283857e-06, + "loss": 0.6642, + "step": 1263 + }, + { + "epoch": 0.52, + "learning_rate": 9.947022294132306e-06, + "loss": 0.6271, + "step": 1264 + }, + { + "epoch": 0.52, + "learning_rate": 9.933778041912365e-06, + "loss": 0.6326, + "step": 1265 + }, + { + "epoch": 0.52, + "learning_rate": 9.920533905856634e-06, + "loss": 0.6505, + "step": 1266 + }, + { + "epoch": 0.52, + "learning_rate": 9.907289909197496e-06, + "loss": 0.7731, + "step": 1267 + }, + { + "epoch": 0.52, + "learning_rate": 9.894046075167106e-06, + "loss": 0.6125, + "step": 1268 + }, + { + "epoch": 0.52, + "learning_rate": 9.880802426997334e-06, + "loss": 0.7185, + "step": 1269 + }, + { + "epoch": 0.52, + "learning_rate": 9.867558987919704e-06, + "loss": 0.7106, + "step": 1270 + }, + { + "epoch": 0.52, + "learning_rate": 9.854315781165398e-06, + "loss": 0.7425, + "step": 1271 + }, + { + "epoch": 0.52, + "learning_rate": 9.841072829965172e-06, + "loss": 0.6497, + "step": 1272 + }, + { + "epoch": 0.52, + "learning_rate": 9.827830157549344e-06, + "loss": 0.5556, + "step": 1273 + }, + { + "epoch": 0.52, + "learning_rate": 9.814587787147735e-06, + "loss": 0.6899, + "step": 1274 + }, + { + "epoch": 0.52, + "learning_rate": 9.801345741989646e-06, + "loss": 0.5726, + "step": 1275 + }, + { + "epoch": 0.52, + "learning_rate": 9.788104045303797e-06, + "loss": 0.6331, + "step": 1276 + }, + { + "epoch": 0.52, + "learning_rate": 9.774862720318305e-06, + "loss": 0.7118, + "step": 1277 + }, + { + "epoch": 0.52, + "learning_rate": 9.761621790260636e-06, + "loss": 0.6878, + "step": 1278 + }, + { + "epoch": 0.52, + "learning_rate": 9.748381278357554e-06, + "loss": 0.6541, + "step": 1279 + }, + { + "epoch": 0.52, + "learning_rate": 9.735141207835095e-06, + "loss": 0.6494, + "step": 1280 + }, + { + "epoch": 0.52, + "learning_rate": 9.72190160191852e-06, + "loss": 0.7121, + "step": 1281 + }, + { + "epoch": 0.52, + "learning_rate": 9.708662483832279e-06, + "loss": 0.6267, + "step": 1282 + }, + { + "epoch": 0.52, + "learning_rate": 9.695423876799957e-06, + "loss": 0.5758, + "step": 1283 + }, + { + "epoch": 0.52, + "learning_rate": 9.682185804044252e-06, + "loss": 0.6203, + "step": 1284 + }, + { + "epoch": 0.53, + "learning_rate": 9.668948288786923e-06, + "loss": 0.6874, + "step": 1285 + }, + { + "epoch": 0.53, + "learning_rate": 9.655711354248747e-06, + "loss": 0.6808, + "step": 1286 + }, + { + "epoch": 0.53, + "learning_rate": 9.642475023649483e-06, + "loss": 0.6903, + "step": 1287 + }, + { + "epoch": 0.53, + "learning_rate": 9.62923932020783e-06, + "loss": 0.6611, + "step": 1288 + }, + { + "epoch": 0.53, + "learning_rate": 9.616004267141396e-06, + "loss": 0.6374, + "step": 1289 + }, + { + "epoch": 0.53, + "learning_rate": 9.602769887666633e-06, + "loss": 0.6496, + "step": 1290 + }, + { + "epoch": 0.53, + "learning_rate": 9.589536204998823e-06, + "loss": 0.7096, + "step": 1291 + }, + { + "epoch": 0.53, + "learning_rate": 9.576303242352025e-06, + "loss": 0.6436, + "step": 1292 + }, + { + "epoch": 0.53, + "learning_rate": 9.563071022939028e-06, + "loss": 0.6056, + "step": 1293 + }, + { + "epoch": 0.53, + "learning_rate": 9.549839569971323e-06, + "loss": 0.5985, + "step": 1294 + }, + { + "epoch": 0.53, + "learning_rate": 9.536608906659052e-06, + "loss": 0.6914, + "step": 1295 + }, + { + "epoch": 0.53, + "learning_rate": 9.523379056210982e-06, + "loss": 0.6994, + "step": 1296 + }, + { + "epoch": 0.53, + "learning_rate": 9.510150041834436e-06, + "loss": 0.6008, + "step": 1297 + }, + { + "epoch": 0.53, + "learning_rate": 9.496921886735287e-06, + "loss": 0.6901, + "step": 1298 + }, + { + "epoch": 0.53, + "learning_rate": 9.483694614117898e-06, + "loss": 0.6402, + "step": 1299 + }, + { + "epoch": 0.53, + "learning_rate": 9.470468247185076e-06, + "loss": 0.6977, + "step": 1300 + }, + { + "epoch": 0.53, + "learning_rate": 9.457242809138045e-06, + "loss": 0.6134, + "step": 1301 + }, + { + "epoch": 0.53, + "learning_rate": 9.444018323176399e-06, + "loss": 0.7395, + "step": 1302 + }, + { + "epoch": 0.53, + "learning_rate": 9.430794812498062e-06, + "loss": 0.6213, + "step": 1303 + }, + { + "epoch": 0.53, + "learning_rate": 9.417572300299244e-06, + "loss": 0.6868, + "step": 1304 + }, + { + "epoch": 0.53, + "learning_rate": 9.404350809774412e-06, + "loss": 0.6937, + "step": 1305 + }, + { + "epoch": 0.53, + "learning_rate": 9.391130364116226e-06, + "loss": 0.6271, + "step": 1306 + }, + { + "epoch": 0.53, + "learning_rate": 9.377910986515529e-06, + "loss": 0.7128, + "step": 1307 + }, + { + "epoch": 0.53, + "learning_rate": 9.364692700161287e-06, + "loss": 0.6465, + "step": 1308 + }, + { + "epoch": 0.54, + "learning_rate": 9.35147552824054e-06, + "loss": 0.635, + "step": 1309 + }, + { + "epoch": 0.54, + "learning_rate": 9.33825949393839e-06, + "loss": 0.699, + "step": 1310 + }, + { + "epoch": 0.54, + "learning_rate": 9.325044620437924e-06, + "loss": 0.7016, + "step": 1311 + }, + { + "epoch": 0.54, + "learning_rate": 9.311830930920214e-06, + "loss": 0.6355, + "step": 1312 + }, + { + "epoch": 0.54, + "learning_rate": 9.298618448564236e-06, + "loss": 0.6209, + "step": 1313 + }, + { + "epoch": 0.54, + "learning_rate": 9.285407196546862e-06, + "loss": 0.6426, + "step": 1314 + }, + { + "epoch": 0.54, + "learning_rate": 9.272197198042804e-06, + "loss": 0.6308, + "step": 1315 + }, + { + "epoch": 0.54, + "learning_rate": 9.25898847622457e-06, + "loss": 0.723, + "step": 1316 + }, + { + "epoch": 0.54, + "learning_rate": 9.24578105426243e-06, + "loss": 0.5931, + "step": 1317 + }, + { + "epoch": 0.54, + "learning_rate": 9.232574955324369e-06, + "loss": 0.7124, + "step": 1318 + }, + { + "epoch": 0.54, + "learning_rate": 9.219370202576067e-06, + "loss": 0.7423, + "step": 1319 + }, + { + "epoch": 0.54, + "learning_rate": 9.206166819180822e-06, + "loss": 0.6254, + "step": 1320 + }, + { + "epoch": 0.54, + "learning_rate": 9.192964828299545e-06, + "loss": 0.5903, + "step": 1321 + }, + { + "epoch": 0.54, + "learning_rate": 9.179764253090703e-06, + "loss": 0.6314, + "step": 1322 + }, + { + "epoch": 0.54, + "learning_rate": 9.166565116710273e-06, + "loss": 0.6325, + "step": 1323 + }, + { + "epoch": 0.54, + "learning_rate": 9.153367442311712e-06, + "loss": 0.6226, + "step": 1324 + }, + { + "epoch": 0.54, + "learning_rate": 9.140171253045906e-06, + "loss": 0.6444, + "step": 1325 + }, + { + "epoch": 0.54, + "learning_rate": 9.12697657206115e-06, + "loss": 0.5342, + "step": 1326 + }, + { + "epoch": 0.54, + "learning_rate": 9.113783422503076e-06, + "loss": 0.6056, + "step": 1327 + }, + { + "epoch": 0.54, + "learning_rate": 9.100591827514643e-06, + "loss": 0.6532, + "step": 1328 + }, + { + "epoch": 0.54, + "learning_rate": 9.087401810236084e-06, + "loss": 0.6605, + "step": 1329 + }, + { + "epoch": 0.54, + "learning_rate": 9.07421339380485e-06, + "loss": 0.5691, + "step": 1330 + }, + { + "epoch": 0.54, + "learning_rate": 9.061026601355596e-06, + "loss": 0.6582, + "step": 1331 + }, + { + "epoch": 0.54, + "learning_rate": 9.047841456020125e-06, + "loss": 0.6211, + "step": 1332 + }, + { + "epoch": 0.54, + "learning_rate": 9.034657980927351e-06, + "loss": 0.5902, + "step": 1333 + }, + { + "epoch": 0.55, + "learning_rate": 9.021476199203255e-06, + "loss": 0.757, + "step": 1334 + }, + { + "epoch": 0.55, + "learning_rate": 9.008296133970855e-06, + "loss": 0.7054, + "step": 1335 + }, + { + "epoch": 0.55, + "learning_rate": 8.995117808350146e-06, + "loss": 0.6219, + "step": 1336 + }, + { + "epoch": 0.55, + "learning_rate": 8.981941245458086e-06, + "loss": 0.6548, + "step": 1337 + }, + { + "epoch": 0.55, + "learning_rate": 8.968766468408532e-06, + "loss": 0.6361, + "step": 1338 + }, + { + "epoch": 0.55, + "learning_rate": 8.955593500312208e-06, + "loss": 0.6198, + "step": 1339 + }, + { + "epoch": 0.55, + "learning_rate": 8.942422364276668e-06, + "loss": 0.682, + "step": 1340 + }, + { + "epoch": 0.55, + "learning_rate": 8.929253083406248e-06, + "loss": 0.6167, + "step": 1341 + }, + { + "epoch": 0.55, + "learning_rate": 8.916085680802038e-06, + "loss": 0.6933, + "step": 1342 + }, + { + "epoch": 0.55, + "learning_rate": 8.902920179561822e-06, + "loss": 0.6746, + "step": 1343 + }, + { + "epoch": 0.55, + "learning_rate": 8.889756602780059e-06, + "loss": 0.6791, + "step": 1344 + }, + { + "epoch": 0.55, + "learning_rate": 8.876594973547825e-06, + "loss": 0.6868, + "step": 1345 + }, + { + "epoch": 0.55, + "learning_rate": 8.863435314952787e-06, + "loss": 0.592, + "step": 1346 + }, + { + "epoch": 0.55, + "learning_rate": 8.85027765007915e-06, + "loss": 0.65, + "step": 1347 + }, + { + "epoch": 0.55, + "learning_rate": 8.837122002007614e-06, + "loss": 0.6359, + "step": 1348 + }, + { + "epoch": 0.55, + "learning_rate": 8.82396839381536e-06, + "loss": 0.6293, + "step": 1349 + }, + { + "epoch": 0.55, + "learning_rate": 8.810816848575971e-06, + "loss": 0.6673, + "step": 1350 + }, + { + "epoch": 0.55, + "learning_rate": 8.797667389359426e-06, + "loss": 0.6763, + "step": 1351 + }, + { + "epoch": 0.55, + "learning_rate": 8.784520039232044e-06, + "loss": 0.5985, + "step": 1352 + }, + { + "epoch": 0.55, + "learning_rate": 8.771374821256431e-06, + "loss": 0.629, + "step": 1353 + }, + { + "epoch": 0.55, + "learning_rate": 8.758231758491467e-06, + "loss": 0.6728, + "step": 1354 + }, + { + "epoch": 0.55, + "learning_rate": 8.745090873992239e-06, + "loss": 0.7148, + "step": 1355 + }, + { + "epoch": 0.55, + "learning_rate": 8.731952190810029e-06, + "loss": 0.6358, + "step": 1356 + }, + { + "epoch": 0.55, + "learning_rate": 8.718815731992239e-06, + "loss": 0.7123, + "step": 1357 + }, + { + "epoch": 0.56, + "learning_rate": 8.705681520582382e-06, + "loss": 0.7215, + "step": 1358 + }, + { + "epoch": 0.56, + "learning_rate": 8.69254957962003e-06, + "loss": 0.5794, + "step": 1359 + }, + { + "epoch": 0.56, + "learning_rate": 8.679419932140765e-06, + "loss": 0.6664, + "step": 1360 + }, + { + "epoch": 0.56, + "learning_rate": 8.666292601176146e-06, + "loss": 0.6564, + "step": 1361 + }, + { + "epoch": 0.56, + "learning_rate": 8.653167609753667e-06, + "loss": 0.7458, + "step": 1362 + }, + { + "epoch": 0.56, + "learning_rate": 8.640044980896734e-06, + "loss": 0.6763, + "step": 1363 + }, + { + "epoch": 0.56, + "learning_rate": 8.62692473762458e-06, + "loss": 0.6906, + "step": 1364 + }, + { + "epoch": 0.56, + "learning_rate": 8.613806902952286e-06, + "loss": 0.6191, + "step": 1365 + }, + { + "epoch": 0.56, + "learning_rate": 8.600691499890677e-06, + "loss": 0.6593, + "step": 1366 + }, + { + "epoch": 0.56, + "learning_rate": 8.587578551446338e-06, + "loss": 0.6647, + "step": 1367 + }, + { + "epoch": 0.56, + "learning_rate": 8.574468080621533e-06, + "loss": 0.6891, + "step": 1368 + }, + { + "epoch": 0.56, + "learning_rate": 8.561360110414185e-06, + "loss": 0.7016, + "step": 1369 + }, + { + "epoch": 0.56, + "learning_rate": 8.54825466381783e-06, + "loss": 0.623, + "step": 1370 + }, + { + "epoch": 0.56, + "learning_rate": 8.535151763821574e-06, + "loss": 0.5997, + "step": 1371 + }, + { + "epoch": 0.56, + "learning_rate": 8.522051433410064e-06, + "loss": 0.6429, + "step": 1372 + }, + { + "epoch": 0.56, + "learning_rate": 8.508953695563428e-06, + "loss": 0.6178, + "step": 1373 + }, + { + "epoch": 0.56, + "learning_rate": 8.495858573257258e-06, + "loss": 0.5654, + "step": 1374 + }, + { + "epoch": 0.56, + "learning_rate": 8.48276608946255e-06, + "loss": 0.6463, + "step": 1375 + }, + { + "epoch": 0.56, + "learning_rate": 8.469676267145674e-06, + "loss": 0.6155, + "step": 1376 + }, + { + "epoch": 0.56, + "learning_rate": 8.456589129268335e-06, + "loss": 0.6585, + "step": 1377 + }, + { + "epoch": 0.56, + "learning_rate": 8.443504698787517e-06, + "loss": 0.6637, + "step": 1378 + }, + { + "epoch": 0.56, + "learning_rate": 8.430422998655473e-06, + "loss": 0.6316, + "step": 1379 + }, + { + "epoch": 0.56, + "learning_rate": 8.417344051819646e-06, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 0.56, + "learning_rate": 8.40426788122267e-06, + "loss": 0.6883, + "step": 1381 + }, + { + "epoch": 0.57, + "learning_rate": 8.391194509802294e-06, + "loss": 0.5809, + "step": 1382 + }, + { + "epoch": 0.57, + "learning_rate": 8.378123960491362e-06, + "loss": 0.6205, + "step": 1383 + }, + { + "epoch": 0.57, + "learning_rate": 8.365056256217772e-06, + "loss": 0.6574, + "step": 1384 + }, + { + "epoch": 0.57, + "learning_rate": 8.351991419904416e-06, + "loss": 0.6135, + "step": 1385 + }, + { + "epoch": 0.57, + "learning_rate": 8.338929474469177e-06, + "loss": 0.6732, + "step": 1386 + }, + { + "epoch": 0.57, + "learning_rate": 8.325870442824848e-06, + "loss": 0.571, + "step": 1387 + }, + { + "epoch": 0.57, + "learning_rate": 8.312814347879121e-06, + "loss": 0.6187, + "step": 1388 + }, + { + "epoch": 0.57, + "learning_rate": 8.299761212534539e-06, + "loss": 0.7331, + "step": 1389 + }, + { + "epoch": 0.57, + "learning_rate": 8.286711059688441e-06, + "loss": 0.5907, + "step": 1390 + }, + { + "epoch": 0.57, + "learning_rate": 8.273663912232948e-06, + "loss": 0.6352, + "step": 1391 + }, + { + "epoch": 0.57, + "learning_rate": 8.260619793054894e-06, + "loss": 0.6186, + "step": 1392 + }, + { + "epoch": 0.57, + "learning_rate": 8.247578725035819e-06, + "loss": 0.6895, + "step": 1393 + }, + { + "epoch": 0.57, + "learning_rate": 8.234540731051892e-06, + "loss": 0.5519, + "step": 1394 + }, + { + "epoch": 0.57, + "learning_rate": 8.221505833973908e-06, + "loss": 0.6142, + "step": 1395 + }, + { + "epoch": 0.57, + "learning_rate": 8.208474056667212e-06, + "loss": 0.591, + "step": 1396 + }, + { + "epoch": 0.57, + "learning_rate": 8.195445421991692e-06, + "loss": 0.634, + "step": 1397 + }, + { + "epoch": 0.57, + "learning_rate": 8.182419952801716e-06, + "loss": 0.7016, + "step": 1398 + }, + { + "epoch": 0.57, + "learning_rate": 8.169397671946092e-06, + "loss": 0.6793, + "step": 1399 + }, + { + "epoch": 0.57, + "learning_rate": 8.15637860226805e-06, + "loss": 0.6162, + "step": 1400 + }, + { + "epoch": 0.57, + "learning_rate": 8.143362766605173e-06, + "loss": 0.5626, + "step": 1401 + }, + { + "epoch": 0.57, + "learning_rate": 8.130350187789387e-06, + "loss": 0.6736, + "step": 1402 + }, + { + "epoch": 0.57, + "learning_rate": 8.117340888646885e-06, + "loss": 0.6167, + "step": 1403 + }, + { + "epoch": 0.57, + "learning_rate": 8.104334891998124e-06, + "loss": 0.6183, + "step": 1404 + }, + { + "epoch": 0.57, + "learning_rate": 8.091332220657759e-06, + "loss": 0.6364, + "step": 1405 + }, + { + "epoch": 0.57, + "learning_rate": 8.078332897434617e-06, + "loss": 0.7039, + "step": 1406 + }, + { + "epoch": 0.58, + "learning_rate": 8.065336945131647e-06, + "loss": 0.6088, + "step": 1407 + }, + { + "epoch": 0.58, + "learning_rate": 8.052344386545882e-06, + "loss": 0.5764, + "step": 1408 + }, + { + "epoch": 0.58, + "learning_rate": 8.039355244468418e-06, + "loss": 0.6885, + "step": 1409 + }, + { + "epoch": 0.58, + "learning_rate": 8.026369541684334e-06, + "loss": 0.6421, + "step": 1410 + }, + { + "epoch": 0.58, + "learning_rate": 8.013387300972698e-06, + "loss": 0.7, + "step": 1411 + }, + { + "epoch": 0.58, + "learning_rate": 8.000408545106492e-06, + "loss": 0.6638, + "step": 1412 + }, + { + "epoch": 0.58, + "learning_rate": 7.987433296852589e-06, + "loss": 0.6438, + "step": 1413 + }, + { + "epoch": 0.58, + "learning_rate": 7.97446157897171e-06, + "loss": 0.6526, + "step": 1414 + }, + { + "epoch": 0.58, + "learning_rate": 7.961493414218377e-06, + "loss": 0.6316, + "step": 1415 + }, + { + "epoch": 0.58, + "learning_rate": 7.948528825340891e-06, + "loss": 0.6365, + "step": 1416 + }, + { + "epoch": 0.58, + "learning_rate": 7.935567835081265e-06, + "loss": 0.5808, + "step": 1417 + }, + { + "epoch": 0.58, + "learning_rate": 7.922610466175213e-06, + "loss": 0.6729, + "step": 1418 + }, + { + "epoch": 0.58, + "learning_rate": 7.909656741352092e-06, + "loss": 0.6027, + "step": 1419 + }, + { + "epoch": 0.58, + "learning_rate": 7.896706683334863e-06, + "loss": 0.5762, + "step": 1420 + }, + { + "epoch": 0.58, + "learning_rate": 7.883760314840064e-06, + "loss": 0.6939, + "step": 1421 + }, + { + "epoch": 0.58, + "learning_rate": 7.870817658577743e-06, + "loss": 0.6654, + "step": 1422 + }, + { + "epoch": 0.58, + "learning_rate": 7.85787873725146e-06, + "loss": 0.6737, + "step": 1423 + }, + { + "epoch": 0.58, + "learning_rate": 7.844943573558202e-06, + "loss": 0.5907, + "step": 1424 + }, + { + "epoch": 0.58, + "learning_rate": 7.83201219018838e-06, + "loss": 0.6303, + "step": 1425 + }, + { + "epoch": 0.58, + "learning_rate": 7.819084609825762e-06, + "loss": 0.6821, + "step": 1426 + }, + { + "epoch": 0.58, + "learning_rate": 7.806160855147456e-06, + "loss": 0.6315, + "step": 1427 + }, + { + "epoch": 0.58, + "learning_rate": 7.793240948823852e-06, + "loss": 0.6252, + "step": 1428 + }, + { + "epoch": 0.58, + "learning_rate": 7.780324913518586e-06, + "loss": 0.6126, + "step": 1429 + }, + { + "epoch": 0.58, + "learning_rate": 7.767412771888515e-06, + "loss": 0.5741, + "step": 1430 + }, + { + "epoch": 0.59, + "learning_rate": 7.754504546583652e-06, + "loss": 0.6822, + "step": 1431 + }, + { + "epoch": 0.59, + "learning_rate": 7.741600260247155e-06, + "loss": 0.6312, + "step": 1432 + }, + { + "epoch": 0.59, + "learning_rate": 7.728699935515257e-06, + "loss": 0.6389, + "step": 1433 + }, + { + "epoch": 0.59, + "learning_rate": 7.715803595017257e-06, + "loss": 0.6923, + "step": 1434 + }, + { + "epoch": 0.59, + "learning_rate": 7.702911261375454e-06, + "loss": 0.622, + "step": 1435 + }, + { + "epoch": 0.59, + "learning_rate": 7.69002295720512e-06, + "loss": 0.6477, + "step": 1436 + }, + { + "epoch": 0.59, + "learning_rate": 7.677138705114463e-06, + "loss": 0.6774, + "step": 1437 + }, + { + "epoch": 0.59, + "learning_rate": 7.664258527704576e-06, + "loss": 0.655, + "step": 1438 + }, + { + "epoch": 0.59, + "learning_rate": 7.651382447569418e-06, + "loss": 0.6349, + "step": 1439 + }, + { + "epoch": 0.59, + "learning_rate": 7.638510487295738e-06, + "loss": 0.5454, + "step": 1440 + }, + { + "epoch": 0.59, + "learning_rate": 7.625642669463084e-06, + "loss": 0.6604, + "step": 1441 + }, + { + "epoch": 0.59, + "learning_rate": 7.61277901664372e-06, + "loss": 0.5886, + "step": 1442 + }, + { + "epoch": 0.59, + "learning_rate": 7.599919551402606e-06, + "loss": 0.6668, + "step": 1443 + }, + { + "epoch": 0.59, + "learning_rate": 7.587064296297364e-06, + "loss": 0.6225, + "step": 1444 + }, + { + "epoch": 0.59, + "learning_rate": 7.574213273878221e-06, + "loss": 0.6207, + "step": 1445 + }, + { + "epoch": 0.59, + "learning_rate": 7.56136650668799e-06, + "loss": 0.6111, + "step": 1446 + }, + { + "epoch": 0.59, + "learning_rate": 7.548524017262007e-06, + "loss": 0.7042, + "step": 1447 + }, + { + "epoch": 0.59, + "learning_rate": 7.535685828128117e-06, + "loss": 0.685, + "step": 1448 + }, + { + "epoch": 0.59, + "learning_rate": 7.52285196180661e-06, + "loss": 0.697, + "step": 1449 + }, + { + "epoch": 0.59, + "learning_rate": 7.510022440810203e-06, + "loss": 0.6194, + "step": 1450 + }, + { + "epoch": 0.59, + "learning_rate": 7.497197287643986e-06, + "loss": 0.5635, + "step": 1451 + }, + { + "epoch": 0.59, + "learning_rate": 7.48437652480538e-06, + "loss": 0.6443, + "step": 1452 + }, + { + "epoch": 0.59, + "learning_rate": 7.471560174784122e-06, + "loss": 0.6482, + "step": 1453 + }, + { + "epoch": 0.59, + "learning_rate": 7.458748260062187e-06, + "loss": 0.6442, + "step": 1454 + }, + { + "epoch": 0.59, + "learning_rate": 7.445940803113792e-06, + "loss": 0.6554, + "step": 1455 + }, + { + "epoch": 0.6, + "learning_rate": 7.433137826405314e-06, + "loss": 0.6285, + "step": 1456 + }, + { + "epoch": 0.6, + "learning_rate": 7.420339352395285e-06, + "loss": 0.5656, + "step": 1457 + }, + { + "epoch": 0.6, + "learning_rate": 7.407545403534334e-06, + "loss": 0.7026, + "step": 1458 + }, + { + "epoch": 0.6, + "learning_rate": 7.394756002265147e-06, + "loss": 0.6109, + "step": 1459 + }, + { + "epoch": 0.6, + "learning_rate": 7.381971171022442e-06, + "loss": 0.631, + "step": 1460 + }, + { + "epoch": 0.6, + "learning_rate": 7.369190932232911e-06, + "loss": 0.6284, + "step": 1461 + }, + { + "epoch": 0.6, + "learning_rate": 7.356415308315201e-06, + "loss": 0.6287, + "step": 1462 + }, + { + "epoch": 0.6, + "learning_rate": 7.343644321679851e-06, + "loss": 0.6145, + "step": 1463 + }, + { + "epoch": 0.6, + "learning_rate": 7.3308779947292776e-06, + "loss": 0.5997, + "step": 1464 + }, + { + "epoch": 0.6, + "learning_rate": 7.318116349857719e-06, + "loss": 0.7314, + "step": 1465 + }, + { + "epoch": 0.6, + "learning_rate": 7.305359409451192e-06, + "loss": 0.663, + "step": 1466 + }, + { + "epoch": 0.6, + "learning_rate": 7.2926071958874765e-06, + "loss": 0.6189, + "step": 1467 + }, + { + "epoch": 0.6, + "learning_rate": 7.279859731536045e-06, + "loss": 0.7363, + "step": 1468 + }, + { + "epoch": 0.6, + "learning_rate": 7.2671170387580534e-06, + "loss": 0.6659, + "step": 1469 + }, + { + "epoch": 0.6, + "learning_rate": 7.2543791399062755e-06, + "loss": 0.552, + "step": 1470 + }, + { + "epoch": 0.6, + "learning_rate": 7.241646057325084e-06, + "loss": 0.6937, + "step": 1471 + }, + { + "epoch": 0.6, + "learning_rate": 7.228917813350404e-06, + "loss": 0.6792, + "step": 1472 + }, + { + "epoch": 0.6, + "learning_rate": 7.216194430309657e-06, + "loss": 0.6138, + "step": 1473 + }, + { + "epoch": 0.6, + "learning_rate": 7.203475930521764e-06, + "loss": 0.662, + "step": 1474 + }, + { + "epoch": 0.6, + "learning_rate": 7.190762336297052e-06, + "loss": 0.6375, + "step": 1475 + }, + { + "epoch": 0.6, + "learning_rate": 7.1780536699372685e-06, + "loss": 0.6624, + "step": 1476 + }, + { + "epoch": 0.6, + "learning_rate": 7.165349953735494e-06, + "loss": 0.6535, + "step": 1477 + }, + { + "epoch": 0.6, + "learning_rate": 7.1526512099761424e-06, + "loss": 0.6433, + "step": 1478 + }, + { + "epoch": 0.6, + "learning_rate": 7.139957460934902e-06, + "loss": 0.6151, + "step": 1479 + }, + { + "epoch": 0.61, + "learning_rate": 7.127268728878687e-06, + "loss": 0.6065, + "step": 1480 + }, + { + "epoch": 0.61, + "learning_rate": 7.11458503606563e-06, + "loss": 0.6915, + "step": 1481 + }, + { + "epoch": 0.61, + "learning_rate": 7.101906404745006e-06, + "loss": 0.6012, + "step": 1482 + }, + { + "epoch": 0.61, + "learning_rate": 7.089232857157228e-06, + "loss": 0.6355, + "step": 1483 + }, + { + "epoch": 0.61, + "learning_rate": 7.076564415533774e-06, + "loss": 0.5928, + "step": 1484 + }, + { + "epoch": 0.61, + "learning_rate": 7.063901102097184e-06, + "loss": 0.6511, + "step": 1485 + }, + { + "epoch": 0.61, + "learning_rate": 7.0512429390609825e-06, + "loss": 0.6225, + "step": 1486 + }, + { + "epoch": 0.61, + "learning_rate": 7.038589948629677e-06, + "loss": 0.6154, + "step": 1487 + }, + { + "epoch": 0.61, + "learning_rate": 7.0259421529986946e-06, + "loss": 0.5748, + "step": 1488 + }, + { + "epoch": 0.61, + "learning_rate": 7.013299574354342e-06, + "loss": 0.6038, + "step": 1489 + }, + { + "epoch": 0.61, + "learning_rate": 7.00066223487379e-06, + "loss": 0.6378, + "step": 1490 + }, + { + "epoch": 0.61, + "learning_rate": 6.9880301567250005e-06, + "loss": 0.6833, + "step": 1491 + }, + { + "epoch": 0.61, + "learning_rate": 6.975403362066727e-06, + "loss": 0.6165, + "step": 1492 + }, + { + "epoch": 0.61, + "learning_rate": 6.962781873048435e-06, + "loss": 0.6691, + "step": 1493 + }, + { + "epoch": 0.61, + "learning_rate": 6.9501657118102994e-06, + "loss": 0.5977, + "step": 1494 + }, + { + "epoch": 0.61, + "learning_rate": 6.93755490048314e-06, + "loss": 0.6229, + "step": 1495 + }, + { + "epoch": 0.61, + "learning_rate": 6.92494946118839e-06, + "loss": 0.6642, + "step": 1496 + }, + { + "epoch": 0.61, + "learning_rate": 6.91234941603807e-06, + "loss": 0.6479, + "step": 1497 + }, + { + "epoch": 0.61, + "learning_rate": 6.899754787134725e-06, + "loss": 0.6376, + "step": 1498 + }, + { + "epoch": 0.61, + "learning_rate": 6.887165596571411e-06, + "loss": 0.6789, + "step": 1499 + }, + { + "epoch": 0.61, + "learning_rate": 6.874581866431633e-06, + "loss": 0.7342, + "step": 1500 + }, + { + "epoch": 0.61, + "learning_rate": 6.86200361878933e-06, + "loss": 0.6027, + "step": 1501 + }, + { + "epoch": 0.61, + "learning_rate": 6.849430875708818e-06, + "loss": 0.6349, + "step": 1502 + }, + { + "epoch": 0.61, + "learning_rate": 6.836863659244746e-06, + "loss": 0.6059, + "step": 1503 + }, + { + "epoch": 0.61, + "learning_rate": 6.82430199144209e-06, + "loss": 0.6761, + "step": 1504 + }, + { + "epoch": 0.62, + "learning_rate": 6.811745894336074e-06, + "loss": 0.646, + "step": 1505 + }, + { + "epoch": 0.62, + "learning_rate": 6.799195389952163e-06, + "loss": 0.6818, + "step": 1506 + }, + { + "epoch": 0.62, + "learning_rate": 6.786650500306e-06, + "loss": 0.6167, + "step": 1507 + }, + { + "epoch": 0.62, + "learning_rate": 6.77411124740339e-06, + "loss": 0.7354, + "step": 1508 + }, + { + "epoch": 0.62, + "learning_rate": 6.7615776532402456e-06, + "loss": 0.5956, + "step": 1509 + }, + { + "epoch": 0.62, + "learning_rate": 6.7490497398025444e-06, + "loss": 0.6544, + "step": 1510 + }, + { + "epoch": 0.62, + "learning_rate": 6.736527529066319e-06, + "loss": 0.6957, + "step": 1511 + }, + { + "epoch": 0.62, + "learning_rate": 6.724011042997576e-06, + "loss": 0.6441, + "step": 1512 + }, + { + "epoch": 0.62, + "learning_rate": 6.7115003035522985e-06, + "loss": 0.6593, + "step": 1513 + }, + { + "epoch": 0.62, + "learning_rate": 6.698995332676375e-06, + "loss": 0.642, + "step": 1514 + }, + { + "epoch": 0.62, + "learning_rate": 6.686496152305586e-06, + "loss": 0.6411, + "step": 1515 + }, + { + "epoch": 0.62, + "learning_rate": 6.674002784365547e-06, + "loss": 0.6947, + "step": 1516 + }, + { + "epoch": 0.62, + "learning_rate": 6.66151525077168e-06, + "loss": 0.6317, + "step": 1517 + }, + { + "epoch": 0.62, + "learning_rate": 6.649033573429178e-06, + "loss": 0.5698, + "step": 1518 + }, + { + "epoch": 0.62, + "learning_rate": 6.6365577742329455e-06, + "loss": 0.622, + "step": 1519 + }, + { + "epoch": 0.62, + "learning_rate": 6.6240878750676e-06, + "loss": 0.6135, + "step": 1520 + }, + { + "epoch": 0.62, + "learning_rate": 6.611623897807382e-06, + "loss": 0.7081, + "step": 1521 + }, + { + "epoch": 0.62, + "learning_rate": 6.5991658643161696e-06, + "loss": 0.6611, + "step": 1522 + }, + { + "epoch": 0.62, + "learning_rate": 6.586713796447392e-06, + "loss": 0.5996, + "step": 1523 + }, + { + "epoch": 0.62, + "learning_rate": 6.574267716044033e-06, + "loss": 0.6354, + "step": 1524 + }, + { + "epoch": 0.62, + "learning_rate": 6.561827644938563e-06, + "loss": 0.5704, + "step": 1525 + }, + { + "epoch": 0.62, + "learning_rate": 6.549393604952906e-06, + "loss": 0.6245, + "step": 1526 + }, + { + "epoch": 0.62, + "learning_rate": 6.536965617898423e-06, + "loss": 0.69, + "step": 1527 + }, + { + "epoch": 0.62, + "learning_rate": 6.524543705575839e-06, + "loss": 0.649, + "step": 1528 + }, + { + "epoch": 0.63, + "learning_rate": 6.512127889775239e-06, + "loss": 0.6557, + "step": 1529 + }, + { + "epoch": 0.63, + "learning_rate": 6.499718192275999e-06, + "loss": 0.6709, + "step": 1530 + }, + { + "epoch": 0.63, + "learning_rate": 6.487314634846774e-06, + "loss": 0.6524, + "step": 1531 + }, + { + "epoch": 0.63, + "learning_rate": 6.474917239245445e-06, + "loss": 0.5817, + "step": 1532 + }, + { + "epoch": 0.63, + "learning_rate": 6.4625260272190775e-06, + "loss": 0.7199, + "step": 1533 + }, + { + "epoch": 0.63, + "learning_rate": 6.450141020503902e-06, + "loss": 0.6143, + "step": 1534 + }, + { + "epoch": 0.63, + "learning_rate": 6.43776224082525e-06, + "loss": 0.5836, + "step": 1535 + }, + { + "epoch": 0.63, + "learning_rate": 6.425389709897543e-06, + "loss": 0.5933, + "step": 1536 + }, + { + "epoch": 0.63, + "learning_rate": 6.4130234494242315e-06, + "loss": 0.7129, + "step": 1537 + }, + { + "epoch": 0.63, + "learning_rate": 6.400663481097774e-06, + "loss": 0.6216, + "step": 1538 + }, + { + "epoch": 0.63, + "learning_rate": 6.388309826599588e-06, + "loss": 0.7252, + "step": 1539 + }, + { + "epoch": 0.63, + "learning_rate": 6.375962507600009e-06, + "loss": 0.6145, + "step": 1540 + }, + { + "epoch": 0.63, + "learning_rate": 6.363621545758276e-06, + "loss": 0.6862, + "step": 1541 + }, + { + "epoch": 0.63, + "learning_rate": 6.3512869627224535e-06, + "loss": 0.6897, + "step": 1542 + }, + { + "epoch": 0.63, + "learning_rate": 6.338958780129441e-06, + "loss": 0.6848, + "step": 1543 + }, + { + "epoch": 0.63, + "learning_rate": 6.326637019604888e-06, + "loss": 0.659, + "step": 1544 + }, + { + "epoch": 0.63, + "learning_rate": 6.314321702763198e-06, + "loss": 0.5883, + "step": 1545 + }, + { + "epoch": 0.63, + "learning_rate": 6.302012851207455e-06, + "loss": 0.6645, + "step": 1546 + }, + { + "epoch": 0.63, + "learning_rate": 6.289710486529412e-06, + "loss": 0.6635, + "step": 1547 + }, + { + "epoch": 0.63, + "learning_rate": 6.277414630309444e-06, + "loss": 0.5986, + "step": 1548 + }, + { + "epoch": 0.63, + "learning_rate": 6.265125304116498e-06, + "loss": 0.7029, + "step": 1549 + }, + { + "epoch": 0.63, + "learning_rate": 6.252842529508081e-06, + "loss": 0.6566, + "step": 1550 + }, + { + "epoch": 0.63, + "learning_rate": 6.240566328030193e-06, + "loss": 0.6431, + "step": 1551 + }, + { + "epoch": 0.63, + "learning_rate": 6.228296721217317e-06, + "loss": 0.6174, + "step": 1552 + }, + { + "epoch": 0.63, + "learning_rate": 6.216033730592357e-06, + "loss": 0.6436, + "step": 1553 + }, + { + "epoch": 0.64, + "learning_rate": 6.2037773776666134e-06, + "loss": 0.6042, + "step": 1554 + }, + { + "epoch": 0.64, + "learning_rate": 6.191527683939753e-06, + "loss": 0.648, + "step": 1555 + }, + { + "epoch": 0.64, + "learning_rate": 6.179284670899745e-06, + "loss": 0.6231, + "step": 1556 + }, + { + "epoch": 0.64, + "learning_rate": 6.167048360022856e-06, + "loss": 0.6698, + "step": 1557 + }, + { + "epoch": 0.64, + "learning_rate": 6.154818772773579e-06, + "loss": 0.6573, + "step": 1558 + }, + { + "epoch": 0.64, + "learning_rate": 6.142595930604631e-06, + "loss": 0.7049, + "step": 1559 + }, + { + "epoch": 0.64, + "learning_rate": 6.130379854956879e-06, + "loss": 0.6447, + "step": 1560 + }, + { + "epoch": 0.64, + "learning_rate": 6.118170567259336e-06, + "loss": 0.7026, + "step": 1561 + }, + { + "epoch": 0.64, + "learning_rate": 6.105968088929098e-06, + "loss": 0.6007, + "step": 1562 + }, + { + "epoch": 0.64, + "learning_rate": 6.093772441371315e-06, + "loss": 0.6078, + "step": 1563 + }, + { + "epoch": 0.64, + "learning_rate": 6.081583645979168e-06, + "loss": 0.6222, + "step": 1564 + }, + { + "epoch": 0.64, + "learning_rate": 6.069401724133796e-06, + "loss": 0.6396, + "step": 1565 + }, + { + "epoch": 0.64, + "learning_rate": 6.057226697204308e-06, + "loss": 0.6941, + "step": 1566 + }, + { + "epoch": 0.64, + "learning_rate": 6.045058586547692e-06, + "loss": 0.6709, + "step": 1567 + }, + { + "epoch": 0.64, + "learning_rate": 6.032897413508822e-06, + "loss": 0.6249, + "step": 1568 + }, + { + "epoch": 0.64, + "learning_rate": 6.0207431994203955e-06, + "loss": 0.6003, + "step": 1569 + }, + { + "epoch": 0.64, + "learning_rate": 6.0085959656028994e-06, + "loss": 0.661, + "step": 1570 + }, + { + "epoch": 0.64, + "learning_rate": 5.9964557333645845e-06, + "loss": 0.6845, + "step": 1571 + }, + { + "epoch": 0.64, + "learning_rate": 5.984322524001409e-06, + "loss": 0.6322, + "step": 1572 + }, + { + "epoch": 0.64, + "learning_rate": 5.972196358797024e-06, + "loss": 0.6658, + "step": 1573 + }, + { + "epoch": 0.64, + "learning_rate": 5.960077259022713e-06, + "loss": 0.632, + "step": 1574 + }, + { + "epoch": 0.64, + "learning_rate": 5.947965245937375e-06, + "loss": 0.6772, + "step": 1575 + }, + { + "epoch": 0.64, + "learning_rate": 5.9358603407874695e-06, + "loss": 0.6451, + "step": 1576 + }, + { + "epoch": 0.64, + "learning_rate": 5.9237625648069895e-06, + "loss": 0.6318, + "step": 1577 + }, + { + "epoch": 0.65, + "learning_rate": 5.9116719392174304e-06, + "loss": 0.6824, + "step": 1578 + }, + { + "epoch": 0.65, + "learning_rate": 5.89958848522773e-06, + "loss": 0.6331, + "step": 1579 + }, + { + "epoch": 0.65, + "learning_rate": 5.887512224034263e-06, + "loss": 0.6536, + "step": 1580 + }, + { + "epoch": 0.65, + "learning_rate": 5.8754431768207694e-06, + "loss": 0.6422, + "step": 1581 + }, + { + "epoch": 0.65, + "learning_rate": 5.8633813647583505e-06, + "loss": 0.6291, + "step": 1582 + }, + { + "epoch": 0.65, + "learning_rate": 5.851326809005402e-06, + "loss": 0.6412, + "step": 1583 + }, + { + "epoch": 0.65, + "learning_rate": 5.8392795307076e-06, + "loss": 0.6001, + "step": 1584 + }, + { + "epoch": 0.65, + "learning_rate": 5.827239550997856e-06, + "loss": 0.6229, + "step": 1585 + }, + { + "epoch": 0.65, + "learning_rate": 5.815206890996267e-06, + "loss": 0.6071, + "step": 1586 + }, + { + "epoch": 0.65, + "learning_rate": 5.803181571810106e-06, + "loss": 0.691, + "step": 1587 + }, + { + "epoch": 0.65, + "learning_rate": 5.791163614533753e-06, + "loss": 0.5727, + "step": 1588 + }, + { + "epoch": 0.65, + "learning_rate": 5.7791530402486884e-06, + "loss": 0.6147, + "step": 1589 + }, + { + "epoch": 0.65, + "learning_rate": 5.76714987002343e-06, + "loss": 0.6413, + "step": 1590 + }, + { + "epoch": 0.65, + "learning_rate": 5.755154124913514e-06, + "loss": 0.7356, + "step": 1591 + }, + { + "epoch": 0.65, + "learning_rate": 5.743165825961454e-06, + "loss": 0.6126, + "step": 1592 + }, + { + "epoch": 0.65, + "learning_rate": 5.731184994196697e-06, + "loss": 0.7526, + "step": 1593 + }, + { + "epoch": 0.65, + "learning_rate": 5.719211650635586e-06, + "loss": 0.6431, + "step": 1594 + }, + { + "epoch": 0.65, + "learning_rate": 5.707245816281345e-06, + "loss": 0.5429, + "step": 1595 + }, + { + "epoch": 0.65, + "learning_rate": 5.695287512124011e-06, + "loss": 0.6359, + "step": 1596 + }, + { + "epoch": 0.65, + "learning_rate": 5.683336759140409e-06, + "loss": 0.5654, + "step": 1597 + }, + { + "epoch": 0.65, + "learning_rate": 5.671393578294133e-06, + "loss": 0.6572, + "step": 1598 + }, + { + "epoch": 0.65, + "learning_rate": 5.659457990535491e-06, + "loss": 0.6754, + "step": 1599 + }, + { + "epoch": 0.65, + "learning_rate": 5.647530016801457e-06, + "loss": 0.6868, + "step": 1600 + }, + { + "epoch": 0.65, + "learning_rate": 5.635609678015668e-06, + "loss": 0.647, + "step": 1601 + }, + { + "epoch": 0.65, + "learning_rate": 5.62369699508835e-06, + "loss": 0.5354, + "step": 1602 + }, + { + "epoch": 0.66, + "learning_rate": 5.611791988916317e-06, + "loss": 0.6123, + "step": 1603 + }, + { + "epoch": 0.66, + "learning_rate": 5.5998946803829e-06, + "loss": 0.578, + "step": 1604 + }, + { + "epoch": 0.66, + "learning_rate": 5.588005090357943e-06, + "loss": 0.652, + "step": 1605 + }, + { + "epoch": 0.66, + "learning_rate": 5.576123239697735e-06, + "loss": 0.6212, + "step": 1606 + }, + { + "epoch": 0.66, + "learning_rate": 5.564249149244998e-06, + "loss": 0.7071, + "step": 1607 + }, + { + "epoch": 0.66, + "learning_rate": 5.552382839828847e-06, + "loss": 0.6945, + "step": 1608 + }, + { + "epoch": 0.66, + "learning_rate": 5.540524332264734e-06, + "loss": 0.6102, + "step": 1609 + }, + { + "epoch": 0.66, + "learning_rate": 5.528673647354432e-06, + "loss": 0.6677, + "step": 1610 + }, + { + "epoch": 0.66, + "learning_rate": 5.516830805885989e-06, + "loss": 0.5845, + "step": 1611 + }, + { + "epoch": 0.66, + "learning_rate": 5.504995828633704e-06, + "loss": 0.6303, + "step": 1612 + }, + { + "epoch": 0.66, + "learning_rate": 5.493168736358063e-06, + "loss": 0.6699, + "step": 1613 + }, + { + "epoch": 0.66, + "learning_rate": 5.481349549805741e-06, + "loss": 0.6219, + "step": 1614 + }, + { + "epoch": 0.66, + "learning_rate": 5.469538289709535e-06, + "loss": 0.5898, + "step": 1615 + }, + { + "epoch": 0.66, + "learning_rate": 5.457734976788331e-06, + "loss": 0.6263, + "step": 1616 + }, + { + "epoch": 0.66, + "learning_rate": 5.445939631747089e-06, + "loss": 0.6121, + "step": 1617 + }, + { + "epoch": 0.66, + "learning_rate": 5.434152275276776e-06, + "loss": 0.6207, + "step": 1618 + }, + { + "epoch": 0.66, + "learning_rate": 5.4223729280543634e-06, + "loss": 0.607, + "step": 1619 + }, + { + "epoch": 0.66, + "learning_rate": 5.410601610742754e-06, + "loss": 0.6156, + "step": 1620 + }, + { + "epoch": 0.66, + "learning_rate": 5.39883834399078e-06, + "loss": 0.696, + "step": 1621 + }, + { + "epoch": 0.66, + "learning_rate": 5.38708314843315e-06, + "loss": 0.6141, + "step": 1622 + }, + { + "epoch": 0.66, + "learning_rate": 5.375336044690405e-06, + "loss": 0.6294, + "step": 1623 + }, + { + "epoch": 0.66, + "learning_rate": 5.363597053368897e-06, + "loss": 0.6457, + "step": 1624 + }, + { + "epoch": 0.66, + "learning_rate": 5.3518661950607465e-06, + "loss": 0.5826, + "step": 1625 + }, + { + "epoch": 0.66, + "learning_rate": 5.340143490343813e-06, + "loss": 0.5124, + "step": 1626 + }, + { + "epoch": 0.67, + "learning_rate": 5.328428959781643e-06, + "loss": 0.6587, + "step": 1627 + }, + { + "epoch": 0.67, + "learning_rate": 5.316722623923454e-06, + "loss": 0.5949, + "step": 1628 + }, + { + "epoch": 0.67, + "learning_rate": 5.305024503304086e-06, + "loss": 0.6249, + "step": 1629 + }, + { + "epoch": 0.67, + "learning_rate": 5.293334618443962e-06, + "loss": 0.5911, + "step": 1630 + }, + { + "epoch": 0.67, + "learning_rate": 5.281652989849067e-06, + "loss": 0.6717, + "step": 1631 + }, + { + "epoch": 0.67, + "learning_rate": 5.269979638010893e-06, + "loss": 0.6239, + "step": 1632 + }, + { + "epoch": 0.67, + "learning_rate": 5.2583145834064295e-06, + "loss": 0.669, + "step": 1633 + }, + { + "epoch": 0.67, + "learning_rate": 5.24665784649809e-06, + "loss": 0.6046, + "step": 1634 + }, + { + "epoch": 0.67, + "learning_rate": 5.235009447733717e-06, + "loss": 0.637, + "step": 1635 + }, + { + "epoch": 0.67, + "learning_rate": 5.223369407546509e-06, + "loss": 0.6707, + "step": 1636 + }, + { + "epoch": 0.67, + "learning_rate": 5.211737746355021e-06, + "loss": 0.6737, + "step": 1637 + }, + { + "epoch": 0.67, + "learning_rate": 5.2001144845630906e-06, + "loss": 0.6084, + "step": 1638 + }, + { + "epoch": 0.67, + "learning_rate": 5.188499642559838e-06, + "loss": 0.6288, + "step": 1639 + }, + { + "epoch": 0.67, + "learning_rate": 5.176893240719602e-06, + "loss": 0.6209, + "step": 1640 + }, + { + "epoch": 0.67, + "learning_rate": 5.165295299401921e-06, + "loss": 0.7151, + "step": 1641 + }, + { + "epoch": 0.67, + "learning_rate": 5.153705838951495e-06, + "loss": 0.6241, + "step": 1642 + }, + { + "epoch": 0.67, + "learning_rate": 5.1421248796981385e-06, + "loss": 0.6221, + "step": 1643 + }, + { + "epoch": 0.67, + "learning_rate": 5.1305524419567595e-06, + "loss": 0.6406, + "step": 1644 + }, + { + "epoch": 0.67, + "learning_rate": 5.1189885460273255e-06, + "loss": 0.6736, + "step": 1645 + }, + { + "epoch": 0.67, + "learning_rate": 5.107433212194801e-06, + "loss": 0.6042, + "step": 1646 + }, + { + "epoch": 0.67, + "learning_rate": 5.095886460729152e-06, + "loss": 0.6682, + "step": 1647 + }, + { + "epoch": 0.67, + "learning_rate": 5.08434831188527e-06, + "loss": 0.7017, + "step": 1648 + }, + { + "epoch": 0.67, + "learning_rate": 5.072818785902975e-06, + "loss": 0.6445, + "step": 1649 + }, + { + "epoch": 0.67, + "learning_rate": 5.061297903006943e-06, + "loss": 0.5683, + "step": 1650 + }, + { + "epoch": 0.67, + "learning_rate": 5.049785683406704e-06, + "loss": 0.5783, + "step": 1651 + }, + { + "epoch": 0.68, + "learning_rate": 5.038282147296585e-06, + "loss": 0.613, + "step": 1652 + }, + { + "epoch": 0.68, + "learning_rate": 5.026787314855679e-06, + "loss": 0.539, + "step": 1653 + }, + { + "epoch": 0.68, + "learning_rate": 5.015301206247813e-06, + "loss": 0.6494, + "step": 1654 + }, + { + "epoch": 0.68, + "learning_rate": 5.003823841621504e-06, + "loss": 0.6016, + "step": 1655 + }, + { + "epoch": 0.68, + "learning_rate": 4.992355241109949e-06, + "loss": 0.604, + "step": 1656 + }, + { + "epoch": 0.68, + "learning_rate": 4.980895424830948e-06, + "loss": 0.5705, + "step": 1657 + }, + { + "epoch": 0.68, + "learning_rate": 4.96944441288691e-06, + "loss": 0.6403, + "step": 1658 + }, + { + "epoch": 0.68, + "learning_rate": 4.958002225364797e-06, + "loss": 0.578, + "step": 1659 + }, + { + "epoch": 0.68, + "learning_rate": 4.94656888233608e-06, + "loss": 0.6708, + "step": 1660 + }, + { + "epoch": 0.68, + "learning_rate": 4.935144403856731e-06, + "loss": 0.6255, + "step": 1661 + }, + { + "epoch": 0.68, + "learning_rate": 4.923728809967156e-06, + "loss": 0.629, + "step": 1662 + }, + { + "epoch": 0.68, + "learning_rate": 4.912322120692194e-06, + "loss": 0.6734, + "step": 1663 + }, + { + "epoch": 0.68, + "learning_rate": 4.900924356041044e-06, + "loss": 0.6246, + "step": 1664 + }, + { + "epoch": 0.68, + "learning_rate": 4.889535536007267e-06, + "loss": 0.5585, + "step": 1665 + }, + { + "epoch": 0.68, + "learning_rate": 4.878155680568721e-06, + "loss": 0.5961, + "step": 1666 + }, + { + "epoch": 0.68, + "learning_rate": 4.866784809687553e-06, + "loss": 0.5819, + "step": 1667 + }, + { + "epoch": 0.68, + "learning_rate": 4.855422943310129e-06, + "loss": 0.5051, + "step": 1668 + }, + { + "epoch": 0.68, + "learning_rate": 4.844070101367043e-06, + "loss": 0.6588, + "step": 1669 + }, + { + "epoch": 0.68, + "learning_rate": 4.832726303773042e-06, + "loss": 0.6368, + "step": 1670 + }, + { + "epoch": 0.68, + "learning_rate": 4.821391570427008e-06, + "loss": 0.5931, + "step": 1671 + }, + { + "epoch": 0.68, + "learning_rate": 4.810065921211936e-06, + "loss": 0.7271, + "step": 1672 + }, + { + "epoch": 0.68, + "learning_rate": 4.79874937599487e-06, + "loss": 0.6914, + "step": 1673 + }, + { + "epoch": 0.68, + "learning_rate": 4.787441954626895e-06, + "loss": 0.5705, + "step": 1674 + }, + { + "epoch": 0.68, + "learning_rate": 4.776143676943093e-06, + "loss": 0.6522, + "step": 1675 + }, + { + "epoch": 0.69, + "learning_rate": 4.764854562762491e-06, + "loss": 0.647, + "step": 1676 + }, + { + "epoch": 0.69, + "learning_rate": 4.753574631888063e-06, + "loss": 0.6377, + "step": 1677 + }, + { + "epoch": 0.69, + "learning_rate": 4.742303904106653e-06, + "loss": 0.6902, + "step": 1678 + }, + { + "epoch": 0.69, + "learning_rate": 4.731042399188981e-06, + "loss": 0.7178, + "step": 1679 + }, + { + "epoch": 0.69, + "learning_rate": 4.719790136889569e-06, + "loss": 0.6393, + "step": 1680 + }, + { + "epoch": 0.69, + "learning_rate": 4.708547136946742e-06, + "loss": 0.5724, + "step": 1681 + }, + { + "epoch": 0.69, + "learning_rate": 4.697313419082573e-06, + "loss": 0.6538, + "step": 1682 + }, + { + "epoch": 0.69, + "learning_rate": 4.6860890030028485e-06, + "loss": 0.7321, + "step": 1683 + }, + { + "epoch": 0.69, + "learning_rate": 4.674873908397039e-06, + "loss": 0.6671, + "step": 1684 + }, + { + "epoch": 0.69, + "learning_rate": 4.663668154938262e-06, + "loss": 0.6276, + "step": 1685 + }, + { + "epoch": 0.69, + "learning_rate": 4.65247176228326e-06, + "loss": 0.6324, + "step": 1686 + }, + { + "epoch": 0.69, + "learning_rate": 4.64128475007234e-06, + "loss": 0.6553, + "step": 1687 + }, + { + "epoch": 0.69, + "learning_rate": 4.630107137929365e-06, + "loss": 0.5936, + "step": 1688 + }, + { + "epoch": 0.69, + "learning_rate": 4.618938945461708e-06, + "loss": 0.6397, + "step": 1689 + }, + { + "epoch": 0.69, + "learning_rate": 4.6077801922602105e-06, + "loss": 0.5604, + "step": 1690 + }, + { + "epoch": 0.69, + "learning_rate": 4.596630897899164e-06, + "loss": 0.7081, + "step": 1691 + }, + { + "epoch": 0.69, + "learning_rate": 4.585491081936263e-06, + "loss": 0.6613, + "step": 1692 + }, + { + "epoch": 0.69, + "learning_rate": 4.57436076391258e-06, + "loss": 0.687, + "step": 1693 + }, + { + "epoch": 0.69, + "learning_rate": 4.563239963352517e-06, + "loss": 0.5905, + "step": 1694 + }, + { + "epoch": 0.69, + "learning_rate": 4.552128699763795e-06, + "loss": 0.6323, + "step": 1695 + }, + { + "epoch": 0.69, + "learning_rate": 4.5410269926373905e-06, + "loss": 0.6664, + "step": 1696 + }, + { + "epoch": 0.69, + "learning_rate": 4.529934861447532e-06, + "loss": 0.6958, + "step": 1697 + }, + { + "epoch": 0.69, + "learning_rate": 4.518852325651638e-06, + "loss": 0.6338, + "step": 1698 + }, + { + "epoch": 0.69, + "learning_rate": 4.507779404690294e-06, + "loss": 0.6107, + "step": 1699 + }, + { + "epoch": 0.7, + "learning_rate": 4.496716117987234e-06, + "loss": 0.633, + "step": 1700 + }, + { + "epoch": 0.7, + "learning_rate": 4.485662484949275e-06, + "loss": 0.6524, + "step": 1701 + }, + { + "epoch": 0.7, + "learning_rate": 4.474618524966313e-06, + "loss": 0.6004, + "step": 1702 + }, + { + "epoch": 0.7, + "learning_rate": 4.463584257411264e-06, + "loss": 0.6358, + "step": 1703 + }, + { + "epoch": 0.7, + "learning_rate": 4.452559701640053e-06, + "loss": 0.652, + "step": 1704 + }, + { + "epoch": 0.7, + "learning_rate": 4.441544876991566e-06, + "loss": 0.533, + "step": 1705 + }, + { + "epoch": 0.7, + "learning_rate": 4.43053980278761e-06, + "loss": 0.6784, + "step": 1706 + }, + { + "epoch": 0.7, + "learning_rate": 4.4195444983329035e-06, + "loss": 0.6295, + "step": 1707 + }, + { + "epoch": 0.7, + "learning_rate": 4.4085589829150125e-06, + "loss": 0.5547, + "step": 1708 + }, + { + "epoch": 0.7, + "learning_rate": 4.397583275804344e-06, + "loss": 0.5615, + "step": 1709 + }, + { + "epoch": 0.7, + "learning_rate": 4.386617396254085e-06, + "loss": 0.6918, + "step": 1710 + }, + { + "epoch": 0.7, + "learning_rate": 4.375661363500201e-06, + "loss": 0.5957, + "step": 1711 + }, + { + "epoch": 0.7, + "learning_rate": 4.364715196761368e-06, + "loss": 0.6164, + "step": 1712 + }, + { + "epoch": 0.7, + "learning_rate": 4.353778915238969e-06, + "loss": 0.6334, + "step": 1713 + }, + { + "epoch": 0.7, + "learning_rate": 4.342852538117039e-06, + "loss": 0.6347, + "step": 1714 + }, + { + "epoch": 0.7, + "learning_rate": 4.331936084562235e-06, + "loss": 0.6551, + "step": 1715 + }, + { + "epoch": 0.7, + "learning_rate": 4.32102957372382e-06, + "loss": 0.6247, + "step": 1716 + }, + { + "epoch": 0.7, + "learning_rate": 4.310133024733602e-06, + "loss": 0.6626, + "step": 1717 + }, + { + "epoch": 0.7, + "learning_rate": 4.299246456705921e-06, + "loss": 0.6122, + "step": 1718 + }, + { + "epoch": 0.7, + "learning_rate": 4.288369888737614e-06, + "loss": 0.6568, + "step": 1719 + }, + { + "epoch": 0.7, + "learning_rate": 4.277503339907961e-06, + "loss": 0.7027, + "step": 1720 + }, + { + "epoch": 0.7, + "learning_rate": 4.266646829278685e-06, + "loss": 0.5958, + "step": 1721 + }, + { + "epoch": 0.7, + "learning_rate": 4.255800375893885e-06, + "loss": 0.6179, + "step": 1722 + }, + { + "epoch": 0.7, + "learning_rate": 4.244963998780028e-06, + "loss": 0.6805, + "step": 1723 + }, + { + "epoch": 0.7, + "learning_rate": 4.234137716945897e-06, + "loss": 0.6868, + "step": 1724 + }, + { + "epoch": 0.71, + "learning_rate": 4.223321549382578e-06, + "loss": 0.5573, + "step": 1725 + }, + { + "epoch": 0.71, + "learning_rate": 4.212515515063399e-06, + "loss": 0.597, + "step": 1726 + }, + { + "epoch": 0.71, + "learning_rate": 4.201719632943931e-06, + "loss": 0.5847, + "step": 1727 + }, + { + "epoch": 0.71, + "learning_rate": 4.1909339219619225e-06, + "loss": 0.647, + "step": 1728 + }, + { + "epoch": 0.71, + "learning_rate": 4.180158401037282e-06, + "loss": 0.6489, + "step": 1729 + }, + { + "epoch": 0.71, + "learning_rate": 4.16939308907205e-06, + "loss": 0.651, + "step": 1730 + }, + { + "epoch": 0.71, + "learning_rate": 4.15863800495035e-06, + "loss": 0.6043, + "step": 1731 + }, + { + "epoch": 0.71, + "learning_rate": 4.147893167538375e-06, + "loss": 0.5854, + "step": 1732 + }, + { + "epoch": 0.71, + "learning_rate": 4.137158595684329e-06, + "loss": 0.6114, + "step": 1733 + }, + { + "epoch": 0.71, + "learning_rate": 4.126434308218421e-06, + "loss": 0.6202, + "step": 1734 + }, + { + "epoch": 0.71, + "learning_rate": 4.115720323952818e-06, + "loss": 0.6272, + "step": 1735 + }, + { + "epoch": 0.71, + "learning_rate": 4.105016661681605e-06, + "loss": 0.6163, + "step": 1736 + }, + { + "epoch": 0.71, + "learning_rate": 4.0943233401807715e-06, + "loss": 0.6743, + "step": 1737 + }, + { + "epoch": 0.71, + "learning_rate": 4.083640378208156e-06, + "loss": 0.7581, + "step": 1738 + }, + { + "epoch": 0.71, + "learning_rate": 4.072967794503437e-06, + "loss": 0.6449, + "step": 1739 + }, + { + "epoch": 0.71, + "learning_rate": 4.0623056077880775e-06, + "loss": 0.6043, + "step": 1740 + }, + { + "epoch": 0.71, + "learning_rate": 4.05165383676531e-06, + "loss": 0.6249, + "step": 1741 + }, + { + "epoch": 0.71, + "learning_rate": 4.04101250012009e-06, + "loss": 0.6138, + "step": 1742 + }, + { + "epoch": 0.71, + "learning_rate": 4.030381616519074e-06, + "loss": 0.6771, + "step": 1743 + }, + { + "epoch": 0.71, + "learning_rate": 4.0197612046105815e-06, + "loss": 0.6382, + "step": 1744 + }, + { + "epoch": 0.71, + "learning_rate": 4.009151283024557e-06, + "loss": 0.658, + "step": 1745 + }, + { + "epoch": 0.71, + "learning_rate": 3.998551870372554e-06, + "loss": 0.6129, + "step": 1746 + }, + { + "epoch": 0.71, + "learning_rate": 3.98796298524768e-06, + "loss": 0.6675, + "step": 1747 + }, + { + "epoch": 0.71, + "learning_rate": 3.977384646224584e-06, + "loss": 0.5419, + "step": 1748 + }, + { + "epoch": 0.72, + "learning_rate": 3.9668168718594155e-06, + "loss": 0.6792, + "step": 1749 + }, + { + "epoch": 0.72, + "learning_rate": 3.956259680689784e-06, + "loss": 0.5761, + "step": 1750 + }, + { + "epoch": 0.72, + "learning_rate": 3.945713091234743e-06, + "loss": 0.5577, + "step": 1751 + }, + { + "epoch": 0.72, + "learning_rate": 3.935177121994741e-06, + "loss": 0.5979, + "step": 1752 + }, + { + "epoch": 0.72, + "learning_rate": 3.924651791451606e-06, + "loss": 0.649, + "step": 1753 + }, + { + "epoch": 0.72, + "learning_rate": 3.9141371180684925e-06, + "loss": 0.6042, + "step": 1754 + }, + { + "epoch": 0.72, + "learning_rate": 3.903633120289876e-06, + "loss": 0.5785, + "step": 1755 + }, + { + "epoch": 0.72, + "learning_rate": 3.893139816541487e-06, + "loss": 0.5849, + "step": 1756 + }, + { + "epoch": 0.72, + "learning_rate": 3.8826572252303145e-06, + "loss": 0.6188, + "step": 1757 + }, + { + "epoch": 0.72, + "learning_rate": 3.872185364744543e-06, + "loss": 0.5875, + "step": 1758 + }, + { + "epoch": 0.72, + "learning_rate": 3.861724253453535e-06, + "loss": 0.5596, + "step": 1759 + }, + { + "epoch": 0.72, + "learning_rate": 3.851273909707809e-06, + "loss": 0.6497, + "step": 1760 + }, + { + "epoch": 0.72, + "learning_rate": 3.840834351838977e-06, + "loss": 0.5923, + "step": 1761 + }, + { + "epoch": 0.72, + "learning_rate": 3.8304055981597495e-06, + "loss": 0.668, + "step": 1762 + }, + { + "epoch": 0.72, + "learning_rate": 3.819987666963869e-06, + "loss": 0.5548, + "step": 1763 + }, + { + "epoch": 0.72, + "learning_rate": 3.809580576526104e-06, + "loss": 0.5588, + "step": 1764 + }, + { + "epoch": 0.72, + "learning_rate": 3.799184345102205e-06, + "loss": 0.6508, + "step": 1765 + }, + { + "epoch": 0.72, + "learning_rate": 3.7887989909288648e-06, + "loss": 0.5973, + "step": 1766 + }, + { + "epoch": 0.72, + "learning_rate": 3.7784245322237113e-06, + "loss": 0.5946, + "step": 1767 + }, + { + "epoch": 0.72, + "learning_rate": 3.7680609871852436e-06, + "loss": 0.5996, + "step": 1768 + }, + { + "epoch": 0.72, + "learning_rate": 3.7577083739928313e-06, + "loss": 0.601, + "step": 1769 + }, + { + "epoch": 0.72, + "learning_rate": 3.7473667108066524e-06, + "loss": 0.6741, + "step": 1770 + }, + { + "epoch": 0.72, + "learning_rate": 3.7370360157676955e-06, + "loss": 0.6801, + "step": 1771 + }, + { + "epoch": 0.72, + "learning_rate": 3.726716306997692e-06, + "loss": 0.6208, + "step": 1772 + }, + { + "epoch": 0.72, + "learning_rate": 3.7164076025991068e-06, + "loss": 0.6188, + "step": 1773 + }, + { + "epoch": 0.73, + "learning_rate": 3.70610992065511e-06, + "loss": 0.573, + "step": 1774 + }, + { + "epoch": 0.73, + "learning_rate": 3.695823279229521e-06, + "loss": 0.6968, + "step": 1775 + }, + { + "epoch": 0.73, + "learning_rate": 3.68554769636681e-06, + "loss": 0.6047, + "step": 1776 + }, + { + "epoch": 0.73, + "learning_rate": 3.6752831900920306e-06, + "loss": 0.5545, + "step": 1777 + }, + { + "epoch": 0.73, + "learning_rate": 3.665029778410819e-06, + "loss": 0.6083, + "step": 1778 + }, + { + "epoch": 0.73, + "learning_rate": 3.6547874793093497e-06, + "loss": 0.6485, + "step": 1779 + }, + { + "epoch": 0.73, + "learning_rate": 3.6445563107542925e-06, + "loss": 0.6835, + "step": 1780 + }, + { + "epoch": 0.73, + "learning_rate": 3.634336290692808e-06, + "loss": 0.6244, + "step": 1781 + }, + { + "epoch": 0.73, + "learning_rate": 3.624127437052484e-06, + "loss": 0.6111, + "step": 1782 + }, + { + "epoch": 0.73, + "learning_rate": 3.6139297677413367e-06, + "loss": 0.5409, + "step": 1783 + }, + { + "epoch": 0.73, + "learning_rate": 3.6037433006477475e-06, + "loss": 0.5767, + "step": 1784 + }, + { + "epoch": 0.73, + "learning_rate": 3.5935680536404626e-06, + "loss": 0.6752, + "step": 1785 + }, + { + "epoch": 0.73, + "learning_rate": 3.5834040445685325e-06, + "loss": 0.5394, + "step": 1786 + }, + { + "epoch": 0.73, + "learning_rate": 3.5732512912613073e-06, + "loss": 0.571, + "step": 1787 + }, + { + "epoch": 0.73, + "learning_rate": 3.5631098115283833e-06, + "loss": 0.6408, + "step": 1788 + }, + { + "epoch": 0.73, + "learning_rate": 3.5529796231595793e-06, + "loss": 0.6153, + "step": 1789 + }, + { + "epoch": 0.73, + "learning_rate": 3.5428607439249197e-06, + "loss": 0.6044, + "step": 1790 + }, + { + "epoch": 0.73, + "learning_rate": 3.532753191574576e-06, + "loss": 0.6072, + "step": 1791 + }, + { + "epoch": 0.73, + "learning_rate": 3.5226569838388647e-06, + "loss": 0.6745, + "step": 1792 + }, + { + "epoch": 0.73, + "learning_rate": 3.5125721384281874e-06, + "loss": 0.5946, + "step": 1793 + }, + { + "epoch": 0.73, + "learning_rate": 3.502498673033026e-06, + "loss": 0.5848, + "step": 1794 + }, + { + "epoch": 0.73, + "learning_rate": 3.4924366053238977e-06, + "loss": 0.6502, + "step": 1795 + }, + { + "epoch": 0.73, + "learning_rate": 3.482385952951318e-06, + "loss": 0.6443, + "step": 1796 + }, + { + "epoch": 0.73, + "learning_rate": 3.472346733545792e-06, + "loss": 0.6579, + "step": 1797 + }, + { + "epoch": 0.74, + "learning_rate": 3.4623189647177533e-06, + "loss": 0.5671, + "step": 1798 + }, + { + "epoch": 0.74, + "learning_rate": 3.4523026640575664e-06, + "loss": 0.6066, + "step": 1799 + }, + { + "epoch": 0.74, + "learning_rate": 3.442297849135462e-06, + "loss": 0.6075, + "step": 1800 + }, + { + "epoch": 0.74, + "learning_rate": 3.4323045375015384e-06, + "loss": 0.6898, + "step": 1801 + }, + { + "epoch": 0.74, + "learning_rate": 3.4223227466857045e-06, + "loss": 0.597, + "step": 1802 + }, + { + "epoch": 0.74, + "learning_rate": 3.4123524941976593e-06, + "loss": 0.6686, + "step": 1803 + }, + { + "epoch": 0.74, + "learning_rate": 3.4023937975268728e-06, + "loss": 0.6153, + "step": 1804 + }, + { + "epoch": 0.74, + "learning_rate": 3.39244667414253e-06, + "loss": 0.6611, + "step": 1805 + }, + { + "epoch": 0.74, + "learning_rate": 3.3825111414935287e-06, + "loss": 0.5729, + "step": 1806 + }, + { + "epoch": 0.74, + "learning_rate": 3.3725872170084193e-06, + "loss": 0.6562, + "step": 1807 + }, + { + "epoch": 0.74, + "learning_rate": 3.3626749180954033e-06, + "loss": 0.6539, + "step": 1808 + }, + { + "epoch": 0.74, + "learning_rate": 3.352774262142284e-06, + "loss": 0.6655, + "step": 1809 + }, + { + "epoch": 0.74, + "learning_rate": 3.342885266516436e-06, + "loss": 0.6419, + "step": 1810 + }, + { + "epoch": 0.74, + "learning_rate": 3.3330079485647894e-06, + "loss": 0.6072, + "step": 1811 + }, + { + "epoch": 0.74, + "learning_rate": 3.3231423256137784e-06, + "loss": 0.5955, + "step": 1812 + }, + { + "epoch": 0.74, + "learning_rate": 3.3132884149693346e-06, + "loss": 0.6535, + "step": 1813 + }, + { + "epoch": 0.74, + "learning_rate": 3.3034462339168317e-06, + "loss": 0.6097, + "step": 1814 + }, + { + "epoch": 0.74, + "learning_rate": 3.2936157997210816e-06, + "loss": 0.6541, + "step": 1815 + }, + { + "epoch": 0.74, + "learning_rate": 3.283797129626274e-06, + "loss": 0.5382, + "step": 1816 + }, + { + "epoch": 0.74, + "learning_rate": 3.27399024085598e-06, + "loss": 0.6066, + "step": 1817 + }, + { + "epoch": 0.74, + "learning_rate": 3.264195150613091e-06, + "loss": 0.5851, + "step": 1818 + }, + { + "epoch": 0.74, + "learning_rate": 3.254411876079803e-06, + "loss": 0.6842, + "step": 1819 + }, + { + "epoch": 0.74, + "learning_rate": 3.244640434417595e-06, + "loss": 0.5912, + "step": 1820 + }, + { + "epoch": 0.74, + "learning_rate": 3.2348808427671784e-06, + "loss": 0.6499, + "step": 1821 + }, + { + "epoch": 0.74, + "learning_rate": 3.2251331182484868e-06, + "loss": 0.6182, + "step": 1822 + }, + { + "epoch": 0.75, + "learning_rate": 3.215397277960626e-06, + "loss": 0.6126, + "step": 1823 + }, + { + "epoch": 0.75, + "learning_rate": 3.205673338981865e-06, + "loss": 0.6458, + "step": 1824 + }, + { + "epoch": 0.75, + "learning_rate": 3.195961318369595e-06, + "loss": 0.6305, + "step": 1825 + }, + { + "epoch": 0.75, + "learning_rate": 3.1862612331602906e-06, + "loss": 0.5899, + "step": 1826 + }, + { + "epoch": 0.75, + "learning_rate": 3.176573100369504e-06, + "loss": 0.6442, + "step": 1827 + }, + { + "epoch": 0.75, + "learning_rate": 3.166896936991808e-06, + "loss": 0.7171, + "step": 1828 + }, + { + "epoch": 0.75, + "learning_rate": 3.157232760000789e-06, + "loss": 0.6163, + "step": 1829 + }, + { + "epoch": 0.75, + "learning_rate": 3.147580586348998e-06, + "loss": 0.585, + "step": 1830 + }, + { + "epoch": 0.75, + "learning_rate": 3.137940432967942e-06, + "loss": 0.5771, + "step": 1831 + }, + { + "epoch": 0.75, + "learning_rate": 3.1283123167680306e-06, + "loss": 0.6859, + "step": 1832 + }, + { + "epoch": 0.75, + "learning_rate": 3.1186962546385613e-06, + "loss": 0.5714, + "step": 1833 + }, + { + "epoch": 0.75, + "learning_rate": 3.1090922634476963e-06, + "loss": 0.6438, + "step": 1834 + }, + { + "epoch": 0.75, + "learning_rate": 3.099500360042407e-06, + "loss": 0.6202, + "step": 1835 + }, + { + "epoch": 0.75, + "learning_rate": 3.089920561248476e-06, + "loss": 0.6056, + "step": 1836 + }, + { + "epoch": 0.75, + "learning_rate": 3.080352883870442e-06, + "loss": 0.6483, + "step": 1837 + }, + { + "epoch": 0.75, + "learning_rate": 3.0707973446915863e-06, + "loss": 0.5747, + "step": 1838 + }, + { + "epoch": 0.75, + "learning_rate": 3.0612539604739e-06, + "loss": 0.6485, + "step": 1839 + }, + { + "epoch": 0.75, + "learning_rate": 3.0517227479580425e-06, + "loss": 0.6238, + "step": 1840 + }, + { + "epoch": 0.75, + "learning_rate": 3.042203723863334e-06, + "loss": 0.6091, + "step": 1841 + }, + { + "epoch": 0.75, + "learning_rate": 3.0326969048877032e-06, + "loss": 0.5766, + "step": 1842 + }, + { + "epoch": 0.75, + "learning_rate": 3.023202307707679e-06, + "loss": 0.5843, + "step": 1843 + }, + { + "epoch": 0.75, + "learning_rate": 3.013719948978342e-06, + "loss": 0.6129, + "step": 1844 + }, + { + "epoch": 0.75, + "learning_rate": 3.0042498453333137e-06, + "loss": 0.6563, + "step": 1845 + }, + { + "epoch": 0.75, + "learning_rate": 2.9947920133847108e-06, + "loss": 0.6044, + "step": 1846 + }, + { + "epoch": 0.76, + "learning_rate": 2.985346469723124e-06, + "loss": 0.6976, + "step": 1847 + }, + { + "epoch": 0.76, + "learning_rate": 2.975913230917595e-06, + "loss": 0.6492, + "step": 1848 + }, + { + "epoch": 0.76, + "learning_rate": 2.9664923135155723e-06, + "loss": 0.6004, + "step": 1849 + }, + { + "epoch": 0.76, + "learning_rate": 2.9570837340428994e-06, + "loss": 0.6734, + "step": 1850 + }, + { + "epoch": 0.76, + "learning_rate": 2.947687509003766e-06, + "loss": 0.6337, + "step": 1851 + }, + { + "epoch": 0.76, + "learning_rate": 2.938303654880702e-06, + "loss": 0.5893, + "step": 1852 + }, + { + "epoch": 0.76, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.6212, + "step": 1853 + }, + { + "epoch": 0.76, + "learning_rate": 2.9195731252043333e-06, + "loss": 0.6412, + "step": 1854 + }, + { + "epoch": 0.76, + "learning_rate": 2.9102264825074657e-06, + "loss": 0.6447, + "step": 1855 + }, + { + "epoch": 0.76, + "learning_rate": 2.900892276439463e-06, + "loss": 0.6326, + "step": 1856 + }, + { + "epoch": 0.76, + "learning_rate": 2.8915705233740653e-06, + "loss": 0.639, + "step": 1857 + }, + { + "epoch": 0.76, + "learning_rate": 2.8822612396631557e-06, + "loss": 0.6734, + "step": 1858 + }, + { + "epoch": 0.76, + "learning_rate": 2.872964441636752e-06, + "loss": 0.6496, + "step": 1859 + }, + { + "epoch": 0.76, + "learning_rate": 2.863680145602963e-06, + "loss": 0.6014, + "step": 1860 + }, + { + "epoch": 0.76, + "learning_rate": 2.854408367847977e-06, + "loss": 0.5486, + "step": 1861 + }, + { + "epoch": 0.76, + "learning_rate": 2.845149124636014e-06, + "loss": 0.6532, + "step": 1862 + }, + { + "epoch": 0.76, + "learning_rate": 2.8359024322093067e-06, + "loss": 0.6207, + "step": 1863 + }, + { + "epoch": 0.76, + "learning_rate": 2.8266683067880807e-06, + "loss": 0.5728, + "step": 1864 + }, + { + "epoch": 0.76, + "learning_rate": 2.817446764570504e-06, + "loss": 0.5612, + "step": 1865 + }, + { + "epoch": 0.76, + "learning_rate": 2.8082378217326843e-06, + "loss": 0.6476, + "step": 1866 + }, + { + "epoch": 0.76, + "learning_rate": 2.799041494428617e-06, + "loss": 0.5984, + "step": 1867 + }, + { + "epoch": 0.76, + "learning_rate": 2.7898577987901786e-06, + "loss": 0.7022, + "step": 1868 + }, + { + "epoch": 0.76, + "learning_rate": 2.7806867509270754e-06, + "loss": 0.5719, + "step": 1869 + }, + { + "epoch": 0.76, + "learning_rate": 2.771528366926837e-06, + "loss": 0.6535, + "step": 1870 + }, + { + "epoch": 0.76, + "learning_rate": 2.762382662854778e-06, + "loss": 0.6677, + "step": 1871 + }, + { + "epoch": 0.77, + "learning_rate": 2.7532496547539623e-06, + "loss": 0.6694, + "step": 1872 + }, + { + "epoch": 0.77, + "learning_rate": 2.7441293586451936e-06, + "loss": 0.6706, + "step": 1873 + }, + { + "epoch": 0.77, + "learning_rate": 2.7350217905269647e-06, + "loss": 0.5942, + "step": 1874 + }, + { + "epoch": 0.77, + "learning_rate": 2.725926966375456e-06, + "loss": 0.6493, + "step": 1875 + }, + { + "epoch": 0.77, + "learning_rate": 2.716844902144481e-06, + "loss": 0.6309, + "step": 1876 + }, + { + "epoch": 0.77, + "learning_rate": 2.707775613765471e-06, + "loss": 0.6233, + "step": 1877 + }, + { + "epoch": 0.77, + "learning_rate": 2.6987191171474548e-06, + "loss": 0.6256, + "step": 1878 + }, + { + "epoch": 0.77, + "learning_rate": 2.689675428177013e-06, + "loss": 0.6972, + "step": 1879 + }, + { + "epoch": 0.77, + "learning_rate": 2.6806445627182686e-06, + "loss": 0.6433, + "step": 1880 + }, + { + "epoch": 0.77, + "learning_rate": 2.671626536612838e-06, + "loss": 0.5779, + "step": 1881 + }, + { + "epoch": 0.77, + "learning_rate": 2.6626213656798295e-06, + "loss": 0.6134, + "step": 1882 + }, + { + "epoch": 0.77, + "learning_rate": 2.6536290657157883e-06, + "loss": 0.6828, + "step": 1883 + }, + { + "epoch": 0.77, + "learning_rate": 2.6446496524946894e-06, + "loss": 0.674, + "step": 1884 + }, + { + "epoch": 0.77, + "learning_rate": 2.635683141767904e-06, + "loss": 0.7182, + "step": 1885 + }, + { + "epoch": 0.77, + "learning_rate": 2.626729549264161e-06, + "loss": 0.637, + "step": 1886 + }, + { + "epoch": 0.77, + "learning_rate": 2.6177888906895398e-06, + "loss": 0.6608, + "step": 1887 + }, + { + "epoch": 0.77, + "learning_rate": 2.608861181727421e-06, + "loss": 0.5861, + "step": 1888 + }, + { + "epoch": 0.77, + "learning_rate": 2.599946438038481e-06, + "loss": 0.6304, + "step": 1889 + }, + { + "epoch": 0.77, + "learning_rate": 2.591044675260641e-06, + "loss": 0.5967, + "step": 1890 + }, + { + "epoch": 0.77, + "learning_rate": 2.5821559090090565e-06, + "loss": 0.6824, + "step": 1891 + }, + { + "epoch": 0.77, + "learning_rate": 2.5732801548760898e-06, + "loss": 0.6641, + "step": 1892 + }, + { + "epoch": 0.77, + "learning_rate": 2.5644174284312686e-06, + "loss": 0.5963, + "step": 1893 + }, + { + "epoch": 0.77, + "learning_rate": 2.5555677452212792e-06, + "loss": 0.6095, + "step": 1894 + }, + { + "epoch": 0.77, + "learning_rate": 2.5467311207699143e-06, + "loss": 0.5949, + "step": 1895 + }, + { + "epoch": 0.78, + "learning_rate": 2.5379075705780733e-06, + "loss": 0.6551, + "step": 1896 + }, + { + "epoch": 0.78, + "learning_rate": 2.5290971101237083e-06, + "loss": 0.6472, + "step": 1897 + }, + { + "epoch": 0.78, + "learning_rate": 2.5202997548618226e-06, + "loss": 0.7092, + "step": 1898 + }, + { + "epoch": 0.78, + "learning_rate": 2.511515520224418e-06, + "loss": 0.6603, + "step": 1899 + }, + { + "epoch": 0.78, + "learning_rate": 2.5027444216204888e-06, + "loss": 0.6436, + "step": 1900 + }, + { + "epoch": 0.78, + "learning_rate": 2.49398647443599e-06, + "loss": 0.6595, + "step": 1901 + }, + { + "epoch": 0.78, + "learning_rate": 2.485241694033793e-06, + "loss": 0.6608, + "step": 1902 + }, + { + "epoch": 0.78, + "learning_rate": 2.476510095753688e-06, + "loss": 0.6017, + "step": 1903 + }, + { + "epoch": 0.78, + "learning_rate": 2.467791694912329e-06, + "loss": 0.6715, + "step": 1904 + }, + { + "epoch": 0.78, + "learning_rate": 2.459086506803231e-06, + "loss": 0.614, + "step": 1905 + }, + { + "epoch": 0.78, + "learning_rate": 2.450394546696723e-06, + "loss": 0.6029, + "step": 1906 + }, + { + "epoch": 0.78, + "learning_rate": 2.441715829839928e-06, + "loss": 0.5832, + "step": 1907 + }, + { + "epoch": 0.78, + "learning_rate": 2.43305037145675e-06, + "loss": 0.6692, + "step": 1908 + }, + { + "epoch": 0.78, + "learning_rate": 2.424398186747823e-06, + "loss": 0.6473, + "step": 1909 + }, + { + "epoch": 0.78, + "learning_rate": 2.415759290890506e-06, + "loss": 0.6604, + "step": 1910 + }, + { + "epoch": 0.78, + "learning_rate": 2.4071336990388396e-06, + "loss": 0.6651, + "step": 1911 + }, + { + "epoch": 0.78, + "learning_rate": 2.3985214263235344e-06, + "loss": 0.6874, + "step": 1912 + }, + { + "epoch": 0.78, + "learning_rate": 2.38992248785193e-06, + "loss": 0.567, + "step": 1913 + }, + { + "epoch": 0.78, + "learning_rate": 2.38133689870798e-06, + "loss": 0.6496, + "step": 1914 + }, + { + "epoch": 0.78, + "learning_rate": 2.3727646739522226e-06, + "loss": 0.6692, + "step": 1915 + }, + { + "epoch": 0.78, + "learning_rate": 2.364205828621745e-06, + "loss": 0.5966, + "step": 1916 + }, + { + "epoch": 0.78, + "learning_rate": 2.3556603777301745e-06, + "loss": 0.625, + "step": 1917 + }, + { + "epoch": 0.78, + "learning_rate": 2.3471283362676334e-06, + "loss": 0.6688, + "step": 1918 + }, + { + "epoch": 0.78, + "learning_rate": 2.3386097192007296e-06, + "loss": 0.5881, + "step": 1919 + }, + { + "epoch": 0.78, + "learning_rate": 2.3301045414725167e-06, + "loss": 0.6437, + "step": 1920 + }, + { + "epoch": 0.79, + "learning_rate": 2.321612818002472e-06, + "loss": 0.6213, + "step": 1921 + }, + { + "epoch": 0.79, + "learning_rate": 2.313134563686482e-06, + "loss": 0.6376, + "step": 1922 + }, + { + "epoch": 0.79, + "learning_rate": 2.304669793396793e-06, + "loss": 0.6311, + "step": 1923 + }, + { + "epoch": 0.79, + "learning_rate": 2.29621852198201e-06, + "loss": 0.7239, + "step": 1924 + }, + { + "epoch": 0.79, + "learning_rate": 2.287780764267047e-06, + "loss": 0.577, + "step": 1925 + }, + { + "epoch": 0.79, + "learning_rate": 2.2793565350531243e-06, + "loss": 0.6204, + "step": 1926 + }, + { + "epoch": 0.79, + "learning_rate": 2.270945849117722e-06, + "loss": 0.5755, + "step": 1927 + }, + { + "epoch": 0.79, + "learning_rate": 2.262548721214569e-06, + "loss": 0.6284, + "step": 1928 + }, + { + "epoch": 0.79, + "learning_rate": 2.254165166073605e-06, + "loss": 0.6777, + "step": 1929 + }, + { + "epoch": 0.79, + "learning_rate": 2.2457951984009684e-06, + "loss": 0.5707, + "step": 1930 + }, + { + "epoch": 0.79, + "learning_rate": 2.237438832878961e-06, + "loss": 0.5719, + "step": 1931 + }, + { + "epoch": 0.79, + "learning_rate": 2.2290960841660157e-06, + "loss": 0.675, + "step": 1932 + }, + { + "epoch": 0.79, + "learning_rate": 2.2207669668966934e-06, + "loss": 0.6788, + "step": 1933 + }, + { + "epoch": 0.79, + "learning_rate": 2.212451495681629e-06, + "loss": 0.6523, + "step": 1934 + }, + { + "epoch": 0.79, + "learning_rate": 2.2041496851075316e-06, + "loss": 0.545, + "step": 1935 + }, + { + "epoch": 0.79, + "learning_rate": 2.1958615497371416e-06, + "loss": 0.6307, + "step": 1936 + }, + { + "epoch": 0.79, + "learning_rate": 2.187587104109208e-06, + "loss": 0.608, + "step": 1937 + }, + { + "epoch": 0.79, + "learning_rate": 2.1793263627384753e-06, + "loss": 0.6708, + "step": 1938 + }, + { + "epoch": 0.79, + "learning_rate": 2.171079340115636e-06, + "loss": 0.5998, + "step": 1939 + }, + { + "epoch": 0.79, + "learning_rate": 2.16284605070733e-06, + "loss": 0.673, + "step": 1940 + }, + { + "epoch": 0.79, + "learning_rate": 2.154626508956097e-06, + "loss": 0.57, + "step": 1941 + }, + { + "epoch": 0.79, + "learning_rate": 2.1464207292803696e-06, + "loss": 0.6615, + "step": 1942 + }, + { + "epoch": 0.79, + "learning_rate": 2.1382287260744283e-06, + "loss": 0.6227, + "step": 1943 + }, + { + "epoch": 0.79, + "learning_rate": 2.130050513708399e-06, + "loss": 0.6064, + "step": 1944 + }, + { + "epoch": 0.8, + "learning_rate": 2.1218861065282137e-06, + "loss": 0.672, + "step": 1945 + }, + { + "epoch": 0.8, + "learning_rate": 2.1137355188555796e-06, + "loss": 0.6932, + "step": 1946 + }, + { + "epoch": 0.8, + "learning_rate": 2.105598764987973e-06, + "loss": 0.6351, + "step": 1947 + }, + { + "epoch": 0.8, + "learning_rate": 2.0974758591985945e-06, + "loss": 0.6846, + "step": 1948 + }, + { + "epoch": 0.8, + "learning_rate": 2.08936681573636e-06, + "loss": 0.7049, + "step": 1949 + }, + { + "epoch": 0.8, + "learning_rate": 2.0812716488258655e-06, + "loss": 0.6386, + "step": 1950 + }, + { + "epoch": 0.8, + "learning_rate": 2.0731903726673596e-06, + "loss": 0.6514, + "step": 1951 + }, + { + "epoch": 0.8, + "learning_rate": 2.0651230014367385e-06, + "loss": 0.7024, + "step": 1952 + }, + { + "epoch": 0.8, + "learning_rate": 2.057069549285491e-06, + "loss": 0.6957, + "step": 1953 + }, + { + "epoch": 0.8, + "learning_rate": 2.0490300303407017e-06, + "loss": 0.5995, + "step": 1954 + }, + { + "epoch": 0.8, + "learning_rate": 2.041004458705006e-06, + "loss": 0.6713, + "step": 1955 + }, + { + "epoch": 0.8, + "learning_rate": 2.0329928484565784e-06, + "loss": 0.5666, + "step": 1956 + }, + { + "epoch": 0.8, + "learning_rate": 2.024995213649099e-06, + "loss": 0.5551, + "step": 1957 + }, + { + "epoch": 0.8, + "learning_rate": 2.01701156831174e-06, + "loss": 0.5885, + "step": 1958 + }, + { + "epoch": 0.8, + "learning_rate": 2.00904192644912e-06, + "loss": 0.6711, + "step": 1959 + }, + { + "epoch": 0.8, + "learning_rate": 2.0010863020413075e-06, + "loss": 0.5906, + "step": 1960 + }, + { + "epoch": 0.8, + "learning_rate": 1.993144709043777e-06, + "loss": 0.638, + "step": 1961 + }, + { + "epoch": 0.8, + "learning_rate": 1.9852171613873837e-06, + "loss": 0.5898, + "step": 1962 + }, + { + "epoch": 0.8, + "learning_rate": 1.977303672978357e-06, + "loss": 0.6459, + "step": 1963 + }, + { + "epoch": 0.8, + "learning_rate": 1.969404257698253e-06, + "loss": 0.6811, + "step": 1964 + }, + { + "epoch": 0.8, + "learning_rate": 1.961518929403944e-06, + "loss": 0.6946, + "step": 1965 + }, + { + "epoch": 0.8, + "learning_rate": 1.9536477019275955e-06, + "loss": 0.651, + "step": 1966 + }, + { + "epoch": 0.8, + "learning_rate": 1.9457905890766325e-06, + "loss": 0.5738, + "step": 1967 + }, + { + "epoch": 0.8, + "learning_rate": 1.9379476046337285e-06, + "loss": 0.5757, + "step": 1968 + }, + { + "epoch": 0.8, + "learning_rate": 1.9301187623567606e-06, + "loss": 0.6743, + "step": 1969 + }, + { + "epoch": 0.81, + "learning_rate": 1.9223040759788138e-06, + "loss": 0.6349, + "step": 1970 + }, + { + "epoch": 0.81, + "learning_rate": 1.9145035592081274e-06, + "loss": 0.6481, + "step": 1971 + }, + { + "epoch": 0.81, + "learning_rate": 1.906717225728094e-06, + "loss": 0.5878, + "step": 1972 + }, + { + "epoch": 0.81, + "learning_rate": 1.8989450891972205e-06, + "loss": 0.634, + "step": 1973 + }, + { + "epoch": 0.81, + "learning_rate": 1.8911871632491153e-06, + "loss": 0.6869, + "step": 1974 + }, + { + "epoch": 0.81, + "learning_rate": 1.8834434614924567e-06, + "loss": 0.6659, + "step": 1975 + }, + { + "epoch": 0.81, + "learning_rate": 1.8757139975109683e-06, + "loss": 0.5889, + "step": 1976 + }, + { + "epoch": 0.81, + "learning_rate": 1.8679987848634063e-06, + "loss": 0.5565, + "step": 1977 + }, + { + "epoch": 0.81, + "learning_rate": 1.8602978370835156e-06, + "loss": 0.628, + "step": 1978 + }, + { + "epoch": 0.81, + "learning_rate": 1.852611167680033e-06, + "loss": 0.5637, + "step": 1979 + }, + { + "epoch": 0.81, + "learning_rate": 1.8449387901366366e-06, + "loss": 0.6013, + "step": 1980 + }, + { + "epoch": 0.81, + "learning_rate": 1.8372807179119366e-06, + "loss": 0.6237, + "step": 1981 + }, + { + "epoch": 0.81, + "learning_rate": 1.8296369644394562e-06, + "loss": 0.5399, + "step": 1982 + }, + { + "epoch": 0.81, + "learning_rate": 1.8220075431275918e-06, + "loss": 0.5737, + "step": 1983 + }, + { + "epoch": 0.81, + "learning_rate": 1.814392467359607e-06, + "loss": 0.6142, + "step": 1984 + }, + { + "epoch": 0.81, + "learning_rate": 1.806791750493594e-06, + "loss": 0.6594, + "step": 1985 + }, + { + "epoch": 0.81, + "learning_rate": 1.799205405862463e-06, + "loss": 0.6424, + "step": 1986 + }, + { + "epoch": 0.81, + "learning_rate": 1.7916334467739083e-06, + "loss": 0.6634, + "step": 1987 + }, + { + "epoch": 0.81, + "learning_rate": 1.7840758865103934e-06, + "loss": 0.6226, + "step": 1988 + }, + { + "epoch": 0.81, + "learning_rate": 1.7765327383291187e-06, + "loss": 0.6087, + "step": 1989 + }, + { + "epoch": 0.81, + "learning_rate": 1.7690040154620092e-06, + "loss": 0.5728, + "step": 1990 + }, + { + "epoch": 0.81, + "learning_rate": 1.7614897311156864e-06, + "loss": 0.6584, + "step": 1991 + }, + { + "epoch": 0.81, + "learning_rate": 1.7539898984714342e-06, + "loss": 0.5712, + "step": 1992 + }, + { + "epoch": 0.81, + "learning_rate": 1.746504530685199e-06, + "loss": 0.67, + "step": 1993 + }, + { + "epoch": 0.82, + "learning_rate": 1.739033640887544e-06, + "loss": 0.6612, + "step": 1994 + }, + { + "epoch": 0.82, + "learning_rate": 1.7315772421836364e-06, + "loss": 0.5817, + "step": 1995 + }, + { + "epoch": 0.82, + "learning_rate": 1.7241353476532307e-06, + "loss": 0.641, + "step": 1996 + }, + { + "epoch": 0.82, + "learning_rate": 1.7167079703506296e-06, + "loss": 0.6266, + "step": 1997 + }, + { + "epoch": 0.82, + "learning_rate": 1.7092951233046795e-06, + "loss": 0.6762, + "step": 1998 + }, + { + "epoch": 0.82, + "learning_rate": 1.701896819518727e-06, + "loss": 0.5922, + "step": 1999 + }, + { + "epoch": 0.82, + "learning_rate": 1.6945130719706205e-06, + "loss": 0.6613, + "step": 2000 + }, + { + "epoch": 0.82, + "learning_rate": 1.6871438936126604e-06, + "loss": 0.6853, + "step": 2001 + }, + { + "epoch": 0.82, + "learning_rate": 1.6797892973716057e-06, + "loss": 0.6097, + "step": 2002 + }, + { + "epoch": 0.82, + "learning_rate": 1.6724492961486206e-06, + "loss": 0.6052, + "step": 2003 + }, + { + "epoch": 0.82, + "learning_rate": 1.665123902819279e-06, + "loss": 0.6344, + "step": 2004 + }, + { + "epoch": 0.82, + "learning_rate": 1.6578131302335255e-06, + "loss": 0.6501, + "step": 2005 + }, + { + "epoch": 0.82, + "learning_rate": 1.6505169912156548e-06, + "loss": 0.635, + "step": 2006 + }, + { + "epoch": 0.82, + "learning_rate": 1.6432354985642984e-06, + "loss": 0.6982, + "step": 2007 + }, + { + "epoch": 0.82, + "learning_rate": 1.6359686650523888e-06, + "loss": 0.6389, + "step": 2008 + }, + { + "epoch": 0.82, + "learning_rate": 1.6287165034271503e-06, + "loss": 0.6041, + "step": 2009 + }, + { + "epoch": 0.82, + "learning_rate": 1.6214790264100666e-06, + "loss": 0.6301, + "step": 2010 + }, + { + "epoch": 0.82, + "learning_rate": 1.614256246696858e-06, + "loss": 0.6172, + "step": 2011 + }, + { + "epoch": 0.82, + "learning_rate": 1.6070481769574753e-06, + "loss": 0.5996, + "step": 2012 + }, + { + "epoch": 0.82, + "learning_rate": 1.5998548298360527e-06, + "loss": 0.6409, + "step": 2013 + }, + { + "epoch": 0.82, + "learning_rate": 1.5926762179509093e-06, + "loss": 0.6465, + "step": 2014 + }, + { + "epoch": 0.82, + "learning_rate": 1.585512353894505e-06, + "loss": 0.546, + "step": 2015 + }, + { + "epoch": 0.82, + "learning_rate": 1.5783632502334411e-06, + "loss": 0.577, + "step": 2016 + }, + { + "epoch": 0.82, + "learning_rate": 1.5712289195084185e-06, + "loss": 0.6104, + "step": 2017 + }, + { + "epoch": 0.83, + "learning_rate": 1.5641093742342284e-06, + "loss": 0.5518, + "step": 2018 + }, + { + "epoch": 0.83, + "learning_rate": 1.5570046268997209e-06, + "loss": 0.6396, + "step": 2019 + }, + { + "epoch": 0.83, + "learning_rate": 1.5499146899677942e-06, + "loss": 0.5909, + "step": 2020 + }, + { + "epoch": 0.83, + "learning_rate": 1.5428395758753655e-06, + "loss": 0.7107, + "step": 2021 + }, + { + "epoch": 0.83, + "learning_rate": 1.535779297033344e-06, + "loss": 0.5169, + "step": 2022 + }, + { + "epoch": 0.83, + "learning_rate": 1.528733865826625e-06, + "loss": 0.5648, + "step": 2023 + }, + { + "epoch": 0.83, + "learning_rate": 1.521703294614052e-06, + "loss": 0.6138, + "step": 2024 + }, + { + "epoch": 0.83, + "learning_rate": 1.5146875957284012e-06, + "loss": 0.5918, + "step": 2025 + }, + { + "epoch": 0.83, + "learning_rate": 1.5076867814763629e-06, + "loss": 0.5838, + "step": 2026 + }, + { + "epoch": 0.83, + "learning_rate": 1.5007008641385168e-06, + "loss": 0.6687, + "step": 2027 + }, + { + "epoch": 0.83, + "learning_rate": 1.4937298559693136e-06, + "loss": 0.6345, + "step": 2028 + }, + { + "epoch": 0.83, + "learning_rate": 1.4867737691970441e-06, + "loss": 0.5792, + "step": 2029 + }, + { + "epoch": 0.83, + "learning_rate": 1.4798326160238342e-06, + "loss": 0.6309, + "step": 2030 + }, + { + "epoch": 0.83, + "learning_rate": 1.4729064086256017e-06, + "loss": 0.5896, + "step": 2031 + }, + { + "epoch": 0.83, + "learning_rate": 1.4659951591520593e-06, + "loss": 0.6591, + "step": 2032 + }, + { + "epoch": 0.83, + "learning_rate": 1.4590988797266704e-06, + "loss": 0.5904, + "step": 2033 + }, + { + "epoch": 0.83, + "learning_rate": 1.4522175824466456e-06, + "loss": 0.6664, + "step": 2034 + }, + { + "epoch": 0.83, + "learning_rate": 1.445351279382915e-06, + "loss": 0.614, + "step": 2035 + }, + { + "epoch": 0.83, + "learning_rate": 1.4384999825800984e-06, + "loss": 0.6194, + "step": 2036 + }, + { + "epoch": 0.83, + "learning_rate": 1.4316637040565029e-06, + "loss": 0.6533, + "step": 2037 + }, + { + "epoch": 0.83, + "learning_rate": 1.4248424558040819e-06, + "loss": 0.7002, + "step": 2038 + }, + { + "epoch": 0.83, + "learning_rate": 1.4180362497884247e-06, + "loss": 0.5797, + "step": 2039 + }, + { + "epoch": 0.83, + "learning_rate": 1.4112450979487412e-06, + "loss": 0.653, + "step": 2040 + }, + { + "epoch": 0.83, + "learning_rate": 1.4044690121978244e-06, + "loss": 0.6399, + "step": 2041 + }, + { + "epoch": 0.83, + "learning_rate": 1.397708004422047e-06, + "loss": 0.6636, + "step": 2042 + }, + { + "epoch": 0.84, + "learning_rate": 1.3909620864813246e-06, + "loss": 0.7017, + "step": 2043 + }, + { + "epoch": 0.84, + "learning_rate": 1.384231270209111e-06, + "loss": 0.5476, + "step": 2044 + }, + { + "epoch": 0.84, + "learning_rate": 1.3775155674123598e-06, + "loss": 0.617, + "step": 2045 + }, + { + "epoch": 0.84, + "learning_rate": 1.370814989871525e-06, + "loss": 0.6658, + "step": 2046 + }, + { + "epoch": 0.84, + "learning_rate": 1.364129549340516e-06, + "loss": 0.6601, + "step": 2047 + }, + { + "epoch": 0.84, + "learning_rate": 1.3574592575466995e-06, + "loss": 0.586, + "step": 2048 + }, + { + "epoch": 0.84, + "learning_rate": 1.350804126190859e-06, + "loss": 0.5854, + "step": 2049 + }, + { + "epoch": 0.84, + "learning_rate": 1.344164166947194e-06, + "loss": 0.6498, + "step": 2050 + }, + { + "epoch": 0.84, + "learning_rate": 1.337539391463285e-06, + "loss": 0.6421, + "step": 2051 + }, + { + "epoch": 0.84, + "learning_rate": 1.3309298113600755e-06, + "loss": 0.6372, + "step": 2052 + }, + { + "epoch": 0.84, + "learning_rate": 1.3243354382318585e-06, + "loss": 0.6794, + "step": 2053 + }, + { + "epoch": 0.84, + "learning_rate": 1.3177562836462487e-06, + "loss": 0.5993, + "step": 2054 + }, + { + "epoch": 0.84, + "learning_rate": 1.3111923591441643e-06, + "loss": 0.5806, + "step": 2055 + }, + { + "epoch": 0.84, + "learning_rate": 1.3046436762398073e-06, + "loss": 0.6225, + "step": 2056 + }, + { + "epoch": 0.84, + "learning_rate": 1.2981102464206463e-06, + "loss": 0.6666, + "step": 2057 + }, + { + "epoch": 0.84, + "learning_rate": 1.2915920811473937e-06, + "loss": 0.684, + "step": 2058 + }, + { + "epoch": 0.84, + "learning_rate": 1.2850891918539787e-06, + "loss": 0.6658, + "step": 2059 + }, + { + "epoch": 0.84, + "learning_rate": 1.2786015899475445e-06, + "loss": 0.6397, + "step": 2060 + }, + { + "epoch": 0.84, + "learning_rate": 1.2721292868084068e-06, + "loss": 0.6532, + "step": 2061 + }, + { + "epoch": 0.84, + "learning_rate": 1.2656722937900534e-06, + "loss": 0.5878, + "step": 2062 + }, + { + "epoch": 0.84, + "learning_rate": 1.2592306222191086e-06, + "loss": 0.6735, + "step": 2063 + }, + { + "epoch": 0.84, + "learning_rate": 1.2528042833953269e-06, + "loss": 0.6792, + "step": 2064 + }, + { + "epoch": 0.84, + "learning_rate": 1.2463932885915643e-06, + "loss": 0.6204, + "step": 2065 + }, + { + "epoch": 0.84, + "learning_rate": 1.2399976490537557e-06, + "loss": 0.611, + "step": 2066 + }, + { + "epoch": 0.85, + "learning_rate": 1.2336173760009096e-06, + "loss": 0.6381, + "step": 2067 + }, + { + "epoch": 0.85, + "learning_rate": 1.227252480625074e-06, + "loss": 0.6973, + "step": 2068 + }, + { + "epoch": 0.85, + "learning_rate": 1.220902974091317e-06, + "loss": 0.5756, + "step": 2069 + }, + { + "epoch": 0.85, + "learning_rate": 1.2145688675377243e-06, + "loss": 0.6774, + "step": 2070 + }, + { + "epoch": 0.85, + "learning_rate": 1.2082501720753538e-06, + "loss": 0.6484, + "step": 2071 + }, + { + "epoch": 0.85, + "learning_rate": 1.2019468987882433e-06, + "loss": 0.6147, + "step": 2072 + }, + { + "epoch": 0.85, + "learning_rate": 1.195659058733366e-06, + "loss": 0.6161, + "step": 2073 + }, + { + "epoch": 0.85, + "learning_rate": 1.1893866629406315e-06, + "loss": 0.6277, + "step": 2074 + }, + { + "epoch": 0.85, + "learning_rate": 1.1831297224128491e-06, + "loss": 0.593, + "step": 2075 + }, + { + "epoch": 0.85, + "learning_rate": 1.176888248125726e-06, + "loss": 0.5932, + "step": 2076 + }, + { + "epoch": 0.85, + "learning_rate": 1.170662251027831e-06, + "loss": 0.5988, + "step": 2077 + }, + { + "epoch": 0.85, + "learning_rate": 1.164451742040591e-06, + "loss": 0.5786, + "step": 2078 + }, + { + "epoch": 0.85, + "learning_rate": 1.1582567320582561e-06, + "loss": 0.6803, + "step": 2079 + }, + { + "epoch": 0.85, + "learning_rate": 1.1520772319478945e-06, + "loss": 0.6807, + "step": 2080 + }, + { + "epoch": 0.85, + "learning_rate": 1.1459132525493677e-06, + "loss": 0.5992, + "step": 2081 + }, + { + "epoch": 0.85, + "learning_rate": 1.1397648046753062e-06, + "loss": 0.6366, + "step": 2082 + }, + { + "epoch": 0.85, + "learning_rate": 1.1336318991111028e-06, + "loss": 0.6556, + "step": 2083 + }, + { + "epoch": 0.85, + "learning_rate": 1.1275145466148807e-06, + "loss": 0.6221, + "step": 2084 + }, + { + "epoch": 0.85, + "learning_rate": 1.1214127579174804e-06, + "loss": 0.5919, + "step": 2085 + }, + { + "epoch": 0.85, + "learning_rate": 1.1153265437224437e-06, + "loss": 0.6734, + "step": 2086 + }, + { + "epoch": 0.85, + "learning_rate": 1.1092559147059912e-06, + "loss": 0.5559, + "step": 2087 + }, + { + "epoch": 0.85, + "learning_rate": 1.1032008815170082e-06, + "loss": 0.6266, + "step": 2088 + }, + { + "epoch": 0.85, + "learning_rate": 1.0971614547770138e-06, + "loss": 0.6294, + "step": 2089 + }, + { + "epoch": 0.85, + "learning_rate": 1.0911376450801603e-06, + "loss": 0.578, + "step": 2090 + }, + { + "epoch": 0.85, + "learning_rate": 1.0851294629931962e-06, + "loss": 0.565, + "step": 2091 + }, + { + "epoch": 0.86, + "learning_rate": 1.0791369190554658e-06, + "loss": 0.6215, + "step": 2092 + }, + { + "epoch": 0.86, + "learning_rate": 1.0731600237788731e-06, + "loss": 0.5673, + "step": 2093 + }, + { + "epoch": 0.86, + "learning_rate": 1.0671987876478763e-06, + "loss": 0.6209, + "step": 2094 + }, + { + "epoch": 0.86, + "learning_rate": 1.061253221119467e-06, + "loss": 0.6993, + "step": 2095 + }, + { + "epoch": 0.86, + "learning_rate": 1.055323334623143e-06, + "loss": 0.6755, + "step": 2096 + }, + { + "epoch": 0.86, + "learning_rate": 1.0494091385609029e-06, + "loss": 0.6167, + "step": 2097 + }, + { + "epoch": 0.86, + "learning_rate": 1.0435106433072195e-06, + "loss": 0.6113, + "step": 2098 + }, + { + "epoch": 0.86, + "learning_rate": 1.0376278592090217e-06, + "loss": 0.624, + "step": 2099 + }, + { + "epoch": 0.86, + "learning_rate": 1.0317607965856802e-06, + "loss": 0.6265, + "step": 2100 + }, + { + "epoch": 0.86, + "learning_rate": 1.0259094657289893e-06, + "loss": 0.6008, + "step": 2101 + }, + { + "epoch": 0.86, + "learning_rate": 1.020073876903147e-06, + "loss": 0.6696, + "step": 2102 + }, + { + "epoch": 0.86, + "learning_rate": 1.0142540403447321e-06, + "loss": 0.6099, + "step": 2103 + }, + { + "epoch": 0.86, + "learning_rate": 1.0084499662627e-06, + "loss": 0.6025, + "step": 2104 + }, + { + "epoch": 0.86, + "learning_rate": 1.0026616648383468e-06, + "loss": 0.6235, + "step": 2105 + }, + { + "epoch": 0.86, + "learning_rate": 9.968891462253084e-07, + "loss": 0.6619, + "step": 2106 + }, + { + "epoch": 0.86, + "learning_rate": 9.911324205495298e-07, + "loss": 0.5741, + "step": 2107 + }, + { + "epoch": 0.86, + "learning_rate": 9.853914979092571e-07, + "loss": 0.6666, + "step": 2108 + }, + { + "epoch": 0.86, + "learning_rate": 9.79666388375009e-07, + "loss": 0.6399, + "step": 2109 + }, + { + "epoch": 0.86, + "learning_rate": 9.739571019895721e-07, + "loss": 0.6142, + "step": 2110 + }, + { + "epoch": 0.86, + "learning_rate": 9.682636487679753e-07, + "loss": 0.5909, + "step": 2111 + }, + { + "epoch": 0.86, + "learning_rate": 9.625860386974705e-07, + "loss": 0.6401, + "step": 2112 + }, + { + "epoch": 0.86, + "learning_rate": 9.569242817375169e-07, + "loss": 0.6571, + "step": 2113 + }, + { + "epoch": 0.86, + "learning_rate": 9.512783878197706e-07, + "loss": 0.6035, + "step": 2114 + }, + { + "epoch": 0.86, + "learning_rate": 9.456483668480587e-07, + "loss": 0.6043, + "step": 2115 + }, + { + "epoch": 0.87, + "learning_rate": 9.400342286983599e-07, + "loss": 0.6343, + "step": 2116 + }, + { + "epoch": 0.87, + "learning_rate": 9.344359832187999e-07, + "loss": 0.5704, + "step": 2117 + }, + { + "epoch": 0.87, + "learning_rate": 9.288536402296211e-07, + "loss": 0.669, + "step": 2118 + }, + { + "epoch": 0.87, + "learning_rate": 9.232872095231693e-07, + "loss": 0.6577, + "step": 2119 + }, + { + "epoch": 0.87, + "learning_rate": 9.177367008638838e-07, + "loss": 0.6304, + "step": 2120 + }, + { + "epoch": 0.87, + "learning_rate": 9.12202123988265e-07, + "loss": 0.5941, + "step": 2121 + }, + { + "epoch": 0.87, + "learning_rate": 9.066834886048748e-07, + "loss": 0.5584, + "step": 2122 + }, + { + "epoch": 0.87, + "learning_rate": 9.011808043943038e-07, + "loss": 0.5959, + "step": 2123 + }, + { + "epoch": 0.87, + "learning_rate": 8.956940810091674e-07, + "loss": 0.6213, + "step": 2124 + }, + { + "epoch": 0.87, + "learning_rate": 8.902233280740824e-07, + "loss": 0.606, + "step": 2125 + }, + { + "epoch": 0.87, + "learning_rate": 8.847685551856455e-07, + "loss": 0.6536, + "step": 2126 + }, + { + "epoch": 0.87, + "learning_rate": 8.793297719124294e-07, + "loss": 0.5667, + "step": 2127 + }, + { + "epoch": 0.87, + "learning_rate": 8.739069877949524e-07, + "loss": 0.6364, + "step": 2128 + }, + { + "epoch": 0.87, + "learning_rate": 8.685002123456699e-07, + "loss": 0.5998, + "step": 2129 + }, + { + "epoch": 0.87, + "learning_rate": 8.631094550489538e-07, + "loss": 0.6742, + "step": 2130 + }, + { + "epoch": 0.87, + "learning_rate": 8.577347253610813e-07, + "loss": 0.5554, + "step": 2131 + }, + { + "epoch": 0.87, + "learning_rate": 8.523760327102126e-07, + "loss": 0.5789, + "step": 2132 + }, + { + "epoch": 0.87, + "learning_rate": 8.470333864963742e-07, + "loss": 0.6365, + "step": 2133 + }, + { + "epoch": 0.87, + "learning_rate": 8.417067960914482e-07, + "loss": 0.6097, + "step": 2134 + }, + { + "epoch": 0.87, + "learning_rate": 8.363962708391482e-07, + "loss": 0.6142, + "step": 2135 + }, + { + "epoch": 0.87, + "learning_rate": 8.311018200550114e-07, + "loss": 0.6367, + "step": 2136 + }, + { + "epoch": 0.87, + "learning_rate": 8.25823453026372e-07, + "loss": 0.6714, + "step": 2137 + }, + { + "epoch": 0.87, + "learning_rate": 8.20561179012358e-07, + "loss": 0.5737, + "step": 2138 + }, + { + "epoch": 0.87, + "learning_rate": 8.153150072438598e-07, + "loss": 0.6305, + "step": 2139 + }, + { + "epoch": 0.87, + "learning_rate": 8.100849469235272e-07, + "loss": 0.6203, + "step": 2140 + }, + { + "epoch": 0.88, + "learning_rate": 8.048710072257481e-07, + "loss": 0.68, + "step": 2141 + }, + { + "epoch": 0.88, + "learning_rate": 7.996731972966287e-07, + "loss": 0.6501, + "step": 2142 + }, + { + "epoch": 0.88, + "learning_rate": 7.944915262539798e-07, + "loss": 0.671, + "step": 2143 + }, + { + "epoch": 0.88, + "learning_rate": 7.893260031873096e-07, + "loss": 0.6335, + "step": 2144 + }, + { + "epoch": 0.88, + "learning_rate": 7.841766371577919e-07, + "loss": 0.5948, + "step": 2145 + }, + { + "epoch": 0.88, + "learning_rate": 7.790434371982625e-07, + "loss": 0.6684, + "step": 2146 + }, + { + "epoch": 0.88, + "learning_rate": 7.739264123131973e-07, + "loss": 0.5923, + "step": 2147 + }, + { + "epoch": 0.88, + "learning_rate": 7.68825571478703e-07, + "loss": 0.6126, + "step": 2148 + }, + { + "epoch": 0.88, + "learning_rate": 7.637409236424887e-07, + "loss": 0.602, + "step": 2149 + }, + { + "epoch": 0.88, + "learning_rate": 7.586724777238686e-07, + "loss": 0.5567, + "step": 2150 + }, + { + "epoch": 0.88, + "learning_rate": 7.53620242613724e-07, + "loss": 0.6304, + "step": 2151 + }, + { + "epoch": 0.88, + "learning_rate": 7.485842271745125e-07, + "loss": 0.6843, + "step": 2152 + }, + { + "epoch": 0.88, + "learning_rate": 7.435644402402298e-07, + "loss": 0.6658, + "step": 2153 + }, + { + "epoch": 0.88, + "learning_rate": 7.385608906164077e-07, + "loss": 0.6084, + "step": 2154 + }, + { + "epoch": 0.88, + "learning_rate": 7.335735870800975e-07, + "loss": 0.5483, + "step": 2155 + }, + { + "epoch": 0.88, + "learning_rate": 7.28602538379849e-07, + "loss": 0.6574, + "step": 2156 + }, + { + "epoch": 0.88, + "learning_rate": 7.236477532356978e-07, + "loss": 0.6799, + "step": 2157 + }, + { + "epoch": 0.88, + "learning_rate": 7.187092403391549e-07, + "loss": 0.5712, + "step": 2158 + }, + { + "epoch": 0.88, + "learning_rate": 7.137870083531818e-07, + "loss": 0.6346, + "step": 2159 + }, + { + "epoch": 0.88, + "learning_rate": 7.088810659121815e-07, + "loss": 0.5653, + "step": 2160 + }, + { + "epoch": 0.88, + "learning_rate": 7.039914216219867e-07, + "loss": 0.6856, + "step": 2161 + }, + { + "epoch": 0.88, + "learning_rate": 6.991180840598388e-07, + "loss": 0.6299, + "step": 2162 + }, + { + "epoch": 0.88, + "learning_rate": 6.942610617743706e-07, + "loss": 0.7086, + "step": 2163 + }, + { + "epoch": 0.88, + "learning_rate": 6.894203632856e-07, + "loss": 0.6392, + "step": 2164 + }, + { + "epoch": 0.89, + "learning_rate": 6.845959970849059e-07, + "loss": 0.6112, + "step": 2165 + }, + { + "epoch": 0.89, + "learning_rate": 6.797879716350242e-07, + "loss": 0.6383, + "step": 2166 + }, + { + "epoch": 0.89, + "learning_rate": 6.749962953700184e-07, + "loss": 0.5758, + "step": 2167 + }, + { + "epoch": 0.89, + "learning_rate": 6.702209766952794e-07, + "loss": 0.5865, + "step": 2168 + }, + { + "epoch": 0.89, + "learning_rate": 6.654620239874987e-07, + "loss": 0.5631, + "step": 2169 + }, + { + "epoch": 0.89, + "learning_rate": 6.607194455946641e-07, + "loss": 0.6528, + "step": 2170 + }, + { + "epoch": 0.89, + "learning_rate": 6.5599324983604e-07, + "loss": 0.6998, + "step": 2171 + }, + { + "epoch": 0.89, + "learning_rate": 6.512834450021488e-07, + "loss": 0.5728, + "step": 2172 + }, + { + "epoch": 0.89, + "learning_rate": 6.465900393547641e-07, + "loss": 0.612, + "step": 2173 + }, + { + "epoch": 0.89, + "learning_rate": 6.419130411268881e-07, + "loss": 0.6587, + "step": 2174 + }, + { + "epoch": 0.89, + "learning_rate": 6.37252458522748e-07, + "loss": 0.6191, + "step": 2175 + }, + { + "epoch": 0.89, + "learning_rate": 6.326082997177718e-07, + "loss": 0.6054, + "step": 2176 + }, + { + "epoch": 0.89, + "learning_rate": 6.279805728585764e-07, + "loss": 0.6761, + "step": 2177 + }, + { + "epoch": 0.89, + "learning_rate": 6.233692860629592e-07, + "loss": 0.6088, + "step": 2178 + }, + { + "epoch": 0.89, + "learning_rate": 6.187744474198721e-07, + "loss": 0.628, + "step": 2179 + }, + { + "epoch": 0.89, + "learning_rate": 6.141960649894208e-07, + "loss": 0.5644, + "step": 2180 + }, + { + "epoch": 0.89, + "learning_rate": 6.096341468028377e-07, + "loss": 0.5896, + "step": 2181 + }, + { + "epoch": 0.89, + "learning_rate": 6.050887008624817e-07, + "loss": 0.6135, + "step": 2182 + }, + { + "epoch": 0.89, + "learning_rate": 6.005597351418091e-07, + "loss": 0.5854, + "step": 2183 + }, + { + "epoch": 0.89, + "learning_rate": 5.960472575853715e-07, + "loss": 0.5639, + "step": 2184 + }, + { + "epoch": 0.89, + "learning_rate": 5.915512761087993e-07, + "loss": 0.6133, + "step": 2185 + }, + { + "epoch": 0.89, + "learning_rate": 5.870717985987817e-07, + "loss": 0.6259, + "step": 2186 + }, + { + "epoch": 0.89, + "learning_rate": 5.826088329130575e-07, + "loss": 0.6688, + "step": 2187 + }, + { + "epoch": 0.89, + "learning_rate": 5.781623868804054e-07, + "loss": 0.5743, + "step": 2188 + }, + { + "epoch": 0.89, + "learning_rate": 5.737324683006218e-07, + "loss": 0.6643, + "step": 2189 + }, + { + "epoch": 0.9, + "learning_rate": 5.693190849445107e-07, + "loss": 0.6536, + "step": 2190 + }, + { + "epoch": 0.9, + "learning_rate": 5.649222445538749e-07, + "loss": 0.6991, + "step": 2191 + }, + { + "epoch": 0.9, + "learning_rate": 5.605419548414947e-07, + "loss": 0.5986, + "step": 2192 + }, + { + "epoch": 0.9, + "learning_rate": 5.561782234911195e-07, + "loss": 0.6193, + "step": 2193 + }, + { + "epoch": 0.9, + "learning_rate": 5.518310581574515e-07, + "loss": 0.6578, + "step": 2194 + }, + { + "epoch": 0.9, + "learning_rate": 5.475004664661321e-07, + "loss": 0.6087, + "step": 2195 + }, + { + "epoch": 0.9, + "learning_rate": 5.431864560137334e-07, + "loss": 0.6861, + "step": 2196 + }, + { + "epoch": 0.9, + "learning_rate": 5.388890343677389e-07, + "loss": 0.5871, + "step": 2197 + }, + { + "epoch": 0.9, + "learning_rate": 5.346082090665328e-07, + "loss": 0.5883, + "step": 2198 + }, + { + "epoch": 0.9, + "learning_rate": 5.303439876193861e-07, + "loss": 0.6302, + "step": 2199 + }, + { + "epoch": 0.9, + "learning_rate": 5.260963775064443e-07, + "loss": 0.5928, + "step": 2200 + }, + { + "epoch": 0.9, + "learning_rate": 5.218653861787171e-07, + "loss": 0.6021, + "step": 2201 + }, + { + "epoch": 0.9, + "learning_rate": 5.176510210580577e-07, + "loss": 0.6564, + "step": 2202 + }, + { + "epoch": 0.9, + "learning_rate": 5.134532895371546e-07, + "loss": 0.6093, + "step": 2203 + }, + { + "epoch": 0.9, + "learning_rate": 5.09272198979519e-07, + "loss": 0.5523, + "step": 2204 + }, + { + "epoch": 0.9, + "learning_rate": 5.051077567194729e-07, + "loss": 0.6599, + "step": 2205 + }, + { + "epoch": 0.9, + "learning_rate": 5.009599700621315e-07, + "loss": 0.6436, + "step": 2206 + }, + { + "epoch": 0.9, + "learning_rate": 4.968288462833959e-07, + "loss": 0.6187, + "step": 2207 + }, + { + "epoch": 0.9, + "learning_rate": 4.927143926299383e-07, + "loss": 0.6571, + "step": 2208 + }, + { + "epoch": 0.9, + "learning_rate": 4.88616616319183e-07, + "loss": 0.5974, + "step": 2209 + }, + { + "epoch": 0.9, + "learning_rate": 4.84535524539309e-07, + "loss": 0.6324, + "step": 2210 + }, + { + "epoch": 0.9, + "learning_rate": 4.804711244492166e-07, + "loss": 0.5792, + "step": 2211 + }, + { + "epoch": 0.9, + "learning_rate": 4.764234231785381e-07, + "loss": 0.5577, + "step": 2212 + }, + { + "epoch": 0.9, + "learning_rate": 4.723924278276015e-07, + "loss": 0.6164, + "step": 2213 + }, + { + "epoch": 0.91, + "learning_rate": 4.6837814546743853e-07, + "loss": 0.6528, + "step": 2214 + }, + { + "epoch": 0.91, + "learning_rate": 4.6438058313976295e-07, + "loss": 0.699, + "step": 2215 + }, + { + "epoch": 0.91, + "learning_rate": 4.603997478569533e-07, + "loss": 0.6786, + "step": 2216 + }, + { + "epoch": 0.91, + "learning_rate": 4.5643564660205033e-07, + "loss": 0.6093, + "step": 2217 + }, + { + "epoch": 0.91, + "learning_rate": 4.5248828632873944e-07, + "loss": 0.7176, + "step": 2218 + }, + { + "epoch": 0.91, + "learning_rate": 4.4855767396134064e-07, + "loss": 0.5165, + "step": 2219 + }, + { + "epoch": 0.91, + "learning_rate": 4.44643816394793e-07, + "loss": 0.5428, + "step": 2220 + }, + { + "epoch": 0.91, + "learning_rate": 4.4074672049464695e-07, + "loss": 0.5496, + "step": 2221 + }, + { + "epoch": 0.91, + "learning_rate": 4.3686639309705183e-07, + "loss": 0.6485, + "step": 2222 + }, + { + "epoch": 0.91, + "learning_rate": 4.3300284100873615e-07, + "loss": 0.5744, + "step": 2223 + }, + { + "epoch": 0.91, + "learning_rate": 4.2915607100701084e-07, + "loss": 0.7151, + "step": 2224 + }, + { + "epoch": 0.91, + "learning_rate": 4.253260898397382e-07, + "loss": 0.5757, + "step": 2225 + }, + { + "epoch": 0.91, + "learning_rate": 4.215129042253385e-07, + "loss": 0.6434, + "step": 2226 + }, + { + "epoch": 0.91, + "learning_rate": 4.177165208527645e-07, + "loss": 0.5767, + "step": 2227 + }, + { + "epoch": 0.91, + "learning_rate": 4.139369463814991e-07, + "loss": 0.622, + "step": 2228 + }, + { + "epoch": 0.91, + "learning_rate": 4.1017418744153437e-07, + "loss": 0.7016, + "step": 2229 + }, + { + "epoch": 0.91, + "learning_rate": 4.064282506333728e-07, + "loss": 0.6253, + "step": 2230 + }, + { + "epoch": 0.91, + "learning_rate": 4.0269914252799914e-07, + "loss": 0.6492, + "step": 2231 + }, + { + "epoch": 0.91, + "learning_rate": 3.989868696668864e-07, + "loss": 0.5724, + "step": 2232 + }, + { + "epoch": 0.91, + "learning_rate": 3.9529143856196995e-07, + "loss": 0.6454, + "step": 2233 + }, + { + "epoch": 0.91, + "learning_rate": 3.9161285569564114e-07, + "loss": 0.7064, + "step": 2234 + }, + { + "epoch": 0.91, + "learning_rate": 3.8795112752074483e-07, + "loss": 0.5768, + "step": 2235 + }, + { + "epoch": 0.91, + "learning_rate": 3.843062604605496e-07, + "loss": 0.5388, + "step": 2236 + }, + { + "epoch": 0.91, + "learning_rate": 3.8067826090875336e-07, + "loss": 0.5691, + "step": 2237 + }, + { + "epoch": 0.91, + "learning_rate": 3.7706713522946637e-07, + "loss": 0.6014, + "step": 2238 + }, + { + "epoch": 0.92, + "learning_rate": 3.7347288975719374e-07, + "loss": 0.6078, + "step": 2239 + }, + { + "epoch": 0.92, + "learning_rate": 3.698955307968355e-07, + "loss": 0.5974, + "step": 2240 + }, + { + "epoch": 0.92, + "learning_rate": 3.663350646236663e-07, + "loss": 0.5968, + "step": 2241 + }, + { + "epoch": 0.92, + "learning_rate": 3.627914974833302e-07, + "loss": 0.5903, + "step": 2242 + }, + { + "epoch": 0.92, + "learning_rate": 3.59264835591826e-07, + "loss": 0.6221, + "step": 2243 + }, + { + "epoch": 0.92, + "learning_rate": 3.557550851354985e-07, + "loss": 0.6264, + "step": 2244 + }, + { + "epoch": 0.92, + "learning_rate": 3.522622522710306e-07, + "loss": 0.6753, + "step": 2245 + }, + { + "epoch": 0.92, + "learning_rate": 3.4878634312542124e-07, + "loss": 0.6668, + "step": 2246 + }, + { + "epoch": 0.92, + "learning_rate": 3.4532736379598974e-07, + "loss": 0.625, + "step": 2247 + }, + { + "epoch": 0.92, + "learning_rate": 3.418853203503525e-07, + "loss": 0.5118, + "step": 2248 + }, + { + "epoch": 0.92, + "learning_rate": 3.384602188264219e-07, + "loss": 0.5872, + "step": 2249 + }, + { + "epoch": 0.92, + "learning_rate": 3.350520652323863e-07, + "loss": 0.617, + "step": 2250 + }, + { + "epoch": 0.92, + "learning_rate": 3.316608655467113e-07, + "loss": 0.62, + "step": 2251 + }, + { + "epoch": 0.92, + "learning_rate": 3.282866257181183e-07, + "loss": 0.7399, + "step": 2252 + }, + { + "epoch": 0.92, + "learning_rate": 3.2492935166557606e-07, + "loss": 0.6344, + "step": 2253 + }, + { + "epoch": 0.92, + "learning_rate": 3.2158904927829913e-07, + "loss": 0.6004, + "step": 2254 + }, + { + "epoch": 0.92, + "learning_rate": 3.18265724415725e-07, + "loss": 0.6416, + "step": 2255 + }, + { + "epoch": 0.92, + "learning_rate": 3.149593829075137e-07, + "loss": 0.5987, + "step": 2256 + }, + { + "epoch": 0.92, + "learning_rate": 3.116700305535303e-07, + "loss": 0.6007, + "step": 2257 + }, + { + "epoch": 0.92, + "learning_rate": 3.083976731238414e-07, + "loss": 0.6365, + "step": 2258 + }, + { + "epoch": 0.92, + "learning_rate": 3.0514231635869863e-07, + "loss": 0.6236, + "step": 2259 + }, + { + "epoch": 0.92, + "learning_rate": 3.019039659685341e-07, + "loss": 0.5542, + "step": 2260 + }, + { + "epoch": 0.92, + "learning_rate": 2.9868262763394717e-07, + "loss": 0.5988, + "step": 2261 + }, + { + "epoch": 0.92, + "learning_rate": 2.9547830700569545e-07, + "loss": 0.5921, + "step": 2262 + }, + { + "epoch": 0.93, + "learning_rate": 2.922910097046838e-07, + "loss": 0.628, + "step": 2263 + }, + { + "epoch": 0.93, + "learning_rate": 2.8912074132195545e-07, + "loss": 0.6705, + "step": 2264 + }, + { + "epoch": 0.93, + "learning_rate": 2.8596750741868295e-07, + "loss": 0.6937, + "step": 2265 + }, + { + "epoch": 0.93, + "learning_rate": 2.8283131352615733e-07, + "loss": 0.6698, + "step": 2266 + }, + { + "epoch": 0.93, + "learning_rate": 2.7971216514577906e-07, + "loss": 0.599, + "step": 2267 + }, + { + "epoch": 0.93, + "learning_rate": 2.7661006774904687e-07, + "loss": 0.5504, + "step": 2268 + }, + { + "epoch": 0.93, + "learning_rate": 2.735250267775491e-07, + "loss": 0.6754, + "step": 2269 + }, + { + "epoch": 0.93, + "learning_rate": 2.7045704764295686e-07, + "loss": 0.6503, + "step": 2270 + }, + { + "epoch": 0.93, + "learning_rate": 2.674061357270086e-07, + "loss": 0.6587, + "step": 2271 + }, + { + "epoch": 0.93, + "learning_rate": 2.6437229638150655e-07, + "loss": 0.6321, + "step": 2272 + }, + { + "epoch": 0.93, + "learning_rate": 2.613555349283026e-07, + "loss": 0.6236, + "step": 2273 + }, + { + "epoch": 0.93, + "learning_rate": 2.5835585665929365e-07, + "loss": 0.6365, + "step": 2274 + }, + { + "epoch": 0.93, + "learning_rate": 2.553732668364095e-07, + "loss": 0.6323, + "step": 2275 + }, + { + "epoch": 0.93, + "learning_rate": 2.524077706916028e-07, + "loss": 0.6625, + "step": 2276 + }, + { + "epoch": 0.93, + "learning_rate": 2.4945937342684024e-07, + "loss": 0.5559, + "step": 2277 + }, + { + "epoch": 0.93, + "learning_rate": 2.4652808021409567e-07, + "loss": 0.704, + "step": 2278 + }, + { + "epoch": 0.93, + "learning_rate": 2.4361389619534157e-07, + "loss": 0.6453, + "step": 2279 + }, + { + "epoch": 0.93, + "learning_rate": 2.4071682648253437e-07, + "loss": 0.6299, + "step": 2280 + }, + { + "epoch": 0.93, + "learning_rate": 2.3783687615760998e-07, + "loss": 0.604, + "step": 2281 + }, + { + "epoch": 0.93, + "learning_rate": 2.3497405027247845e-07, + "loss": 0.5761, + "step": 2282 + }, + { + "epoch": 0.93, + "learning_rate": 2.3212835384900268e-07, + "loss": 0.6042, + "step": 2283 + }, + { + "epoch": 0.93, + "learning_rate": 2.292997918790063e-07, + "loss": 0.5963, + "step": 2284 + }, + { + "epoch": 0.93, + "learning_rate": 2.2648836932425032e-07, + "loss": 0.6466, + "step": 2285 + }, + { + "epoch": 0.93, + "learning_rate": 2.2369409111643204e-07, + "loss": 0.6451, + "step": 2286 + }, + { + "epoch": 0.93, + "learning_rate": 2.2091696215717606e-07, + "loss": 0.6075, + "step": 2287 + }, + { + "epoch": 0.94, + "learning_rate": 2.1815698731802227e-07, + "loss": 0.6647, + "step": 2288 + }, + { + "epoch": 0.94, + "learning_rate": 2.154141714404212e-07, + "loss": 0.6471, + "step": 2289 + }, + { + "epoch": 0.94, + "learning_rate": 2.126885193357231e-07, + "loss": 0.5999, + "step": 2290 + }, + { + "epoch": 0.94, + "learning_rate": 2.0998003578517e-07, + "loss": 0.7041, + "step": 2291 + }, + { + "epoch": 0.94, + "learning_rate": 2.07288725539887e-07, + "loss": 0.5889, + "step": 2292 + }, + { + "epoch": 0.94, + "learning_rate": 2.0461459332087653e-07, + "loss": 0.6521, + "step": 2293 + }, + { + "epoch": 0.94, + "learning_rate": 2.0195764381900295e-07, + "loss": 0.6011, + "step": 2294 + }, + { + "epoch": 0.94, + "learning_rate": 1.99317881694997e-07, + "loss": 0.7214, + "step": 2295 + }, + { + "epoch": 0.94, + "learning_rate": 1.9669531157943233e-07, + "loss": 0.6214, + "step": 2296 + }, + { + "epoch": 0.94, + "learning_rate": 1.9408993807272903e-07, + "loss": 0.7617, + "step": 2297 + }, + { + "epoch": 0.94, + "learning_rate": 1.9150176574514233e-07, + "loss": 0.6108, + "step": 2298 + }, + { + "epoch": 0.94, + "learning_rate": 1.889307991367506e-07, + "loss": 0.6425, + "step": 2299 + }, + { + "epoch": 0.94, + "learning_rate": 1.8637704275745294e-07, + "loss": 0.657, + "step": 2300 + }, + { + "epoch": 0.94, + "learning_rate": 1.8384050108695707e-07, + "loss": 0.5848, + "step": 2301 + }, + { + "epoch": 0.94, + "learning_rate": 1.8132117857477593e-07, + "loss": 0.5859, + "step": 2302 + }, + { + "epoch": 0.94, + "learning_rate": 1.788190796402134e-07, + "loss": 0.6068, + "step": 2303 + }, + { + "epoch": 0.94, + "learning_rate": 1.7633420867236518e-07, + "loss": 0.5742, + "step": 2304 + }, + { + "epoch": 0.94, + "learning_rate": 1.738665700301012e-07, + "loss": 0.5427, + "step": 2305 + }, + { + "epoch": 0.94, + "learning_rate": 1.7141616804206784e-07, + "loss": 0.6511, + "step": 2306 + }, + { + "epoch": 0.94, + "learning_rate": 1.6898300700667115e-07, + "loss": 0.5694, + "step": 2307 + }, + { + "epoch": 0.94, + "learning_rate": 1.665670911920758e-07, + "loss": 0.6221, + "step": 2308 + }, + { + "epoch": 0.94, + "learning_rate": 1.6416842483619743e-07, + "loss": 0.6609, + "step": 2309 + }, + { + "epoch": 0.94, + "learning_rate": 1.6178701214669023e-07, + "loss": 0.5805, + "step": 2310 + }, + { + "epoch": 0.94, + "learning_rate": 1.5942285730094265e-07, + "loss": 0.6571, + "step": 2311 + }, + { + "epoch": 0.95, + "learning_rate": 1.5707596444607288e-07, + "loss": 0.5605, + "step": 2312 + }, + { + "epoch": 0.95, + "learning_rate": 1.5474633769891667e-07, + "loss": 0.6185, + "step": 2313 + }, + { + "epoch": 0.95, + "learning_rate": 1.5243398114602182e-07, + "loss": 0.6843, + "step": 2314 + }, + { + "epoch": 0.95, + "learning_rate": 1.5013889884364253e-07, + "loss": 0.7244, + "step": 2315 + }, + { + "epoch": 0.95, + "learning_rate": 1.4786109481773058e-07, + "loss": 0.5748, + "step": 2316 + }, + { + "epoch": 0.95, + "learning_rate": 1.4560057306392872e-07, + "loss": 0.5641, + "step": 2317 + }, + { + "epoch": 0.95, + "learning_rate": 1.4335733754756388e-07, + "loss": 0.6369, + "step": 2318 + }, + { + "epoch": 0.95, + "learning_rate": 1.4113139220364058e-07, + "loss": 0.644, + "step": 2319 + }, + { + "epoch": 0.95, + "learning_rate": 1.3892274093683323e-07, + "loss": 0.6212, + "step": 2320 + }, + { + "epoch": 0.95, + "learning_rate": 1.367313876214782e-07, + "loss": 0.6529, + "step": 2321 + }, + { + "epoch": 0.95, + "learning_rate": 1.345573361015706e-07, + "loss": 0.6405, + "step": 2322 + }, + { + "epoch": 0.95, + "learning_rate": 1.3240059019075546e-07, + "loss": 0.5578, + "step": 2323 + }, + { + "epoch": 0.95, + "learning_rate": 1.302611536723164e-07, + "loss": 0.5952, + "step": 2324 + }, + { + "epoch": 0.95, + "learning_rate": 1.2813903029918028e-07, + "loss": 0.6696, + "step": 2325 + }, + { + "epoch": 0.95, + "learning_rate": 1.260342237938994e-07, + "loss": 0.6686, + "step": 2326 + }, + { + "epoch": 0.95, + "learning_rate": 1.239467378486503e-07, + "loss": 0.6727, + "step": 2327 + }, + { + "epoch": 0.95, + "learning_rate": 1.2187657612522717e-07, + "loss": 0.7231, + "step": 2328 + }, + { + "epoch": 0.95, + "learning_rate": 1.1982374225503412e-07, + "loss": 0.6203, + "step": 2329 + }, + { + "epoch": 0.95, + "learning_rate": 1.177882398390795e-07, + "loss": 0.6314, + "step": 2330 + }, + { + "epoch": 0.95, + "learning_rate": 1.1577007244796823e-07, + "loss": 0.6109, + "step": 2331 + }, + { + "epoch": 0.95, + "learning_rate": 1.1376924362189956e-07, + "loss": 0.6494, + "step": 2332 + }, + { + "epoch": 0.95, + "learning_rate": 1.1178575687065485e-07, + "loss": 0.7083, + "step": 2333 + }, + { + "epoch": 0.95, + "learning_rate": 1.0981961567359756e-07, + "loss": 0.5516, + "step": 2334 + }, + { + "epoch": 0.95, + "learning_rate": 1.0787082347966104e-07, + "loss": 0.5751, + "step": 2335 + }, + { + "epoch": 0.96, + "learning_rate": 1.0593938370734969e-07, + "loss": 0.5631, + "step": 2336 + }, + { + "epoch": 0.96, + "learning_rate": 1.0402529974472442e-07, + "loss": 0.5252, + "step": 2337 + }, + { + "epoch": 0.96, + "learning_rate": 1.0212857494940276e-07, + "loss": 0.5919, + "step": 2338 + }, + { + "epoch": 0.96, + "learning_rate": 1.0024921264855436e-07, + "loss": 0.59, + "step": 2339 + }, + { + "epoch": 0.96, + "learning_rate": 9.83872161388888e-08, + "loss": 0.6296, + "step": 2340 + }, + { + "epoch": 0.96, + "learning_rate": 9.654258868665334e-08, + "loss": 0.5512, + "step": 2341 + }, + { + "epoch": 0.96, + "learning_rate": 9.471533352762962e-08, + "loss": 0.5536, + "step": 2342 + }, + { + "epoch": 0.96, + "learning_rate": 9.290545386712147e-08, + "loss": 0.6088, + "step": 2343 + }, + { + "epoch": 0.96, + "learning_rate": 9.111295287995814e-08, + "loss": 0.5847, + "step": 2344 + }, + { + "epoch": 0.96, + "learning_rate": 8.933783371047888e-08, + "loss": 0.5719, + "step": 2345 + }, + { + "epoch": 0.96, + "learning_rate": 8.758009947253621e-08, + "loss": 0.5565, + "step": 2346 + }, + { + "epoch": 0.96, + "learning_rate": 8.583975324948479e-08, + "loss": 0.5576, + "step": 2347 + }, + { + "epoch": 0.96, + "learning_rate": 8.411679809417706e-08, + "loss": 0.644, + "step": 2348 + }, + { + "epoch": 0.96, + "learning_rate": 8.241123702896093e-08, + "loss": 0.5563, + "step": 2349 + }, + { + "epoch": 0.96, + "learning_rate": 8.072307304567206e-08, + "loss": 0.6191, + "step": 2350 + }, + { + "epoch": 0.96, + "learning_rate": 7.9052309105625e-08, + "loss": 0.6436, + "step": 2351 + }, + { + "epoch": 0.96, + "learning_rate": 7.739894813961645e-08, + "loss": 0.6221, + "step": 2352 + }, + { + "epoch": 0.96, + "learning_rate": 7.576299304791312e-08, + "loss": 0.6291, + "step": 2353 + }, + { + "epoch": 0.96, + "learning_rate": 7.414444670024834e-08, + "loss": 0.672, + "step": 2354 + }, + { + "epoch": 0.96, + "learning_rate": 7.254331193581987e-08, + "loss": 0.617, + "step": 2355 + }, + { + "epoch": 0.96, + "learning_rate": 7.095959156328103e-08, + "loss": 0.6161, + "step": 2356 + }, + { + "epoch": 0.96, + "learning_rate": 6.939328836073733e-08, + "loss": 0.6588, + "step": 2357 + }, + { + "epoch": 0.96, + "learning_rate": 6.784440507574319e-08, + "loss": 0.6391, + "step": 2358 + }, + { + "epoch": 0.96, + "learning_rate": 6.631294442529412e-08, + "loss": 0.6367, + "step": 2359 + }, + { + "epoch": 0.96, + "learning_rate": 6.479890909582453e-08, + "loss": 0.6447, + "step": 2360 + }, + { + "epoch": 0.97, + "learning_rate": 6.330230174319996e-08, + "loss": 0.5896, + "step": 2361 + }, + { + "epoch": 0.97, + "learning_rate": 6.182312499271703e-08, + "loss": 0.6164, + "step": 2362 + }, + { + "epoch": 0.97, + "learning_rate": 6.036138143909464e-08, + "loss": 0.5986, + "step": 2363 + }, + { + "epoch": 0.97, + "learning_rate": 5.891707364647281e-08, + "loss": 0.6594, + "step": 2364 + }, + { + "epoch": 0.97, + "learning_rate": 5.749020414840267e-08, + "loss": 0.6121, + "step": 2365 + }, + { + "epoch": 0.97, + "learning_rate": 5.6080775447849846e-08, + "loss": 0.6253, + "step": 2366 + }, + { + "epoch": 0.97, + "learning_rate": 5.46887900171833e-08, + "loss": 0.6113, + "step": 2367 + }, + { + "epoch": 0.97, + "learning_rate": 5.3314250298173164e-08, + "loss": 0.5733, + "step": 2368 + }, + { + "epoch": 0.97, + "learning_rate": 5.195715870199181e-08, + "loss": 0.617, + "step": 2369 + }, + { + "epoch": 0.97, + "learning_rate": 5.06175176091972e-08, + "loss": 0.6222, + "step": 2370 + }, + { + "epoch": 0.97, + "learning_rate": 4.9295329369741794e-08, + "loss": 0.6593, + "step": 2371 + }, + { + "epoch": 0.97, + "learning_rate": 4.799059630296144e-08, + "loss": 0.6411, + "step": 2372 + }, + { + "epoch": 0.97, + "learning_rate": 4.6703320697572e-08, + "loss": 0.5944, + "step": 2373 + }, + { + "epoch": 0.97, + "learning_rate": 4.5433504811667193e-08, + "loss": 0.5662, + "step": 2374 + }, + { + "epoch": 0.97, + "learning_rate": 4.418115087271302e-08, + "loss": 0.5919, + "step": 2375 + }, + { + "epoch": 0.97, + "learning_rate": 4.2946261077544404e-08, + "loss": 0.6008, + "step": 2376 + }, + { + "epoch": 0.97, + "learning_rate": 4.172883759235968e-08, + "loss": 0.6319, + "step": 2377 + }, + { + "epoch": 0.97, + "learning_rate": 4.0528882552720584e-08, + "loss": 0.632, + "step": 2378 + }, + { + "epoch": 0.97, + "learning_rate": 3.9346398063546676e-08, + "loss": 0.665, + "step": 2379 + }, + { + "epoch": 0.97, + "learning_rate": 3.8181386199109825e-08, + "loss": 0.6213, + "step": 2380 + }, + { + "epoch": 0.97, + "learning_rate": 3.7033849003031975e-08, + "loss": 0.6881, + "step": 2381 + }, + { + "epoch": 0.97, + "learning_rate": 3.59037884882818e-08, + "loss": 0.6056, + "step": 2382 + }, + { + "epoch": 0.97, + "learning_rate": 3.4791206637172504e-08, + "loss": 0.5901, + "step": 2383 + }, + { + "epoch": 0.97, + "learning_rate": 3.369610540135404e-08, + "loss": 0.6251, + "step": 2384 + }, + { + "epoch": 0.98, + "learning_rate": 3.261848670181533e-08, + "loss": 0.6456, + "step": 2385 + }, + { + "epoch": 0.98, + "learning_rate": 3.15583524288765e-08, + "loss": 0.6233, + "step": 2386 + }, + { + "epoch": 0.98, + "learning_rate": 3.051570444218665e-08, + "loss": 0.6181, + "step": 2387 + }, + { + "epoch": 0.98, + "learning_rate": 2.9490544570723867e-08, + "loss": 0.7135, + "step": 2388 + }, + { + "epoch": 0.98, + "learning_rate": 2.8482874612785205e-08, + "loss": 0.6401, + "step": 2389 + }, + { + "epoch": 0.98, + "learning_rate": 2.7492696335991165e-08, + "loss": 0.5945, + "step": 2390 + }, + { + "epoch": 0.98, + "learning_rate": 2.6520011477274567e-08, + "loss": 0.585, + "step": 2391 + }, + { + "epoch": 0.98, + "learning_rate": 2.5564821742888324e-08, + "loss": 0.6019, + "step": 2392 + }, + { + "epoch": 0.98, + "learning_rate": 2.46271288083888e-08, + "loss": 0.5982, + "step": 2393 + }, + { + "epoch": 0.98, + "learning_rate": 2.37069343186469e-08, + "loss": 0.6452, + "step": 2394 + }, + { + "epoch": 0.98, + "learning_rate": 2.2804239887832536e-08, + "loss": 0.5608, + "step": 2395 + }, + { + "epoch": 0.98, + "learning_rate": 2.1919047099421274e-08, + "loss": 0.6123, + "step": 2396 + }, + { + "epoch": 0.98, + "learning_rate": 2.10513575061877e-08, + "loss": 0.5814, + "step": 2397 + }, + { + "epoch": 0.98, + "learning_rate": 2.0201172630199827e-08, + "loss": 0.6203, + "step": 2398 + }, + { + "epoch": 0.98, + "learning_rate": 1.9368493962823585e-08, + "loss": 0.6164, + "step": 2399 + }, + { + "epoch": 0.98, + "learning_rate": 1.8553322964713906e-08, + "loss": 0.5999, + "step": 2400 + }, + { + "epoch": 0.98, + "learning_rate": 1.7755661065814723e-08, + "loss": 0.6118, + "step": 2401 + }, + { + "epoch": 0.98, + "learning_rate": 1.697550966535566e-08, + "loss": 0.5894, + "step": 2402 + }, + { + "epoch": 0.98, + "learning_rate": 1.621287013185202e-08, + "loss": 0.5942, + "step": 2403 + }, + { + "epoch": 0.98, + "learning_rate": 1.546774380309701e-08, + "loss": 0.5385, + "step": 2404 + }, + { + "epoch": 0.98, + "learning_rate": 1.4740131986166196e-08, + "loss": 0.6258, + "step": 2405 + }, + { + "epoch": 0.98, + "learning_rate": 1.4030035957410815e-08, + "loss": 0.576, + "step": 2406 + }, + { + "epoch": 0.98, + "learning_rate": 1.3337456962454476e-08, + "loss": 0.5796, + "step": 2407 + }, + { + "epoch": 0.98, + "learning_rate": 1.2662396216196471e-08, + "loss": 0.5841, + "step": 2408 + }, + { + "epoch": 0.98, + "learning_rate": 1.2004854902801789e-08, + "loss": 0.6282, + "step": 2409 + }, + { + "epoch": 0.99, + "learning_rate": 1.1364834175708883e-08, + "loss": 0.6947, + "step": 2410 + }, + { + "epoch": 0.99, + "learning_rate": 1.0742335157618578e-08, + "loss": 0.5592, + "step": 2411 + }, + { + "epoch": 0.99, + "learning_rate": 1.0137358940495168e-08, + "loss": 0.6063, + "step": 2412 + }, + { + "epoch": 0.99, + "learning_rate": 9.549906585567536e-09, + "loss": 0.623, + "step": 2413 + }, + { + "epoch": 0.99, + "learning_rate": 8.979979123323601e-09, + "loss": 0.5986, + "step": 2414 + }, + { + "epoch": 0.99, + "learning_rate": 8.427577553509203e-09, + "loss": 0.6024, + "step": 2415 + }, + { + "epoch": 0.99, + "learning_rate": 7.892702845129218e-09, + "loss": 0.6173, + "step": 2416 + }, + { + "epoch": 0.99, + "learning_rate": 7.375355936440898e-09, + "loss": 0.7068, + "step": 2417 + }, + { + "epoch": 0.99, + "learning_rate": 6.875537734954973e-09, + "loss": 0.6872, + "step": 2418 + }, + { + "epoch": 0.99, + "learning_rate": 6.3932491174356626e-09, + "loss": 0.6079, + "step": 2419 + }, + { + "epoch": 0.99, + "learning_rate": 5.928490929897335e-09, + "loss": 0.5665, + "step": 2420 + }, + { + "epoch": 0.99, + "learning_rate": 5.4812639876000715e-09, + "loss": 0.6371, + "step": 2421 + }, + { + "epoch": 0.99, + "learning_rate": 5.051569075056328e-09, + "loss": 0.5822, + "step": 2422 + }, + { + "epoch": 0.99, + "learning_rate": 4.639406946018721e-09, + "loss": 0.6264, + "step": 2423 + }, + { + "epoch": 0.99, + "learning_rate": 4.2447783234900175e-09, + "loss": 0.625, + "step": 2424 + }, + { + "epoch": 0.99, + "learning_rate": 3.867683899712038e-09, + "loss": 0.6325, + "step": 2425 + }, + { + "epoch": 0.99, + "learning_rate": 3.5081243361700934e-09, + "loss": 0.6229, + "step": 2426 + }, + { + "epoch": 0.99, + "learning_rate": 3.166100263592986e-09, + "loss": 0.6136, + "step": 2427 + }, + { + "epoch": 0.99, + "learning_rate": 2.8416122819452384e-09, + "loss": 0.6242, + "step": 2428 + }, + { + "epoch": 0.99, + "learning_rate": 2.5346609604326446e-09, + "loss": 0.6293, + "step": 2429 + }, + { + "epoch": 0.99, + "learning_rate": 2.245246837498938e-09, + "loss": 0.5637, + "step": 2430 + }, + { + "epoch": 0.99, + "learning_rate": 1.973370420823573e-09, + "loss": 0.5801, + "step": 2431 + }, + { + "epoch": 0.99, + "learning_rate": 1.719032187321723e-09, + "loss": 0.6315, + "step": 2432 + }, + { + "epoch": 0.99, + "learning_rate": 1.4822325831453932e-09, + "loss": 0.6365, + "step": 2433 + }, + { + "epoch": 1.0, + "learning_rate": 1.2629720236800868e-09, + "loss": 0.5855, + "step": 2434 + }, + { + "epoch": 1.0, + "learning_rate": 1.0612508935459176e-09, + "loss": 0.6802, + "step": 2435 + }, + { + "epoch": 1.0, + "learning_rate": 8.770695465920576e-10, + "loss": 0.5994, + "step": 2436 + }, + { + "epoch": 1.0, + "learning_rate": 7.104283059056194e-10, + "loss": 0.5722, + "step": 2437 + }, + { + "epoch": 1.0, + "learning_rate": 5.613274638005539e-10, + "loss": 0.4984, + "step": 2438 + }, + { + "epoch": 1.0, + "learning_rate": 4.2976728182653193e-10, + "loss": 0.6701, + "step": 2439 + }, + { + "epoch": 1.0, + "learning_rate": 3.1574799076006247e-10, + "loss": 0.5628, + "step": 2440 + }, + { + "epoch": 1.0, + "learning_rate": 2.1926979061004382e-10, + "loss": 0.6036, + "step": 2441 + }, + { + "epoch": 1.0, + "learning_rate": 1.4033285061554325e-10, + "loss": 0.6475, + "step": 2442 + }, + { + "epoch": 1.0, + "learning_rate": 7.893730924468657e-11, + "loss": 0.6088, + "step": 2443 + }, + { + "epoch": 1.0, + "learning_rate": 3.508327419576851e-11, + "loss": 0.6262, + "step": 2444 + }, + { + "epoch": 1.0, + "learning_rate": 8.770822395032241e-12, + "loss": 0.5745, + "step": 2445 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.6366, + "step": 2446 + }, + { + "epoch": 1.0, + "step": 2446, + "total_flos": 191695072002048.0, + "train_loss": 0.6746118150815941, + "train_runtime": 25368.0481, + "train_samples_per_second": 12.339, + "train_steps_per_second": 0.096 + } + ], + "logging_steps": 1.0, + "max_steps": 2446, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 191695072002048.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}