{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 6144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.702702702702703e-08, "loss": 0.6772, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.405405405405406e-08, "loss": 0.8209, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.108108108108109e-08, "loss": 0.9717, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.0810810810810812e-07, "loss": 0.7738, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.3513513513513515e-07, "loss": 0.8899, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.6216216216216218e-07, "loss": 0.9095, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.8918918918918921e-07, "loss": 0.7483, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.1621621621621625e-07, "loss": 0.7777, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.4324324324324326e-07, "loss": 0.6867, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.702702702702703e-07, "loss": 0.8413, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.972972972972973e-07, "loss": 0.8644, "step": 11 }, { "epoch": 0.01, "learning_rate": 3.2432432432432436e-07, "loss": 0.8515, "step": 12 }, { "epoch": 0.01, "learning_rate": 3.513513513513514e-07, "loss": 0.7805, "step": 13 }, { "epoch": 0.01, "learning_rate": 3.7837837837837843e-07, "loss": 0.7631, "step": 14 }, { "epoch": 0.01, "learning_rate": 4.0540540540540546e-07, "loss": 0.7179, "step": 15 }, { "epoch": 0.01, "learning_rate": 4.324324324324325e-07, "loss": 0.7484, "step": 16 }, { "epoch": 0.01, "learning_rate": 4.5945945945945953e-07, "loss": 0.7369, "step": 17 }, { "epoch": 0.01, "learning_rate": 4.864864864864865e-07, "loss": 0.7269, "step": 18 }, { "epoch": 0.01, "learning_rate": 5.135135135135135e-07, "loss": 0.737, "step": 19 }, { "epoch": 0.01, "learning_rate": 5.405405405405406e-07, "loss": 0.693, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.675675675675676e-07, "loss": 0.7765, "step": 21 }, { "epoch": 0.01, "learning_rate": 5.945945945945947e-07, "loss": 0.7595, "step": 22 }, { "epoch": 0.01, "learning_rate": 6.216216216216217e-07, "loss": 0.7593, "step": 23 }, { "epoch": 0.01, "learning_rate": 6.486486486486487e-07, "loss": 0.6278, "step": 24 }, { "epoch": 0.01, "learning_rate": 6.756756756756758e-07, "loss": 0.7604, "step": 25 }, { "epoch": 0.01, "learning_rate": 7.027027027027028e-07, "loss": 0.6883, "step": 26 }, { "epoch": 0.01, "learning_rate": 7.297297297297298e-07, "loss": 0.7243, "step": 27 }, { "epoch": 0.01, "learning_rate": 7.567567567567569e-07, "loss": 0.6713, "step": 28 }, { "epoch": 0.01, "learning_rate": 7.837837837837839e-07, "loss": 0.6818, "step": 29 }, { "epoch": 0.01, "learning_rate": 8.108108108108109e-07, "loss": 0.6511, "step": 30 }, { "epoch": 0.02, "learning_rate": 8.37837837837838e-07, "loss": 0.6556, "step": 31 }, { "epoch": 0.02, "learning_rate": 8.64864864864865e-07, "loss": 0.6321, "step": 32 }, { "epoch": 0.02, "learning_rate": 8.91891891891892e-07, "loss": 0.6944, "step": 33 }, { "epoch": 0.02, "learning_rate": 9.189189189189191e-07, "loss": 0.5816, "step": 34 }, { "epoch": 0.02, "learning_rate": 9.459459459459461e-07, "loss": 0.6181, "step": 35 }, { "epoch": 0.02, "learning_rate": 9.72972972972973e-07, "loss": 0.6287, "step": 36 }, { "epoch": 0.02, "learning_rate": 1.0000000000000002e-06, "loss": 0.6203, "step": 37 }, { "epoch": 0.02, "learning_rate": 1.027027027027027e-06, "loss": 0.5437, "step": 38 }, { "epoch": 0.02, "learning_rate": 1.0540540540540542e-06, "loss": 0.6185, "step": 39 }, { "epoch": 0.02, "learning_rate": 1.0810810810810812e-06, "loss": 0.6347, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.1081081081081083e-06, "loss": 0.6477, "step": 41 }, { "epoch": 0.02, "learning_rate": 1.1351351351351352e-06, "loss": 0.6265, "step": 42 }, { "epoch": 0.02, "learning_rate": 1.1621621621621624e-06, "loss": 0.5698, "step": 43 }, { "epoch": 0.02, "learning_rate": 1.1891891891891893e-06, "loss": 0.6277, "step": 44 }, { "epoch": 0.02, "learning_rate": 1.2162162162162164e-06, "loss": 0.6599, "step": 45 }, { "epoch": 0.02, "learning_rate": 1.2432432432432434e-06, "loss": 0.6207, "step": 46 }, { "epoch": 0.02, "learning_rate": 1.2702702702702705e-06, "loss": 0.6083, "step": 47 }, { "epoch": 0.02, "learning_rate": 1.2972972972972974e-06, "loss": 0.5726, "step": 48 }, { "epoch": 0.02, "learning_rate": 1.3243243243243246e-06, "loss": 0.5767, "step": 49 }, { "epoch": 0.02, "learning_rate": 1.3513513513513515e-06, "loss": 0.5604, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.3783783783783786e-06, "loss": 0.625, "step": 51 }, { "epoch": 0.03, "learning_rate": 1.4054054054054056e-06, "loss": 0.5806, "step": 52 }, { "epoch": 0.03, "learning_rate": 1.4324324324324327e-06, "loss": 0.5469, "step": 53 }, { "epoch": 0.03, "learning_rate": 1.4594594594594596e-06, "loss": 0.5713, "step": 54 }, { "epoch": 0.03, "learning_rate": 1.4864864864864868e-06, "loss": 0.585, "step": 55 }, { "epoch": 0.03, "learning_rate": 1.5135135135135137e-06, "loss": 0.518, "step": 56 }, { "epoch": 0.03, "learning_rate": 1.5405405405405409e-06, "loss": 0.6037, "step": 57 }, { "epoch": 0.03, "learning_rate": 1.5675675675675678e-06, "loss": 0.5505, "step": 58 }, { "epoch": 0.03, "learning_rate": 1.5945945945945947e-06, "loss": 0.5713, "step": 59 }, { "epoch": 0.03, "learning_rate": 1.6216216216216219e-06, "loss": 0.5317, "step": 60 }, { "epoch": 0.03, "learning_rate": 1.6486486486486488e-06, "loss": 0.5262, "step": 61 }, { "epoch": 0.03, "learning_rate": 1.675675675675676e-06, "loss": 0.5661, "step": 62 }, { "epoch": 0.03, "learning_rate": 1.7027027027027028e-06, "loss": 0.5282, "step": 63 }, { "epoch": 0.03, "learning_rate": 1.72972972972973e-06, "loss": 0.5534, "step": 64 }, { "epoch": 0.03, "learning_rate": 1.756756756756757e-06, "loss": 0.5193, "step": 65 }, { "epoch": 0.03, "learning_rate": 1.783783783783784e-06, "loss": 0.4635, "step": 66 }, { "epoch": 0.03, "learning_rate": 1.810810810810811e-06, "loss": 0.5992, "step": 67 }, { "epoch": 0.03, "learning_rate": 1.8378378378378381e-06, "loss": 0.461, "step": 68 }, { "epoch": 0.03, "learning_rate": 1.864864864864865e-06, "loss": 0.5546, "step": 69 }, { "epoch": 0.03, "learning_rate": 1.8918918918918922e-06, "loss": 0.5176, "step": 70 }, { "epoch": 0.03, "learning_rate": 1.918918918918919e-06, "loss": 0.5721, "step": 71 }, { "epoch": 0.04, "learning_rate": 1.945945945945946e-06, "loss": 0.5629, "step": 72 }, { "epoch": 0.04, "learning_rate": 1.9729729729729734e-06, "loss": 0.516, "step": 73 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "loss": 0.5646, "step": 74 }, { "epoch": 0.04, "learning_rate": 2.0270270270270273e-06, "loss": 0.5641, "step": 75 }, { "epoch": 0.04, "learning_rate": 2.054054054054054e-06, "loss": 0.4996, "step": 76 }, { "epoch": 0.04, "learning_rate": 2.0810810810810815e-06, "loss": 0.5027, "step": 77 }, { "epoch": 0.04, "learning_rate": 2.1081081081081085e-06, "loss": 0.5254, "step": 78 }, { "epoch": 0.04, "learning_rate": 2.1351351351351354e-06, "loss": 0.5372, "step": 79 }, { "epoch": 0.04, "learning_rate": 2.1621621621621623e-06, "loss": 0.5637, "step": 80 }, { "epoch": 0.04, "learning_rate": 2.1891891891891897e-06, "loss": 0.5163, "step": 81 }, { "epoch": 0.04, "learning_rate": 2.2162162162162166e-06, "loss": 0.5419, "step": 82 }, { "epoch": 0.04, "learning_rate": 2.2432432432432435e-06, "loss": 0.471, "step": 83 }, { "epoch": 0.04, "learning_rate": 2.2702702702702705e-06, "loss": 0.5521, "step": 84 }, { "epoch": 0.04, "learning_rate": 2.297297297297298e-06, "loss": 0.5063, "step": 85 }, { "epoch": 0.04, "learning_rate": 2.3243243243243247e-06, "loss": 0.4853, "step": 86 }, { "epoch": 0.04, "learning_rate": 2.3513513513513517e-06, "loss": 0.5161, "step": 87 }, { "epoch": 0.04, "learning_rate": 2.3783783783783786e-06, "loss": 0.4665, "step": 88 }, { "epoch": 0.04, "learning_rate": 2.4054054054054055e-06, "loss": 0.5067, "step": 89 }, { "epoch": 0.04, "learning_rate": 2.432432432432433e-06, "loss": 0.5281, "step": 90 }, { "epoch": 0.04, "learning_rate": 2.45945945945946e-06, "loss": 0.4961, "step": 91 }, { "epoch": 0.04, "learning_rate": 2.4864864864864867e-06, "loss": 0.5026, "step": 92 }, { "epoch": 0.05, "learning_rate": 2.5135135135135137e-06, "loss": 0.5505, "step": 93 }, { "epoch": 0.05, "learning_rate": 2.540540540540541e-06, "loss": 0.5435, "step": 94 }, { "epoch": 0.05, "learning_rate": 2.5675675675675675e-06, "loss": 0.4516, "step": 95 }, { "epoch": 0.05, "learning_rate": 2.594594594594595e-06, "loss": 0.5196, "step": 96 }, { "epoch": 0.05, "learning_rate": 2.621621621621622e-06, "loss": 0.5346, "step": 97 }, { "epoch": 0.05, "learning_rate": 2.648648648648649e-06, "loss": 0.5437, "step": 98 }, { "epoch": 0.05, "learning_rate": 2.6756756756756757e-06, "loss": 0.5417, "step": 99 }, { "epoch": 0.05, "learning_rate": 2.702702702702703e-06, "loss": 0.4941, "step": 100 }, { "epoch": 0.05, "learning_rate": 2.72972972972973e-06, "loss": 0.5066, "step": 101 }, { "epoch": 0.05, "learning_rate": 2.7567567567567573e-06, "loss": 0.4782, "step": 102 }, { "epoch": 0.05, "learning_rate": 2.783783783783784e-06, "loss": 0.4722, "step": 103 }, { "epoch": 0.05, "learning_rate": 2.810810810810811e-06, "loss": 0.5476, "step": 104 }, { "epoch": 0.05, "learning_rate": 2.837837837837838e-06, "loss": 0.5055, "step": 105 }, { "epoch": 0.05, "learning_rate": 2.8648648648648654e-06, "loss": 0.4961, "step": 106 }, { "epoch": 0.05, "learning_rate": 2.891891891891892e-06, "loss": 0.4944, "step": 107 }, { "epoch": 0.05, "learning_rate": 2.9189189189189193e-06, "loss": 0.5053, "step": 108 }, { "epoch": 0.05, "learning_rate": 2.9459459459459462e-06, "loss": 0.555, "step": 109 }, { "epoch": 0.05, "learning_rate": 2.9729729729729736e-06, "loss": 0.5329, "step": 110 }, { "epoch": 0.05, "learning_rate": 3e-06, "loss": 0.5454, "step": 111 }, { "epoch": 0.05, "learning_rate": 3.0270270270270274e-06, "loss": 0.4693, "step": 112 }, { "epoch": 0.06, "learning_rate": 3.0540540540540544e-06, "loss": 0.4768, "step": 113 }, { "epoch": 0.06, "learning_rate": 3.0810810810810817e-06, "loss": 0.4914, "step": 114 }, { "epoch": 0.06, "learning_rate": 3.1081081081081082e-06, "loss": 0.5135, "step": 115 }, { "epoch": 0.06, "learning_rate": 3.1351351351351356e-06, "loss": 0.5271, "step": 116 }, { "epoch": 0.06, "learning_rate": 3.1621621621621625e-06, "loss": 0.5499, "step": 117 }, { "epoch": 0.06, "learning_rate": 3.1891891891891894e-06, "loss": 0.4544, "step": 118 }, { "epoch": 0.06, "learning_rate": 3.2162162162162164e-06, "loss": 0.5425, "step": 119 }, { "epoch": 0.06, "learning_rate": 3.2432432432432437e-06, "loss": 0.4958, "step": 120 }, { "epoch": 0.06, "learning_rate": 3.2702702702702706e-06, "loss": 0.5457, "step": 121 }, { "epoch": 0.06, "learning_rate": 3.2972972972972976e-06, "loss": 0.514, "step": 122 }, { "epoch": 0.06, "learning_rate": 3.3243243243243245e-06, "loss": 0.4386, "step": 123 }, { "epoch": 0.06, "learning_rate": 3.351351351351352e-06, "loss": 0.5193, "step": 124 }, { "epoch": 0.06, "learning_rate": 3.3783783783783788e-06, "loss": 0.5287, "step": 125 }, { "epoch": 0.06, "learning_rate": 3.4054054054054057e-06, "loss": 0.5466, "step": 126 }, { "epoch": 0.06, "learning_rate": 3.4324324324324326e-06, "loss": 0.4472, "step": 127 }, { "epoch": 0.06, "learning_rate": 3.45945945945946e-06, "loss": 0.5058, "step": 128 }, { "epoch": 0.06, "learning_rate": 3.4864864864864865e-06, "loss": 0.484, "step": 129 }, { "epoch": 0.06, "learning_rate": 3.513513513513514e-06, "loss": 0.5528, "step": 130 }, { "epoch": 0.06, "learning_rate": 3.5405405405405408e-06, "loss": 0.5025, "step": 131 }, { "epoch": 0.06, "learning_rate": 3.567567567567568e-06, "loss": 0.4906, "step": 132 }, { "epoch": 0.06, "learning_rate": 3.5945945945945946e-06, "loss": 0.548, "step": 133 }, { "epoch": 0.07, "learning_rate": 3.621621621621622e-06, "loss": 0.5329, "step": 134 }, { "epoch": 0.07, "learning_rate": 3.648648648648649e-06, "loss": 0.5003, "step": 135 }, { "epoch": 0.07, "learning_rate": 3.6756756756756763e-06, "loss": 0.4876, "step": 136 }, { "epoch": 0.07, "learning_rate": 3.7027027027027028e-06, "loss": 0.4997, "step": 137 }, { "epoch": 0.07, "learning_rate": 3.72972972972973e-06, "loss": 0.5052, "step": 138 }, { "epoch": 0.07, "learning_rate": 3.756756756756757e-06, "loss": 0.5192, "step": 139 }, { "epoch": 0.07, "learning_rate": 3.7837837837837844e-06, "loss": 0.4603, "step": 140 }, { "epoch": 0.07, "learning_rate": 3.810810810810811e-06, "loss": 0.5329, "step": 141 }, { "epoch": 0.07, "learning_rate": 3.837837837837838e-06, "loss": 0.4423, "step": 142 }, { "epoch": 0.07, "learning_rate": 3.864864864864865e-06, "loss": 0.5105, "step": 143 }, { "epoch": 0.07, "learning_rate": 3.891891891891892e-06, "loss": 0.5396, "step": 144 }, { "epoch": 0.07, "learning_rate": 3.918918918918919e-06, "loss": 0.4465, "step": 145 }, { "epoch": 0.07, "learning_rate": 3.945945945945947e-06, "loss": 0.4316, "step": 146 }, { "epoch": 0.07, "learning_rate": 3.972972972972973e-06, "loss": 0.5004, "step": 147 }, { "epoch": 0.07, "learning_rate": 4.000000000000001e-06, "loss": 0.461, "step": 148 }, { "epoch": 0.07, "learning_rate": 4.027027027027028e-06, "loss": 0.4489, "step": 149 }, { "epoch": 0.07, "learning_rate": 4.0540540540540545e-06, "loss": 0.4814, "step": 150 }, { "epoch": 0.07, "learning_rate": 4.0810810810810815e-06, "loss": 0.4524, "step": 151 }, { "epoch": 0.07, "learning_rate": 4.108108108108108e-06, "loss": 0.3676, "step": 152 }, { "epoch": 0.07, "learning_rate": 4.135135135135135e-06, "loss": 0.5046, "step": 153 }, { "epoch": 0.08, "learning_rate": 4.162162162162163e-06, "loss": 0.4667, "step": 154 }, { "epoch": 0.08, "learning_rate": 4.189189189189189e-06, "loss": 0.5217, "step": 155 }, { "epoch": 0.08, "learning_rate": 4.216216216216217e-06, "loss": 0.4803, "step": 156 }, { "epoch": 0.08, "learning_rate": 4.243243243243244e-06, "loss": 0.4722, "step": 157 }, { "epoch": 0.08, "learning_rate": 4.270270270270271e-06, "loss": 0.4459, "step": 158 }, { "epoch": 0.08, "learning_rate": 4.297297297297298e-06, "loss": 0.5272, "step": 159 }, { "epoch": 0.08, "learning_rate": 4.324324324324325e-06, "loss": 0.4776, "step": 160 }, { "epoch": 0.08, "learning_rate": 4.351351351351352e-06, "loss": 0.5163, "step": 161 }, { "epoch": 0.08, "learning_rate": 4.378378378378379e-06, "loss": 0.4176, "step": 162 }, { "epoch": 0.08, "learning_rate": 4.4054054054054054e-06, "loss": 0.4332, "step": 163 }, { "epoch": 0.08, "learning_rate": 4.432432432432433e-06, "loss": 0.428, "step": 164 }, { "epoch": 0.08, "learning_rate": 4.45945945945946e-06, "loss": 0.4689, "step": 165 }, { "epoch": 0.08, "learning_rate": 4.486486486486487e-06, "loss": 0.4824, "step": 166 }, { "epoch": 0.08, "learning_rate": 4.513513513513514e-06, "loss": 0.4903, "step": 167 }, { "epoch": 0.08, "learning_rate": 4.540540540540541e-06, "loss": 0.4758, "step": 168 }, { "epoch": 0.08, "learning_rate": 4.567567567567568e-06, "loss": 0.5342, "step": 169 }, { "epoch": 0.08, "learning_rate": 4.594594594594596e-06, "loss": 0.4604, "step": 170 }, { "epoch": 0.08, "learning_rate": 4.621621621621622e-06, "loss": 0.4587, "step": 171 }, { "epoch": 0.08, "learning_rate": 4.6486486486486495e-06, "loss": 0.4259, "step": 172 }, { "epoch": 0.08, "learning_rate": 4.675675675675676e-06, "loss": 0.4434, "step": 173 }, { "epoch": 0.08, "learning_rate": 4.702702702702703e-06, "loss": 0.4788, "step": 174 }, { "epoch": 0.09, "learning_rate": 4.72972972972973e-06, "loss": 0.4625, "step": 175 }, { "epoch": 0.09, "learning_rate": 4.756756756756757e-06, "loss": 0.4112, "step": 176 }, { "epoch": 0.09, "learning_rate": 4.783783783783784e-06, "loss": 0.4588, "step": 177 }, { "epoch": 0.09, "learning_rate": 4.810810810810811e-06, "loss": 0.4372, "step": 178 }, { "epoch": 0.09, "learning_rate": 4.837837837837838e-06, "loss": 0.419, "step": 179 }, { "epoch": 0.09, "learning_rate": 4.864864864864866e-06, "loss": 0.5581, "step": 180 }, { "epoch": 0.09, "learning_rate": 4.891891891891893e-06, "loss": 0.4411, "step": 181 }, { "epoch": 0.09, "learning_rate": 4.91891891891892e-06, "loss": 0.4894, "step": 182 }, { "epoch": 0.09, "learning_rate": 4.9459459459459466e-06, "loss": 0.4794, "step": 183 }, { "epoch": 0.09, "learning_rate": 4.9729729729729735e-06, "loss": 0.476, "step": 184 }, { "epoch": 0.09, "learning_rate": 5e-06, "loss": 0.496, "step": 185 }, { "epoch": 0.09, "learning_rate": 4.999999652573471e-06, "loss": 0.5199, "step": 186 }, { "epoch": 0.09, "learning_rate": 4.999998610293982e-06, "loss": 0.4892, "step": 187 }, { "epoch": 0.09, "learning_rate": 4.999996873161818e-06, "loss": 0.4544, "step": 188 }, { "epoch": 0.09, "learning_rate": 4.999994441177468e-06, "loss": 0.5106, "step": 189 }, { "epoch": 0.09, "learning_rate": 4.999991314341603e-06, "loss": 0.4811, "step": 190 }, { "epoch": 0.09, "learning_rate": 4.999987492655096e-06, "loss": 0.4, "step": 191 }, { "epoch": 0.09, "learning_rate": 4.999982976119006e-06, "loss": 0.4812, "step": 192 }, { "epoch": 0.09, "learning_rate": 4.999977764734591e-06, "loss": 0.4534, "step": 193 }, { "epoch": 0.09, "learning_rate": 4.9999718585032966e-06, "loss": 0.4312, "step": 194 }, { "epoch": 0.1, "learning_rate": 4.999965257426766e-06, "loss": 0.4312, "step": 195 }, { "epoch": 0.1, "learning_rate": 4.999957961506834e-06, "loss": 0.4052, "step": 196 }, { "epoch": 0.1, "learning_rate": 4.999949970745529e-06, "loss": 0.4587, "step": 197 }, { "epoch": 0.1, "learning_rate": 4.99994128514507e-06, "loss": 0.4259, "step": 198 }, { "epoch": 0.1, "learning_rate": 4.999931904707873e-06, "loss": 0.4438, "step": 199 }, { "epoch": 0.1, "learning_rate": 4.999921829436545e-06, "loss": 0.4362, "step": 200 }, { "epoch": 0.1, "learning_rate": 4.999911059333885e-06, "loss": 0.4555, "step": 201 }, { "epoch": 0.1, "learning_rate": 4.999899594402887e-06, "loss": 0.4667, "step": 202 }, { "epoch": 0.1, "learning_rate": 4.999887434646739e-06, "loss": 0.4831, "step": 203 }, { "epoch": 0.1, "learning_rate": 4.999874580068819e-06, "loss": 0.4524, "step": 204 }, { "epoch": 0.1, "learning_rate": 4.9998610306727005e-06, "loss": 0.4586, "step": 205 }, { "epoch": 0.1, "learning_rate": 4.999846786462149e-06, "loss": 0.4523, "step": 206 }, { "epoch": 0.1, "learning_rate": 4.999831847441124e-06, "loss": 0.43, "step": 207 }, { "epoch": 0.1, "learning_rate": 4.999816213613777e-06, "loss": 0.4814, "step": 208 }, { "epoch": 0.1, "learning_rate": 4.999799884984455e-06, "loss": 0.4902, "step": 209 }, { "epoch": 0.1, "learning_rate": 4.9997828615576945e-06, "loss": 0.51, "step": 210 }, { "epoch": 0.1, "learning_rate": 4.999765143338227e-06, "loss": 0.503, "step": 211 }, { "epoch": 0.1, "learning_rate": 4.9997467303309795e-06, "loss": 0.4642, "step": 212 }, { "epoch": 0.1, "learning_rate": 4.999727622541067e-06, "loss": 0.4634, "step": 213 }, { "epoch": 0.1, "learning_rate": 4.999707819973801e-06, "loss": 0.5253, "step": 214 }, { "epoch": 0.1, "learning_rate": 4.999687322634688e-06, "loss": 0.4746, "step": 215 }, { "epoch": 0.11, "learning_rate": 4.9996661305294205e-06, "loss": 0.4707, "step": 216 }, { "epoch": 0.11, "learning_rate": 4.999644243663893e-06, "loss": 0.538, "step": 217 }, { "epoch": 0.11, "learning_rate": 4.999621662044185e-06, "loss": 0.5104, "step": 218 }, { "epoch": 0.11, "learning_rate": 4.999598385676576e-06, "loss": 0.4807, "step": 219 }, { "epoch": 0.11, "learning_rate": 4.999574414567534e-06, "loss": 0.4982, "step": 220 }, { "epoch": 0.11, "learning_rate": 4.999549748723722e-06, "loss": 0.4605, "step": 221 }, { "epoch": 0.11, "learning_rate": 4.999524388151994e-06, "loss": 0.4651, "step": 222 }, { "epoch": 0.11, "learning_rate": 4.999498332859402e-06, "loss": 0.5034, "step": 223 }, { "epoch": 0.11, "learning_rate": 4.999471582853185e-06, "loss": 0.4622, "step": 224 }, { "epoch": 0.11, "learning_rate": 4.999444138140779e-06, "loss": 0.4304, "step": 225 }, { "epoch": 0.11, "learning_rate": 4.999415998729812e-06, "loss": 0.4404, "step": 226 }, { "epoch": 0.11, "learning_rate": 4.999387164628104e-06, "loss": 0.4653, "step": 227 }, { "epoch": 0.11, "learning_rate": 4.9993576358436724e-06, "loss": 0.392, "step": 228 }, { "epoch": 0.11, "learning_rate": 4.999327412384721e-06, "loss": 0.4474, "step": 229 }, { "epoch": 0.11, "learning_rate": 4.999296494259651e-06, "loss": 0.4949, "step": 230 }, { "epoch": 0.11, "learning_rate": 4.999264881477057e-06, "loss": 0.4573, "step": 231 }, { "epoch": 0.11, "learning_rate": 4.999232574045725e-06, "loss": 0.4646, "step": 232 }, { "epoch": 0.11, "learning_rate": 4.999199571974633e-06, "loss": 0.4224, "step": 233 }, { "epoch": 0.11, "learning_rate": 4.999165875272957e-06, "loss": 0.4706, "step": 234 }, { "epoch": 0.11, "learning_rate": 4.9991314839500585e-06, "loss": 0.4023, "step": 235 }, { "epoch": 0.12, "learning_rate": 4.999096398015499e-06, "loss": 0.4827, "step": 236 }, { "epoch": 0.12, "learning_rate": 4.999060617479031e-06, "loss": 0.4166, "step": 237 }, { "epoch": 0.12, "learning_rate": 4.999024142350597e-06, "loss": 0.4911, "step": 238 }, { "epoch": 0.12, "learning_rate": 4.998986972640336e-06, "loss": 0.4178, "step": 239 }, { "epoch": 0.12, "learning_rate": 4.99894910835858e-06, "loss": 0.4911, "step": 240 }, { "epoch": 0.12, "learning_rate": 4.998910549515851e-06, "loss": 0.4547, "step": 241 }, { "epoch": 0.12, "learning_rate": 4.998871296122868e-06, "loss": 0.4401, "step": 242 }, { "epoch": 0.12, "learning_rate": 4.99883134819054e-06, "loss": 0.477, "step": 243 }, { "epoch": 0.12, "learning_rate": 4.99879070572997e-06, "loss": 0.4669, "step": 244 }, { "epoch": 0.12, "learning_rate": 4.998749368752455e-06, "loss": 0.4296, "step": 245 }, { "epoch": 0.12, "learning_rate": 4.998707337269485e-06, "loss": 0.4026, "step": 246 }, { "epoch": 0.12, "learning_rate": 4.99866461129274e-06, "loss": 0.5161, "step": 247 }, { "epoch": 0.12, "learning_rate": 4.998621190834098e-06, "loss": 0.4559, "step": 248 }, { "epoch": 0.12, "learning_rate": 4.9985770759056254e-06, "loss": 0.4665, "step": 249 }, { "epoch": 0.12, "learning_rate": 4.998532266519583e-06, "loss": 0.4642, "step": 250 }, { "epoch": 0.12, "learning_rate": 4.9984867626884274e-06, "loss": 0.4577, "step": 251 }, { "epoch": 0.12, "learning_rate": 4.9984405644248044e-06, "loss": 0.4741, "step": 252 }, { "epoch": 0.12, "learning_rate": 4.998393671741555e-06, "loss": 0.4426, "step": 253 }, { "epoch": 0.12, "learning_rate": 4.998346084651712e-06, "loss": 0.5035, "step": 254 }, { "epoch": 0.12, "learning_rate": 4.998297803168503e-06, "loss": 0.4255, "step": 255 }, { "epoch": 0.12, "learning_rate": 4.998248827305346e-06, "loss": 0.4459, "step": 256 }, { "epoch": 0.13, "learning_rate": 4.998199157075854e-06, "loss": 0.4522, "step": 257 }, { "epoch": 0.13, "learning_rate": 4.998148792493833e-06, "loss": 0.5106, "step": 258 }, { "epoch": 0.13, "learning_rate": 4.99809773357328e-06, "loss": 0.4208, "step": 259 }, { "epoch": 0.13, "learning_rate": 4.9980459803283885e-06, "loss": 0.4717, "step": 260 }, { "epoch": 0.13, "learning_rate": 4.997993532773541e-06, "loss": 0.4104, "step": 261 }, { "epoch": 0.13, "learning_rate": 4.997940390923316e-06, "loss": 0.4947, "step": 262 }, { "epoch": 0.13, "learning_rate": 4.9978865547924825e-06, "loss": 0.4326, "step": 263 }, { "epoch": 0.13, "learning_rate": 4.997832024396005e-06, "loss": 0.4889, "step": 264 }, { "epoch": 0.13, "learning_rate": 4.997776799749039e-06, "loss": 0.4019, "step": 265 }, { "epoch": 0.13, "learning_rate": 4.997720880866934e-06, "loss": 0.538, "step": 266 }, { "epoch": 0.13, "learning_rate": 4.997664267765233e-06, "loss": 0.4367, "step": 267 }, { "epoch": 0.13, "learning_rate": 4.99760696045967e-06, "loss": 0.4544, "step": 268 }, { "epoch": 0.13, "learning_rate": 4.9975489589661715e-06, "loss": 0.4278, "step": 269 }, { "epoch": 0.13, "learning_rate": 4.997490263300863e-06, "loss": 0.4411, "step": 270 }, { "epoch": 0.13, "learning_rate": 4.9974308734800545e-06, "loss": 0.4571, "step": 271 }, { "epoch": 0.13, "learning_rate": 4.997370789520254e-06, "loss": 0.3938, "step": 272 }, { "epoch": 0.13, "learning_rate": 4.997310011438162e-06, "loss": 0.4575, "step": 273 }, { "epoch": 0.13, "learning_rate": 4.99724853925067e-06, "loss": 0.4329, "step": 274 }, { "epoch": 0.13, "learning_rate": 4.997186372974865e-06, "loss": 0.4655, "step": 275 }, { "epoch": 0.13, "learning_rate": 4.997123512628025e-06, "loss": 0.428, "step": 276 }, { "epoch": 0.14, "learning_rate": 4.997059958227621e-06, "loss": 0.4607, "step": 277 }, { "epoch": 0.14, "learning_rate": 4.996995709791318e-06, "loss": 0.4802, "step": 278 }, { "epoch": 0.14, "learning_rate": 4.996930767336973e-06, "loss": 0.5357, "step": 279 }, { "epoch": 0.14, "learning_rate": 4.996865130882636e-06, "loss": 0.4704, "step": 280 }, { "epoch": 0.14, "learning_rate": 4.996798800446552e-06, "loss": 0.5086, "step": 281 }, { "epoch": 0.14, "learning_rate": 4.9967317760471535e-06, "loss": 0.4704, "step": 282 }, { "epoch": 0.14, "learning_rate": 4.996664057703072e-06, "loss": 0.4073, "step": 283 }, { "epoch": 0.14, "learning_rate": 4.996595645433128e-06, "loss": 0.4209, "step": 284 }, { "epoch": 0.14, "learning_rate": 4.996526539256337e-06, "loss": 0.4598, "step": 285 }, { "epoch": 0.14, "learning_rate": 4.996456739191905e-06, "loss": 0.4011, "step": 286 }, { "epoch": 0.14, "learning_rate": 4.996386245259234e-06, "loss": 0.4485, "step": 287 }, { "epoch": 0.14, "learning_rate": 4.996315057477916e-06, "loss": 0.4787, "step": 288 }, { "epoch": 0.14, "learning_rate": 4.996243175867738e-06, "loss": 0.4397, "step": 289 }, { "epoch": 0.14, "learning_rate": 4.996170600448678e-06, "loss": 0.4693, "step": 290 }, { "epoch": 0.14, "learning_rate": 4.996097331240908e-06, "loss": 0.4129, "step": 291 }, { "epoch": 0.14, "learning_rate": 4.996023368264793e-06, "loss": 0.4592, "step": 292 }, { "epoch": 0.14, "learning_rate": 4.995948711540889e-06, "loss": 0.4031, "step": 293 }, { "epoch": 0.14, "learning_rate": 4.995873361089948e-06, "loss": 0.4498, "step": 294 }, { "epoch": 0.14, "learning_rate": 4.995797316932911e-06, "loss": 0.3976, "step": 295 }, { "epoch": 0.14, "learning_rate": 4.995720579090916e-06, "loss": 0.417, "step": 296 }, { "epoch": 0.15, "learning_rate": 4.99564314758529e-06, "loss": 0.4784, "step": 297 }, { "epoch": 0.15, "learning_rate": 4.995565022437554e-06, "loss": 0.4665, "step": 298 }, { "epoch": 0.15, "learning_rate": 4.9954862036694244e-06, "loss": 0.4509, "step": 299 }, { "epoch": 0.15, "learning_rate": 4.995406691302807e-06, "loss": 0.4558, "step": 300 }, { "epoch": 0.15, "learning_rate": 4.995326485359801e-06, "loss": 0.451, "step": 301 }, { "epoch": 0.15, "learning_rate": 4.995245585862698e-06, "loss": 0.4419, "step": 302 }, { "epoch": 0.15, "learning_rate": 4.995163992833986e-06, "loss": 0.449, "step": 303 }, { "epoch": 0.15, "learning_rate": 4.995081706296342e-06, "loss": 0.5213, "step": 304 }, { "epoch": 0.15, "learning_rate": 4.9949987262726365e-06, "loss": 0.4224, "step": 305 }, { "epoch": 0.15, "learning_rate": 4.9949150527859324e-06, "loss": 0.4435, "step": 306 }, { "epoch": 0.15, "learning_rate": 4.994830685859487e-06, "loss": 0.494, "step": 307 }, { "epoch": 0.15, "learning_rate": 4.99474562551675e-06, "loss": 0.4204, "step": 308 }, { "epoch": 0.15, "learning_rate": 4.994659871781361e-06, "loss": 0.399, "step": 309 }, { "epoch": 0.15, "learning_rate": 4.994573424677157e-06, "loss": 0.4366, "step": 310 }, { "epoch": 0.15, "learning_rate": 4.9944862842281636e-06, "loss": 0.4478, "step": 311 }, { "epoch": 0.15, "learning_rate": 4.994398450458601e-06, "loss": 0.4372, "step": 312 }, { "epoch": 0.15, "learning_rate": 4.994309923392881e-06, "loss": 0.4466, "step": 313 }, { "epoch": 0.15, "learning_rate": 4.994220703055611e-06, "loss": 0.4579, "step": 314 }, { "epoch": 0.15, "learning_rate": 4.994130789471587e-06, "loss": 0.4061, "step": 315 }, { "epoch": 0.15, "learning_rate": 4.994040182665802e-06, "loss": 0.4229, "step": 316 }, { "epoch": 0.15, "learning_rate": 4.993948882663437e-06, "loss": 0.4505, "step": 317 }, { "epoch": 0.16, "learning_rate": 4.993856889489868e-06, "loss": 0.4661, "step": 318 }, { "epoch": 0.16, "learning_rate": 4.993764203170667e-06, "loss": 0.4376, "step": 319 }, { "epoch": 0.16, "learning_rate": 4.9936708237315915e-06, "loss": 0.4643, "step": 320 }, { "epoch": 0.16, "learning_rate": 4.993576751198597e-06, "loss": 0.4898, "step": 321 }, { "epoch": 0.16, "learning_rate": 4.99348198559783e-06, "loss": 0.4655, "step": 322 }, { "epoch": 0.16, "learning_rate": 4.99338652695563e-06, "loss": 0.4205, "step": 323 }, { "epoch": 0.16, "learning_rate": 4.99329037529853e-06, "loss": 0.4155, "step": 324 }, { "epoch": 0.16, "learning_rate": 4.993193530653252e-06, "loss": 0.4513, "step": 325 }, { "epoch": 0.16, "learning_rate": 4.9930959930467146e-06, "loss": 0.4155, "step": 326 }, { "epoch": 0.16, "learning_rate": 4.992997762506028e-06, "loss": 0.442, "step": 327 }, { "epoch": 0.16, "learning_rate": 4.9928988390584925e-06, "loss": 0.414, "step": 328 }, { "epoch": 0.16, "learning_rate": 4.992799222731606e-06, "loss": 0.453, "step": 329 }, { "epoch": 0.16, "learning_rate": 4.992698913553053e-06, "loss": 0.4515, "step": 330 }, { "epoch": 0.16, "learning_rate": 4.992597911550715e-06, "loss": 0.4348, "step": 331 }, { "epoch": 0.16, "learning_rate": 4.992496216752666e-06, "loss": 0.4269, "step": 332 }, { "epoch": 0.16, "learning_rate": 4.992393829187168e-06, "loss": 0.4559, "step": 333 }, { "epoch": 0.16, "learning_rate": 4.992290748882681e-06, "loss": 0.419, "step": 334 }, { "epoch": 0.16, "learning_rate": 4.992186975867856e-06, "loss": 0.4397, "step": 335 }, { "epoch": 0.16, "learning_rate": 4.992082510171532e-06, "loss": 0.3948, "step": 336 }, { "epoch": 0.16, "learning_rate": 4.991977351822749e-06, "loss": 0.4641, "step": 337 }, { "epoch": 0.17, "learning_rate": 4.991871500850731e-06, "loss": 0.5009, "step": 338 }, { "epoch": 0.17, "learning_rate": 4.9917649572849004e-06, "loss": 0.4547, "step": 339 }, { "epoch": 0.17, "learning_rate": 4.991657721154871e-06, "loss": 0.4932, "step": 340 }, { "epoch": 0.17, "learning_rate": 4.991549792490445e-06, "loss": 0.4236, "step": 341 }, { "epoch": 0.17, "learning_rate": 4.991441171321623e-06, "loss": 0.4182, "step": 342 }, { "epoch": 0.17, "learning_rate": 4.991331857678594e-06, "loss": 0.3703, "step": 343 }, { "epoch": 0.17, "learning_rate": 4.991221851591742e-06, "loss": 0.4431, "step": 344 }, { "epoch": 0.17, "learning_rate": 4.99111115309164e-06, "loss": 0.4431, "step": 345 }, { "epoch": 0.17, "learning_rate": 4.990999762209058e-06, "loss": 0.3919, "step": 346 }, { "epoch": 0.17, "learning_rate": 4.990887678974954e-06, "loss": 0.4223, "step": 347 }, { "epoch": 0.17, "learning_rate": 4.990774903420482e-06, "loss": 0.4267, "step": 348 }, { "epoch": 0.17, "learning_rate": 4.990661435576988e-06, "loss": 0.4703, "step": 349 }, { "epoch": 0.17, "learning_rate": 4.990547275476007e-06, "loss": 0.4478, "step": 350 }, { "epoch": 0.17, "learning_rate": 4.990432423149269e-06, "loss": 0.4169, "step": 351 }, { "epoch": 0.17, "learning_rate": 4.990316878628698e-06, "loss": 0.389, "step": 352 }, { "epoch": 0.17, "learning_rate": 4.990200641946408e-06, "loss": 0.3922, "step": 353 }, { "epoch": 0.17, "learning_rate": 4.990083713134705e-06, "loss": 0.447, "step": 354 }, { "epoch": 0.17, "learning_rate": 4.989966092226088e-06, "loss": 0.4348, "step": 355 }, { "epoch": 0.17, "learning_rate": 4.989847779253251e-06, "loss": 0.4529, "step": 356 }, { "epoch": 0.17, "learning_rate": 4.989728774249075e-06, "loss": 0.4711, "step": 357 }, { "epoch": 0.17, "learning_rate": 4.98960907724664e-06, "loss": 0.5187, "step": 358 }, { "epoch": 0.18, "learning_rate": 4.9894886882792124e-06, "loss": 0.4954, "step": 359 }, { "epoch": 0.18, "learning_rate": 4.989367607380253e-06, "loss": 0.4053, "step": 360 }, { "epoch": 0.18, "learning_rate": 4.9892458345834164e-06, "loss": 0.4374, "step": 361 }, { "epoch": 0.18, "learning_rate": 4.989123369922547e-06, "loss": 0.4016, "step": 362 }, { "epoch": 0.18, "learning_rate": 4.989000213431684e-06, "loss": 0.3845, "step": 363 }, { "epoch": 0.18, "learning_rate": 4.9888763651450564e-06, "loss": 0.425, "step": 364 }, { "epoch": 0.18, "learning_rate": 4.988751825097088e-06, "loss": 0.453, "step": 365 }, { "epoch": 0.18, "learning_rate": 4.988626593322393e-06, "loss": 0.3881, "step": 366 }, { "epoch": 0.18, "learning_rate": 4.988500669855779e-06, "loss": 0.3953, "step": 367 }, { "epoch": 0.18, "learning_rate": 4.988374054732244e-06, "loss": 0.432, "step": 368 }, { "epoch": 0.18, "learning_rate": 4.988246747986982e-06, "loss": 0.4619, "step": 369 }, { "epoch": 0.18, "learning_rate": 4.988118749655374e-06, "loss": 0.4419, "step": 370 }, { "epoch": 0.18, "learning_rate": 4.987990059772998e-06, "loss": 0.3835, "step": 371 }, { "epoch": 0.18, "learning_rate": 4.987860678375621e-06, "loss": 0.3745, "step": 372 }, { "epoch": 0.18, "learning_rate": 4.987730605499204e-06, "loss": 0.4469, "step": 373 }, { "epoch": 0.18, "learning_rate": 4.987599841179899e-06, "loss": 0.391, "step": 374 }, { "epoch": 0.18, "learning_rate": 4.987468385454052e-06, "loss": 0.4308, "step": 375 }, { "epoch": 0.18, "learning_rate": 4.987336238358199e-06, "loss": 0.4339, "step": 376 }, { "epoch": 0.18, "learning_rate": 4.987203399929069e-06, "loss": 0.4437, "step": 377 }, { "epoch": 0.18, "learning_rate": 4.987069870203585e-06, "loss": 0.4314, "step": 378 }, { "epoch": 0.19, "learning_rate": 4.986935649218858e-06, "loss": 0.4377, "step": 379 }, { "epoch": 0.19, "learning_rate": 4.9868007370121954e-06, "loss": 0.4061, "step": 380 }, { "epoch": 0.19, "learning_rate": 4.986665133621093e-06, "loss": 0.4563, "step": 381 }, { "epoch": 0.19, "learning_rate": 4.986528839083242e-06, "loss": 0.4905, "step": 382 }, { "epoch": 0.19, "learning_rate": 4.986391853436525e-06, "loss": 0.4289, "step": 383 }, { "epoch": 0.19, "learning_rate": 4.986254176719014e-06, "loss": 0.4321, "step": 384 }, { "epoch": 0.19, "learning_rate": 4.986115808968975e-06, "loss": 0.4296, "step": 385 }, { "epoch": 0.19, "learning_rate": 4.985976750224869e-06, "loss": 0.4418, "step": 386 }, { "epoch": 0.19, "learning_rate": 4.9858370005253435e-06, "loss": 0.4089, "step": 387 }, { "epoch": 0.19, "learning_rate": 4.985696559909241e-06, "loss": 0.4706, "step": 388 }, { "epoch": 0.19, "learning_rate": 4.985555428415596e-06, "loss": 0.5151, "step": 389 }, { "epoch": 0.19, "learning_rate": 4.985413606083635e-06, "loss": 0.4304, "step": 390 }, { "epoch": 0.19, "learning_rate": 4.985271092952777e-06, "loss": 0.4006, "step": 391 }, { "epoch": 0.19, "learning_rate": 4.985127889062631e-06, "loss": 0.4468, "step": 392 }, { "epoch": 0.19, "learning_rate": 4.984983994453e-06, "loss": 0.4603, "step": 393 }, { "epoch": 0.19, "learning_rate": 4.984839409163877e-06, "loss": 0.4394, "step": 394 }, { "epoch": 0.19, "learning_rate": 4.9846941332354494e-06, "loss": 0.4464, "step": 395 }, { "epoch": 0.19, "learning_rate": 4.9845481667080966e-06, "loss": 0.4536, "step": 396 }, { "epoch": 0.19, "learning_rate": 4.984401509622385e-06, "loss": 0.3927, "step": 397 }, { "epoch": 0.19, "learning_rate": 4.984254162019081e-06, "loss": 0.4542, "step": 398 }, { "epoch": 0.19, "learning_rate": 4.984106123939136e-06, "loss": 0.4125, "step": 399 }, { "epoch": 0.2, "learning_rate": 4.983957395423695e-06, "loss": 0.3903, "step": 400 }, { "epoch": 0.2, "learning_rate": 4.983807976514098e-06, "loss": 0.3999, "step": 401 }, { "epoch": 0.2, "learning_rate": 4.983657867251874e-06, "loss": 0.3809, "step": 402 }, { "epoch": 0.2, "learning_rate": 4.983507067678744e-06, "loss": 0.4349, "step": 403 }, { "epoch": 0.2, "learning_rate": 4.983355577836622e-06, "loss": 0.3832, "step": 404 }, { "epoch": 0.2, "learning_rate": 4.983203397767613e-06, "loss": 0.4244, "step": 405 }, { "epoch": 0.2, "learning_rate": 4.983050527514014e-06, "loss": 0.4189, "step": 406 }, { "epoch": 0.2, "learning_rate": 4.982896967118315e-06, "loss": 0.3846, "step": 407 }, { "epoch": 0.2, "learning_rate": 4.9827427166231944e-06, "loss": 0.3708, "step": 408 }, { "epoch": 0.2, "learning_rate": 4.982587776071528e-06, "loss": 0.4561, "step": 409 }, { "epoch": 0.2, "learning_rate": 4.9824321455063774e-06, "loss": 0.4493, "step": 410 }, { "epoch": 0.2, "learning_rate": 4.982275824971e-06, "loss": 0.4442, "step": 411 }, { "epoch": 0.2, "learning_rate": 4.982118814508843e-06, "loss": 0.4126, "step": 412 }, { "epoch": 0.2, "learning_rate": 4.981961114163547e-06, "loss": 0.3865, "step": 413 }, { "epoch": 0.2, "learning_rate": 4.981802723978943e-06, "loss": 0.4252, "step": 414 }, { "epoch": 0.2, "learning_rate": 4.981643643999054e-06, "loss": 0.4823, "step": 415 }, { "epoch": 0.2, "learning_rate": 4.981483874268095e-06, "loss": 0.427, "step": 416 }, { "epoch": 0.2, "learning_rate": 4.981323414830473e-06, "loss": 0.3969, "step": 417 }, { "epoch": 0.2, "learning_rate": 4.981162265730786e-06, "loss": 0.4527, "step": 418 }, { "epoch": 0.2, "learning_rate": 4.981000427013824e-06, "loss": 0.4484, "step": 419 }, { "epoch": 0.21, "learning_rate": 4.980837898724568e-06, "loss": 0.3863, "step": 420 }, { "epoch": 0.21, "learning_rate": 4.980674680908192e-06, "loss": 0.4155, "step": 421 }, { "epoch": 0.21, "learning_rate": 4.98051077361006e-06, "loss": 0.4127, "step": 422 }, { "epoch": 0.21, "learning_rate": 4.9803461768757315e-06, "loss": 0.4199, "step": 423 }, { "epoch": 0.21, "learning_rate": 4.980180890750951e-06, "loss": 0.4192, "step": 424 }, { "epoch": 0.21, "learning_rate": 4.98001491528166e-06, "loss": 0.4034, "step": 425 }, { "epoch": 0.21, "learning_rate": 4.979848250513991e-06, "loss": 0.4477, "step": 426 }, { "epoch": 0.21, "learning_rate": 4.979680896494265e-06, "loss": 0.4105, "step": 427 }, { "epoch": 0.21, "learning_rate": 4.979512853268998e-06, "loss": 0.4158, "step": 428 }, { "epoch": 0.21, "learning_rate": 4.979344120884895e-06, "loss": 0.4476, "step": 429 }, { "epoch": 0.21, "learning_rate": 4.9791746993888545e-06, "loss": 0.4206, "step": 430 }, { "epoch": 0.21, "learning_rate": 4.979004588827966e-06, "loss": 0.4193, "step": 431 }, { "epoch": 0.21, "learning_rate": 4.97883378924951e-06, "loss": 0.3915, "step": 432 }, { "epoch": 0.21, "learning_rate": 4.978662300700957e-06, "loss": 0.4401, "step": 433 }, { "epoch": 0.21, "learning_rate": 4.978490123229974e-06, "loss": 0.4163, "step": 434 }, { "epoch": 0.21, "learning_rate": 4.978317256884414e-06, "loss": 0.3473, "step": 435 }, { "epoch": 0.21, "learning_rate": 4.978143701712323e-06, "loss": 0.4248, "step": 436 }, { "epoch": 0.21, "learning_rate": 4.977969457761942e-06, "loss": 0.416, "step": 437 }, { "epoch": 0.21, "learning_rate": 4.9777945250816975e-06, "loss": 0.4205, "step": 438 }, { "epoch": 0.21, "learning_rate": 4.977618903720213e-06, "loss": 0.4605, "step": 439 }, { "epoch": 0.21, "learning_rate": 4.977442593726299e-06, "loss": 0.431, "step": 440 }, { "epoch": 0.22, "learning_rate": 4.9772655951489605e-06, "loss": 0.3997, "step": 441 }, { "epoch": 0.22, "learning_rate": 4.977087908037392e-06, "loss": 0.3999, "step": 442 }, { "epoch": 0.22, "learning_rate": 4.97690953244098e-06, "loss": 0.3805, "step": 443 }, { "epoch": 0.22, "learning_rate": 4.976730468409303e-06, "loss": 0.4143, "step": 444 }, { "epoch": 0.22, "learning_rate": 4.976550715992131e-06, "loss": 0.4593, "step": 445 }, { "epoch": 0.22, "learning_rate": 4.9763702752394225e-06, "loss": 0.3892, "step": 446 }, { "epoch": 0.22, "learning_rate": 4.976189146201331e-06, "loss": 0.3875, "step": 447 }, { "epoch": 0.22, "learning_rate": 4.9760073289282005e-06, "loss": 0.4199, "step": 448 }, { "epoch": 0.22, "learning_rate": 4.975824823470563e-06, "loss": 0.4468, "step": 449 }, { "epoch": 0.22, "learning_rate": 4.975641629879146e-06, "loss": 0.4104, "step": 450 }, { "epoch": 0.22, "learning_rate": 4.9754577482048664e-06, "loss": 0.4239, "step": 451 }, { "epoch": 0.22, "learning_rate": 4.975273178498832e-06, "loss": 0.4851, "step": 452 }, { "epoch": 0.22, "learning_rate": 4.975087920812343e-06, "loss": 0.3546, "step": 453 }, { "epoch": 0.22, "learning_rate": 4.974901975196888e-06, "loss": 0.4759, "step": 454 }, { "epoch": 0.22, "learning_rate": 4.9747153417041525e-06, "loss": 0.3462, "step": 455 }, { "epoch": 0.22, "learning_rate": 4.974528020386008e-06, "loss": 0.4248, "step": 456 }, { "epoch": 0.22, "learning_rate": 4.974340011294519e-06, "loss": 0.4202, "step": 457 }, { "epoch": 0.22, "learning_rate": 4.974151314481939e-06, "loss": 0.3681, "step": 458 }, { "epoch": 0.22, "learning_rate": 4.973961930000717e-06, "loss": 0.427, "step": 459 }, { "epoch": 0.22, "learning_rate": 4.973771857903491e-06, "loss": 0.4257, "step": 460 }, { "epoch": 0.23, "learning_rate": 4.973581098243087e-06, "loss": 0.3916, "step": 461 }, { "epoch": 0.23, "learning_rate": 4.973389651072528e-06, "loss": 0.3811, "step": 462 }, { "epoch": 0.23, "learning_rate": 4.973197516445024e-06, "loss": 0.4156, "step": 463 }, { "epoch": 0.23, "learning_rate": 4.973004694413978e-06, "loss": 0.4035, "step": 464 }, { "epoch": 0.23, "learning_rate": 4.97281118503298e-06, "loss": 0.4199, "step": 465 }, { "epoch": 0.23, "learning_rate": 4.972616988355818e-06, "loss": 0.4001, "step": 466 }, { "epoch": 0.23, "learning_rate": 4.972422104436466e-06, "loss": 0.3771, "step": 467 }, { "epoch": 0.23, "learning_rate": 4.972226533329089e-06, "loss": 0.407, "step": 468 }, { "epoch": 0.23, "learning_rate": 4.9720302750880455e-06, "loss": 0.4248, "step": 469 }, { "epoch": 0.23, "learning_rate": 4.971833329767884e-06, "loss": 0.4352, "step": 470 }, { "epoch": 0.23, "learning_rate": 4.971635697423344e-06, "loss": 0.469, "step": 471 }, { "epoch": 0.23, "learning_rate": 4.971437378109354e-06, "loss": 0.4292, "step": 472 }, { "epoch": 0.23, "learning_rate": 4.971238371881036e-06, "loss": 0.4597, "step": 473 }, { "epoch": 0.23, "learning_rate": 4.971038678793702e-06, "loss": 0.3716, "step": 474 }, { "epoch": 0.23, "learning_rate": 4.9708382989028555e-06, "loss": 0.4073, "step": 475 }, { "epoch": 0.23, "learning_rate": 4.9706372322641894e-06, "loss": 0.3622, "step": 476 }, { "epoch": 0.23, "learning_rate": 4.97043547893359e-06, "loss": 0.454, "step": 477 }, { "epoch": 0.23, "learning_rate": 4.9702330389671304e-06, "loss": 0.4582, "step": 478 }, { "epoch": 0.23, "learning_rate": 4.970029912421079e-06, "loss": 0.3761, "step": 479 }, { "epoch": 0.23, "learning_rate": 4.969826099351892e-06, "loss": 0.3887, "step": 480 }, { "epoch": 0.23, "learning_rate": 4.969621599816219e-06, "loss": 0.444, "step": 481 }, { "epoch": 0.24, "learning_rate": 4.969416413870897e-06, "loss": 0.4518, "step": 482 }, { "epoch": 0.24, "learning_rate": 4.969210541572955e-06, "loss": 0.4268, "step": 483 }, { "epoch": 0.24, "learning_rate": 4.969003982979617e-06, "loss": 0.3539, "step": 484 }, { "epoch": 0.24, "learning_rate": 4.9687967381482905e-06, "loss": 0.3913, "step": 485 }, { "epoch": 0.24, "learning_rate": 4.968588807136579e-06, "loss": 0.438, "step": 486 }, { "epoch": 0.24, "learning_rate": 4.968380190002275e-06, "loss": 0.4723, "step": 487 }, { "epoch": 0.24, "learning_rate": 4.968170886803361e-06, "loss": 0.4043, "step": 488 }, { "epoch": 0.24, "learning_rate": 4.967960897598013e-06, "loss": 0.3728, "step": 489 }, { "epoch": 0.24, "learning_rate": 4.967750222444594e-06, "loss": 0.4349, "step": 490 }, { "epoch": 0.24, "learning_rate": 4.967538861401659e-06, "loss": 0.3845, "step": 491 }, { "epoch": 0.24, "learning_rate": 4.9673268145279556e-06, "loss": 0.3874, "step": 492 }, { "epoch": 0.24, "learning_rate": 4.967114081882419e-06, "loss": 0.437, "step": 493 }, { "epoch": 0.24, "learning_rate": 4.9669006635241755e-06, "loss": 0.3619, "step": 494 }, { "epoch": 0.24, "learning_rate": 4.966686559512545e-06, "loss": 0.3997, "step": 495 }, { "epoch": 0.24, "learning_rate": 4.966471769907034e-06, "loss": 0.4071, "step": 496 }, { "epoch": 0.24, "learning_rate": 4.9662562947673435e-06, "loss": 0.3641, "step": 497 }, { "epoch": 0.24, "learning_rate": 4.966040134153362e-06, "loss": 0.4115, "step": 498 }, { "epoch": 0.24, "learning_rate": 4.9658232881251675e-06, "loss": 0.4321, "step": 499 }, { "epoch": 0.24, "learning_rate": 4.965605756743034e-06, "loss": 0.3709, "step": 500 }, { "epoch": 0.24, "learning_rate": 4.965387540067419e-06, "loss": 0.4686, "step": 501 }, { "epoch": 0.25, "learning_rate": 4.9651686381589765e-06, "loss": 0.378, "step": 502 }, { "epoch": 0.25, "learning_rate": 4.964949051078547e-06, "loss": 0.4261, "step": 503 }, { "epoch": 0.25, "learning_rate": 4.964728778887163e-06, "loss": 0.3621, "step": 504 }, { "epoch": 0.25, "learning_rate": 4.964507821646047e-06, "loss": 0.4107, "step": 505 }, { "epoch": 0.25, "learning_rate": 4.964286179416613e-06, "loss": 0.3918, "step": 506 }, { "epoch": 0.25, "learning_rate": 4.964063852260464e-06, "loss": 0.3995, "step": 507 }, { "epoch": 0.25, "learning_rate": 4.963840840239393e-06, "loss": 0.4022, "step": 508 }, { "epoch": 0.25, "learning_rate": 4.963617143415387e-06, "loss": 0.4314, "step": 509 }, { "epoch": 0.25, "learning_rate": 4.963392761850617e-06, "loss": 0.4186, "step": 510 }, { "epoch": 0.25, "learning_rate": 4.96316769560745e-06, "loss": 0.4661, "step": 511 }, { "epoch": 0.25, "learning_rate": 4.962941944748441e-06, "loss": 0.3671, "step": 512 }, { "epoch": 0.25, "learning_rate": 4.962715509336335e-06, "loss": 0.3799, "step": 513 }, { "epoch": 0.25, "learning_rate": 4.9624883894340684e-06, "loss": 0.4323, "step": 514 }, { "epoch": 0.25, "learning_rate": 4.962260585104766e-06, "loss": 0.3761, "step": 515 }, { "epoch": 0.25, "learning_rate": 4.962032096411745e-06, "loss": 0.3878, "step": 516 }, { "epoch": 0.25, "learning_rate": 4.9618029234185116e-06, "loss": 0.374, "step": 517 }, { "epoch": 0.25, "learning_rate": 4.961573066188762e-06, "loss": 0.4724, "step": 518 }, { "epoch": 0.25, "learning_rate": 4.961342524786384e-06, "loss": 0.4064, "step": 519 }, { "epoch": 0.25, "learning_rate": 4.961111299275454e-06, "loss": 0.3983, "step": 520 }, { "epoch": 0.25, "learning_rate": 4.960879389720239e-06, "loss": 0.4632, "step": 521 }, { "epoch": 0.25, "learning_rate": 4.960646796185196e-06, "loss": 0.35, "step": 522 }, { "epoch": 0.26, "learning_rate": 4.960413518734972e-06, "loss": 0.456, "step": 523 }, { "epoch": 0.26, "learning_rate": 4.960179557434406e-06, "loss": 0.4232, "step": 524 }, { "epoch": 0.26, "learning_rate": 4.9599449123485244e-06, "loss": 0.4042, "step": 525 }, { "epoch": 0.26, "learning_rate": 4.959709583542545e-06, "loss": 0.4151, "step": 526 }, { "epoch": 0.26, "learning_rate": 4.959473571081874e-06, "loss": 0.4168, "step": 527 }, { "epoch": 0.26, "learning_rate": 4.959236875032112e-06, "loss": 0.425, "step": 528 }, { "epoch": 0.26, "learning_rate": 4.958999495459043e-06, "loss": 0.4083, "step": 529 }, { "epoch": 0.26, "learning_rate": 4.958761432428648e-06, "loss": 0.4446, "step": 530 }, { "epoch": 0.26, "learning_rate": 4.958522686007093e-06, "loss": 0.3909, "step": 531 }, { "epoch": 0.26, "learning_rate": 4.958283256260734e-06, "loss": 0.3823, "step": 532 }, { "epoch": 0.26, "learning_rate": 4.958043143256121e-06, "loss": 0.4513, "step": 533 }, { "epoch": 0.26, "learning_rate": 4.957802347059989e-06, "loss": 0.3739, "step": 534 }, { "epoch": 0.26, "learning_rate": 4.957560867739267e-06, "loss": 0.3863, "step": 535 }, { "epoch": 0.26, "learning_rate": 4.95731870536107e-06, "loss": 0.4655, "step": 536 }, { "epoch": 0.26, "learning_rate": 4.957075859992708e-06, "loss": 0.341, "step": 537 }, { "epoch": 0.26, "learning_rate": 4.9568323317016755e-06, "loss": 0.377, "step": 538 }, { "epoch": 0.26, "learning_rate": 4.95658812055566e-06, "loss": 0.39, "step": 539 }, { "epoch": 0.26, "learning_rate": 4.956343226622536e-06, "loss": 0.4019, "step": 540 }, { "epoch": 0.26, "learning_rate": 4.956097649970372e-06, "loss": 0.4961, "step": 541 }, { "epoch": 0.26, "learning_rate": 4.955851390667423e-06, "loss": 0.4261, "step": 542 }, { "epoch": 0.27, "learning_rate": 4.955604448782134e-06, "loss": 0.4368, "step": 543 }, { "epoch": 0.27, "learning_rate": 4.955356824383141e-06, "loss": 0.3332, "step": 544 }, { "epoch": 0.27, "learning_rate": 4.955108517539269e-06, "loss": 0.402, "step": 545 }, { "epoch": 0.27, "learning_rate": 4.9548595283195325e-06, "loss": 0.3775, "step": 546 }, { "epoch": 0.27, "learning_rate": 4.9546098567931355e-06, "loss": 0.34, "step": 547 }, { "epoch": 0.27, "learning_rate": 4.954359503029473e-06, "loss": 0.4037, "step": 548 }, { "epoch": 0.27, "learning_rate": 4.954108467098128e-06, "loss": 0.3834, "step": 549 }, { "epoch": 0.27, "learning_rate": 4.953856749068874e-06, "loss": 0.3723, "step": 550 }, { "epoch": 0.27, "learning_rate": 4.953604349011673e-06, "loss": 0.3995, "step": 551 }, { "epoch": 0.27, "learning_rate": 4.953351266996677e-06, "loss": 0.3719, "step": 552 }, { "epoch": 0.27, "learning_rate": 4.953097503094231e-06, "loss": 0.4188, "step": 553 }, { "epoch": 0.27, "learning_rate": 4.952843057374864e-06, "loss": 0.4214, "step": 554 }, { "epoch": 0.27, "learning_rate": 4.952587929909296e-06, "loss": 0.399, "step": 555 }, { "epoch": 0.27, "learning_rate": 4.95233212076844e-06, "loss": 0.4085, "step": 556 }, { "epoch": 0.27, "learning_rate": 4.952075630023394e-06, "loss": 0.4025, "step": 557 }, { "epoch": 0.27, "learning_rate": 4.951818457745448e-06, "loss": 0.4082, "step": 558 }, { "epoch": 0.27, "learning_rate": 4.951560604006082e-06, "loss": 0.4408, "step": 559 }, { "epoch": 0.27, "learning_rate": 4.951302068876962e-06, "loss": 0.4184, "step": 560 }, { "epoch": 0.27, "learning_rate": 4.951042852429946e-06, "loss": 0.3406, "step": 561 }, { "epoch": 0.27, "learning_rate": 4.950782954737082e-06, "loss": 0.4055, "step": 562 }, { "epoch": 0.27, "learning_rate": 4.9505223758706065e-06, "loss": 0.4059, "step": 563 }, { "epoch": 0.28, "learning_rate": 4.950261115902943e-06, "loss": 0.3794, "step": 564 }, { "epoch": 0.28, "learning_rate": 4.949999174906709e-06, "loss": 0.4309, "step": 565 }, { "epoch": 0.28, "learning_rate": 4.949736552954707e-06, "loss": 0.3223, "step": 566 }, { "epoch": 0.28, "learning_rate": 4.949473250119931e-06, "loss": 0.4274, "step": 567 }, { "epoch": 0.28, "learning_rate": 4.949209266475564e-06, "loss": 0.3785, "step": 568 }, { "epoch": 0.28, "learning_rate": 4.948944602094977e-06, "loss": 0.3944, "step": 569 }, { "epoch": 0.28, "learning_rate": 4.948679257051732e-06, "loss": 0.4036, "step": 570 }, { "epoch": 0.28, "learning_rate": 4.948413231419579e-06, "loss": 0.4109, "step": 571 }, { "epoch": 0.28, "learning_rate": 4.948146525272458e-06, "loss": 0.3896, "step": 572 }, { "epoch": 0.28, "learning_rate": 4.947879138684497e-06, "loss": 0.4246, "step": 573 }, { "epoch": 0.28, "learning_rate": 4.947611071730013e-06, "loss": 0.4343, "step": 574 }, { "epoch": 0.28, "learning_rate": 4.947342324483515e-06, "loss": 0.4234, "step": 575 }, { "epoch": 0.28, "learning_rate": 4.947072897019698e-06, "loss": 0.3931, "step": 576 }, { "epoch": 0.28, "learning_rate": 4.946802789413445e-06, "loss": 0.3764, "step": 577 }, { "epoch": 0.28, "learning_rate": 4.946532001739834e-06, "loss": 0.4258, "step": 578 }, { "epoch": 0.28, "learning_rate": 4.9462605340741244e-06, "loss": 0.3912, "step": 579 }, { "epoch": 0.28, "learning_rate": 4.94598838649177e-06, "loss": 0.4328, "step": 580 }, { "epoch": 0.28, "learning_rate": 4.945715559068412e-06, "loss": 0.3733, "step": 581 }, { "epoch": 0.28, "learning_rate": 4.945442051879879e-06, "loss": 0.4016, "step": 582 }, { "epoch": 0.28, "learning_rate": 4.945167865002191e-06, "loss": 0.3454, "step": 583 }, { "epoch": 0.29, "learning_rate": 4.944892998511556e-06, "loss": 0.3404, "step": 584 }, { "epoch": 0.29, "learning_rate": 4.94461745248437e-06, "loss": 0.3661, "step": 585 }, { "epoch": 0.29, "learning_rate": 4.94434122699722e-06, "loss": 0.3562, "step": 586 }, { "epoch": 0.29, "learning_rate": 4.944064322126879e-06, "loss": 0.4064, "step": 587 }, { "epoch": 0.29, "learning_rate": 4.9437867379503105e-06, "loss": 0.4142, "step": 588 }, { "epoch": 0.29, "learning_rate": 4.943508474544667e-06, "loss": 0.3898, "step": 589 }, { "epoch": 0.29, "learning_rate": 4.943229531987289e-06, "loss": 0.3557, "step": 590 }, { "epoch": 0.29, "learning_rate": 4.942949910355707e-06, "loss": 0.3144, "step": 591 }, { "epoch": 0.29, "learning_rate": 4.942669609727638e-06, "loss": 0.4012, "step": 592 }, { "epoch": 0.29, "learning_rate": 4.94238863018099e-06, "loss": 0.3886, "step": 593 }, { "epoch": 0.29, "learning_rate": 4.942106971793858e-06, "loss": 0.3923, "step": 594 }, { "epoch": 0.29, "learning_rate": 4.941824634644528e-06, "loss": 0.4662, "step": 595 }, { "epoch": 0.29, "learning_rate": 4.941541618811472e-06, "loss": 0.4758, "step": 596 }, { "epoch": 0.29, "learning_rate": 4.941257924373352e-06, "loss": 0.4638, "step": 597 }, { "epoch": 0.29, "learning_rate": 4.940973551409018e-06, "loss": 0.386, "step": 598 }, { "epoch": 0.29, "learning_rate": 4.94068849999751e-06, "loss": 0.4026, "step": 599 }, { "epoch": 0.29, "learning_rate": 4.940402770218054e-06, "loss": 0.3864, "step": 600 }, { "epoch": 0.29, "learning_rate": 4.940116362150067e-06, "loss": 0.4108, "step": 601 }, { "epoch": 0.29, "learning_rate": 4.939829275873154e-06, "loss": 0.3548, "step": 602 }, { "epoch": 0.29, "learning_rate": 4.9395415114671076e-06, "loss": 0.3938, "step": 603 }, { "epoch": 0.29, "learning_rate": 4.93925306901191e-06, "loss": 0.3382, "step": 604 }, { "epoch": 0.3, "learning_rate": 4.93896394858773e-06, "loss": 0.4329, "step": 605 }, { "epoch": 0.3, "learning_rate": 4.938674150274927e-06, "loss": 0.4135, "step": 606 }, { "epoch": 0.3, "learning_rate": 4.938383674154047e-06, "loss": 0.4016, "step": 607 }, { "epoch": 0.3, "learning_rate": 4.938092520305827e-06, "loss": 0.3471, "step": 608 }, { "epoch": 0.3, "learning_rate": 4.937800688811189e-06, "loss": 0.3962, "step": 609 }, { "epoch": 0.3, "learning_rate": 4.937508179751246e-06, "loss": 0.3584, "step": 610 }, { "epoch": 0.3, "learning_rate": 4.937214993207298e-06, "loss": 0.3676, "step": 611 }, { "epoch": 0.3, "learning_rate": 4.936921129260834e-06, "loss": 0.3993, "step": 612 }, { "epoch": 0.3, "learning_rate": 4.936626587993529e-06, "loss": 0.4219, "step": 613 }, { "epoch": 0.3, "learning_rate": 4.936331369487251e-06, "loss": 0.3978, "step": 614 }, { "epoch": 0.3, "learning_rate": 4.936035473824052e-06, "loss": 0.3678, "step": 615 }, { "epoch": 0.3, "learning_rate": 4.935738901086173e-06, "loss": 0.3601, "step": 616 }, { "epoch": 0.3, "learning_rate": 4.935441651356045e-06, "loss": 0.3902, "step": 617 }, { "epoch": 0.3, "learning_rate": 4.935143724716285e-06, "loss": 0.4075, "step": 618 }, { "epoch": 0.3, "learning_rate": 4.9348451212497e-06, "loss": 0.3948, "step": 619 }, { "epoch": 0.3, "learning_rate": 4.934545841039283e-06, "loss": 0.4108, "step": 620 }, { "epoch": 0.3, "learning_rate": 4.9342458841682175e-06, "loss": 0.3611, "step": 621 }, { "epoch": 0.3, "learning_rate": 4.933945250719873e-06, "loss": 0.3708, "step": 622 }, { "epoch": 0.3, "learning_rate": 4.933643940777809e-06, "loss": 0.374, "step": 623 }, { "epoch": 0.3, "learning_rate": 4.933341954425771e-06, "loss": 0.4692, "step": 624 }, { "epoch": 0.31, "learning_rate": 4.933039291747693e-06, "loss": 0.4299, "step": 625 }, { "epoch": 0.31, "learning_rate": 4.932735952827699e-06, "loss": 0.3711, "step": 626 }, { "epoch": 0.31, "learning_rate": 4.932431937750098e-06, "loss": 0.4455, "step": 627 }, { "epoch": 0.31, "learning_rate": 4.93212724659939e-06, "loss": 0.4294, "step": 628 }, { "epoch": 0.31, "learning_rate": 4.931821879460259e-06, "loss": 0.3974, "step": 629 }, { "epoch": 0.31, "learning_rate": 4.93151583641758e-06, "loss": 0.3896, "step": 630 }, { "epoch": 0.31, "learning_rate": 4.931209117556416e-06, "loss": 0.3448, "step": 631 }, { "epoch": 0.31, "learning_rate": 4.930901722962016e-06, "loss": 0.401, "step": 632 }, { "epoch": 0.31, "learning_rate": 4.930593652719817e-06, "loss": 0.3783, "step": 633 }, { "epoch": 0.31, "learning_rate": 4.930284906915445e-06, "loss": 0.3864, "step": 634 }, { "epoch": 0.31, "learning_rate": 4.929975485634714e-06, "loss": 0.4151, "step": 635 }, { "epoch": 0.31, "learning_rate": 4.929665388963624e-06, "loss": 0.3662, "step": 636 }, { "epoch": 0.31, "learning_rate": 4.929354616988364e-06, "loss": 0.3422, "step": 637 }, { "epoch": 0.31, "learning_rate": 4.92904316979531e-06, "loss": 0.4146, "step": 638 }, { "epoch": 0.31, "learning_rate": 4.928731047471026e-06, "loss": 0.4504, "step": 639 }, { "epoch": 0.31, "learning_rate": 4.928418250102264e-06, "loss": 0.4033, "step": 640 }, { "epoch": 0.31, "learning_rate": 4.928104777775963e-06, "loss": 0.3757, "step": 641 }, { "epoch": 0.31, "learning_rate": 4.92779063057925e-06, "loss": 0.3799, "step": 642 }, { "epoch": 0.31, "learning_rate": 4.9274758085994395e-06, "loss": 0.3683, "step": 643 }, { "epoch": 0.31, "learning_rate": 4.927160311924034e-06, "loss": 0.3636, "step": 644 }, { "epoch": 0.31, "learning_rate": 4.9268441406407216e-06, "loss": 0.445, "step": 645 }, { "epoch": 0.32, "learning_rate": 4.9265272948373805e-06, "loss": 0.3768, "step": 646 }, { "epoch": 0.32, "learning_rate": 4.926209774602076e-06, "loss": 0.4589, "step": 647 }, { "epoch": 0.32, "learning_rate": 4.925891580023057e-06, "loss": 0.3872, "step": 648 }, { "epoch": 0.32, "learning_rate": 4.925572711188766e-06, "loss": 0.361, "step": 649 }, { "epoch": 0.32, "learning_rate": 4.925253168187829e-06, "loss": 0.3858, "step": 650 }, { "epoch": 0.32, "learning_rate": 4.9249329511090596e-06, "loss": 0.3781, "step": 651 }, { "epoch": 0.32, "learning_rate": 4.924612060041459e-06, "loss": 0.3356, "step": 652 }, { "epoch": 0.32, "learning_rate": 4.924290495074217e-06, "loss": 0.4053, "step": 653 }, { "epoch": 0.32, "learning_rate": 4.92396825629671e-06, "loss": 0.4141, "step": 654 }, { "epoch": 0.32, "learning_rate": 4.923645343798501e-06, "loss": 0.4271, "step": 655 }, { "epoch": 0.32, "learning_rate": 4.923321757669341e-06, "loss": 0.3798, "step": 656 }, { "epoch": 0.32, "learning_rate": 4.922997497999166e-06, "loss": 0.4502, "step": 657 }, { "epoch": 0.32, "learning_rate": 4.9226725648781024e-06, "loss": 0.385, "step": 658 }, { "epoch": 0.32, "learning_rate": 4.922346958396464e-06, "loss": 0.3646, "step": 659 }, { "epoch": 0.32, "learning_rate": 4.922020678644748e-06, "loss": 0.3895, "step": 660 }, { "epoch": 0.32, "learning_rate": 4.921693725713643e-06, "loss": 0.4043, "step": 661 }, { "epoch": 0.32, "learning_rate": 4.921366099694021e-06, "loss": 0.4124, "step": 662 }, { "epoch": 0.32, "learning_rate": 4.9210378006769435e-06, "loss": 0.3867, "step": 663 }, { "epoch": 0.32, "learning_rate": 4.920708828753658e-06, "loss": 0.4002, "step": 664 }, { "epoch": 0.32, "learning_rate": 4.9203791840156e-06, "loss": 0.35, "step": 665 }, { "epoch": 0.33, "learning_rate": 4.920048866554391e-06, "loss": 0.3389, "step": 666 }, { "epoch": 0.33, "learning_rate": 4.919717876461839e-06, "loss": 0.3749, "step": 667 }, { "epoch": 0.33, "learning_rate": 4.919386213829942e-06, "loss": 0.3884, "step": 668 }, { "epoch": 0.33, "learning_rate": 4.919053878750881e-06, "loss": 0.4048, "step": 669 }, { "epoch": 0.33, "learning_rate": 4.918720871317025e-06, "loss": 0.341, "step": 670 }, { "epoch": 0.33, "learning_rate": 4.918387191620933e-06, "loss": 0.3874, "step": 671 }, { "epoch": 0.33, "learning_rate": 4.918052839755345e-06, "loss": 0.3748, "step": 672 }, { "epoch": 0.33, "learning_rate": 4.917717815813194e-06, "loss": 0.4169, "step": 673 }, { "epoch": 0.33, "learning_rate": 4.917382119887596e-06, "loss": 0.3617, "step": 674 }, { "epoch": 0.33, "learning_rate": 4.917045752071854e-06, "loss": 0.378, "step": 675 }, { "epoch": 0.33, "learning_rate": 4.91670871245946e-06, "loss": 0.3507, "step": 676 }, { "epoch": 0.33, "learning_rate": 4.91637100114409e-06, "loss": 0.3629, "step": 677 }, { "epoch": 0.33, "learning_rate": 4.916032618219608e-06, "loss": 0.4028, "step": 678 }, { "epoch": 0.33, "learning_rate": 4.9156935637800655e-06, "loss": 0.3934, "step": 679 }, { "epoch": 0.33, "learning_rate": 4.915353837919698e-06, "loss": 0.3591, "step": 680 }, { "epoch": 0.33, "learning_rate": 4.915013440732931e-06, "loss": 0.3936, "step": 681 }, { "epoch": 0.33, "learning_rate": 4.9146723723143745e-06, "loss": 0.345, "step": 682 }, { "epoch": 0.33, "learning_rate": 4.9143306327588245e-06, "loss": 0.3834, "step": 683 }, { "epoch": 0.33, "learning_rate": 4.9139882221612655e-06, "loss": 0.367, "step": 684 }, { "epoch": 0.33, "learning_rate": 4.9136451406168675e-06, "loss": 0.373, "step": 685 }, { "epoch": 0.33, "learning_rate": 4.913301388220986e-06, "loss": 0.3912, "step": 686 }, { "epoch": 0.34, "learning_rate": 4.912956965069165e-06, "loss": 0.3555, "step": 687 }, { "epoch": 0.34, "learning_rate": 4.912611871257134e-06, "loss": 0.4183, "step": 688 }, { "epoch": 0.34, "learning_rate": 4.9122661068808084e-06, "loss": 0.3569, "step": 689 }, { "epoch": 0.34, "learning_rate": 4.911919672036291e-06, "loss": 0.4036, "step": 690 }, { "epoch": 0.34, "learning_rate": 4.911572566819868e-06, "loss": 0.4132, "step": 691 }, { "epoch": 0.34, "learning_rate": 4.911224791328017e-06, "loss": 0.4111, "step": 692 }, { "epoch": 0.34, "learning_rate": 4.910876345657398e-06, "loss": 0.4107, "step": 693 }, { "epoch": 0.34, "learning_rate": 4.9105272299048584e-06, "loss": 0.4048, "step": 694 }, { "epoch": 0.34, "learning_rate": 4.910177444167432e-06, "loss": 0.3116, "step": 695 }, { "epoch": 0.34, "learning_rate": 4.909826988542339e-06, "loss": 0.4885, "step": 696 }, { "epoch": 0.34, "learning_rate": 4.909475863126984e-06, "loss": 0.4724, "step": 697 }, { "epoch": 0.34, "learning_rate": 4.909124068018962e-06, "loss": 0.3794, "step": 698 }, { "epoch": 0.34, "learning_rate": 4.908771603316049e-06, "loss": 0.3716, "step": 699 }, { "epoch": 0.34, "learning_rate": 4.90841846911621e-06, "loss": 0.4525, "step": 700 }, { "epoch": 0.34, "learning_rate": 4.908064665517595e-06, "loss": 0.4065, "step": 701 }, { "epoch": 0.34, "learning_rate": 4.907710192618543e-06, "loss": 0.3527, "step": 702 }, { "epoch": 0.34, "learning_rate": 4.9073550505175736e-06, "loss": 0.4207, "step": 703 }, { "epoch": 0.34, "learning_rate": 4.906999239313397e-06, "loss": 0.4087, "step": 704 }, { "epoch": 0.34, "learning_rate": 4.906642759104908e-06, "loss": 0.3745, "step": 705 }, { "epoch": 0.34, "learning_rate": 4.906285609991187e-06, "loss": 0.3573, "step": 706 }, { "epoch": 0.35, "learning_rate": 4.9059277920715e-06, "loss": 0.4269, "step": 707 }, { "epoch": 0.35, "learning_rate": 4.905569305445299e-06, "loss": 0.3995, "step": 708 }, { "epoch": 0.35, "learning_rate": 4.905210150212223e-06, "loss": 0.3642, "step": 709 }, { "epoch": 0.35, "learning_rate": 4.904850326472095e-06, "loss": 0.3228, "step": 710 }, { "epoch": 0.35, "learning_rate": 4.904489834324927e-06, "loss": 0.3517, "step": 711 }, { "epoch": 0.35, "learning_rate": 4.904128673870913e-06, "loss": 0.3687, "step": 712 }, { "epoch": 0.35, "learning_rate": 4.903766845210434e-06, "loss": 0.3444, "step": 713 }, { "epoch": 0.35, "learning_rate": 4.903404348444058e-06, "loss": 0.4164, "step": 714 }, { "epoch": 0.35, "learning_rate": 4.903041183672538e-06, "loss": 0.3757, "step": 715 }, { "epoch": 0.35, "learning_rate": 4.902677350996812e-06, "loss": 0.3268, "step": 716 }, { "epoch": 0.35, "learning_rate": 4.902312850518004e-06, "loss": 0.3785, "step": 717 }, { "epoch": 0.35, "learning_rate": 4.901947682337424e-06, "loss": 0.3892, "step": 718 }, { "epoch": 0.35, "learning_rate": 4.901581846556566e-06, "loss": 0.3476, "step": 719 }, { "epoch": 0.35, "learning_rate": 4.901215343277113e-06, "loss": 0.4171, "step": 720 }, { "epoch": 0.35, "learning_rate": 4.9008481726009305e-06, "loss": 0.4012, "step": 721 }, { "epoch": 0.35, "learning_rate": 4.90048033463007e-06, "loss": 0.4406, "step": 722 }, { "epoch": 0.35, "learning_rate": 4.900111829466768e-06, "loss": 0.4155, "step": 723 }, { "epoch": 0.35, "learning_rate": 4.89974265721345e-06, "loss": 0.3863, "step": 724 }, { "epoch": 0.35, "learning_rate": 4.899372817972721e-06, "loss": 0.3756, "step": 725 }, { "epoch": 0.35, "learning_rate": 4.8990023118473765e-06, "loss": 0.3723, "step": 726 }, { "epoch": 0.35, "learning_rate": 4.898631138940395e-06, "loss": 0.3852, "step": 727 }, { "epoch": 0.36, "learning_rate": 4.8982592993549416e-06, "loss": 0.352, "step": 728 }, { "epoch": 0.36, "learning_rate": 4.897886793194364e-06, "loss": 0.3896, "step": 729 }, { "epoch": 0.36, "learning_rate": 4.897513620562199e-06, "loss": 0.3939, "step": 730 }, { "epoch": 0.36, "learning_rate": 4.897139781562165e-06, "loss": 0.3136, "step": 731 }, { "epoch": 0.36, "learning_rate": 4.8967652762981676e-06, "loss": 0.3619, "step": 732 }, { "epoch": 0.36, "learning_rate": 4.896390104874298e-06, "loss": 0.3867, "step": 733 }, { "epoch": 0.36, "learning_rate": 4.89601426739483e-06, "loss": 0.4029, "step": 734 }, { "epoch": 0.36, "learning_rate": 4.8956377639642285e-06, "loss": 0.4096, "step": 735 }, { "epoch": 0.36, "learning_rate": 4.895260594687135e-06, "loss": 0.4419, "step": 736 }, { "epoch": 0.36, "learning_rate": 4.894882759668382e-06, "loss": 0.4091, "step": 737 }, { "epoch": 0.36, "learning_rate": 4.894504259012986e-06, "loss": 0.3921, "step": 738 }, { "epoch": 0.36, "learning_rate": 4.894125092826145e-06, "loss": 0.375, "step": 739 }, { "epoch": 0.36, "learning_rate": 4.89374526121325e-06, "loss": 0.3987, "step": 740 }, { "epoch": 0.36, "learning_rate": 4.893364764279867e-06, "loss": 0.3686, "step": 741 }, { "epoch": 0.36, "learning_rate": 4.892983602131756e-06, "loss": 0.3812, "step": 742 }, { "epoch": 0.36, "learning_rate": 4.892601774874854e-06, "loss": 0.4315, "step": 743 }, { "epoch": 0.36, "learning_rate": 4.892219282615288e-06, "loss": 0.3872, "step": 744 }, { "epoch": 0.36, "learning_rate": 4.89183612545937e-06, "loss": 0.3768, "step": 745 }, { "epoch": 0.36, "learning_rate": 4.891452303513592e-06, "loss": 0.3601, "step": 746 }, { "epoch": 0.36, "learning_rate": 4.891067816884636e-06, "loss": 0.4734, "step": 747 }, { "epoch": 0.37, "learning_rate": 4.890682665679366e-06, "loss": 0.3477, "step": 748 }, { "epoch": 0.37, "learning_rate": 4.890296850004832e-06, "loss": 0.3832, "step": 749 }, { "epoch": 0.37, "learning_rate": 4.889910369968267e-06, "loss": 0.4002, "step": 750 }, { "epoch": 0.37, "learning_rate": 4.889523225677091e-06, "loss": 0.2856, "step": 751 }, { "epoch": 0.37, "learning_rate": 4.889135417238907e-06, "loss": 0.3994, "step": 752 }, { "epoch": 0.37, "learning_rate": 4.888746944761503e-06, "loss": 0.3921, "step": 753 }, { "epoch": 0.37, "learning_rate": 4.88835780835285e-06, "loss": 0.3072, "step": 754 }, { "epoch": 0.37, "learning_rate": 4.8879680081211075e-06, "loss": 0.3892, "step": 755 }, { "epoch": 0.37, "learning_rate": 4.887577544174616e-06, "loss": 0.3564, "step": 756 }, { "epoch": 0.37, "learning_rate": 4.887186416621901e-06, "loss": 0.3938, "step": 757 }, { "epoch": 0.37, "learning_rate": 4.886794625571673e-06, "loss": 0.4171, "step": 758 }, { "epoch": 0.37, "learning_rate": 4.886402171132827e-06, "loss": 0.3411, "step": 759 }, { "epoch": 0.37, "learning_rate": 4.886009053414443e-06, "loss": 0.3998, "step": 760 }, { "epoch": 0.37, "learning_rate": 4.8856152725257845e-06, "loss": 0.3748, "step": 761 }, { "epoch": 0.37, "learning_rate": 4.885220828576299e-06, "loss": 0.3608, "step": 762 }, { "epoch": 0.37, "learning_rate": 4.884825721675619e-06, "loss": 0.3953, "step": 763 }, { "epoch": 0.37, "learning_rate": 4.88442995193356e-06, "loss": 0.3684, "step": 764 }, { "epoch": 0.37, "learning_rate": 4.884033519460124e-06, "loss": 0.3917, "step": 765 }, { "epoch": 0.37, "learning_rate": 4.883636424365495e-06, "loss": 0.3391, "step": 766 }, { "epoch": 0.37, "learning_rate": 4.883238666760043e-06, "loss": 0.3638, "step": 767 }, { "epoch": 0.38, "learning_rate": 4.882840246754321e-06, "loss": 0.4018, "step": 768 }, { "epoch": 0.38, "learning_rate": 4.8824411644590655e-06, "loss": 0.3584, "step": 769 }, { "epoch": 0.38, "learning_rate": 4.882041419985198e-06, "loss": 0.4108, "step": 770 }, { "epoch": 0.38, "learning_rate": 4.881641013443826e-06, "loss": 0.3562, "step": 771 }, { "epoch": 0.38, "learning_rate": 4.881239944946237e-06, "loss": 0.3862, "step": 772 }, { "epoch": 0.38, "learning_rate": 4.880838214603905e-06, "loss": 0.3911, "step": 773 }, { "epoch": 0.38, "learning_rate": 4.880435822528487e-06, "loss": 0.4321, "step": 774 }, { "epoch": 0.38, "learning_rate": 4.880032768831825e-06, "loss": 0.4477, "step": 775 }, { "epoch": 0.38, "learning_rate": 4.879629053625944e-06, "loss": 0.384, "step": 776 }, { "epoch": 0.38, "learning_rate": 4.879224677023053e-06, "loss": 0.3415, "step": 777 }, { "epoch": 0.38, "learning_rate": 4.878819639135545e-06, "loss": 0.3649, "step": 778 }, { "epoch": 0.38, "learning_rate": 4.878413940075997e-06, "loss": 0.3708, "step": 779 }, { "epoch": 0.38, "learning_rate": 4.8780075799571695e-06, "loss": 0.3647, "step": 780 }, { "epoch": 0.38, "learning_rate": 4.8776005588920055e-06, "loss": 0.3696, "step": 781 }, { "epoch": 0.38, "learning_rate": 4.8771928769936345e-06, "loss": 0.3682, "step": 782 }, { "epoch": 0.38, "learning_rate": 4.876784534375367e-06, "loss": 0.3597, "step": 783 }, { "epoch": 0.38, "learning_rate": 4.8763755311507e-06, "loss": 0.3665, "step": 784 }, { "epoch": 0.38, "learning_rate": 4.87596586743331e-06, "loss": 0.3817, "step": 785 }, { "epoch": 0.38, "learning_rate": 4.87555554333706e-06, "loss": 0.3903, "step": 786 }, { "epoch": 0.38, "learning_rate": 4.875144558975997e-06, "loss": 0.3624, "step": 787 }, { "epoch": 0.38, "learning_rate": 4.874732914464351e-06, "loss": 0.3353, "step": 788 }, { "epoch": 0.39, "learning_rate": 4.874320609916533e-06, "loss": 0.3493, "step": 789 }, { "epoch": 0.39, "learning_rate": 4.873907645447141e-06, "loss": 0.4011, "step": 790 }, { "epoch": 0.39, "learning_rate": 4.8734940211709535e-06, "loss": 0.3659, "step": 791 }, { "epoch": 0.39, "learning_rate": 4.873079737202935e-06, "loss": 0.3741, "step": 792 }, { "epoch": 0.39, "learning_rate": 4.8726647936582315e-06, "loss": 0.3772, "step": 793 }, { "epoch": 0.39, "learning_rate": 4.872249190652173e-06, "loss": 0.3955, "step": 794 }, { "epoch": 0.39, "learning_rate": 4.871832928300273e-06, "loss": 0.3666, "step": 795 }, { "epoch": 0.39, "learning_rate": 4.871416006718227e-06, "loss": 0.4264, "step": 796 }, { "epoch": 0.39, "learning_rate": 4.870998426021917e-06, "loss": 0.3415, "step": 797 }, { "epoch": 0.39, "learning_rate": 4.870580186327402e-06, "loss": 0.3935, "step": 798 }, { "epoch": 0.39, "learning_rate": 4.870161287750932e-06, "loss": 0.3963, "step": 799 }, { "epoch": 0.39, "learning_rate": 4.869741730408934e-06, "loss": 0.3459, "step": 800 }, { "epoch": 0.39, "learning_rate": 4.869321514418021e-06, "loss": 0.373, "step": 801 }, { "epoch": 0.39, "learning_rate": 4.868900639894987e-06, "loss": 0.4031, "step": 802 }, { "epoch": 0.39, "learning_rate": 4.868479106956813e-06, "loss": 0.3953, "step": 803 }, { "epoch": 0.39, "learning_rate": 4.868056915720658e-06, "loss": 0.3591, "step": 804 }, { "epoch": 0.39, "learning_rate": 4.8676340663038675e-06, "loss": 0.3726, "step": 805 }, { "epoch": 0.39, "learning_rate": 4.867210558823968e-06, "loss": 0.3841, "step": 806 }, { "epoch": 0.39, "learning_rate": 4.866786393398671e-06, "loss": 0.393, "step": 807 }, { "epoch": 0.39, "learning_rate": 4.866361570145869e-06, "loss": 0.3563, "step": 808 }, { "epoch": 0.4, "learning_rate": 4.865936089183637e-06, "loss": 0.3631, "step": 809 }, { "epoch": 0.4, "learning_rate": 4.865509950630234e-06, "loss": 0.3913, "step": 810 }, { "epoch": 0.4, "learning_rate": 4.865083154604103e-06, "loss": 0.3902, "step": 811 }, { "epoch": 0.4, "learning_rate": 4.864655701223866e-06, "loss": 0.3503, "step": 812 }, { "epoch": 0.4, "learning_rate": 4.864227590608332e-06, "loss": 0.3364, "step": 813 }, { "epoch": 0.4, "learning_rate": 4.863798822876489e-06, "loss": 0.4052, "step": 814 }, { "epoch": 0.4, "learning_rate": 4.863369398147509e-06, "loss": 0.3629, "step": 815 }, { "epoch": 0.4, "learning_rate": 4.862939316540749e-06, "loss": 0.3342, "step": 816 }, { "epoch": 0.4, "learning_rate": 4.862508578175744e-06, "loss": 0.3833, "step": 817 }, { "epoch": 0.4, "learning_rate": 4.862077183172215e-06, "loss": 0.3865, "step": 818 }, { "epoch": 0.4, "learning_rate": 4.861645131650064e-06, "loss": 0.3907, "step": 819 }, { "epoch": 0.4, "learning_rate": 4.8612124237293775e-06, "loss": 0.3594, "step": 820 }, { "epoch": 0.4, "learning_rate": 4.860779059530422e-06, "loss": 0.3903, "step": 821 }, { "epoch": 0.4, "learning_rate": 4.860345039173646e-06, "loss": 0.4215, "step": 822 }, { "epoch": 0.4, "learning_rate": 4.859910362779683e-06, "loss": 0.3225, "step": 823 }, { "epoch": 0.4, "learning_rate": 4.859475030469347e-06, "loss": 0.3888, "step": 824 }, { "epoch": 0.4, "learning_rate": 4.859039042363635e-06, "loss": 0.3688, "step": 825 }, { "epoch": 0.4, "learning_rate": 4.858602398583726e-06, "loss": 0.3346, "step": 826 }, { "epoch": 0.4, "learning_rate": 4.858165099250981e-06, "loss": 0.3361, "step": 827 }, { "epoch": 0.4, "learning_rate": 4.857727144486944e-06, "loss": 0.3876, "step": 828 }, { "epoch": 0.4, "learning_rate": 4.857288534413341e-06, "loss": 0.369, "step": 829 }, { "epoch": 0.41, "learning_rate": 4.856849269152079e-06, "loss": 0.3625, "step": 830 }, { "epoch": 0.41, "learning_rate": 4.856409348825248e-06, "loss": 0.376, "step": 831 }, { "epoch": 0.41, "learning_rate": 4.855968773555121e-06, "loss": 0.3856, "step": 832 }, { "epoch": 0.41, "learning_rate": 4.85552754346415e-06, "loss": 0.3494, "step": 833 }, { "epoch": 0.41, "learning_rate": 4.855085658674973e-06, "loss": 0.362, "step": 834 }, { "epoch": 0.41, "learning_rate": 4.854643119310407e-06, "loss": 0.3398, "step": 835 }, { "epoch": 0.41, "learning_rate": 4.854199925493453e-06, "loss": 0.3806, "step": 836 }, { "epoch": 0.41, "learning_rate": 4.853756077347291e-06, "loss": 0.3925, "step": 837 }, { "epoch": 0.41, "learning_rate": 4.853311574995287e-06, "loss": 0.3752, "step": 838 }, { "epoch": 0.41, "learning_rate": 4.8528664185609845e-06, "loss": 0.4205, "step": 839 }, { "epoch": 0.41, "learning_rate": 4.852420608168111e-06, "loss": 0.3441, "step": 840 }, { "epoch": 0.41, "learning_rate": 4.851974143940578e-06, "loss": 0.4078, "step": 841 }, { "epoch": 0.41, "learning_rate": 4.851527026002473e-06, "loss": 0.3636, "step": 842 }, { "epoch": 0.41, "learning_rate": 4.851079254478071e-06, "loss": 0.3383, "step": 843 }, { "epoch": 0.41, "learning_rate": 4.8506308294918234e-06, "loss": 0.3362, "step": 844 }, { "epoch": 0.41, "learning_rate": 4.850181751168369e-06, "loss": 0.3467, "step": 845 }, { "epoch": 0.41, "learning_rate": 4.849732019632524e-06, "loss": 0.3931, "step": 846 }, { "epoch": 0.41, "learning_rate": 4.849281635009288e-06, "loss": 0.3769, "step": 847 }, { "epoch": 0.41, "learning_rate": 4.848830597423839e-06, "loss": 0.3854, "step": 848 }, { "epoch": 0.41, "learning_rate": 4.848378907001542e-06, "loss": 0.388, "step": 849 }, { "epoch": 0.42, "learning_rate": 4.847926563867938e-06, "loss": 0.3904, "step": 850 }, { "epoch": 0.42, "learning_rate": 4.847473568148754e-06, "loss": 0.3097, "step": 851 }, { "epoch": 0.42, "learning_rate": 4.8470199199698945e-06, "loss": 0.3734, "step": 852 }, { "epoch": 0.42, "learning_rate": 4.846565619457447e-06, "loss": 0.3561, "step": 853 }, { "epoch": 0.42, "learning_rate": 4.8461106667376814e-06, "loss": 0.3363, "step": 854 }, { "epoch": 0.42, "learning_rate": 4.8456550619370475e-06, "loss": 0.3322, "step": 855 }, { "epoch": 0.42, "learning_rate": 4.845198805182176e-06, "loss": 0.3628, "step": 856 }, { "epoch": 0.42, "learning_rate": 4.844741896599881e-06, "loss": 0.3115, "step": 857 }, { "epoch": 0.42, "learning_rate": 4.844284336317154e-06, "loss": 0.3607, "step": 858 }, { "epoch": 0.42, "learning_rate": 4.843826124461172e-06, "loss": 0.3287, "step": 859 }, { "epoch": 0.42, "learning_rate": 4.8433672611592894e-06, "loss": 0.4005, "step": 860 }, { "epoch": 0.42, "learning_rate": 4.842907746539044e-06, "loss": 0.3456, "step": 861 }, { "epoch": 0.42, "learning_rate": 4.842447580728154e-06, "loss": 0.3817, "step": 862 }, { "epoch": 0.42, "learning_rate": 4.841986763854518e-06, "loss": 0.4091, "step": 863 }, { "epoch": 0.42, "learning_rate": 4.841525296046215e-06, "loss": 0.4196, "step": 864 }, { "epoch": 0.42, "learning_rate": 4.841063177431509e-06, "loss": 0.3634, "step": 865 }, { "epoch": 0.42, "learning_rate": 4.840600408138839e-06, "loss": 0.3268, "step": 866 }, { "epoch": 0.42, "learning_rate": 4.840136988296829e-06, "loss": 0.4083, "step": 867 }, { "epoch": 0.42, "learning_rate": 4.839672918034281e-06, "loss": 0.4146, "step": 868 }, { "epoch": 0.42, "learning_rate": 4.839208197480181e-06, "loss": 0.3999, "step": 869 }, { "epoch": 0.42, "learning_rate": 4.838742826763693e-06, "loss": 0.3416, "step": 870 }, { "epoch": 0.43, "learning_rate": 4.838276806014163e-06, "loss": 0.3258, "step": 871 }, { "epoch": 0.43, "learning_rate": 4.837810135361117e-06, "loss": 0.3622, "step": 872 }, { "epoch": 0.43, "learning_rate": 4.837342814934263e-06, "loss": 0.3261, "step": 873 }, { "epoch": 0.43, "learning_rate": 4.8368748448634874e-06, "loss": 0.35, "step": 874 }, { "epoch": 0.43, "learning_rate": 4.836406225278859e-06, "loss": 0.371, "step": 875 }, { "epoch": 0.43, "learning_rate": 4.835936956310628e-06, "loss": 0.3535, "step": 876 }, { "epoch": 0.43, "learning_rate": 4.83546703808922e-06, "loss": 0.3669, "step": 877 }, { "epoch": 0.43, "learning_rate": 4.834996470745248e-06, "loss": 0.3223, "step": 878 }, { "epoch": 0.43, "learning_rate": 4.8345252544095e-06, "loss": 0.3706, "step": 879 }, { "epoch": 0.43, "learning_rate": 4.834053389212947e-06, "loss": 0.4136, "step": 880 }, { "epoch": 0.43, "learning_rate": 4.833580875286741e-06, "loss": 0.4552, "step": 881 }, { "epoch": 0.43, "learning_rate": 4.8331077127622115e-06, "loss": 0.3458, "step": 882 }, { "epoch": 0.43, "learning_rate": 4.8326339017708705e-06, "loss": 0.4058, "step": 883 }, { "epoch": 0.43, "learning_rate": 4.8321594424444095e-06, "loss": 0.3678, "step": 884 }, { "epoch": 0.43, "learning_rate": 4.8316843349147e-06, "loss": 0.3426, "step": 885 }, { "epoch": 0.43, "learning_rate": 4.8312085793137955e-06, "loss": 0.3457, "step": 886 }, { "epoch": 0.43, "learning_rate": 4.830732175773926e-06, "loss": 0.3613, "step": 887 }, { "epoch": 0.43, "learning_rate": 4.8302551244275056e-06, "loss": 0.3644, "step": 888 }, { "epoch": 0.43, "learning_rate": 4.8297774254071235e-06, "loss": 0.4067, "step": 889 }, { "epoch": 0.43, "learning_rate": 4.8292990788455565e-06, "loss": 0.4008, "step": 890 }, { "epoch": 0.44, "learning_rate": 4.828820084875753e-06, "loss": 0.4085, "step": 891 }, { "epoch": 0.44, "learning_rate": 4.828340443630847e-06, "loss": 0.3686, "step": 892 }, { "epoch": 0.44, "learning_rate": 4.827860155244149e-06, "loss": 0.3442, "step": 893 }, { "epoch": 0.44, "learning_rate": 4.827379219849153e-06, "loss": 0.3606, "step": 894 }, { "epoch": 0.44, "learning_rate": 4.82689763757953e-06, "loss": 0.3921, "step": 895 }, { "epoch": 0.44, "learning_rate": 4.826415408569131e-06, "loss": 0.3707, "step": 896 }, { "epoch": 0.44, "learning_rate": 4.825932532951987e-06, "loss": 0.3584, "step": 897 }, { "epoch": 0.44, "learning_rate": 4.825449010862311e-06, "loss": 0.3547, "step": 898 }, { "epoch": 0.44, "learning_rate": 4.824964842434491e-06, "loss": 0.3568, "step": 899 }, { "epoch": 0.44, "learning_rate": 4.8244800278031e-06, "loss": 0.316, "step": 900 }, { "epoch": 0.44, "learning_rate": 4.823994567102887e-06, "loss": 0.3857, "step": 901 }, { "epoch": 0.44, "learning_rate": 4.82350846046878e-06, "loss": 0.3126, "step": 902 }, { "epoch": 0.44, "learning_rate": 4.82302170803589e-06, "loss": 0.4434, "step": 903 }, { "epoch": 0.44, "learning_rate": 4.822534309939505e-06, "loss": 0.3168, "step": 904 }, { "epoch": 0.44, "learning_rate": 4.8220462663150925e-06, "loss": 0.3501, "step": 905 }, { "epoch": 0.44, "learning_rate": 4.821557577298302e-06, "loss": 0.3061, "step": 906 }, { "epoch": 0.44, "learning_rate": 4.8210682430249565e-06, "loss": 0.3628, "step": 907 }, { "epoch": 0.44, "learning_rate": 4.820578263631066e-06, "loss": 0.35, "step": 908 }, { "epoch": 0.44, "learning_rate": 4.820087639252814e-06, "loss": 0.3736, "step": 909 }, { "epoch": 0.44, "learning_rate": 4.819596370026565e-06, "loss": 0.3314, "step": 910 }, { "epoch": 0.44, "learning_rate": 4.819104456088864e-06, "loss": 0.3813, "step": 911 }, { "epoch": 0.45, "learning_rate": 4.818611897576434e-06, "loss": 0.3728, "step": 912 }, { "epoch": 0.45, "learning_rate": 4.8181186946261775e-06, "loss": 0.3234, "step": 913 }, { "epoch": 0.45, "learning_rate": 4.817624847375175e-06, "loss": 0.423, "step": 914 }, { "epoch": 0.45, "learning_rate": 4.817130355960687e-06, "loss": 0.3394, "step": 915 }, { "epoch": 0.45, "learning_rate": 4.816635220520154e-06, "loss": 0.3622, "step": 916 }, { "epoch": 0.45, "learning_rate": 4.8161394411911946e-06, "loss": 0.3111, "step": 917 }, { "epoch": 0.45, "learning_rate": 4.815643018111605e-06, "loss": 0.3525, "step": 918 }, { "epoch": 0.45, "learning_rate": 4.815145951419363e-06, "loss": 0.3477, "step": 919 }, { "epoch": 0.45, "learning_rate": 4.814648241252623e-06, "loss": 0.4184, "step": 920 }, { "epoch": 0.45, "learning_rate": 4.8141498877497195e-06, "loss": 0.3172, "step": 921 }, { "epoch": 0.45, "learning_rate": 4.813650891049166e-06, "loss": 0.3657, "step": 922 }, { "epoch": 0.45, "learning_rate": 4.813151251289653e-06, "loss": 0.3745, "step": 923 }, { "epoch": 0.45, "learning_rate": 4.812650968610053e-06, "loss": 0.3635, "step": 924 }, { "epoch": 0.45, "learning_rate": 4.812150043149413e-06, "loss": 0.3146, "step": 925 }, { "epoch": 0.45, "learning_rate": 4.8116484750469625e-06, "loss": 0.3357, "step": 926 }, { "epoch": 0.45, "learning_rate": 4.811146264442107e-06, "loss": 0.4396, "step": 927 }, { "epoch": 0.45, "learning_rate": 4.810643411474431e-06, "loss": 0.3127, "step": 928 }, { "epoch": 0.45, "learning_rate": 4.8101399162836996e-06, "loss": 0.3322, "step": 929 }, { "epoch": 0.45, "learning_rate": 4.8096357790098535e-06, "loss": 0.3435, "step": 930 }, { "epoch": 0.45, "learning_rate": 4.809130999793015e-06, "loss": 0.3141, "step": 931 }, { "epoch": 0.46, "learning_rate": 4.808625578773481e-06, "loss": 0.3122, "step": 932 }, { "epoch": 0.46, "learning_rate": 4.808119516091729e-06, "loss": 0.3566, "step": 933 }, { "epoch": 0.46, "learning_rate": 4.807612811888417e-06, "loss": 0.3831, "step": 934 }, { "epoch": 0.46, "learning_rate": 4.807105466304377e-06, "loss": 0.3591, "step": 935 }, { "epoch": 0.46, "learning_rate": 4.8065974794806215e-06, "loss": 0.3308, "step": 936 }, { "epoch": 0.46, "learning_rate": 4.80608885155834e-06, "loss": 0.4023, "step": 937 }, { "epoch": 0.46, "learning_rate": 4.8055795826789046e-06, "loss": 0.3372, "step": 938 }, { "epoch": 0.46, "learning_rate": 4.805069672983858e-06, "loss": 0.3316, "step": 939 }, { "epoch": 0.46, "learning_rate": 4.8045591226149276e-06, "loss": 0.3577, "step": 940 }, { "epoch": 0.46, "learning_rate": 4.804047931714015e-06, "loss": 0.3469, "step": 941 }, { "epoch": 0.46, "learning_rate": 4.803536100423203e-06, "loss": 0.4036, "step": 942 }, { "epoch": 0.46, "learning_rate": 4.803023628884749e-06, "loss": 0.309, "step": 943 }, { "epoch": 0.46, "learning_rate": 4.80251051724109e-06, "loss": 0.3579, "step": 944 }, { "epoch": 0.46, "learning_rate": 4.801996765634843e-06, "loss": 0.3805, "step": 945 }, { "epoch": 0.46, "learning_rate": 4.801482374208798e-06, "loss": 0.377, "step": 946 }, { "epoch": 0.46, "learning_rate": 4.800967343105927e-06, "loss": 0.3853, "step": 947 }, { "epoch": 0.46, "learning_rate": 4.8004516724693774e-06, "loss": 0.3281, "step": 948 }, { "epoch": 0.46, "learning_rate": 4.799935362442477e-06, "loss": 0.3376, "step": 949 }, { "epoch": 0.46, "learning_rate": 4.799418413168728e-06, "loss": 0.3087, "step": 950 }, { "epoch": 0.46, "learning_rate": 4.7989008247918135e-06, "loss": 0.3784, "step": 951 }, { "epoch": 0.46, "learning_rate": 4.798382597455591e-06, "loss": 0.3527, "step": 952 }, { "epoch": 0.47, "learning_rate": 4.7978637313040985e-06, "loss": 0.3317, "step": 953 }, { "epoch": 0.47, "learning_rate": 4.797344226481549e-06, "loss": 0.3897, "step": 954 }, { "epoch": 0.47, "learning_rate": 4.796824083132337e-06, "loss": 0.3176, "step": 955 }, { "epoch": 0.47, "learning_rate": 4.796303301401027e-06, "loss": 0.3513, "step": 956 }, { "epoch": 0.47, "learning_rate": 4.795781881432371e-06, "loss": 0.3848, "step": 957 }, { "epoch": 0.47, "learning_rate": 4.795259823371289e-06, "loss": 0.3806, "step": 958 }, { "epoch": 0.47, "learning_rate": 4.794737127362885e-06, "loss": 0.3404, "step": 959 }, { "epoch": 0.47, "learning_rate": 4.794213793552437e-06, "loss": 0.3432, "step": 960 }, { "epoch": 0.47, "learning_rate": 4.793689822085401e-06, "loss": 0.428, "step": 961 }, { "epoch": 0.47, "learning_rate": 4.79316521310741e-06, "loss": 0.3293, "step": 962 }, { "epoch": 0.47, "learning_rate": 4.7926399667642744e-06, "loss": 0.3779, "step": 963 }, { "epoch": 0.47, "learning_rate": 4.7921140832019816e-06, "loss": 0.4038, "step": 964 }, { "epoch": 0.47, "learning_rate": 4.791587562566698e-06, "loss": 0.3913, "step": 965 }, { "epoch": 0.47, "learning_rate": 4.791060405004764e-06, "loss": 0.401, "step": 966 }, { "epoch": 0.47, "learning_rate": 4.790532610662698e-06, "loss": 0.3662, "step": 967 }, { "epoch": 0.47, "learning_rate": 4.790004179687197e-06, "loss": 0.3539, "step": 968 }, { "epoch": 0.47, "learning_rate": 4.789475112225133e-06, "loss": 0.3611, "step": 969 }, { "epoch": 0.47, "learning_rate": 4.788945408423555e-06, "loss": 0.3872, "step": 970 }, { "epoch": 0.47, "learning_rate": 4.788415068429691e-06, "loss": 0.4192, "step": 971 }, { "epoch": 0.47, "learning_rate": 4.7878840923909445e-06, "loss": 0.322, "step": 972 }, { "epoch": 0.48, "learning_rate": 4.787352480454894e-06, "loss": 0.3133, "step": 973 }, { "epoch": 0.48, "learning_rate": 4.786820232769297e-06, "loss": 0.334, "step": 974 }, { "epoch": 0.48, "learning_rate": 4.786287349482088e-06, "loss": 0.3595, "step": 975 }, { "epoch": 0.48, "learning_rate": 4.785753830741376e-06, "loss": 0.3657, "step": 976 }, { "epoch": 0.48, "learning_rate": 4.785219676695448e-06, "loss": 0.3593, "step": 977 }, { "epoch": 0.48, "learning_rate": 4.784684887492769e-06, "loss": 0.3306, "step": 978 }, { "epoch": 0.48, "learning_rate": 4.7841494632819765e-06, "loss": 0.3291, "step": 979 }, { "epoch": 0.48, "learning_rate": 4.783613404211889e-06, "loss": 0.3526, "step": 980 }, { "epoch": 0.48, "learning_rate": 4.783076710431498e-06, "loss": 0.3477, "step": 981 }, { "epoch": 0.48, "learning_rate": 4.782539382089974e-06, "loss": 0.3419, "step": 982 }, { "epoch": 0.48, "learning_rate": 4.782001419336661e-06, "loss": 0.3798, "step": 983 }, { "epoch": 0.48, "learning_rate": 4.781462822321084e-06, "loss": 0.3553, "step": 984 }, { "epoch": 0.48, "learning_rate": 4.780923591192938e-06, "loss": 0.327, "step": 985 }, { "epoch": 0.48, "learning_rate": 4.780383726102099e-06, "loss": 0.3571, "step": 986 }, { "epoch": 0.48, "learning_rate": 4.77984322719862e-06, "loss": 0.3266, "step": 987 }, { "epoch": 0.48, "learning_rate": 4.779302094632723e-06, "loss": 0.3451, "step": 988 }, { "epoch": 0.48, "learning_rate": 4.778760328554815e-06, "loss": 0.43, "step": 989 }, { "epoch": 0.48, "learning_rate": 4.778217929115474e-06, "loss": 0.3327, "step": 990 }, { "epoch": 0.48, "learning_rate": 4.777674896465455e-06, "loss": 0.3641, "step": 991 }, { "epoch": 0.48, "learning_rate": 4.7771312307556885e-06, "loss": 0.3291, "step": 992 }, { "epoch": 0.48, "learning_rate": 4.7765869321372835e-06, "loss": 0.394, "step": 993 }, { "epoch": 0.49, "learning_rate": 4.776042000761522e-06, "loss": 0.3564, "step": 994 }, { "epoch": 0.49, "learning_rate": 4.775496436779861e-06, "loss": 0.3496, "step": 995 }, { "epoch": 0.49, "learning_rate": 4.7749502403439385e-06, "loss": 0.3735, "step": 996 }, { "epoch": 0.49, "learning_rate": 4.774403411605563e-06, "loss": 0.4356, "step": 997 }, { "epoch": 0.49, "learning_rate": 4.773855950716722e-06, "loss": 0.2835, "step": 998 }, { "epoch": 0.49, "learning_rate": 4.773307857829575e-06, "loss": 0.3768, "step": 999 }, { "epoch": 0.49, "learning_rate": 4.772759133096462e-06, "loss": 0.404, "step": 1000 }, { "epoch": 0.49, "learning_rate": 4.772209776669896e-06, "loss": 0.3639, "step": 1001 }, { "epoch": 0.49, "learning_rate": 4.771659788702565e-06, "loss": 0.3532, "step": 1002 }, { "epoch": 0.49, "learning_rate": 4.771109169347334e-06, "loss": 0.3834, "step": 1003 }, { "epoch": 0.49, "learning_rate": 4.770557918757242e-06, "loss": 0.3562, "step": 1004 }, { "epoch": 0.49, "learning_rate": 4.770006037085505e-06, "loss": 0.3253, "step": 1005 }, { "epoch": 0.49, "learning_rate": 4.7694535244855136e-06, "loss": 0.305, "step": 1006 }, { "epoch": 0.49, "learning_rate": 4.768900381110833e-06, "loss": 0.3396, "step": 1007 }, { "epoch": 0.49, "learning_rate": 4.768346607115205e-06, "loss": 0.3614, "step": 1008 }, { "epoch": 0.49, "learning_rate": 4.767792202652547e-06, "loss": 0.349, "step": 1009 }, { "epoch": 0.49, "learning_rate": 4.76723716787695e-06, "loss": 0.3577, "step": 1010 }, { "epoch": 0.49, "learning_rate": 4.766681502942681e-06, "loss": 0.3868, "step": 1011 }, { "epoch": 0.49, "learning_rate": 4.7661252080041834e-06, "loss": 0.3853, "step": 1012 }, { "epoch": 0.49, "learning_rate": 4.765568283216073e-06, "loss": 0.2934, "step": 1013 }, { "epoch": 0.5, "learning_rate": 4.765010728733143e-06, "loss": 0.3531, "step": 1014 }, { "epoch": 0.5, "learning_rate": 4.7644525447103605e-06, "loss": 0.3692, "step": 1015 }, { "epoch": 0.5, "learning_rate": 4.763893731302867e-06, "loss": 0.3811, "step": 1016 }, { "epoch": 0.5, "learning_rate": 4.763334288665982e-06, "loss": 0.3321, "step": 1017 }, { "epoch": 0.5, "learning_rate": 4.762774216955195e-06, "loss": 0.3067, "step": 1018 }, { "epoch": 0.5, "learning_rate": 4.762213516326175e-06, "loss": 0.3938, "step": 1019 }, { "epoch": 0.5, "learning_rate": 4.761652186934763e-06, "loss": 0.392, "step": 1020 }, { "epoch": 0.5, "learning_rate": 4.761090228936976e-06, "loss": 0.306, "step": 1021 }, { "epoch": 0.5, "learning_rate": 4.7605276424890045e-06, "loss": 0.3734, "step": 1022 }, { "epoch": 0.5, "learning_rate": 4.759964427747214e-06, "loss": 0.3478, "step": 1023 }, { "epoch": 0.5, "learning_rate": 4.759400584868148e-06, "loss": 0.3586, "step": 1024 }, { "epoch": 0.5, "learning_rate": 4.758836114008519e-06, "loss": 0.3159, "step": 1025 }, { "epoch": 0.5, "learning_rate": 4.758271015325218e-06, "loss": 0.3126, "step": 1026 }, { "epoch": 0.5, "learning_rate": 4.7577052889753075e-06, "loss": 0.383, "step": 1027 }, { "epoch": 0.5, "learning_rate": 4.757138935116028e-06, "loss": 0.4335, "step": 1028 }, { "epoch": 0.5, "learning_rate": 4.756571953904792e-06, "loss": 0.364, "step": 1029 }, { "epoch": 0.5, "learning_rate": 4.756004345499187e-06, "loss": 0.3279, "step": 1030 }, { "epoch": 0.5, "learning_rate": 4.755436110056975e-06, "loss": 0.3709, "step": 1031 }, { "epoch": 0.5, "learning_rate": 4.754867247736091e-06, "loss": 0.4228, "step": 1032 }, { "epoch": 0.5, "learning_rate": 4.754297758694646e-06, "loss": 0.3603, "step": 1033 }, { "epoch": 0.5, "learning_rate": 4.753727643090925e-06, "loss": 0.3944, "step": 1034 }, { "epoch": 0.51, "learning_rate": 4.753156901083385e-06, "loss": 0.3216, "step": 1035 }, { "epoch": 0.51, "learning_rate": 4.752585532830661e-06, "loss": 0.3306, "step": 1036 }, { "epoch": 0.51, "learning_rate": 4.752013538491559e-06, "loss": 0.3337, "step": 1037 }, { "epoch": 0.51, "learning_rate": 4.751440918225059e-06, "loss": 0.3582, "step": 1038 }, { "epoch": 0.51, "learning_rate": 4.750867672190316e-06, "loss": 0.3224, "step": 1039 }, { "epoch": 0.51, "learning_rate": 4.750293800546659e-06, "loss": 0.3731, "step": 1040 }, { "epoch": 0.51, "learning_rate": 4.74971930345359e-06, "loss": 0.3467, "step": 1041 }, { "epoch": 0.51, "learning_rate": 4.7491441810707874e-06, "loss": 0.3251, "step": 1042 }, { "epoch": 0.51, "learning_rate": 4.748568433558099e-06, "loss": 0.3429, "step": 1043 }, { "epoch": 0.51, "learning_rate": 4.747992061075549e-06, "loss": 0.3564, "step": 1044 }, { "epoch": 0.51, "learning_rate": 4.7474150637833375e-06, "loss": 0.3171, "step": 1045 }, { "epoch": 0.51, "learning_rate": 4.746837441841833e-06, "loss": 0.3278, "step": 1046 }, { "epoch": 0.51, "learning_rate": 4.746259195411582e-06, "loss": 0.3236, "step": 1047 }, { "epoch": 0.51, "learning_rate": 4.7456803246533025e-06, "loss": 0.3083, "step": 1048 }, { "epoch": 0.51, "learning_rate": 4.745100829727887e-06, "loss": 0.3831, "step": 1049 }, { "epoch": 0.51, "learning_rate": 4.7445207107964e-06, "loss": 0.3312, "step": 1050 }, { "epoch": 0.51, "learning_rate": 4.743939968020081e-06, "loss": 0.3215, "step": 1051 }, { "epoch": 0.51, "learning_rate": 4.743358601560343e-06, "loss": 0.3722, "step": 1052 }, { "epoch": 0.51, "learning_rate": 4.742776611578771e-06, "loss": 0.3557, "step": 1053 }, { "epoch": 0.51, "learning_rate": 4.742193998237125e-06, "loss": 0.3536, "step": 1054 }, { "epoch": 0.52, "learning_rate": 4.741610761697335e-06, "loss": 0.3548, "step": 1055 }, { "epoch": 0.52, "learning_rate": 4.74102690212151e-06, "loss": 0.3375, "step": 1056 }, { "epoch": 0.52, "learning_rate": 4.740442419671924e-06, "loss": 0.3722, "step": 1057 }, { "epoch": 0.52, "learning_rate": 4.739857314511033e-06, "loss": 0.37, "step": 1058 }, { "epoch": 0.52, "learning_rate": 4.739271586801461e-06, "loss": 0.3403, "step": 1059 }, { "epoch": 0.52, "learning_rate": 4.738685236706003e-06, "loss": 0.3568, "step": 1060 }, { "epoch": 0.52, "learning_rate": 4.738098264387634e-06, "loss": 0.3936, "step": 1061 }, { "epoch": 0.52, "learning_rate": 4.737510670009494e-06, "loss": 0.3211, "step": 1062 }, { "epoch": 0.52, "learning_rate": 4.736922453734902e-06, "loss": 0.3288, "step": 1063 }, { "epoch": 0.52, "learning_rate": 4.7363336157273476e-06, "loss": 0.3352, "step": 1064 }, { "epoch": 0.52, "learning_rate": 4.735744156150492e-06, "loss": 0.3156, "step": 1065 }, { "epoch": 0.52, "learning_rate": 4.735154075168171e-06, "loss": 0.3908, "step": 1066 }, { "epoch": 0.52, "learning_rate": 4.734563372944393e-06, "loss": 0.358, "step": 1067 }, { "epoch": 0.52, "learning_rate": 4.733972049643337e-06, "loss": 0.3507, "step": 1068 }, { "epoch": 0.52, "learning_rate": 4.733380105429358e-06, "loss": 0.3217, "step": 1069 }, { "epoch": 0.52, "learning_rate": 4.732787540466979e-06, "loss": 0.3316, "step": 1070 }, { "epoch": 0.52, "learning_rate": 4.732194354920901e-06, "loss": 0.3513, "step": 1071 }, { "epoch": 0.52, "learning_rate": 4.731600548955993e-06, "loss": 0.3847, "step": 1072 }, { "epoch": 0.52, "learning_rate": 4.731006122737299e-06, "loss": 0.3311, "step": 1073 }, { "epoch": 0.52, "learning_rate": 4.730411076430035e-06, "loss": 0.3762, "step": 1074 }, { "epoch": 0.52, "learning_rate": 4.729815410199588e-06, "loss": 0.3657, "step": 1075 }, { "epoch": 0.53, "learning_rate": 4.729219124211517e-06, "loss": 0.2979, "step": 1076 }, { "epoch": 0.53, "learning_rate": 4.728622218631557e-06, "loss": 0.3488, "step": 1077 }, { "epoch": 0.53, "learning_rate": 4.728024693625611e-06, "loss": 0.3418, "step": 1078 }, { "epoch": 0.53, "learning_rate": 4.727426549359756e-06, "loss": 0.3268, "step": 1079 }, { "epoch": 0.53, "learning_rate": 4.726827786000242e-06, "loss": 0.3189, "step": 1080 }, { "epoch": 0.53, "learning_rate": 4.72622840371349e-06, "loss": 0.4037, "step": 1081 }, { "epoch": 0.53, "learning_rate": 4.725628402666091e-06, "loss": 0.3245, "step": 1082 }, { "epoch": 0.53, "learning_rate": 4.7250277830248115e-06, "loss": 0.335, "step": 1083 }, { "epoch": 0.53, "learning_rate": 4.724426544956589e-06, "loss": 0.3407, "step": 1084 }, { "epoch": 0.53, "learning_rate": 4.723824688628531e-06, "loss": 0.3508, "step": 1085 }, { "epoch": 0.53, "learning_rate": 4.72322221420792e-06, "loss": 0.3321, "step": 1086 }, { "epoch": 0.53, "learning_rate": 4.722619121862206e-06, "loss": 0.3417, "step": 1087 }, { "epoch": 0.53, "learning_rate": 4.722015411759015e-06, "loss": 0.3317, "step": 1088 }, { "epoch": 0.53, "learning_rate": 4.721411084066143e-06, "loss": 0.3838, "step": 1089 }, { "epoch": 0.53, "learning_rate": 4.720806138951557e-06, "loss": 0.3316, "step": 1090 }, { "epoch": 0.53, "learning_rate": 4.720200576583396e-06, "loss": 0.3497, "step": 1091 }, { "epoch": 0.53, "learning_rate": 4.719594397129971e-06, "loss": 0.3781, "step": 1092 }, { "epoch": 0.53, "learning_rate": 4.718987600759764e-06, "loss": 0.3779, "step": 1093 }, { "epoch": 0.53, "learning_rate": 4.718380187641429e-06, "loss": 0.3527, "step": 1094 }, { "epoch": 0.53, "learning_rate": 4.717772157943792e-06, "loss": 0.3344, "step": 1095 }, { "epoch": 0.54, "learning_rate": 4.717163511835848e-06, "loss": 0.3884, "step": 1096 }, { "epoch": 0.54, "learning_rate": 4.7165542494867655e-06, "loss": 0.3408, "step": 1097 }, { "epoch": 0.54, "learning_rate": 4.715944371065884e-06, "loss": 0.3224, "step": 1098 }, { "epoch": 0.54, "learning_rate": 4.715333876742712e-06, "loss": 0.352, "step": 1099 }, { "epoch": 0.54, "learning_rate": 4.714722766686934e-06, "loss": 0.3654, "step": 1100 }, { "epoch": 0.54, "learning_rate": 4.714111041068401e-06, "loss": 0.3129, "step": 1101 }, { "epoch": 0.54, "learning_rate": 4.713498700057136e-06, "loss": 0.3319, "step": 1102 }, { "epoch": 0.54, "learning_rate": 4.712885743823336e-06, "loss": 0.3575, "step": 1103 }, { "epoch": 0.54, "learning_rate": 4.712272172537365e-06, "loss": 0.3167, "step": 1104 }, { "epoch": 0.54, "learning_rate": 4.71165798636976e-06, "loss": 0.3674, "step": 1105 }, { "epoch": 0.54, "learning_rate": 4.71104318549123e-06, "loss": 0.3948, "step": 1106 }, { "epoch": 0.54, "learning_rate": 4.710427770072652e-06, "loss": 0.3471, "step": 1107 }, { "epoch": 0.54, "learning_rate": 4.709811740285075e-06, "loss": 0.3698, "step": 1108 }, { "epoch": 0.54, "learning_rate": 4.709195096299721e-06, "loss": 0.3373, "step": 1109 }, { "epoch": 0.54, "learning_rate": 4.708577838287979e-06, "loss": 0.3433, "step": 1110 }, { "epoch": 0.54, "learning_rate": 4.707959966421412e-06, "loss": 0.3264, "step": 1111 }, { "epoch": 0.54, "learning_rate": 4.70734148087175e-06, "loss": 0.3502, "step": 1112 }, { "epoch": 0.54, "learning_rate": 4.706722381810897e-06, "loss": 0.3026, "step": 1113 }, { "epoch": 0.54, "learning_rate": 4.706102669410926e-06, "loss": 0.3355, "step": 1114 }, { "epoch": 0.54, "learning_rate": 4.70548234384408e-06, "loss": 0.3791, "step": 1115 }, { "epoch": 0.54, "learning_rate": 4.704861405282775e-06, "loss": 0.3809, "step": 1116 }, { "epoch": 0.55, "learning_rate": 4.704239853899593e-06, "loss": 0.3568, "step": 1117 }, { "epoch": 0.55, "learning_rate": 4.70361768986729e-06, "loss": 0.341, "step": 1118 }, { "epoch": 0.55, "learning_rate": 4.7029949133587915e-06, "loss": 0.2819, "step": 1119 }, { "epoch": 0.55, "learning_rate": 4.7023715245471914e-06, "loss": 0.3773, "step": 1120 }, { "epoch": 0.55, "learning_rate": 4.701747523605756e-06, "loss": 0.3841, "step": 1121 }, { "epoch": 0.55, "learning_rate": 4.7011229107079206e-06, "loss": 0.3187, "step": 1122 }, { "epoch": 0.55, "learning_rate": 4.700497686027291e-06, "loss": 0.3196, "step": 1123 }, { "epoch": 0.55, "learning_rate": 4.699871849737643e-06, "loss": 0.3504, "step": 1124 }, { "epoch": 0.55, "learning_rate": 4.699245402012922e-06, "loss": 0.2842, "step": 1125 }, { "epoch": 0.55, "learning_rate": 4.698618343027244e-06, "loss": 0.3346, "step": 1126 }, { "epoch": 0.55, "learning_rate": 4.697990672954896e-06, "loss": 0.327, "step": 1127 }, { "epoch": 0.55, "learning_rate": 4.697362391970329e-06, "loss": 0.3963, "step": 1128 }, { "epoch": 0.55, "learning_rate": 4.6967335002481724e-06, "loss": 0.3879, "step": 1129 }, { "epoch": 0.55, "learning_rate": 4.696103997963219e-06, "loss": 0.3429, "step": 1130 }, { "epoch": 0.55, "learning_rate": 4.695473885290434e-06, "loss": 0.3073, "step": 1131 }, { "epoch": 0.55, "learning_rate": 4.694843162404954e-06, "loss": 0.3254, "step": 1132 }, { "epoch": 0.55, "learning_rate": 4.694211829482079e-06, "loss": 0.3318, "step": 1133 }, { "epoch": 0.55, "learning_rate": 4.693579886697284e-06, "loss": 0.3435, "step": 1134 }, { "epoch": 0.55, "learning_rate": 4.692947334226212e-06, "loss": 0.3955, "step": 1135 }, { "epoch": 0.55, "learning_rate": 4.692314172244677e-06, "loss": 0.3394, "step": 1136 }, { "epoch": 0.56, "learning_rate": 4.691680400928658e-06, "loss": 0.3346, "step": 1137 }, { "epoch": 0.56, "learning_rate": 4.691046020454307e-06, "loss": 0.3617, "step": 1138 }, { "epoch": 0.56, "learning_rate": 4.690411030997947e-06, "loss": 0.3497, "step": 1139 }, { "epoch": 0.56, "learning_rate": 4.689775432736064e-06, "loss": 0.3568, "step": 1140 }, { "epoch": 0.56, "learning_rate": 4.689139225845319e-06, "loss": 0.2902, "step": 1141 }, { "epoch": 0.56, "learning_rate": 4.68850241050254e-06, "loss": 0.396, "step": 1142 }, { "epoch": 0.56, "learning_rate": 4.6878649868847245e-06, "loss": 0.3633, "step": 1143 }, { "epoch": 0.56, "learning_rate": 4.6872269551690375e-06, "loss": 0.343, "step": 1144 }, { "epoch": 0.56, "learning_rate": 4.686588315532815e-06, "loss": 0.3294, "step": 1145 }, { "epoch": 0.56, "learning_rate": 4.685949068153563e-06, "loss": 0.3471, "step": 1146 }, { "epoch": 0.56, "learning_rate": 4.685309213208952e-06, "loss": 0.3893, "step": 1147 }, { "epoch": 0.56, "learning_rate": 4.684668750876825e-06, "loss": 0.3033, "step": 1148 }, { "epoch": 0.56, "learning_rate": 4.684027681335194e-06, "loss": 0.2937, "step": 1149 }, { "epoch": 0.56, "learning_rate": 4.683386004762237e-06, "loss": 0.3765, "step": 1150 }, { "epoch": 0.56, "learning_rate": 4.682743721336304e-06, "loss": 0.3484, "step": 1151 }, { "epoch": 0.56, "learning_rate": 4.68210083123591e-06, "loss": 0.3321, "step": 1152 }, { "epoch": 0.56, "learning_rate": 4.681457334639743e-06, "loss": 0.3493, "step": 1153 }, { "epoch": 0.56, "learning_rate": 4.6808132317266556e-06, "loss": 0.3698, "step": 1154 }, { "epoch": 0.56, "learning_rate": 4.680168522675671e-06, "loss": 0.3758, "step": 1155 }, { "epoch": 0.56, "learning_rate": 4.67952320766598e-06, "loss": 0.3901, "step": 1156 }, { "epoch": 0.56, "learning_rate": 4.678877286876943e-06, "loss": 0.3359, "step": 1157 }, { "epoch": 0.57, "learning_rate": 4.678230760488088e-06, "loss": 0.3421, "step": 1158 }, { "epoch": 0.57, "learning_rate": 4.67758362867911e-06, "loss": 0.3558, "step": 1159 }, { "epoch": 0.57, "learning_rate": 4.676935891629876e-06, "loss": 0.3503, "step": 1160 }, { "epoch": 0.57, "learning_rate": 4.676287549520416e-06, "loss": 0.3527, "step": 1161 }, { "epoch": 0.57, "learning_rate": 4.675638602530934e-06, "loss": 0.4095, "step": 1162 }, { "epoch": 0.57, "learning_rate": 4.674989050841796e-06, "loss": 0.3266, "step": 1163 }, { "epoch": 0.57, "learning_rate": 4.674338894633542e-06, "loss": 0.3699, "step": 1164 }, { "epoch": 0.57, "learning_rate": 4.673688134086875e-06, "loss": 0.324, "step": 1165 }, { "epoch": 0.57, "learning_rate": 4.673036769382671e-06, "loss": 0.3688, "step": 1166 }, { "epoch": 0.57, "learning_rate": 4.672384800701967e-06, "loss": 0.3325, "step": 1167 }, { "epoch": 0.57, "learning_rate": 4.671732228225976e-06, "loss": 0.346, "step": 1168 }, { "epoch": 0.57, "learning_rate": 4.6710790521360715e-06, "loss": 0.3046, "step": 1169 }, { "epoch": 0.57, "learning_rate": 4.6704252726138e-06, "loss": 0.3329, "step": 1170 }, { "epoch": 0.57, "learning_rate": 4.6697708898408745e-06, "loss": 0.3152, "step": 1171 }, { "epoch": 0.57, "learning_rate": 4.669115903999173e-06, "loss": 0.3559, "step": 1172 }, { "epoch": 0.57, "learning_rate": 4.668460315270744e-06, "loss": 0.3495, "step": 1173 }, { "epoch": 0.57, "learning_rate": 4.667804123837802e-06, "loss": 0.3238, "step": 1174 }, { "epoch": 0.57, "learning_rate": 4.667147329882731e-06, "loss": 0.3952, "step": 1175 }, { "epoch": 0.57, "learning_rate": 4.66648993358808e-06, "loss": 0.4112, "step": 1176 }, { "epoch": 0.57, "learning_rate": 4.665831935136567e-06, "loss": 0.3302, "step": 1177 }, { "epoch": 0.58, "learning_rate": 4.665173334711076e-06, "loss": 0.3687, "step": 1178 }, { "epoch": 0.58, "learning_rate": 4.6645141324946605e-06, "loss": 0.3686, "step": 1179 }, { "epoch": 0.58, "learning_rate": 4.663854328670539e-06, "loss": 0.2894, "step": 1180 }, { "epoch": 0.58, "learning_rate": 4.663193923422098e-06, "loss": 0.3124, "step": 1181 }, { "epoch": 0.58, "learning_rate": 4.6625329169328935e-06, "loss": 0.3534, "step": 1182 }, { "epoch": 0.58, "learning_rate": 4.6618713093866445e-06, "loss": 0.3855, "step": 1183 }, { "epoch": 0.58, "learning_rate": 4.661209100967239e-06, "loss": 0.3289, "step": 1184 }, { "epoch": 0.58, "learning_rate": 4.660546291858732e-06, "loss": 0.3697, "step": 1185 }, { "epoch": 0.58, "learning_rate": 4.659882882245346e-06, "loss": 0.3103, "step": 1186 }, { "epoch": 0.58, "learning_rate": 4.659218872311469e-06, "loss": 0.367, "step": 1187 }, { "epoch": 0.58, "learning_rate": 4.658554262241659e-06, "loss": 0.3451, "step": 1188 }, { "epoch": 0.58, "learning_rate": 4.657889052220636e-06, "loss": 0.2667, "step": 1189 }, { "epoch": 0.58, "learning_rate": 4.6572232424332915e-06, "loss": 0.3265, "step": 1190 }, { "epoch": 0.58, "learning_rate": 4.6565568330646795e-06, "loss": 0.3557, "step": 1191 }, { "epoch": 0.58, "learning_rate": 4.655889824300023e-06, "loss": 0.3585, "step": 1192 }, { "epoch": 0.58, "learning_rate": 4.655222216324713e-06, "loss": 0.3173, "step": 1193 }, { "epoch": 0.58, "learning_rate": 4.6545540093243025e-06, "loss": 0.3944, "step": 1194 }, { "epoch": 0.58, "learning_rate": 4.653885203484516e-06, "loss": 0.3023, "step": 1195 }, { "epoch": 0.58, "learning_rate": 4.653215798991241e-06, "loss": 0.3653, "step": 1196 }, { "epoch": 0.58, "learning_rate": 4.6525457960305335e-06, "loss": 0.341, "step": 1197 }, { "epoch": 0.58, "learning_rate": 4.651875194788613e-06, "loss": 0.3694, "step": 1198 }, { "epoch": 0.59, "learning_rate": 4.651203995451871e-06, "loss": 0.3786, "step": 1199 }, { "epoch": 0.59, "learning_rate": 4.650532198206857e-06, "loss": 0.362, "step": 1200 }, { "epoch": 0.59, "learning_rate": 4.649859803240295e-06, "loss": 0.3318, "step": 1201 }, { "epoch": 0.59, "learning_rate": 4.649186810739069e-06, "loss": 0.3111, "step": 1202 }, { "epoch": 0.59, "learning_rate": 4.648513220890231e-06, "loss": 0.3347, "step": 1203 }, { "epoch": 0.59, "learning_rate": 4.647839033881002e-06, "loss": 0.3385, "step": 1204 }, { "epoch": 0.59, "learning_rate": 4.647164249898763e-06, "loss": 0.3406, "step": 1205 }, { "epoch": 0.59, "learning_rate": 4.646488869131067e-06, "loss": 0.3697, "step": 1206 }, { "epoch": 0.59, "learning_rate": 4.64581289176563e-06, "loss": 0.3038, "step": 1207 }, { "epoch": 0.59, "learning_rate": 4.645136317990331e-06, "loss": 0.3689, "step": 1208 }, { "epoch": 0.59, "learning_rate": 4.644459147993221e-06, "loss": 0.3444, "step": 1209 }, { "epoch": 0.59, "learning_rate": 4.643781381962512e-06, "loss": 0.3625, "step": 1210 }, { "epoch": 0.59, "learning_rate": 4.643103020086584e-06, "loss": 0.4175, "step": 1211 }, { "epoch": 0.59, "learning_rate": 4.642424062553981e-06, "loss": 0.3268, "step": 1212 }, { "epoch": 0.59, "learning_rate": 4.641744509553413e-06, "loss": 0.3255, "step": 1213 }, { "epoch": 0.59, "learning_rate": 4.641064361273757e-06, "loss": 0.3937, "step": 1214 }, { "epoch": 0.59, "learning_rate": 4.6403836179040525e-06, "loss": 0.3448, "step": 1215 }, { "epoch": 0.59, "learning_rate": 4.639702279633508e-06, "loss": 0.3476, "step": 1216 }, { "epoch": 0.59, "learning_rate": 4.6390203466514936e-06, "loss": 0.3623, "step": 1217 }, { "epoch": 0.59, "learning_rate": 4.638337819147548e-06, "loss": 0.35, "step": 1218 }, { "epoch": 0.6, "learning_rate": 4.637654697311374e-06, "loss": 0.3656, "step": 1219 }, { "epoch": 0.6, "learning_rate": 4.6369709813328385e-06, "loss": 0.3292, "step": 1220 }, { "epoch": 0.6, "learning_rate": 4.6362866714019735e-06, "loss": 0.3014, "step": 1221 }, { "epoch": 0.6, "learning_rate": 4.63560176770898e-06, "loss": 0.3522, "step": 1222 }, { "epoch": 0.6, "learning_rate": 4.6349162704442165e-06, "loss": 0.3595, "step": 1223 }, { "epoch": 0.6, "learning_rate": 4.634230179798215e-06, "loss": 0.3343, "step": 1224 }, { "epoch": 0.6, "learning_rate": 4.633543495961666e-06, "loss": 0.3615, "step": 1225 }, { "epoch": 0.6, "learning_rate": 4.632856219125428e-06, "loss": 0.3346, "step": 1226 }, { "epoch": 0.6, "learning_rate": 4.632168349480523e-06, "loss": 0.3325, "step": 1227 }, { "epoch": 0.6, "learning_rate": 4.631479887218141e-06, "loss": 0.3329, "step": 1228 }, { "epoch": 0.6, "learning_rate": 4.630790832529631e-06, "loss": 0.3224, "step": 1229 }, { "epoch": 0.6, "learning_rate": 4.630101185606509e-06, "loss": 0.3451, "step": 1230 }, { "epoch": 0.6, "learning_rate": 4.629410946640459e-06, "loss": 0.3537, "step": 1231 }, { "epoch": 0.6, "learning_rate": 4.628720115823327e-06, "loss": 0.3497, "step": 1232 }, { "epoch": 0.6, "learning_rate": 4.628028693347121e-06, "loss": 0.3245, "step": 1233 }, { "epoch": 0.6, "learning_rate": 4.627336679404016e-06, "loss": 0.3538, "step": 1234 }, { "epoch": 0.6, "learning_rate": 4.6266440741863535e-06, "loss": 0.3505, "step": 1235 }, { "epoch": 0.6, "learning_rate": 4.625950877886636e-06, "loss": 0.3177, "step": 1236 }, { "epoch": 0.6, "learning_rate": 4.62525709069753e-06, "loss": 0.3159, "step": 1237 }, { "epoch": 0.6, "learning_rate": 4.624562712811869e-06, "loss": 0.3321, "step": 1238 }, { "epoch": 0.6, "learning_rate": 4.623867744422649e-06, "loss": 0.3547, "step": 1239 }, { "epoch": 0.61, "learning_rate": 4.62317218572303e-06, "loss": 0.3619, "step": 1240 }, { "epoch": 0.61, "learning_rate": 4.622476036906337e-06, "loss": 0.3107, "step": 1241 }, { "epoch": 0.61, "learning_rate": 4.621779298166058e-06, "loss": 0.3189, "step": 1242 }, { "epoch": 0.61, "learning_rate": 4.621081969695845e-06, "loss": 0.3312, "step": 1243 }, { "epoch": 0.61, "learning_rate": 4.620384051689516e-06, "loss": 0.2945, "step": 1244 }, { "epoch": 0.61, "learning_rate": 4.619685544341049e-06, "loss": 0.3935, "step": 1245 }, { "epoch": 0.61, "learning_rate": 4.618986447844589e-06, "loss": 0.3198, "step": 1246 }, { "epoch": 0.61, "learning_rate": 4.618286762394444e-06, "loss": 0.2997, "step": 1247 }, { "epoch": 0.61, "learning_rate": 4.617586488185085e-06, "loss": 0.3217, "step": 1248 }, { "epoch": 0.61, "learning_rate": 4.616885625411149e-06, "loss": 0.2808, "step": 1249 }, { "epoch": 0.61, "learning_rate": 4.61618417426743e-06, "loss": 0.3139, "step": 1250 }, { "epoch": 0.61, "learning_rate": 4.6154821349488945e-06, "loss": 0.3316, "step": 1251 }, { "epoch": 0.61, "learning_rate": 4.614779507650667e-06, "loss": 0.3903, "step": 1252 }, { "epoch": 0.61, "learning_rate": 4.614076292568036e-06, "loss": 0.2892, "step": 1253 }, { "epoch": 0.61, "learning_rate": 4.613372489896454e-06, "loss": 0.409, "step": 1254 }, { "epoch": 0.61, "learning_rate": 4.612668099831538e-06, "loss": 0.3555, "step": 1255 }, { "epoch": 0.61, "learning_rate": 4.6119631225690654e-06, "loss": 0.3392, "step": 1256 }, { "epoch": 0.61, "learning_rate": 4.611257558304979e-06, "loss": 0.3643, "step": 1257 }, { "epoch": 0.61, "learning_rate": 4.610551407235385e-06, "loss": 0.3477, "step": 1258 }, { "epoch": 0.61, "learning_rate": 4.609844669556551e-06, "loss": 0.2805, "step": 1259 }, { "epoch": 0.62, "learning_rate": 4.609137345464908e-06, "loss": 0.2933, "step": 1260 }, { "epoch": 0.62, "learning_rate": 4.6084294351570526e-06, "loss": 0.2898, "step": 1261 }, { "epoch": 0.62, "learning_rate": 4.60772093882974e-06, "loss": 0.3751, "step": 1262 }, { "epoch": 0.62, "learning_rate": 4.607011856679892e-06, "loss": 0.3338, "step": 1263 }, { "epoch": 0.62, "learning_rate": 4.6063021889045915e-06, "loss": 0.3803, "step": 1264 }, { "epoch": 0.62, "learning_rate": 4.605591935701084e-06, "loss": 0.3238, "step": 1265 }, { "epoch": 0.62, "learning_rate": 4.604881097266778e-06, "loss": 0.3187, "step": 1266 }, { "epoch": 0.62, "learning_rate": 4.604169673799246e-06, "loss": 0.3286, "step": 1267 }, { "epoch": 0.62, "learning_rate": 4.60345766549622e-06, "loss": 0.3652, "step": 1268 }, { "epoch": 0.62, "learning_rate": 4.602745072555598e-06, "loss": 0.3383, "step": 1269 }, { "epoch": 0.62, "learning_rate": 4.602031895175439e-06, "loss": 0.3566, "step": 1270 }, { "epoch": 0.62, "learning_rate": 4.601318133553963e-06, "loss": 0.3132, "step": 1271 }, { "epoch": 0.62, "learning_rate": 4.600603787889554e-06, "loss": 0.3477, "step": 1272 }, { "epoch": 0.62, "learning_rate": 4.59988885838076e-06, "loss": 0.354, "step": 1273 }, { "epoch": 0.62, "learning_rate": 4.5991733452262875e-06, "loss": 0.4003, "step": 1274 }, { "epoch": 0.62, "learning_rate": 4.598457248625008e-06, "loss": 0.3145, "step": 1275 }, { "epoch": 0.62, "learning_rate": 4.597740568775953e-06, "loss": 0.2867, "step": 1276 }, { "epoch": 0.62, "learning_rate": 4.59702330587832e-06, "loss": 0.3476, "step": 1277 }, { "epoch": 0.62, "learning_rate": 4.596305460131464e-06, "loss": 0.3306, "step": 1278 }, { "epoch": 0.62, "learning_rate": 4.595587031734904e-06, "loss": 0.3639, "step": 1279 }, { "epoch": 0.62, "learning_rate": 4.59486802088832e-06, "loss": 0.3186, "step": 1280 }, { "epoch": 0.63, "learning_rate": 4.594148427791557e-06, "loss": 0.3076, "step": 1281 }, { "epoch": 0.63, "learning_rate": 4.593428252644619e-06, "loss": 0.3178, "step": 1282 }, { "epoch": 0.63, "learning_rate": 4.59270749564767e-06, "loss": 0.3676, "step": 1283 }, { "epoch": 0.63, "learning_rate": 4.591986157001041e-06, "loss": 0.3107, "step": 1284 }, { "epoch": 0.63, "learning_rate": 4.5912642369052206e-06, "loss": 0.3426, "step": 1285 }, { "epoch": 0.63, "learning_rate": 4.590541735560859e-06, "loss": 0.3675, "step": 1286 }, { "epoch": 0.63, "learning_rate": 4.589818653168772e-06, "loss": 0.3573, "step": 1287 }, { "epoch": 0.63, "learning_rate": 4.589094989929931e-06, "loss": 0.3587, "step": 1288 }, { "epoch": 0.63, "learning_rate": 4.588370746045474e-06, "loss": 0.2947, "step": 1289 }, { "epoch": 0.63, "learning_rate": 4.5876459217166965e-06, "loss": 0.3282, "step": 1290 }, { "epoch": 0.63, "learning_rate": 4.586920517145059e-06, "loss": 0.3365, "step": 1291 }, { "epoch": 0.63, "learning_rate": 4.58619453253218e-06, "loss": 0.3679, "step": 1292 }, { "epoch": 0.63, "learning_rate": 4.58546796807984e-06, "loss": 0.3682, "step": 1293 }, { "epoch": 0.63, "learning_rate": 4.584740823989982e-06, "loss": 0.3391, "step": 1294 }, { "epoch": 0.63, "learning_rate": 4.58401310046471e-06, "loss": 0.3243, "step": 1295 }, { "epoch": 0.63, "learning_rate": 4.583284797706288e-06, "loss": 0.3847, "step": 1296 }, { "epoch": 0.63, "learning_rate": 4.58255591591714e-06, "loss": 0.3818, "step": 1297 }, { "epoch": 0.63, "learning_rate": 4.581826455299855e-06, "loss": 0.28, "step": 1298 }, { "epoch": 0.63, "learning_rate": 4.581096416057177e-06, "loss": 0.3588, "step": 1299 }, { "epoch": 0.63, "learning_rate": 4.580365798392016e-06, "loss": 0.3754, "step": 1300 }, { "epoch": 0.64, "learning_rate": 4.57963460250744e-06, "loss": 0.3222, "step": 1301 }, { "epoch": 0.64, "learning_rate": 4.578902828606679e-06, "loss": 0.3594, "step": 1302 }, { "epoch": 0.64, "learning_rate": 4.578170476893123e-06, "loss": 0.3833, "step": 1303 }, { "epoch": 0.64, "learning_rate": 4.577437547570323e-06, "loss": 0.3461, "step": 1304 }, { "epoch": 0.64, "learning_rate": 4.57670404084199e-06, "loss": 0.3228, "step": 1305 }, { "epoch": 0.64, "learning_rate": 4.575969956911994e-06, "loss": 0.3395, "step": 1306 }, { "epoch": 0.64, "learning_rate": 4.5752352959843705e-06, "loss": 0.3237, "step": 1307 }, { "epoch": 0.64, "learning_rate": 4.5745000582633095e-06, "loss": 0.3439, "step": 1308 }, { "epoch": 0.64, "learning_rate": 4.5737642439531655e-06, "loss": 0.3518, "step": 1309 }, { "epoch": 0.64, "learning_rate": 4.57302785325845e-06, "loss": 0.3049, "step": 1310 }, { "epoch": 0.64, "learning_rate": 4.5722908863838385e-06, "loss": 0.3202, "step": 1311 }, { "epoch": 0.64, "learning_rate": 4.571553343534162e-06, "loss": 0.3403, "step": 1312 }, { "epoch": 0.64, "learning_rate": 4.570815224914417e-06, "loss": 0.3288, "step": 1313 }, { "epoch": 0.64, "learning_rate": 4.5700765307297535e-06, "loss": 0.294, "step": 1314 }, { "epoch": 0.64, "learning_rate": 4.569337261185488e-06, "loss": 0.3652, "step": 1315 }, { "epoch": 0.64, "learning_rate": 4.568597416487092e-06, "loss": 0.3412, "step": 1316 }, { "epoch": 0.64, "learning_rate": 4.567856996840201e-06, "loss": 0.3344, "step": 1317 }, { "epoch": 0.64, "learning_rate": 4.5671160024506055e-06, "loss": 0.3557, "step": 1318 }, { "epoch": 0.64, "learning_rate": 4.566374433524261e-06, "loss": 0.3288, "step": 1319 }, { "epoch": 0.64, "learning_rate": 4.565632290267279e-06, "loss": 0.3257, "step": 1320 }, { "epoch": 0.65, "learning_rate": 4.56488957288593e-06, "loss": 0.3125, "step": 1321 }, { "epoch": 0.65, "learning_rate": 4.564146281586649e-06, "loss": 0.3457, "step": 1322 }, { "epoch": 0.65, "learning_rate": 4.563402416576025e-06, "loss": 0.3288, "step": 1323 }, { "epoch": 0.65, "learning_rate": 4.562657978060809e-06, "loss": 0.3307, "step": 1324 }, { "epoch": 0.65, "learning_rate": 4.561912966247912e-06, "loss": 0.3356, "step": 1325 }, { "epoch": 0.65, "learning_rate": 4.5611673813444025e-06, "loss": 0.3171, "step": 1326 }, { "epoch": 0.65, "learning_rate": 4.56042122355751e-06, "loss": 0.2928, "step": 1327 }, { "epoch": 0.65, "learning_rate": 4.559674493094623e-06, "loss": 0.3237, "step": 1328 }, { "epoch": 0.65, "learning_rate": 4.558927190163288e-06, "loss": 0.339, "step": 1329 }, { "epoch": 0.65, "learning_rate": 4.5581793149712105e-06, "loss": 0.3187, "step": 1330 }, { "epoch": 0.65, "learning_rate": 4.557430867726257e-06, "loss": 0.3337, "step": 1331 }, { "epoch": 0.65, "learning_rate": 4.556681848636451e-06, "loss": 0.3161, "step": 1332 }, { "epoch": 0.65, "learning_rate": 4.555932257909977e-06, "loss": 0.3484, "step": 1333 }, { "epoch": 0.65, "learning_rate": 4.5551820957551764e-06, "loss": 0.4016, "step": 1334 }, { "epoch": 0.65, "learning_rate": 4.55443136238055e-06, "loss": 0.3251, "step": 1335 }, { "epoch": 0.65, "learning_rate": 4.553680057994757e-06, "loss": 0.3826, "step": 1336 }, { "epoch": 0.65, "learning_rate": 4.552928182806618e-06, "loss": 0.3579, "step": 1337 }, { "epoch": 0.65, "learning_rate": 4.552175737025108e-06, "loss": 0.3417, "step": 1338 }, { "epoch": 0.65, "learning_rate": 4.551422720859363e-06, "loss": 0.3067, "step": 1339 }, { "epoch": 0.65, "learning_rate": 4.5506691345186785e-06, "loss": 0.3626, "step": 1340 }, { "epoch": 0.65, "learning_rate": 4.549914978212506e-06, "loss": 0.3227, "step": 1341 }, { "epoch": 0.66, "learning_rate": 4.5491602521504574e-06, "loss": 0.2925, "step": 1342 }, { "epoch": 0.66, "learning_rate": 4.548404956542302e-06, "loss": 0.3123, "step": 1343 }, { "epoch": 0.66, "learning_rate": 4.547649091597967e-06, "loss": 0.2678, "step": 1344 }, { "epoch": 0.66, "learning_rate": 4.546892657527538e-06, "loss": 0.3094, "step": 1345 }, { "epoch": 0.66, "learning_rate": 4.54613565454126e-06, "loss": 0.3366, "step": 1346 }, { "epoch": 0.66, "learning_rate": 4.545378082849536e-06, "loss": 0.3068, "step": 1347 }, { "epoch": 0.66, "learning_rate": 4.544619942662926e-06, "loss": 0.3863, "step": 1348 }, { "epoch": 0.66, "learning_rate": 4.543861234192148e-06, "loss": 0.3472, "step": 1349 }, { "epoch": 0.66, "learning_rate": 4.5431019576480784e-06, "loss": 0.3449, "step": 1350 }, { "epoch": 0.66, "learning_rate": 4.542342113241752e-06, "loss": 0.3787, "step": 1351 }, { "epoch": 0.66, "learning_rate": 4.5415817011843595e-06, "loss": 0.2638, "step": 1352 }, { "epoch": 0.66, "learning_rate": 4.540820721687253e-06, "loss": 0.3656, "step": 1353 }, { "epoch": 0.66, "learning_rate": 4.5400591749619375e-06, "loss": 0.286, "step": 1354 }, { "epoch": 0.66, "learning_rate": 4.5392970612200805e-06, "loss": 0.3192, "step": 1355 }, { "epoch": 0.66, "learning_rate": 4.5385343806735035e-06, "loss": 0.3352, "step": 1356 }, { "epoch": 0.66, "learning_rate": 4.537771133534187e-06, "loss": 0.2636, "step": 1357 }, { "epoch": 0.66, "learning_rate": 4.537007320014269e-06, "loss": 0.3249, "step": 1358 }, { "epoch": 0.66, "learning_rate": 4.536242940326045e-06, "loss": 0.3308, "step": 1359 }, { "epoch": 0.66, "learning_rate": 4.535477994681968e-06, "loss": 0.2934, "step": 1360 }, { "epoch": 0.66, "learning_rate": 4.534712483294647e-06, "loss": 0.3442, "step": 1361 }, { "epoch": 0.67, "learning_rate": 4.533946406376849e-06, "loss": 0.3199, "step": 1362 }, { "epoch": 0.67, "learning_rate": 4.5331797641414985e-06, "loss": 0.3298, "step": 1363 }, { "epoch": 0.67, "learning_rate": 4.532412556801679e-06, "loss": 0.3242, "step": 1364 }, { "epoch": 0.67, "learning_rate": 4.5316447845706255e-06, "loss": 0.2658, "step": 1365 }, { "epoch": 0.67, "learning_rate": 4.530876447661738e-06, "loss": 0.3539, "step": 1366 }, { "epoch": 0.67, "learning_rate": 4.530107546288564e-06, "loss": 0.3442, "step": 1367 }, { "epoch": 0.67, "learning_rate": 4.529338080664816e-06, "loss": 0.3249, "step": 1368 }, { "epoch": 0.67, "learning_rate": 4.528568051004361e-06, "loss": 0.3409, "step": 1369 }, { "epoch": 0.67, "learning_rate": 4.527797457521219e-06, "loss": 0.2993, "step": 1370 }, { "epoch": 0.67, "learning_rate": 4.527026300429571e-06, "loss": 0.29, "step": 1371 }, { "epoch": 0.67, "learning_rate": 4.5262545799437555e-06, "loss": 0.3146, "step": 1372 }, { "epoch": 0.67, "learning_rate": 4.525482296278262e-06, "loss": 0.3241, "step": 1373 }, { "epoch": 0.67, "learning_rate": 4.524709449647742e-06, "loss": 0.301, "step": 1374 }, { "epoch": 0.67, "learning_rate": 4.523936040267e-06, "loss": 0.3291, "step": 1375 }, { "epoch": 0.67, "learning_rate": 4.5231620683510005e-06, "loss": 0.2796, "step": 1376 }, { "epoch": 0.67, "learning_rate": 4.52238753411486e-06, "loss": 0.2901, "step": 1377 }, { "epoch": 0.67, "learning_rate": 4.521612437773855e-06, "loss": 0.3101, "step": 1378 }, { "epoch": 0.67, "learning_rate": 4.520836779543417e-06, "loss": 0.3199, "step": 1379 }, { "epoch": 0.67, "learning_rate": 4.520060559639132e-06, "loss": 0.3047, "step": 1380 }, { "epoch": 0.67, "learning_rate": 4.519283778276744e-06, "loss": 0.3682, "step": 1381 }, { "epoch": 0.67, "learning_rate": 4.5185064356721534e-06, "loss": 0.3247, "step": 1382 }, { "epoch": 0.68, "learning_rate": 4.517728532041414e-06, "loss": 0.3559, "step": 1383 }, { "epoch": 0.68, "learning_rate": 4.516950067600739e-06, "loss": 0.3168, "step": 1384 }, { "epoch": 0.68, "learning_rate": 4.516171042566495e-06, "loss": 0.2867, "step": 1385 }, { "epoch": 0.68, "learning_rate": 4.515391457155204e-06, "loss": 0.3476, "step": 1386 }, { "epoch": 0.68, "learning_rate": 4.514611311583548e-06, "loss": 0.2895, "step": 1387 }, { "epoch": 0.68, "learning_rate": 4.513830606068359e-06, "loss": 0.3855, "step": 1388 }, { "epoch": 0.68, "learning_rate": 4.513049340826628e-06, "loss": 0.3089, "step": 1389 }, { "epoch": 0.68, "learning_rate": 4.512267516075501e-06, "loss": 0.3576, "step": 1390 }, { "epoch": 0.68, "learning_rate": 4.5114851320322785e-06, "loss": 0.3247, "step": 1391 }, { "epoch": 0.68, "learning_rate": 4.510702188914418e-06, "loss": 0.3312, "step": 1392 }, { "epoch": 0.68, "learning_rate": 4.509918686939533e-06, "loss": 0.3153, "step": 1393 }, { "epoch": 0.68, "learning_rate": 4.509134626325389e-06, "loss": 0.3329, "step": 1394 }, { "epoch": 0.68, "learning_rate": 4.508350007289909e-06, "loss": 0.3139, "step": 1395 }, { "epoch": 0.68, "learning_rate": 4.507564830051171e-06, "loss": 0.3532, "step": 1396 }, { "epoch": 0.68, "learning_rate": 4.506779094827409e-06, "loss": 0.3437, "step": 1397 }, { "epoch": 0.68, "learning_rate": 4.505992801837011e-06, "loss": 0.3004, "step": 1398 }, { "epoch": 0.68, "learning_rate": 4.50520595129852e-06, "loss": 0.3304, "step": 1399 }, { "epoch": 0.68, "learning_rate": 4.504418543430634e-06, "loss": 0.3153, "step": 1400 }, { "epoch": 0.68, "learning_rate": 4.5036305784522065e-06, "loss": 0.3361, "step": 1401 }, { "epoch": 0.68, "learning_rate": 4.502842056582244e-06, "loss": 0.3702, "step": 1402 }, { "epoch": 0.69, "learning_rate": 4.502052978039911e-06, "loss": 0.2996, "step": 1403 }, { "epoch": 0.69, "learning_rate": 4.501263343044524e-06, "loss": 0.3398, "step": 1404 }, { "epoch": 0.69, "learning_rate": 4.500473151815556e-06, "loss": 0.3792, "step": 1405 }, { "epoch": 0.69, "learning_rate": 4.499682404572634e-06, "loss": 0.288, "step": 1406 }, { "epoch": 0.69, "learning_rate": 4.4988911015355375e-06, "loss": 0.2636, "step": 1407 }, { "epoch": 0.69, "learning_rate": 4.498099242924203e-06, "loss": 0.3198, "step": 1408 }, { "epoch": 0.69, "learning_rate": 4.497306828958722e-06, "loss": 0.3109, "step": 1409 }, { "epoch": 0.69, "learning_rate": 4.496513859859337e-06, "loss": 0.3024, "step": 1410 }, { "epoch": 0.69, "learning_rate": 4.495720335846447e-06, "loss": 0.2968, "step": 1411 }, { "epoch": 0.69, "learning_rate": 4.494926257140607e-06, "loss": 0.2826, "step": 1412 }, { "epoch": 0.69, "learning_rate": 4.4941316239625224e-06, "loss": 0.3014, "step": 1413 }, { "epoch": 0.69, "learning_rate": 4.493336436533056e-06, "loss": 0.3398, "step": 1414 }, { "epoch": 0.69, "learning_rate": 4.492540695073221e-06, "loss": 0.2963, "step": 1415 }, { "epoch": 0.69, "learning_rate": 4.491744399804188e-06, "loss": 0.3465, "step": 1416 }, { "epoch": 0.69, "learning_rate": 4.49094755094728e-06, "loss": 0.3386, "step": 1417 }, { "epoch": 0.69, "learning_rate": 4.490150148723975e-06, "loss": 0.3103, "step": 1418 }, { "epoch": 0.69, "learning_rate": 4.489352193355902e-06, "loss": 0.3491, "step": 1419 }, { "epoch": 0.69, "learning_rate": 4.488553685064849e-06, "loss": 0.3092, "step": 1420 }, { "epoch": 0.69, "learning_rate": 4.487754624072751e-06, "loss": 0.3551, "step": 1421 }, { "epoch": 0.69, "learning_rate": 4.486955010601702e-06, "loss": 0.3426, "step": 1422 }, { "epoch": 0.69, "learning_rate": 4.486154844873947e-06, "loss": 0.3042, "step": 1423 }, { "epoch": 0.7, "learning_rate": 4.485354127111884e-06, "loss": 0.3383, "step": 1424 }, { "epoch": 0.7, "learning_rate": 4.484552857538067e-06, "loss": 0.3635, "step": 1425 }, { "epoch": 0.7, "learning_rate": 4.483751036375201e-06, "loss": 0.3234, "step": 1426 }, { "epoch": 0.7, "learning_rate": 4.482948663846146e-06, "loss": 0.3463, "step": 1427 }, { "epoch": 0.7, "learning_rate": 4.482145740173914e-06, "loss": 0.3707, "step": 1428 }, { "epoch": 0.7, "learning_rate": 4.4813422655816695e-06, "loss": 0.3181, "step": 1429 }, { "epoch": 0.7, "learning_rate": 4.480538240292733e-06, "loss": 0.2865, "step": 1430 }, { "epoch": 0.7, "learning_rate": 4.479733664530575e-06, "loss": 0.3253, "step": 1431 }, { "epoch": 0.7, "learning_rate": 4.478928538518821e-06, "loss": 0.3608, "step": 1432 }, { "epoch": 0.7, "learning_rate": 4.478122862481248e-06, "loss": 0.2915, "step": 1433 }, { "epoch": 0.7, "learning_rate": 4.477316636641788e-06, "loss": 0.2952, "step": 1434 }, { "epoch": 0.7, "learning_rate": 4.476509861224522e-06, "loss": 0.3635, "step": 1435 }, { "epoch": 0.7, "learning_rate": 4.475702536453688e-06, "loss": 0.3121, "step": 1436 }, { "epoch": 0.7, "learning_rate": 4.474894662553674e-06, "loss": 0.3546, "step": 1437 }, { "epoch": 0.7, "learning_rate": 4.474086239749022e-06, "loss": 0.3097, "step": 1438 }, { "epoch": 0.7, "learning_rate": 4.473277268264426e-06, "loss": 0.3231, "step": 1439 }, { "epoch": 0.7, "learning_rate": 4.472467748324732e-06, "loss": 0.3458, "step": 1440 }, { "epoch": 0.7, "learning_rate": 4.47165768015494e-06, "loss": 0.3483, "step": 1441 }, { "epoch": 0.7, "learning_rate": 4.470847063980201e-06, "loss": 0.2737, "step": 1442 }, { "epoch": 0.7, "learning_rate": 4.4700359000258165e-06, "loss": 0.3231, "step": 1443 }, { "epoch": 0.71, "learning_rate": 4.469224188517245e-06, "loss": 0.3775, "step": 1444 }, { "epoch": 0.71, "learning_rate": 4.4684119296800944e-06, "loss": 0.3241, "step": 1445 }, { "epoch": 0.71, "learning_rate": 4.4675991237401235e-06, "loss": 0.3061, "step": 1446 }, { "epoch": 0.71, "learning_rate": 4.4667857709232464e-06, "loss": 0.2996, "step": 1447 }, { "epoch": 0.71, "learning_rate": 4.465971871455525e-06, "loss": 0.3216, "step": 1448 }, { "epoch": 0.71, "learning_rate": 4.465157425563179e-06, "loss": 0.2885, "step": 1449 }, { "epoch": 0.71, "learning_rate": 4.464342433472573e-06, "loss": 0.3299, "step": 1450 }, { "epoch": 0.71, "learning_rate": 4.463526895410228e-06, "loss": 0.3831, "step": 1451 }, { "epoch": 0.71, "learning_rate": 4.462710811602816e-06, "loss": 0.3131, "step": 1452 }, { "epoch": 0.71, "learning_rate": 4.461894182277161e-06, "loss": 0.2987, "step": 1453 }, { "epoch": 0.71, "learning_rate": 4.4610770076602364e-06, "loss": 0.286, "step": 1454 }, { "epoch": 0.71, "learning_rate": 4.46025928797917e-06, "loss": 0.3799, "step": 1455 }, { "epoch": 0.71, "learning_rate": 4.4594410234612385e-06, "loss": 0.2857, "step": 1456 }, { "epoch": 0.71, "learning_rate": 4.458622214333873e-06, "loss": 0.3196, "step": 1457 }, { "epoch": 0.71, "learning_rate": 4.457802860824653e-06, "loss": 0.2922, "step": 1458 }, { "epoch": 0.71, "learning_rate": 4.4569829631613115e-06, "loss": 0.3782, "step": 1459 }, { "epoch": 0.71, "learning_rate": 4.456162521571732e-06, "loss": 0.319, "step": 1460 }, { "epoch": 0.71, "learning_rate": 4.455341536283947e-06, "loss": 0.3478, "step": 1461 }, { "epoch": 0.71, "learning_rate": 4.454520007526144e-06, "loss": 0.3658, "step": 1462 }, { "epoch": 0.71, "learning_rate": 4.45369793552666e-06, "loss": 0.3531, "step": 1463 }, { "epoch": 0.71, "learning_rate": 4.452875320513982e-06, "loss": 0.3351, "step": 1464 }, { "epoch": 0.72, "learning_rate": 4.452052162716748e-06, "loss": 0.326, "step": 1465 }, { "epoch": 0.72, "learning_rate": 4.451228462363749e-06, "loss": 0.2959, "step": 1466 }, { "epoch": 0.72, "learning_rate": 4.450404219683924e-06, "loss": 0.3685, "step": 1467 }, { "epoch": 0.72, "learning_rate": 4.449579434906364e-06, "loss": 0.329, "step": 1468 }, { "epoch": 0.72, "learning_rate": 4.4487541082603115e-06, "loss": 0.3721, "step": 1469 }, { "epoch": 0.72, "learning_rate": 4.447928239975159e-06, "loss": 0.3311, "step": 1470 }, { "epoch": 0.72, "learning_rate": 4.447101830280448e-06, "loss": 0.3317, "step": 1471 }, { "epoch": 0.72, "learning_rate": 4.446274879405873e-06, "loss": 0.2964, "step": 1472 }, { "epoch": 0.72, "learning_rate": 4.445447387581277e-06, "loss": 0.3113, "step": 1473 }, { "epoch": 0.72, "learning_rate": 4.444619355036654e-06, "loss": 0.3177, "step": 1474 }, { "epoch": 0.72, "learning_rate": 4.44379078200215e-06, "loss": 0.283, "step": 1475 }, { "epoch": 0.72, "learning_rate": 4.442961668708058e-06, "loss": 0.3711, "step": 1476 }, { "epoch": 0.72, "learning_rate": 4.442132015384823e-06, "loss": 0.3277, "step": 1477 }, { "epoch": 0.72, "learning_rate": 4.44130182226304e-06, "loss": 0.3314, "step": 1478 }, { "epoch": 0.72, "learning_rate": 4.440471089573455e-06, "loss": 0.3042, "step": 1479 }, { "epoch": 0.72, "learning_rate": 4.43963981754696e-06, "loss": 0.3248, "step": 1480 }, { "epoch": 0.72, "learning_rate": 4.4388080064146025e-06, "loss": 0.3474, "step": 1481 }, { "epoch": 0.72, "learning_rate": 4.437975656407576e-06, "loss": 0.2617, "step": 1482 }, { "epoch": 0.72, "learning_rate": 4.437142767757225e-06, "loss": 0.3175, "step": 1483 }, { "epoch": 0.72, "learning_rate": 4.436309340695044e-06, "loss": 0.288, "step": 1484 }, { "epoch": 0.73, "learning_rate": 4.435475375452676e-06, "loss": 0.3054, "step": 1485 }, { "epoch": 0.73, "learning_rate": 4.4346408722619135e-06, "loss": 0.2833, "step": 1486 }, { "epoch": 0.73, "learning_rate": 4.433805831354703e-06, "loss": 0.3356, "step": 1487 }, { "epoch": 0.73, "learning_rate": 4.432970252963132e-06, "loss": 0.3309, "step": 1488 }, { "epoch": 0.73, "learning_rate": 4.432134137319446e-06, "loss": 0.3039, "step": 1489 }, { "epoch": 0.73, "learning_rate": 4.431297484656034e-06, "loss": 0.3458, "step": 1490 }, { "epoch": 0.73, "learning_rate": 4.430460295205437e-06, "loss": 0.2949, "step": 1491 }, { "epoch": 0.73, "learning_rate": 4.4296225692003436e-06, "loss": 0.3524, "step": 1492 }, { "epoch": 0.73, "learning_rate": 4.428784306873594e-06, "loss": 0.3183, "step": 1493 }, { "epoch": 0.73, "learning_rate": 4.4279455084581736e-06, "loss": 0.2692, "step": 1494 }, { "epoch": 0.73, "learning_rate": 4.4271061741872215e-06, "loss": 0.2982, "step": 1495 }, { "epoch": 0.73, "learning_rate": 4.426266304294021e-06, "loss": 0.3605, "step": 1496 }, { "epoch": 0.73, "learning_rate": 4.4254258990120084e-06, "loss": 0.3153, "step": 1497 }, { "epoch": 0.73, "learning_rate": 4.424584958574766e-06, "loss": 0.2756, "step": 1498 }, { "epoch": 0.73, "learning_rate": 4.423743483216026e-06, "loss": 0.3608, "step": 1499 }, { "epoch": 0.73, "learning_rate": 4.422901473169669e-06, "loss": 0.3246, "step": 1500 }, { "epoch": 0.73, "learning_rate": 4.422058928669725e-06, "loss": 0.358, "step": 1501 }, { "epoch": 0.73, "learning_rate": 4.421215849950371e-06, "loss": 0.3301, "step": 1502 }, { "epoch": 0.73, "learning_rate": 4.420372237245934e-06, "loss": 0.3098, "step": 1503 }, { "epoch": 0.73, "learning_rate": 4.419528090790888e-06, "loss": 0.3206, "step": 1504 }, { "epoch": 0.73, "learning_rate": 4.418683410819857e-06, "loss": 0.337, "step": 1505 }, { "epoch": 0.74, "learning_rate": 4.417838197567611e-06, "loss": 0.2601, "step": 1506 }, { "epoch": 0.74, "learning_rate": 4.416992451269071e-06, "loss": 0.3014, "step": 1507 }, { "epoch": 0.74, "learning_rate": 4.416146172159305e-06, "loss": 0.3239, "step": 1508 }, { "epoch": 0.74, "learning_rate": 4.415299360473527e-06, "loss": 0.3695, "step": 1509 }, { "epoch": 0.74, "learning_rate": 4.4144520164471015e-06, "loss": 0.3486, "step": 1510 }, { "epoch": 0.74, "learning_rate": 4.4136041403155415e-06, "loss": 0.3375, "step": 1511 }, { "epoch": 0.74, "learning_rate": 4.412755732314506e-06, "loss": 0.3124, "step": 1512 }, { "epoch": 0.74, "learning_rate": 4.411906792679803e-06, "loss": 0.3227, "step": 1513 }, { "epoch": 0.74, "learning_rate": 4.411057321647387e-06, "loss": 0.3348, "step": 1514 }, { "epoch": 0.74, "learning_rate": 4.410207319453361e-06, "loss": 0.2932, "step": 1515 }, { "epoch": 0.74, "learning_rate": 4.409356786333977e-06, "loss": 0.3465, "step": 1516 }, { "epoch": 0.74, "learning_rate": 4.408505722525632e-06, "loss": 0.3216, "step": 1517 }, { "epoch": 0.74, "learning_rate": 4.407654128264871e-06, "loss": 0.288, "step": 1518 }, { "epoch": 0.74, "learning_rate": 4.40680200378839e-06, "loss": 0.3569, "step": 1519 }, { "epoch": 0.74, "learning_rate": 4.405949349333026e-06, "loss": 0.3234, "step": 1520 }, { "epoch": 0.74, "learning_rate": 4.405096165135769e-06, "loss": 0.3328, "step": 1521 }, { "epoch": 0.74, "learning_rate": 4.404242451433754e-06, "loss": 0.2993, "step": 1522 }, { "epoch": 0.74, "learning_rate": 4.403388208464263e-06, "loss": 0.3293, "step": 1523 }, { "epoch": 0.74, "learning_rate": 4.402533436464724e-06, "loss": 0.3133, "step": 1524 }, { "epoch": 0.74, "learning_rate": 4.401678135672715e-06, "loss": 0.2805, "step": 1525 }, { "epoch": 0.75, "learning_rate": 4.400822306325959e-06, "loss": 0.2929, "step": 1526 }, { "epoch": 0.75, "learning_rate": 4.3999659486623255e-06, "loss": 0.3287, "step": 1527 }, { "epoch": 0.75, "learning_rate": 4.399109062919834e-06, "loss": 0.2963, "step": 1528 }, { "epoch": 0.75, "learning_rate": 4.398251649336645e-06, "loss": 0.3435, "step": 1529 }, { "epoch": 0.75, "learning_rate": 4.39739370815107e-06, "loss": 0.3267, "step": 1530 }, { "epoch": 0.75, "learning_rate": 4.3965352396015685e-06, "loss": 0.3542, "step": 1531 }, { "epoch": 0.75, "learning_rate": 4.395676243926742e-06, "loss": 0.289, "step": 1532 }, { "epoch": 0.75, "learning_rate": 4.394816721365341e-06, "loss": 0.3308, "step": 1533 }, { "epoch": 0.75, "learning_rate": 4.393956672156263e-06, "loss": 0.3114, "step": 1534 }, { "epoch": 0.75, "learning_rate": 4.393096096538552e-06, "loss": 0.2711, "step": 1535 }, { "epoch": 0.75, "learning_rate": 4.392234994751394e-06, "loss": 0.3155, "step": 1536 }, { "epoch": 0.75, "learning_rate": 4.391373367034129e-06, "loss": 0.3029, "step": 1537 }, { "epoch": 0.75, "learning_rate": 4.3905112136262365e-06, "loss": 0.2843, "step": 1538 }, { "epoch": 0.75, "learning_rate": 4.389648534767344e-06, "loss": 0.3096, "step": 1539 }, { "epoch": 0.75, "learning_rate": 4.388785330697227e-06, "loss": 0.33, "step": 1540 }, { "epoch": 0.75, "learning_rate": 4.387921601655805e-06, "loss": 0.3119, "step": 1541 }, { "epoch": 0.75, "learning_rate": 4.387057347883143e-06, "loss": 0.31, "step": 1542 }, { "epoch": 0.75, "learning_rate": 4.386192569619454e-06, "loss": 0.3025, "step": 1543 }, { "epoch": 0.75, "learning_rate": 4.385327267105095e-06, "loss": 0.3494, "step": 1544 }, { "epoch": 0.75, "learning_rate": 4.384461440580568e-06, "loss": 0.2962, "step": 1545 }, { "epoch": 0.75, "learning_rate": 4.383595090286525e-06, "loss": 0.2938, "step": 1546 }, { "epoch": 0.76, "learning_rate": 4.382728216463758e-06, "loss": 0.2532, "step": 1547 }, { "epoch": 0.76, "learning_rate": 4.3818608193532074e-06, "loss": 0.3171, "step": 1548 }, { "epoch": 0.76, "learning_rate": 4.380992899195959e-06, "loss": 0.315, "step": 1549 }, { "epoch": 0.76, "learning_rate": 4.380124456233243e-06, "loss": 0.3257, "step": 1550 }, { "epoch": 0.76, "learning_rate": 4.3792554907064355e-06, "loss": 0.2972, "step": 1551 }, { "epoch": 0.76, "learning_rate": 4.378386002857059e-06, "loss": 0.3018, "step": 1552 }, { "epoch": 0.76, "learning_rate": 4.377515992926778e-06, "loss": 0.3046, "step": 1553 }, { "epoch": 0.76, "learning_rate": 4.376645461157407e-06, "loss": 0.3195, "step": 1554 }, { "epoch": 0.76, "learning_rate": 4.375774407790901e-06, "loss": 0.3608, "step": 1555 }, { "epoch": 0.76, "learning_rate": 4.374902833069361e-06, "loss": 0.3101, "step": 1556 }, { "epoch": 0.76, "learning_rate": 4.374030737235035e-06, "loss": 0.2824, "step": 1557 }, { "epoch": 0.76, "learning_rate": 4.373158120530312e-06, "loss": 0.3126, "step": 1558 }, { "epoch": 0.76, "learning_rate": 4.37228498319773e-06, "loss": 0.3002, "step": 1559 }, { "epoch": 0.76, "learning_rate": 4.371411325479971e-06, "loss": 0.2833, "step": 1560 }, { "epoch": 0.76, "learning_rate": 4.370537147619858e-06, "loss": 0.3179, "step": 1561 }, { "epoch": 0.76, "learning_rate": 4.369662449860362e-06, "loss": 0.258, "step": 1562 }, { "epoch": 0.76, "learning_rate": 4.368787232444598e-06, "loss": 0.3206, "step": 1563 }, { "epoch": 0.76, "learning_rate": 4.367911495615825e-06, "loss": 0.3662, "step": 1564 }, { "epoch": 0.76, "learning_rate": 4.367035239617446e-06, "loss": 0.2906, "step": 1565 }, { "epoch": 0.76, "learning_rate": 4.366158464693009e-06, "loss": 0.2953, "step": 1566 }, { "epoch": 0.77, "learning_rate": 4.365281171086205e-06, "loss": 0.3116, "step": 1567 }, { "epoch": 0.77, "learning_rate": 4.364403359040871e-06, "loss": 0.3105, "step": 1568 }, { "epoch": 0.77, "learning_rate": 4.363525028800987e-06, "loss": 0.326, "step": 1569 }, { "epoch": 0.77, "learning_rate": 4.362646180610677e-06, "loss": 0.3589, "step": 1570 }, { "epoch": 0.77, "learning_rate": 4.3617668147142084e-06, "loss": 0.2922, "step": 1571 }, { "epoch": 0.77, "learning_rate": 4.3608869313559946e-06, "loss": 0.2722, "step": 1572 }, { "epoch": 0.77, "learning_rate": 4.360006530780591e-06, "loss": 0.3343, "step": 1573 }, { "epoch": 0.77, "learning_rate": 4.359125613232697e-06, "loss": 0.2708, "step": 1574 }, { "epoch": 0.77, "learning_rate": 4.358244178957156e-06, "loss": 0.298, "step": 1575 }, { "epoch": 0.77, "learning_rate": 4.357362228198955e-06, "loss": 0.2906, "step": 1576 }, { "epoch": 0.77, "learning_rate": 4.356479761203224e-06, "loss": 0.2781, "step": 1577 }, { "epoch": 0.77, "learning_rate": 4.355596778215237e-06, "loss": 0.3047, "step": 1578 }, { "epoch": 0.77, "learning_rate": 4.354713279480413e-06, "loss": 0.2571, "step": 1579 }, { "epoch": 0.77, "learning_rate": 4.353829265244311e-06, "loss": 0.2896, "step": 1580 }, { "epoch": 0.77, "learning_rate": 4.352944735752635e-06, "loss": 0.2994, "step": 1581 }, { "epoch": 0.77, "learning_rate": 4.352059691251232e-06, "loss": 0.2967, "step": 1582 }, { "epoch": 0.77, "learning_rate": 4.351174131986095e-06, "loss": 0.3211, "step": 1583 }, { "epoch": 0.77, "learning_rate": 4.350288058203353e-06, "loss": 0.3062, "step": 1584 }, { "epoch": 0.77, "learning_rate": 4.349401470149287e-06, "loss": 0.3207, "step": 1585 }, { "epoch": 0.77, "learning_rate": 4.348514368070313e-06, "loss": 0.3428, "step": 1586 }, { "epoch": 0.77, "learning_rate": 4.347626752212994e-06, "loss": 0.2859, "step": 1587 }, { "epoch": 0.78, "learning_rate": 4.346738622824036e-06, "loss": 0.2735, "step": 1588 }, { "epoch": 0.78, "learning_rate": 4.345849980150287e-06, "loss": 0.2568, "step": 1589 }, { "epoch": 0.78, "learning_rate": 4.344960824438736e-06, "loss": 0.3318, "step": 1590 }, { "epoch": 0.78, "learning_rate": 4.344071155936517e-06, "loss": 0.3053, "step": 1591 }, { "epoch": 0.78, "learning_rate": 4.343180974890905e-06, "loss": 0.3319, "step": 1592 }, { "epoch": 0.78, "learning_rate": 4.3422902815493175e-06, "loss": 0.2838, "step": 1593 }, { "epoch": 0.78, "learning_rate": 4.341399076159317e-06, "loss": 0.3294, "step": 1594 }, { "epoch": 0.78, "learning_rate": 4.340507358968604e-06, "loss": 0.3159, "step": 1595 }, { "epoch": 0.78, "learning_rate": 4.339615130225025e-06, "loss": 0.3083, "step": 1596 }, { "epoch": 0.78, "learning_rate": 4.3387223901765655e-06, "loss": 0.3286, "step": 1597 }, { "epoch": 0.78, "learning_rate": 4.337829139071357e-06, "loss": 0.2916, "step": 1598 }, { "epoch": 0.78, "learning_rate": 4.336935377157668e-06, "loss": 0.3134, "step": 1599 }, { "epoch": 0.78, "learning_rate": 4.336041104683914e-06, "loss": 0.2856, "step": 1600 }, { "epoch": 0.78, "learning_rate": 4.335146321898651e-06, "loss": 0.3155, "step": 1601 }, { "epoch": 0.78, "learning_rate": 4.334251029050573e-06, "loss": 0.272, "step": 1602 }, { "epoch": 0.78, "learning_rate": 4.33335522638852e-06, "loss": 0.2747, "step": 1603 }, { "epoch": 0.78, "learning_rate": 4.332458914161474e-06, "loss": 0.2945, "step": 1604 }, { "epoch": 0.78, "learning_rate": 4.3315620926185554e-06, "loss": 0.2826, "step": 1605 }, { "epoch": 0.78, "learning_rate": 4.330664762009028e-06, "loss": 0.3203, "step": 1606 }, { "epoch": 0.78, "learning_rate": 4.329766922582298e-06, "loss": 0.3293, "step": 1607 }, { "epoch": 0.79, "learning_rate": 4.328868574587912e-06, "loss": 0.2793, "step": 1608 }, { "epoch": 0.79, "learning_rate": 4.327969718275556e-06, "loss": 0.3247, "step": 1609 }, { "epoch": 0.79, "learning_rate": 4.3270703538950605e-06, "loss": 0.2929, "step": 1610 }, { "epoch": 0.79, "learning_rate": 4.326170481696398e-06, "loss": 0.3038, "step": 1611 }, { "epoch": 0.79, "learning_rate": 4.325270101929676e-06, "loss": 0.3049, "step": 1612 }, { "epoch": 0.79, "learning_rate": 4.32436921484515e-06, "loss": 0.2492, "step": 1613 }, { "epoch": 0.79, "learning_rate": 4.3234678206932125e-06, "loss": 0.3155, "step": 1614 }, { "epoch": 0.79, "learning_rate": 4.322565919724398e-06, "loss": 0.3439, "step": 1615 }, { "epoch": 0.79, "learning_rate": 4.321663512189383e-06, "loss": 0.3505, "step": 1616 }, { "epoch": 0.79, "learning_rate": 4.320760598338983e-06, "loss": 0.3451, "step": 1617 }, { "epoch": 0.79, "learning_rate": 4.319857178424156e-06, "loss": 0.327, "step": 1618 }, { "epoch": 0.79, "learning_rate": 4.318953252695998e-06, "loss": 0.3341, "step": 1619 }, { "epoch": 0.79, "learning_rate": 4.318048821405748e-06, "loss": 0.3203, "step": 1620 }, { "epoch": 0.79, "learning_rate": 4.3171438848047856e-06, "loss": 0.326, "step": 1621 }, { "epoch": 0.79, "learning_rate": 4.316238443144628e-06, "loss": 0.39, "step": 1622 }, { "epoch": 0.79, "learning_rate": 4.315332496676936e-06, "loss": 0.2807, "step": 1623 }, { "epoch": 0.79, "learning_rate": 4.31442604565351e-06, "loss": 0.3146, "step": 1624 }, { "epoch": 0.79, "learning_rate": 4.313519090326289e-06, "loss": 0.3016, "step": 1625 }, { "epoch": 0.79, "learning_rate": 4.312611630947354e-06, "loss": 0.3116, "step": 1626 }, { "epoch": 0.79, "learning_rate": 4.311703667768925e-06, "loss": 0.301, "step": 1627 }, { "epoch": 0.79, "learning_rate": 4.310795201043362e-06, "loss": 0.2737, "step": 1628 }, { "epoch": 0.8, "learning_rate": 4.309886231023166e-06, "loss": 0.2929, "step": 1629 }, { "epoch": 0.8, "learning_rate": 4.308976757960978e-06, "loss": 0.2936, "step": 1630 }, { "epoch": 0.8, "learning_rate": 4.308066782109577e-06, "loss": 0.3266, "step": 1631 }, { "epoch": 0.8, "learning_rate": 4.307156303721883e-06, "loss": 0.3304, "step": 1632 }, { "epoch": 0.8, "learning_rate": 4.306245323050954e-06, "loss": 0.2907, "step": 1633 }, { "epoch": 0.8, "learning_rate": 4.3053338403499915e-06, "loss": 0.3193, "step": 1634 }, { "epoch": 0.8, "learning_rate": 4.304421855872333e-06, "loss": 0.2836, "step": 1635 }, { "epoch": 0.8, "learning_rate": 4.303509369871457e-06, "loss": 0.2834, "step": 1636 }, { "epoch": 0.8, "learning_rate": 4.302596382600981e-06, "loss": 0.3089, "step": 1637 }, { "epoch": 0.8, "learning_rate": 4.301682894314661e-06, "loss": 0.3285, "step": 1638 }, { "epoch": 0.8, "learning_rate": 4.300768905266393e-06, "loss": 0.2939, "step": 1639 }, { "epoch": 0.8, "learning_rate": 4.299854415710214e-06, "loss": 0.2943, "step": 1640 }, { "epoch": 0.8, "learning_rate": 4.298939425900296e-06, "loss": 0.3041, "step": 1641 }, { "epoch": 0.8, "learning_rate": 4.298023936090954e-06, "loss": 0.294, "step": 1642 }, { "epoch": 0.8, "learning_rate": 4.297107946536641e-06, "loss": 0.2714, "step": 1643 }, { "epoch": 0.8, "learning_rate": 4.296191457491945e-06, "loss": 0.2864, "step": 1644 }, { "epoch": 0.8, "learning_rate": 4.2952744692116e-06, "loss": 0.3318, "step": 1645 }, { "epoch": 0.8, "learning_rate": 4.294356981950471e-06, "loss": 0.344, "step": 1646 }, { "epoch": 0.8, "learning_rate": 4.293438995963569e-06, "loss": 0.2862, "step": 1647 }, { "epoch": 0.8, "learning_rate": 4.292520511506039e-06, "loss": 0.3258, "step": 1648 }, { "epoch": 0.81, "learning_rate": 4.291601528833165e-06, "loss": 0.315, "step": 1649 }, { "epoch": 0.81, "learning_rate": 4.29068204820037e-06, "loss": 0.3425, "step": 1650 }, { "epoch": 0.81, "learning_rate": 4.289762069863217e-06, "loss": 0.3259, "step": 1651 }, { "epoch": 0.81, "learning_rate": 4.288841594077405e-06, "loss": 0.2768, "step": 1652 }, { "epoch": 0.81, "learning_rate": 4.287920621098772e-06, "loss": 0.3143, "step": 1653 }, { "epoch": 0.81, "learning_rate": 4.286999151183294e-06, "loss": 0.2688, "step": 1654 }, { "epoch": 0.81, "learning_rate": 4.286077184587088e-06, "loss": 0.3313, "step": 1655 }, { "epoch": 0.81, "learning_rate": 4.285154721566402e-06, "loss": 0.2895, "step": 1656 }, { "epoch": 0.81, "learning_rate": 4.284231762377631e-06, "loss": 0.3131, "step": 1657 }, { "epoch": 0.81, "learning_rate": 4.283308307277301e-06, "loss": 0.3394, "step": 1658 }, { "epoch": 0.81, "learning_rate": 4.282384356522078e-06, "loss": 0.3079, "step": 1659 }, { "epoch": 0.81, "learning_rate": 4.2814599103687686e-06, "loss": 0.2712, "step": 1660 }, { "epoch": 0.81, "learning_rate": 4.28053496907431e-06, "loss": 0.2958, "step": 1661 }, { "epoch": 0.81, "learning_rate": 4.2796095328957855e-06, "loss": 0.3225, "step": 1662 }, { "epoch": 0.81, "learning_rate": 4.278683602090411e-06, "loss": 0.3095, "step": 1663 }, { "epoch": 0.81, "learning_rate": 4.2777571769155406e-06, "loss": 0.3443, "step": 1664 }, { "epoch": 0.81, "learning_rate": 4.276830257628665e-06, "loss": 0.3088, "step": 1665 }, { "epoch": 0.81, "learning_rate": 4.2759028444874155e-06, "loss": 0.2733, "step": 1666 }, { "epoch": 0.81, "learning_rate": 4.274974937749555e-06, "loss": 0.429, "step": 1667 }, { "epoch": 0.81, "learning_rate": 4.274046537672991e-06, "loss": 0.2812, "step": 1668 }, { "epoch": 0.81, "learning_rate": 4.273117644515762e-06, "loss": 0.3432, "step": 1669 }, { "epoch": 0.82, "learning_rate": 4.2721882585360456e-06, "loss": 0.3301, "step": 1670 }, { "epoch": 0.82, "learning_rate": 4.271258379992157e-06, "loss": 0.2996, "step": 1671 }, { "epoch": 0.82, "learning_rate": 4.270328009142547e-06, "loss": 0.3271, "step": 1672 }, { "epoch": 0.82, "learning_rate": 4.269397146245805e-06, "loss": 0.323, "step": 1673 }, { "epoch": 0.82, "learning_rate": 4.268465791560656e-06, "loss": 0.311, "step": 1674 }, { "epoch": 0.82, "learning_rate": 4.267533945345962e-06, "loss": 0.3062, "step": 1675 }, { "epoch": 0.82, "learning_rate": 4.266601607860721e-06, "loss": 0.3169, "step": 1676 }, { "epoch": 0.82, "learning_rate": 4.265668779364069e-06, "loss": 0.3224, "step": 1677 }, { "epoch": 0.82, "learning_rate": 4.2647354601152756e-06, "loss": 0.3184, "step": 1678 }, { "epoch": 0.82, "learning_rate": 4.26380165037375e-06, "loss": 0.3217, "step": 1679 }, { "epoch": 0.82, "learning_rate": 4.262867350399037e-06, "loss": 0.2842, "step": 1680 }, { "epoch": 0.82, "learning_rate": 4.261932560450816e-06, "loss": 0.3213, "step": 1681 }, { "epoch": 0.82, "learning_rate": 4.2609972807889034e-06, "loss": 0.3391, "step": 1682 }, { "epoch": 0.82, "learning_rate": 4.260061511673252e-06, "loss": 0.303, "step": 1683 }, { "epoch": 0.82, "learning_rate": 4.259125253363952e-06, "loss": 0.261, "step": 1684 }, { "epoch": 0.82, "learning_rate": 4.258188506121227e-06, "loss": 0.2662, "step": 1685 }, { "epoch": 0.82, "learning_rate": 4.257251270205439e-06, "loss": 0.2808, "step": 1686 }, { "epoch": 0.82, "learning_rate": 4.256313545877082e-06, "loss": 0.2859, "step": 1687 }, { "epoch": 0.82, "learning_rate": 4.255375333396791e-06, "loss": 0.2825, "step": 1688 }, { "epoch": 0.82, "learning_rate": 4.254436633025332e-06, "loss": 0.2951, "step": 1689 }, { "epoch": 0.83, "learning_rate": 4.253497445023609e-06, "loss": 0.2877, "step": 1690 }, { "epoch": 0.83, "learning_rate": 4.252557769652662e-06, "loss": 0.3023, "step": 1691 }, { "epoch": 0.83, "learning_rate": 4.251617607173665e-06, "loss": 0.286, "step": 1692 }, { "epoch": 0.83, "learning_rate": 4.2506769578479265e-06, "loss": 0.3014, "step": 1693 }, { "epoch": 0.83, "learning_rate": 4.249735821936893e-06, "loss": 0.2727, "step": 1694 }, { "epoch": 0.83, "learning_rate": 4.248794199702146e-06, "loss": 0.2949, "step": 1695 }, { "epoch": 0.83, "learning_rate": 4.247852091405399e-06, "loss": 0.2764, "step": 1696 }, { "epoch": 0.83, "learning_rate": 4.246909497308504e-06, "loss": 0.3302, "step": 1697 }, { "epoch": 0.83, "learning_rate": 4.245966417673447e-06, "loss": 0.3704, "step": 1698 }, { "epoch": 0.83, "learning_rate": 4.245022852762347e-06, "loss": 0.3109, "step": 1699 }, { "epoch": 0.83, "learning_rate": 4.244078802837462e-06, "loss": 0.3271, "step": 1700 }, { "epoch": 0.83, "learning_rate": 4.2431342681611805e-06, "loss": 0.2751, "step": 1701 }, { "epoch": 0.83, "learning_rate": 4.242189248996029e-06, "loss": 0.2791, "step": 1702 }, { "epoch": 0.83, "learning_rate": 4.241243745604666e-06, "loss": 0.3274, "step": 1703 }, { "epoch": 0.83, "learning_rate": 4.240297758249887e-06, "loss": 0.318, "step": 1704 }, { "epoch": 0.83, "learning_rate": 4.23935128719462e-06, "loss": 0.3408, "step": 1705 }, { "epoch": 0.83, "learning_rate": 4.2384043327019286e-06, "loss": 0.3191, "step": 1706 }, { "epoch": 0.83, "learning_rate": 4.23745689503501e-06, "loss": 0.2782, "step": 1707 }, { "epoch": 0.83, "learning_rate": 4.2365089744571985e-06, "loss": 0.2642, "step": 1708 }, { "epoch": 0.83, "learning_rate": 4.2355605712319565e-06, "loss": 0.2558, "step": 1709 }, { "epoch": 0.83, "learning_rate": 4.234611685622888e-06, "loss": 0.3342, "step": 1710 }, { "epoch": 0.84, "learning_rate": 4.233662317893725e-06, "loss": 0.2487, "step": 1711 }, { "epoch": 0.84, "learning_rate": 4.232712468308336e-06, "loss": 0.2933, "step": 1712 }, { "epoch": 0.84, "learning_rate": 4.231762137130726e-06, "loss": 0.2995, "step": 1713 }, { "epoch": 0.84, "learning_rate": 4.230811324625028e-06, "loss": 0.2758, "step": 1714 }, { "epoch": 0.84, "learning_rate": 4.229860031055514e-06, "loss": 0.3101, "step": 1715 }, { "epoch": 0.84, "learning_rate": 4.228908256686586e-06, "loss": 0.2764, "step": 1716 }, { "epoch": 0.84, "learning_rate": 4.227956001782784e-06, "loss": 0.3105, "step": 1717 }, { "epoch": 0.84, "learning_rate": 4.227003266608775e-06, "loss": 0.2915, "step": 1718 }, { "epoch": 0.84, "learning_rate": 4.226050051429367e-06, "loss": 0.3173, "step": 1719 }, { "epoch": 0.84, "learning_rate": 4.225096356509497e-06, "loss": 0.3352, "step": 1720 }, { "epoch": 0.84, "learning_rate": 4.224142182114234e-06, "loss": 0.3175, "step": 1721 }, { "epoch": 0.84, "learning_rate": 4.223187528508786e-06, "loss": 0.3, "step": 1722 }, { "epoch": 0.84, "learning_rate": 4.222232395958486e-06, "loss": 0.287, "step": 1723 }, { "epoch": 0.84, "learning_rate": 4.2212767847288085e-06, "loss": 0.3145, "step": 1724 }, { "epoch": 0.84, "learning_rate": 4.220320695085357e-06, "loss": 0.2845, "step": 1725 }, { "epoch": 0.84, "learning_rate": 4.219364127293865e-06, "loss": 0.3047, "step": 1726 }, { "epoch": 0.84, "learning_rate": 4.218407081620206e-06, "loss": 0.3105, "step": 1727 }, { "epoch": 0.84, "learning_rate": 4.21744955833038e-06, "loss": 0.3127, "step": 1728 }, { "epoch": 0.84, "learning_rate": 4.216491557690523e-06, "loss": 0.2803, "step": 1729 }, { "epoch": 0.84, "learning_rate": 4.215533079966903e-06, "loss": 0.2902, "step": 1730 }, { "epoch": 0.85, "learning_rate": 4.214574125425921e-06, "loss": 0.3422, "step": 1731 }, { "epoch": 0.85, "learning_rate": 4.213614694334109e-06, "loss": 0.2876, "step": 1732 }, { "epoch": 0.85, "learning_rate": 4.212654786958133e-06, "loss": 0.3172, "step": 1733 }, { "epoch": 0.85, "learning_rate": 4.21169440356479e-06, "loss": 0.2532, "step": 1734 }, { "epoch": 0.85, "learning_rate": 4.210733544421011e-06, "loss": 0.2962, "step": 1735 }, { "epoch": 0.85, "learning_rate": 4.209772209793857e-06, "loss": 0.2863, "step": 1736 }, { "epoch": 0.85, "learning_rate": 4.208810399950525e-06, "loss": 0.2767, "step": 1737 }, { "epoch": 0.85, "learning_rate": 4.207848115158339e-06, "loss": 0.3534, "step": 1738 }, { "epoch": 0.85, "learning_rate": 4.20688535568476e-06, "loss": 0.3138, "step": 1739 }, { "epoch": 0.85, "learning_rate": 4.205922121797377e-06, "loss": 0.3085, "step": 1740 }, { "epoch": 0.85, "learning_rate": 4.204958413763913e-06, "loss": 0.3181, "step": 1741 }, { "epoch": 0.85, "learning_rate": 4.203994231852222e-06, "loss": 0.3223, "step": 1742 }, { "epoch": 0.85, "learning_rate": 4.2030295763302894e-06, "loss": 0.3217, "step": 1743 }, { "epoch": 0.85, "learning_rate": 4.2020644474662335e-06, "loss": 0.3132, "step": 1744 }, { "epoch": 0.85, "learning_rate": 4.201098845528303e-06, "loss": 0.3038, "step": 1745 }, { "epoch": 0.85, "learning_rate": 4.200132770784878e-06, "loss": 0.2753, "step": 1746 }, { "epoch": 0.85, "learning_rate": 4.199166223504473e-06, "loss": 0.3208, "step": 1747 }, { "epoch": 0.85, "learning_rate": 4.198199203955727e-06, "loss": 0.2605, "step": 1748 }, { "epoch": 0.85, "learning_rate": 4.1972317124074195e-06, "loss": 0.3011, "step": 1749 }, { "epoch": 0.85, "learning_rate": 4.196263749128452e-06, "loss": 0.2893, "step": 1750 }, { "epoch": 0.85, "learning_rate": 4.195295314387864e-06, "loss": 0.3046, "step": 1751 }, { "epoch": 0.86, "learning_rate": 4.194326408454822e-06, "loss": 0.2961, "step": 1752 }, { "epoch": 0.86, "learning_rate": 4.193357031598626e-06, "loss": 0.2798, "step": 1753 }, { "epoch": 0.86, "learning_rate": 4.1923871840887055e-06, "loss": 0.3333, "step": 1754 }, { "epoch": 0.86, "learning_rate": 4.1914168661946204e-06, "loss": 0.3374, "step": 1755 }, { "epoch": 0.86, "learning_rate": 4.190446078186063e-06, "loss": 0.3569, "step": 1756 }, { "epoch": 0.86, "learning_rate": 4.189474820332855e-06, "loss": 0.3, "step": 1757 }, { "epoch": 0.86, "learning_rate": 4.1885030929049476e-06, "loss": 0.2959, "step": 1758 }, { "epoch": 0.86, "learning_rate": 4.187530896172427e-06, "loss": 0.2899, "step": 1759 }, { "epoch": 0.86, "learning_rate": 4.1865582304055045e-06, "loss": 0.288, "step": 1760 }, { "epoch": 0.86, "learning_rate": 4.1855850958745234e-06, "loss": 0.2935, "step": 1761 }, { "epoch": 0.86, "learning_rate": 4.184611492849959e-06, "loss": 0.2663, "step": 1762 }, { "epoch": 0.86, "learning_rate": 4.183637421602417e-06, "loss": 0.3094, "step": 1763 }, { "epoch": 0.86, "learning_rate": 4.182662882402629e-06, "loss": 0.2688, "step": 1764 }, { "epoch": 0.86, "learning_rate": 4.181687875521463e-06, "loss": 0.2695, "step": 1765 }, { "epoch": 0.86, "learning_rate": 4.18071240122991e-06, "loss": 0.2968, "step": 1766 }, { "epoch": 0.86, "learning_rate": 4.179736459799098e-06, "loss": 0.3122, "step": 1767 }, { "epoch": 0.86, "learning_rate": 4.178760051500279e-06, "loss": 0.2875, "step": 1768 }, { "epoch": 0.86, "learning_rate": 4.177783176604838e-06, "loss": 0.295, "step": 1769 }, { "epoch": 0.86, "learning_rate": 4.176805835384289e-06, "loss": 0.3148, "step": 1770 }, { "epoch": 0.86, "learning_rate": 4.175828028110274e-06, "loss": 0.2493, "step": 1771 }, { "epoch": 0.87, "learning_rate": 4.174849755054567e-06, "loss": 0.2924, "step": 1772 }, { "epoch": 0.87, "learning_rate": 4.173871016489072e-06, "loss": 0.2772, "step": 1773 }, { "epoch": 0.87, "learning_rate": 4.172891812685818e-06, "loss": 0.3247, "step": 1774 }, { "epoch": 0.87, "learning_rate": 4.171912143916967e-06, "loss": 0.2756, "step": 1775 }, { "epoch": 0.87, "learning_rate": 4.17093201045481e-06, "loss": 0.3027, "step": 1776 }, { "epoch": 0.87, "learning_rate": 4.169951412571767e-06, "loss": 0.3504, "step": 1777 }, { "epoch": 0.87, "learning_rate": 4.168970350540385e-06, "loss": 0.2738, "step": 1778 }, { "epoch": 0.87, "learning_rate": 4.167988824633342e-06, "loss": 0.2965, "step": 1779 }, { "epoch": 0.87, "learning_rate": 4.1670068351234445e-06, "loss": 0.2996, "step": 1780 }, { "epoch": 0.87, "learning_rate": 4.166024382283629e-06, "loss": 0.3178, "step": 1781 }, { "epoch": 0.87, "learning_rate": 4.165041466386959e-06, "loss": 0.2752, "step": 1782 }, { "epoch": 0.87, "learning_rate": 4.164058087706626e-06, "loss": 0.333, "step": 1783 }, { "epoch": 0.87, "learning_rate": 4.163074246515953e-06, "loss": 0.3039, "step": 1784 }, { "epoch": 0.87, "learning_rate": 4.1620899430883896e-06, "loss": 0.2739, "step": 1785 }, { "epoch": 0.87, "learning_rate": 4.161105177697515e-06, "loss": 0.2945, "step": 1786 }, { "epoch": 0.87, "learning_rate": 4.1601199506170355e-06, "loss": 0.2895, "step": 1787 }, { "epoch": 0.87, "learning_rate": 4.159134262120786e-06, "loss": 0.2905, "step": 1788 }, { "epoch": 0.87, "learning_rate": 4.158148112482731e-06, "loss": 0.2713, "step": 1789 }, { "epoch": 0.87, "learning_rate": 4.15716150197696e-06, "loss": 0.2768, "step": 1790 }, { "epoch": 0.87, "learning_rate": 4.156174430877694e-06, "loss": 0.2714, "step": 1791 }, { "epoch": 0.88, "learning_rate": 4.155186899459282e-06, "loss": 0.3331, "step": 1792 }, { "epoch": 0.88, "learning_rate": 4.1541989079961975e-06, "loss": 0.2495, "step": 1793 }, { "epoch": 0.88, "learning_rate": 4.153210456763046e-06, "loss": 0.2462, "step": 1794 }, { "epoch": 0.88, "learning_rate": 4.152221546034557e-06, "loss": 0.3135, "step": 1795 }, { "epoch": 0.88, "learning_rate": 4.151232176085592e-06, "loss": 0.2729, "step": 1796 }, { "epoch": 0.88, "learning_rate": 4.150242347191135e-06, "loss": 0.2515, "step": 1797 }, { "epoch": 0.88, "learning_rate": 4.149252059626302e-06, "loss": 0.2502, "step": 1798 }, { "epoch": 0.88, "learning_rate": 4.148261313666334e-06, "loss": 0.274, "step": 1799 }, { "epoch": 0.88, "learning_rate": 4.1472701095866e-06, "loss": 0.2574, "step": 1800 }, { "epoch": 0.88, "learning_rate": 4.146278447662597e-06, "loss": 0.2607, "step": 1801 }, { "epoch": 0.88, "learning_rate": 4.145286328169948e-06, "loss": 0.3134, "step": 1802 }, { "epoch": 0.88, "learning_rate": 4.144293751384406e-06, "loss": 0.2749, "step": 1803 }, { "epoch": 0.88, "learning_rate": 4.143300717581846e-06, "loss": 0.2805, "step": 1804 }, { "epoch": 0.88, "learning_rate": 4.142307227038275e-06, "loss": 0.3217, "step": 1805 }, { "epoch": 0.88, "learning_rate": 4.141313280029824e-06, "loss": 0.4118, "step": 1806 }, { "epoch": 0.88, "learning_rate": 4.140318876832753e-06, "loss": 0.3065, "step": 1807 }, { "epoch": 0.88, "learning_rate": 4.139324017723447e-06, "loss": 0.3183, "step": 1808 }, { "epoch": 0.88, "learning_rate": 4.138328702978418e-06, "loss": 0.2878, "step": 1809 }, { "epoch": 0.88, "learning_rate": 4.137332932874305e-06, "loss": 0.2771, "step": 1810 }, { "epoch": 0.88, "learning_rate": 4.136336707687874e-06, "loss": 0.2606, "step": 1811 }, { "epoch": 0.88, "learning_rate": 4.135340027696017e-06, "loss": 0.3194, "step": 1812 }, { "epoch": 0.89, "learning_rate": 4.1343428931757526e-06, "loss": 0.2371, "step": 1813 }, { "epoch": 0.89, "learning_rate": 4.133345304404225e-06, "loss": 0.2795, "step": 1814 }, { "epoch": 0.89, "learning_rate": 4.132347261658706e-06, "loss": 0.3096, "step": 1815 }, { "epoch": 0.89, "learning_rate": 4.131348765216592e-06, "loss": 0.2885, "step": 1816 }, { "epoch": 0.89, "learning_rate": 4.130349815355407e-06, "loss": 0.3172, "step": 1817 }, { "epoch": 0.89, "learning_rate": 4.129350412352799e-06, "loss": 0.2708, "step": 1818 }, { "epoch": 0.89, "learning_rate": 4.128350556486546e-06, "loss": 0.3117, "step": 1819 }, { "epoch": 0.89, "learning_rate": 4.127350248034546e-06, "loss": 0.3629, "step": 1820 }, { "epoch": 0.89, "learning_rate": 4.1263494872748286e-06, "loss": 0.3165, "step": 1821 }, { "epoch": 0.89, "learning_rate": 4.125348274485545e-06, "loss": 0.3211, "step": 1822 }, { "epoch": 0.89, "learning_rate": 4.124346609944974e-06, "loss": 0.2939, "step": 1823 }, { "epoch": 0.89, "learning_rate": 4.123344493931519e-06, "loss": 0.2784, "step": 1824 }, { "epoch": 0.89, "learning_rate": 4.12234192672371e-06, "loss": 0.3376, "step": 1825 }, { "epoch": 0.89, "learning_rate": 4.121338908600202e-06, "loss": 0.3165, "step": 1826 }, { "epoch": 0.89, "learning_rate": 4.1203354398397735e-06, "loss": 0.2866, "step": 1827 }, { "epoch": 0.89, "learning_rate": 4.119331520721331e-06, "loss": 0.288, "step": 1828 }, { "epoch": 0.89, "learning_rate": 4.118327151523905e-06, "loss": 0.296, "step": 1829 }, { "epoch": 0.89, "learning_rate": 4.117322332526652e-06, "loss": 0.3269, "step": 1830 }, { "epoch": 0.89, "learning_rate": 4.116317064008851e-06, "loss": 0.3241, "step": 1831 }, { "epoch": 0.89, "learning_rate": 4.115311346249907e-06, "loss": 0.3368, "step": 1832 }, { "epoch": 0.9, "learning_rate": 4.114305179529353e-06, "loss": 0.2781, "step": 1833 }, { "epoch": 0.9, "learning_rate": 4.113298564126842e-06, "loss": 0.2757, "step": 1834 }, { "epoch": 0.9, "learning_rate": 4.112291500322155e-06, "loss": 0.2703, "step": 1835 }, { "epoch": 0.9, "learning_rate": 4.111283988395196e-06, "loss": 0.3343, "step": 1836 }, { "epoch": 0.9, "learning_rate": 4.110276028625995e-06, "loss": 0.2883, "step": 1837 }, { "epoch": 0.9, "learning_rate": 4.109267621294704e-06, "loss": 0.3261, "step": 1838 }, { "epoch": 0.9, "learning_rate": 4.108258766681603e-06, "loss": 0.2982, "step": 1839 }, { "epoch": 0.9, "learning_rate": 4.107249465067092e-06, "loss": 0.3113, "step": 1840 }, { "epoch": 0.9, "learning_rate": 4.1062397167316995e-06, "loss": 0.28, "step": 1841 }, { "epoch": 0.9, "learning_rate": 4.105229521956075e-06, "loss": 0.3141, "step": 1842 }, { "epoch": 0.9, "learning_rate": 4.1042188810209935e-06, "loss": 0.3262, "step": 1843 }, { "epoch": 0.9, "learning_rate": 4.1032077942073546e-06, "loss": 0.2839, "step": 1844 }, { "epoch": 0.9, "learning_rate": 4.10219626179618e-06, "loss": 0.2817, "step": 1845 }, { "epoch": 0.9, "learning_rate": 4.101184284068616e-06, "loss": 0.3167, "step": 1846 }, { "epoch": 0.9, "learning_rate": 4.1001718613059335e-06, "loss": 0.3185, "step": 1847 }, { "epoch": 0.9, "learning_rate": 4.099158993789528e-06, "loss": 0.3232, "step": 1848 }, { "epoch": 0.9, "learning_rate": 4.098145681800914e-06, "loss": 0.3054, "step": 1849 }, { "epoch": 0.9, "learning_rate": 4.0971319256217355e-06, "loss": 0.2556, "step": 1850 }, { "epoch": 0.9, "learning_rate": 4.0961177255337556e-06, "loss": 0.2713, "step": 1851 }, { "epoch": 0.9, "learning_rate": 4.095103081818863e-06, "loss": 0.2841, "step": 1852 }, { "epoch": 0.9, "learning_rate": 4.094087994759068e-06, "loss": 0.3034, "step": 1853 }, { "epoch": 0.91, "learning_rate": 4.093072464636507e-06, "loss": 0.2964, "step": 1854 }, { "epoch": 0.91, "learning_rate": 4.092056491733435e-06, "loss": 0.2763, "step": 1855 }, { "epoch": 0.91, "learning_rate": 4.0910400763322355e-06, "loss": 0.2563, "step": 1856 }, { "epoch": 0.91, "learning_rate": 4.090023218715411e-06, "loss": 0.2938, "step": 1857 }, { "epoch": 0.91, "learning_rate": 4.0890059191655874e-06, "loss": 0.3074, "step": 1858 }, { "epoch": 0.91, "learning_rate": 4.087988177965515e-06, "loss": 0.2551, "step": 1859 }, { "epoch": 0.91, "learning_rate": 4.086969995398067e-06, "loss": 0.3146, "step": 1860 }, { "epoch": 0.91, "learning_rate": 4.085951371746236e-06, "loss": 0.3219, "step": 1861 }, { "epoch": 0.91, "learning_rate": 4.084932307293141e-06, "loss": 0.2774, "step": 1862 }, { "epoch": 0.91, "learning_rate": 4.083912802322023e-06, "loss": 0.2855, "step": 1863 }, { "epoch": 0.91, "learning_rate": 4.082892857116243e-06, "loss": 0.3045, "step": 1864 }, { "epoch": 0.91, "learning_rate": 4.081872471959285e-06, "loss": 0.2942, "step": 1865 }, { "epoch": 0.91, "learning_rate": 4.0808516471347585e-06, "loss": 0.2882, "step": 1866 }, { "epoch": 0.91, "learning_rate": 4.07983038292639e-06, "loss": 0.3437, "step": 1867 }, { "epoch": 0.91, "learning_rate": 4.078808679618034e-06, "loss": 0.2669, "step": 1868 }, { "epoch": 0.91, "learning_rate": 4.077786537493662e-06, "loss": 0.3427, "step": 1869 }, { "epoch": 0.91, "learning_rate": 4.076763956837368e-06, "loss": 0.2729, "step": 1870 }, { "epoch": 0.91, "learning_rate": 4.075740937933373e-06, "loss": 0.2657, "step": 1871 }, { "epoch": 0.91, "learning_rate": 4.074717481066014e-06, "loss": 0.2912, "step": 1872 }, { "epoch": 0.91, "learning_rate": 4.073693586519753e-06, "loss": 0.2982, "step": 1873 }, { "epoch": 0.92, "learning_rate": 4.07266925457917e-06, "loss": 0.3364, "step": 1874 }, { "epoch": 0.92, "learning_rate": 4.071644485528972e-06, "loss": 0.2935, "step": 1875 }, { "epoch": 0.92, "learning_rate": 4.070619279653982e-06, "loss": 0.3372, "step": 1876 }, { "epoch": 0.92, "learning_rate": 4.069593637239148e-06, "loss": 0.3007, "step": 1877 }, { "epoch": 0.92, "learning_rate": 4.0685675585695405e-06, "loss": 0.3115, "step": 1878 }, { "epoch": 0.92, "learning_rate": 4.067541043930346e-06, "loss": 0.2766, "step": 1879 }, { "epoch": 0.92, "learning_rate": 4.0665140936068755e-06, "loss": 0.2661, "step": 1880 }, { "epoch": 0.92, "learning_rate": 4.065486707884564e-06, "loss": 0.3301, "step": 1881 }, { "epoch": 0.92, "learning_rate": 4.06445888704896e-06, "loss": 0.2954, "step": 1882 }, { "epoch": 0.92, "learning_rate": 4.063430631385739e-06, "loss": 0.2912, "step": 1883 }, { "epoch": 0.92, "learning_rate": 4.062401941180697e-06, "loss": 0.3036, "step": 1884 }, { "epoch": 0.92, "learning_rate": 4.061372816719749e-06, "loss": 0.3246, "step": 1885 }, { "epoch": 0.92, "learning_rate": 4.060343258288929e-06, "loss": 0.2958, "step": 1886 }, { "epoch": 0.92, "learning_rate": 4.0593132661743955e-06, "loss": 0.2696, "step": 1887 }, { "epoch": 0.92, "learning_rate": 4.058282840662426e-06, "loss": 0.25, "step": 1888 }, { "epoch": 0.92, "learning_rate": 4.057251982039417e-06, "loss": 0.3499, "step": 1889 }, { "epoch": 0.92, "learning_rate": 4.056220690591888e-06, "loss": 0.3077, "step": 1890 }, { "epoch": 0.92, "learning_rate": 4.055188966606476e-06, "loss": 0.3264, "step": 1891 }, { "epoch": 0.92, "learning_rate": 4.054156810369941e-06, "loss": 0.2562, "step": 1892 }, { "epoch": 0.92, "learning_rate": 4.05312422216916e-06, "loss": 0.3345, "step": 1893 }, { "epoch": 0.92, "learning_rate": 4.052091202291134e-06, "loss": 0.3275, "step": 1894 }, { "epoch": 0.93, "learning_rate": 4.051057751022981e-06, "loss": 0.266, "step": 1895 }, { "epoch": 0.93, "learning_rate": 4.050023868651938e-06, "loss": 0.2831, "step": 1896 }, { "epoch": 0.93, "learning_rate": 4.048989555465366e-06, "loss": 0.3168, "step": 1897 }, { "epoch": 0.93, "learning_rate": 4.047954811750742e-06, "loss": 0.2936, "step": 1898 }, { "epoch": 0.93, "learning_rate": 4.046919637795664e-06, "loss": 0.3045, "step": 1899 }, { "epoch": 0.93, "learning_rate": 4.04588403388785e-06, "loss": 0.2884, "step": 1900 }, { "epoch": 0.93, "learning_rate": 4.044848000315137e-06, "loss": 0.3137, "step": 1901 }, { "epoch": 0.93, "learning_rate": 4.04381153736548e-06, "loss": 0.2969, "step": 1902 }, { "epoch": 0.93, "learning_rate": 4.042774645326957e-06, "loss": 0.2881, "step": 1903 }, { "epoch": 0.93, "learning_rate": 4.0417373244877614e-06, "loss": 0.2577, "step": 1904 }, { "epoch": 0.93, "learning_rate": 4.040699575136208e-06, "loss": 0.2977, "step": 1905 }, { "epoch": 0.93, "learning_rate": 4.03966139756073e-06, "loss": 0.2543, "step": 1906 }, { "epoch": 0.93, "learning_rate": 4.03862279204988e-06, "loss": 0.2662, "step": 1907 }, { "epoch": 0.93, "learning_rate": 4.0375837588923296e-06, "loss": 0.2699, "step": 1908 }, { "epoch": 0.93, "learning_rate": 4.0365442983768684e-06, "loss": 0.2474, "step": 1909 }, { "epoch": 0.93, "learning_rate": 4.035504410792405e-06, "loss": 0.2604, "step": 1910 }, { "epoch": 0.93, "learning_rate": 4.034464096427968e-06, "loss": 0.2888, "step": 1911 }, { "epoch": 0.93, "learning_rate": 4.033423355572702e-06, "loss": 0.2982, "step": 1912 }, { "epoch": 0.93, "learning_rate": 4.0323821885158735e-06, "loss": 0.2985, "step": 1913 }, { "epoch": 0.93, "learning_rate": 4.0313405955468655e-06, "loss": 0.3531, "step": 1914 }, { "epoch": 0.94, "learning_rate": 4.030298576955179e-06, "loss": 0.2669, "step": 1915 }, { "epoch": 0.94, "learning_rate": 4.029256133030434e-06, "loss": 0.3031, "step": 1916 }, { "epoch": 0.94, "learning_rate": 4.028213264062368e-06, "loss": 0.303, "step": 1917 }, { "epoch": 0.94, "learning_rate": 4.0271699703408396e-06, "loss": 0.2868, "step": 1918 }, { "epoch": 0.94, "learning_rate": 4.026126252155821e-06, "loss": 0.3203, "step": 1919 }, { "epoch": 0.94, "learning_rate": 4.0250821097974045e-06, "loss": 0.2896, "step": 1920 }, { "epoch": 0.94, "learning_rate": 4.024037543555802e-06, "loss": 0.2698, "step": 1921 }, { "epoch": 0.94, "learning_rate": 4.02299255372134e-06, "loss": 0.2796, "step": 1922 }, { "epoch": 0.94, "learning_rate": 4.0219471405844645e-06, "loss": 0.2906, "step": 1923 }, { "epoch": 0.94, "learning_rate": 4.0209013044357396e-06, "loss": 0.2947, "step": 1924 }, { "epoch": 0.94, "learning_rate": 4.019855045565845e-06, "loss": 0.2696, "step": 1925 }, { "epoch": 0.94, "learning_rate": 4.01880836426558e-06, "loss": 0.2801, "step": 1926 }, { "epoch": 0.94, "learning_rate": 4.017761260825861e-06, "loss": 0.2845, "step": 1927 }, { "epoch": 0.94, "learning_rate": 4.016713735537721e-06, "loss": 0.2711, "step": 1928 }, { "epoch": 0.94, "learning_rate": 4.015665788692309e-06, "loss": 0.2983, "step": 1929 }, { "epoch": 0.94, "learning_rate": 4.014617420580895e-06, "loss": 0.2903, "step": 1930 }, { "epoch": 0.94, "learning_rate": 4.013568631494861e-06, "loss": 0.2904, "step": 1931 }, { "epoch": 0.94, "learning_rate": 4.012519421725711e-06, "loss": 0.271, "step": 1932 }, { "epoch": 0.94, "learning_rate": 4.011469791565063e-06, "loss": 0.2213, "step": 1933 }, { "epoch": 0.94, "learning_rate": 4.010419741304652e-06, "loss": 0.3369, "step": 1934 }, { "epoch": 0.94, "learning_rate": 4.009369271236331e-06, "loss": 0.2647, "step": 1935 }, { "epoch": 0.95, "learning_rate": 4.008318381652068e-06, "loss": 0.2406, "step": 1936 }, { "epoch": 0.95, "learning_rate": 4.007267072843949e-06, "loss": 0.3048, "step": 1937 }, { "epoch": 0.95, "learning_rate": 4.0062153451041765e-06, "loss": 0.2716, "step": 1938 }, { "epoch": 0.95, "learning_rate": 4.005163198725068e-06, "loss": 0.2714, "step": 1939 }, { "epoch": 0.95, "learning_rate": 4.004110633999059e-06, "loss": 0.2876, "step": 1940 }, { "epoch": 0.95, "learning_rate": 4.003057651218701e-06, "loss": 0.2928, "step": 1941 }, { "epoch": 0.95, "learning_rate": 4.002004250676661e-06, "loss": 0.3099, "step": 1942 }, { "epoch": 0.95, "learning_rate": 4.000950432665721e-06, "loss": 0.3335, "step": 1943 }, { "epoch": 0.95, "learning_rate": 3.999896197478783e-06, "loss": 0.2778, "step": 1944 }, { "epoch": 0.95, "learning_rate": 3.998841545408861e-06, "loss": 0.2468, "step": 1945 }, { "epoch": 0.95, "learning_rate": 3.997786476749087e-06, "loss": 0.2546, "step": 1946 }, { "epoch": 0.95, "learning_rate": 3.9967309917927064e-06, "loss": 0.2527, "step": 1947 }, { "epoch": 0.95, "learning_rate": 3.995675090833085e-06, "loss": 0.2629, "step": 1948 }, { "epoch": 0.95, "learning_rate": 3.994618774163697e-06, "loss": 0.2937, "step": 1949 }, { "epoch": 0.95, "learning_rate": 3.99356204207814e-06, "loss": 0.2999, "step": 1950 }, { "epoch": 0.95, "learning_rate": 3.992504894870122e-06, "loss": 0.276, "step": 1951 }, { "epoch": 0.95, "learning_rate": 3.991447332833468e-06, "loss": 0.3162, "step": 1952 }, { "epoch": 0.95, "learning_rate": 3.990389356262118e-06, "loss": 0.2611, "step": 1953 }, { "epoch": 0.95, "learning_rate": 3.9893309654501265e-06, "loss": 0.3002, "step": 1954 }, { "epoch": 0.95, "learning_rate": 3.988272160691665e-06, "loss": 0.2957, "step": 1955 }, { "epoch": 0.96, "learning_rate": 3.987212942281019e-06, "loss": 0.2617, "step": 1956 }, { "epoch": 0.96, "learning_rate": 3.986153310512589e-06, "loss": 0.2913, "step": 1957 }, { "epoch": 0.96, "learning_rate": 3.98509326568089e-06, "loss": 0.2745, "step": 1958 }, { "epoch": 0.96, "learning_rate": 3.984032808080551e-06, "loss": 0.3019, "step": 1959 }, { "epoch": 0.96, "learning_rate": 3.98297193800632e-06, "loss": 0.2591, "step": 1960 }, { "epoch": 0.96, "learning_rate": 3.981910655753053e-06, "loss": 0.331, "step": 1961 }, { "epoch": 0.96, "learning_rate": 3.980848961615726e-06, "loss": 0.2701, "step": 1962 }, { "epoch": 0.96, "learning_rate": 3.9797868558894285e-06, "loss": 0.261, "step": 1963 }, { "epoch": 0.96, "learning_rate": 3.9787243388693596e-06, "loss": 0.2385, "step": 1964 }, { "epoch": 0.96, "learning_rate": 3.977661410850842e-06, "loss": 0.2874, "step": 1965 }, { "epoch": 0.96, "learning_rate": 3.976598072129302e-06, "loss": 0.3073, "step": 1966 }, { "epoch": 0.96, "learning_rate": 3.975534323000289e-06, "loss": 0.2541, "step": 1967 }, { "epoch": 0.96, "learning_rate": 3.974470163759461e-06, "loss": 0.2777, "step": 1968 }, { "epoch": 0.96, "learning_rate": 3.973405594702593e-06, "loss": 0.3482, "step": 1969 }, { "epoch": 0.96, "learning_rate": 3.972340616125571e-06, "loss": 0.2965, "step": 1970 }, { "epoch": 0.96, "learning_rate": 3.971275228324397e-06, "loss": 0.2884, "step": 1971 }, { "epoch": 0.96, "learning_rate": 3.970209431595187e-06, "loss": 0.2863, "step": 1972 }, { "epoch": 0.96, "learning_rate": 3.969143226234168e-06, "loss": 0.3713, "step": 1973 }, { "epoch": 0.96, "learning_rate": 3.968076612537684e-06, "loss": 0.3033, "step": 1974 }, { "epoch": 0.96, "learning_rate": 3.967009590802192e-06, "loss": 0.3109, "step": 1975 }, { "epoch": 0.96, "learning_rate": 3.965942161324259e-06, "loss": 0.2943, "step": 1976 }, { "epoch": 0.97, "learning_rate": 3.964874324400568e-06, "loss": 0.3014, "step": 1977 }, { "epoch": 0.97, "learning_rate": 3.963806080327915e-06, "loss": 0.3141, "step": 1978 }, { "epoch": 0.97, "learning_rate": 3.962737429403211e-06, "loss": 0.3356, "step": 1979 }, { "epoch": 0.97, "learning_rate": 3.961668371923476e-06, "loss": 0.2716, "step": 1980 }, { "epoch": 0.97, "learning_rate": 3.960598908185846e-06, "loss": 0.2646, "step": 1981 }, { "epoch": 0.97, "learning_rate": 3.959529038487569e-06, "loss": 0.2754, "step": 1982 }, { "epoch": 0.97, "learning_rate": 3.958458763126005e-06, "loss": 0.2532, "step": 1983 }, { "epoch": 0.97, "learning_rate": 3.957388082398629e-06, "loss": 0.3119, "step": 1984 }, { "epoch": 0.97, "learning_rate": 3.9563169966030264e-06, "loss": 0.3228, "step": 1985 }, { "epoch": 0.97, "learning_rate": 3.955245506036896e-06, "loss": 0.2822, "step": 1986 }, { "epoch": 0.97, "learning_rate": 3.954173610998051e-06, "loss": 0.2742, "step": 1987 }, { "epoch": 0.97, "learning_rate": 3.953101311784413e-06, "loss": 0.2596, "step": 1988 }, { "epoch": 0.97, "learning_rate": 3.952028608694019e-06, "loss": 0.2558, "step": 1989 }, { "epoch": 0.97, "learning_rate": 3.950955502025018e-06, "loss": 0.3142, "step": 1990 }, { "epoch": 0.97, "learning_rate": 3.949881992075669e-06, "loss": 0.2906, "step": 1991 }, { "epoch": 0.97, "learning_rate": 3.948808079144346e-06, "loss": 0.2826, "step": 1992 }, { "epoch": 0.97, "learning_rate": 3.947733763529533e-06, "loss": 0.3244, "step": 1993 }, { "epoch": 0.97, "learning_rate": 3.946659045529827e-06, "loss": 0.2939, "step": 1994 }, { "epoch": 0.97, "learning_rate": 3.945583925443937e-06, "loss": 0.3021, "step": 1995 }, { "epoch": 0.97, "learning_rate": 3.944508403570682e-06, "loss": 0.3078, "step": 1996 }, { "epoch": 0.98, "learning_rate": 3.943432480208994e-06, "loss": 0.2754, "step": 1997 }, { "epoch": 0.98, "learning_rate": 3.942356155657917e-06, "loss": 0.3166, "step": 1998 }, { "epoch": 0.98, "learning_rate": 3.941279430216606e-06, "loss": 0.2554, "step": 1999 }, { "epoch": 0.98, "learning_rate": 3.940202304184327e-06, "loss": 0.2808, "step": 2000 }, { "epoch": 0.98, "learning_rate": 3.939124777860458e-06, "loss": 0.3083, "step": 2001 }, { "epoch": 0.98, "learning_rate": 3.938046851544489e-06, "loss": 0.2828, "step": 2002 }, { "epoch": 0.98, "learning_rate": 3.936968525536018e-06, "loss": 0.3195, "step": 2003 }, { "epoch": 0.98, "learning_rate": 3.935889800134757e-06, "loss": 0.2932, "step": 2004 }, { "epoch": 0.98, "learning_rate": 3.934810675640529e-06, "loss": 0.2746, "step": 2005 }, { "epoch": 0.98, "learning_rate": 3.9337311523532674e-06, "loss": 0.3305, "step": 2006 }, { "epoch": 0.98, "learning_rate": 3.932651230573014e-06, "loss": 0.2978, "step": 2007 }, { "epoch": 0.98, "learning_rate": 3.931570910599927e-06, "loss": 0.3042, "step": 2008 }, { "epoch": 0.98, "learning_rate": 3.930490192734269e-06, "loss": 0.2795, "step": 2009 }, { "epoch": 0.98, "learning_rate": 3.929409077276417e-06, "loss": 0.2743, "step": 2010 }, { "epoch": 0.98, "learning_rate": 3.928327564526858e-06, "loss": 0.2692, "step": 2011 }, { "epoch": 0.98, "learning_rate": 3.927245654786188e-06, "loss": 0.2748, "step": 2012 }, { "epoch": 0.98, "learning_rate": 3.926163348355115e-06, "loss": 0.3227, "step": 2013 }, { "epoch": 0.98, "learning_rate": 3.925080645534457e-06, "loss": 0.2883, "step": 2014 }, { "epoch": 0.98, "learning_rate": 3.923997546625141e-06, "loss": 0.2675, "step": 2015 }, { "epoch": 0.98, "learning_rate": 3.9229140519282045e-06, "loss": 0.3109, "step": 2016 }, { "epoch": 0.98, "learning_rate": 3.921830161744798e-06, "loss": 0.2771, "step": 2017 }, { "epoch": 0.99, "learning_rate": 3.9207458763761745e-06, "loss": 0.2998, "step": 2018 }, { "epoch": 0.99, "learning_rate": 3.919661196123706e-06, "loss": 0.2976, "step": 2019 }, { "epoch": 0.99, "learning_rate": 3.918576121288867e-06, "loss": 0.2611, "step": 2020 }, { "epoch": 0.99, "learning_rate": 3.917490652173247e-06, "loss": 0.2861, "step": 2021 }, { "epoch": 0.99, "learning_rate": 3.91640478907854e-06, "loss": 0.2897, "step": 2022 }, { "epoch": 0.99, "learning_rate": 3.915318532306553e-06, "loss": 0.3079, "step": 2023 }, { "epoch": 0.99, "learning_rate": 3.914231882159203e-06, "loss": 0.2721, "step": 2024 }, { "epoch": 0.99, "learning_rate": 3.913144838938514e-06, "loss": 0.3027, "step": 2025 }, { "epoch": 0.99, "learning_rate": 3.91205740294662e-06, "loss": 0.2517, "step": 2026 }, { "epoch": 0.99, "learning_rate": 3.9109695744857635e-06, "loss": 0.3028, "step": 2027 }, { "epoch": 0.99, "learning_rate": 3.909881353858297e-06, "loss": 0.3066, "step": 2028 }, { "epoch": 0.99, "learning_rate": 3.9087927413666835e-06, "loss": 0.2923, "step": 2029 }, { "epoch": 0.99, "learning_rate": 3.907703737313492e-06, "loss": 0.2958, "step": 2030 }, { "epoch": 0.99, "learning_rate": 3.9066143420014016e-06, "loss": 0.304, "step": 2031 }, { "epoch": 0.99, "learning_rate": 3.9055245557332006e-06, "loss": 0.2479, "step": 2032 }, { "epoch": 0.99, "learning_rate": 3.904434378811785e-06, "loss": 0.2528, "step": 2033 }, { "epoch": 0.99, "learning_rate": 3.903343811540161e-06, "loss": 0.2826, "step": 2034 }, { "epoch": 0.99, "learning_rate": 3.902252854221441e-06, "loss": 0.2643, "step": 2035 }, { "epoch": 0.99, "learning_rate": 3.901161507158847e-06, "loss": 0.3002, "step": 2036 }, { "epoch": 0.99, "learning_rate": 3.900069770655711e-06, "loss": 0.2902, "step": 2037 }, { "epoch": 1.0, "learning_rate": 3.898977645015469e-06, "loss": 0.3173, "step": 2038 }, { "epoch": 1.0, "learning_rate": 3.8978851305416696e-06, "loss": 0.2884, "step": 2039 }, { "epoch": 1.0, "learning_rate": 3.896792227537968e-06, "loss": 0.2529, "step": 2040 }, { "epoch": 1.0, "learning_rate": 3.895698936308125e-06, "loss": 0.2833, "step": 2041 }, { "epoch": 1.0, "learning_rate": 3.894605257156013e-06, "loss": 0.2625, "step": 2042 }, { "epoch": 1.0, "learning_rate": 3.8935111903856094e-06, "loss": 0.3095, "step": 2043 }, { "epoch": 1.0, "learning_rate": 3.8924167363010016e-06, "loss": 0.2641, "step": 2044 }, { "epoch": 1.0, "learning_rate": 3.891321895206383e-06, "loss": 0.2537, "step": 2045 }, { "epoch": 1.0, "learning_rate": 3.890226667406054e-06, "loss": 0.2579, "step": 2046 }, { "epoch": 1.0, "learning_rate": 3.889131053204426e-06, "loss": 0.2768, "step": 2047 }, { "epoch": 1.0, "learning_rate": 3.888035052906013e-06, "loss": 0.272, "step": 2048 }, { "epoch": 1.0, "learning_rate": 3.88693866681544e-06, "loss": 0.2629, "step": 2049 }, { "epoch": 1.0, "learning_rate": 3.885841895237436e-06, "loss": 0.2932, "step": 2050 }, { "epoch": 1.0, "learning_rate": 3.884744738476842e-06, "loss": 0.3574, "step": 2051 }, { "epoch": 1.0, "learning_rate": 3.883647196838602e-06, "loss": 0.2566, "step": 2052 }, { "epoch": 1.0, "learning_rate": 3.8825492706277665e-06, "loss": 0.2662, "step": 2053 }, { "epoch": 1.0, "learning_rate": 3.881450960149496e-06, "loss": 0.3145, "step": 2054 }, { "epoch": 1.0, "learning_rate": 3.880352265709056e-06, "loss": 0.3009, "step": 2055 }, { "epoch": 1.0, "learning_rate": 3.879253187611818e-06, "loss": 0.2962, "step": 2056 }, { "epoch": 1.0, "learning_rate": 3.878153726163263e-06, "loss": 0.3036, "step": 2057 }, { "epoch": 1.0, "learning_rate": 3.877053881668974e-06, "loss": 0.3103, "step": 2058 }, { "epoch": 1.01, "learning_rate": 3.875953654434647e-06, "loss": 0.2492, "step": 2059 }, { "epoch": 1.01, "learning_rate": 3.874853044766076e-06, "loss": 0.3266, "step": 2060 }, { "epoch": 1.01, "learning_rate": 3.873752052969169e-06, "loss": 0.2852, "step": 2061 }, { "epoch": 1.01, "learning_rate": 3.872650679349938e-06, "loss": 0.272, "step": 2062 }, { "epoch": 1.01, "learning_rate": 3.871548924214495e-06, "loss": 0.282, "step": 2063 }, { "epoch": 1.01, "learning_rate": 3.8704467878690675e-06, "loss": 0.2946, "step": 2064 }, { "epoch": 1.01, "learning_rate": 3.8693442706199835e-06, "loss": 0.2577, "step": 2065 }, { "epoch": 1.01, "learning_rate": 3.8682413727736784e-06, "loss": 0.249, "step": 2066 }, { "epoch": 1.01, "learning_rate": 3.867138094636692e-06, "loss": 0.2629, "step": 2067 }, { "epoch": 1.01, "learning_rate": 3.866034436515671e-06, "loss": 0.2757, "step": 2068 }, { "epoch": 1.01, "learning_rate": 3.864930398717369e-06, "loss": 0.2602, "step": 2069 }, { "epoch": 1.01, "learning_rate": 3.863825981548641e-06, "loss": 0.2587, "step": 2070 }, { "epoch": 1.01, "learning_rate": 3.862721185316453e-06, "loss": 0.3508, "step": 2071 }, { "epoch": 1.01, "learning_rate": 3.86161601032787e-06, "loss": 0.2461, "step": 2072 }, { "epoch": 1.01, "learning_rate": 3.8605104568900685e-06, "loss": 0.3248, "step": 2073 }, { "epoch": 1.01, "learning_rate": 3.859404525310327e-06, "loss": 0.2763, "step": 2074 }, { "epoch": 1.01, "learning_rate": 3.858298215896028e-06, "loss": 0.3026, "step": 2075 }, { "epoch": 1.01, "learning_rate": 3.857191528954661e-06, "loss": 0.2894, "step": 2076 }, { "epoch": 1.01, "learning_rate": 3.856084464793822e-06, "loss": 0.2641, "step": 2077 }, { "epoch": 1.01, "learning_rate": 3.854977023721207e-06, "loss": 0.2807, "step": 2078 }, { "epoch": 1.02, "learning_rate": 3.853869206044621e-06, "loss": 0.291, "step": 2079 }, { "epoch": 1.02, "learning_rate": 3.852761012071972e-06, "loss": 0.3087, "step": 2080 }, { "epoch": 1.02, "learning_rate": 3.851652442111271e-06, "loss": 0.2929, "step": 2081 }, { "epoch": 1.02, "learning_rate": 3.850543496470639e-06, "loss": 0.2496, "step": 2082 }, { "epoch": 1.02, "learning_rate": 3.849434175458294e-06, "loss": 0.2681, "step": 2083 }, { "epoch": 1.02, "learning_rate": 3.8483244793825654e-06, "loss": 0.2701, "step": 2084 }, { "epoch": 1.02, "learning_rate": 3.84721440855188e-06, "loss": 0.2908, "step": 2085 }, { "epoch": 1.02, "learning_rate": 3.8461039632747745e-06, "loss": 0.2839, "step": 2086 }, { "epoch": 1.02, "learning_rate": 3.844993143859887e-06, "loss": 0.2797, "step": 2087 }, { "epoch": 1.02, "learning_rate": 3.843881950615961e-06, "loss": 0.2898, "step": 2088 }, { "epoch": 1.02, "learning_rate": 3.84277038385184e-06, "loss": 0.3077, "step": 2089 }, { "epoch": 1.02, "learning_rate": 3.841658443876477e-06, "loss": 0.2795, "step": 2090 }, { "epoch": 1.02, "learning_rate": 3.840546130998926e-06, "loss": 0.2708, "step": 2091 }, { "epoch": 1.02, "learning_rate": 3.839433445528342e-06, "loss": 0.2872, "step": 2092 }, { "epoch": 1.02, "learning_rate": 3.838320387773988e-06, "loss": 0.2881, "step": 2093 }, { "epoch": 1.02, "learning_rate": 3.8372069580452285e-06, "loss": 0.2938, "step": 2094 }, { "epoch": 1.02, "learning_rate": 3.8360931566515316e-06, "loss": 0.2952, "step": 2095 }, { "epoch": 1.02, "learning_rate": 3.834978983902469e-06, "loss": 0.2557, "step": 2096 }, { "epoch": 1.02, "learning_rate": 3.833864440107713e-06, "loss": 0.2706, "step": 2097 }, { "epoch": 1.02, "learning_rate": 3.832749525577044e-06, "loss": 0.2722, "step": 2098 }, { "epoch": 1.02, "learning_rate": 3.831634240620342e-06, "loss": 0.2915, "step": 2099 }, { "epoch": 1.03, "learning_rate": 3.83051858554759e-06, "loss": 0.2733, "step": 2100 }, { "epoch": 1.03, "learning_rate": 3.829402560668875e-06, "loss": 0.2531, "step": 2101 }, { "epoch": 1.03, "learning_rate": 3.828286166294386e-06, "loss": 0.3155, "step": 2102 }, { "epoch": 1.03, "learning_rate": 3.827169402734415e-06, "loss": 0.2678, "step": 2103 }, { "epoch": 1.03, "learning_rate": 3.826052270299356e-06, "loss": 0.2714, "step": 2104 }, { "epoch": 1.03, "learning_rate": 3.824934769299709e-06, "loss": 0.2946, "step": 2105 }, { "epoch": 1.03, "learning_rate": 3.82381690004607e-06, "loss": 0.2517, "step": 2106 }, { "epoch": 1.03, "learning_rate": 3.822698662849143e-06, "loss": 0.2582, "step": 2107 }, { "epoch": 1.03, "learning_rate": 3.821580058019732e-06, "loss": 0.2694, "step": 2108 }, { "epoch": 1.03, "learning_rate": 3.820461085868743e-06, "loss": 0.2706, "step": 2109 }, { "epoch": 1.03, "learning_rate": 3.819341746707184e-06, "loss": 0.2714, "step": 2110 }, { "epoch": 1.03, "learning_rate": 3.818222040846167e-06, "loss": 0.2568, "step": 2111 }, { "epoch": 1.03, "learning_rate": 3.817101968596902e-06, "loss": 0.3001, "step": 2112 }, { "epoch": 1.03, "learning_rate": 3.815981530270707e-06, "loss": 0.2764, "step": 2113 }, { "epoch": 1.03, "learning_rate": 3.814860726178994e-06, "loss": 0.2171, "step": 2114 }, { "epoch": 1.03, "learning_rate": 3.813739556633283e-06, "loss": 0.2824, "step": 2115 }, { "epoch": 1.03, "learning_rate": 3.8126180219451932e-06, "loss": 0.2328, "step": 2116 }, { "epoch": 1.03, "learning_rate": 3.8114961224264434e-06, "loss": 0.3161, "step": 2117 }, { "epoch": 1.03, "learning_rate": 3.8103738583888582e-06, "loss": 0.2542, "step": 2118 }, { "epoch": 1.03, "learning_rate": 3.8092512301443594e-06, "loss": 0.2939, "step": 2119 }, { "epoch": 1.04, "learning_rate": 3.8081282380049735e-06, "loss": 0.2878, "step": 2120 }, { "epoch": 1.04, "learning_rate": 3.807004882282824e-06, "loss": 0.2588, "step": 2121 }, { "epoch": 1.04, "learning_rate": 3.8058811632901387e-06, "loss": 0.2993, "step": 2122 }, { "epoch": 1.04, "learning_rate": 3.804757081339245e-06, "loss": 0.2843, "step": 2123 }, { "epoch": 1.04, "learning_rate": 3.8036326367425736e-06, "loss": 0.2856, "step": 2124 }, { "epoch": 1.04, "learning_rate": 3.802507829812652e-06, "loss": 0.2593, "step": 2125 }, { "epoch": 1.04, "learning_rate": 3.801382660862111e-06, "loss": 0.2611, "step": 2126 }, { "epoch": 1.04, "learning_rate": 3.8002571302036805e-06, "loss": 0.2632, "step": 2127 }, { "epoch": 1.04, "learning_rate": 3.799131238150194e-06, "loss": 0.2785, "step": 2128 }, { "epoch": 1.04, "learning_rate": 3.7980049850145805e-06, "loss": 0.2506, "step": 2129 }, { "epoch": 1.04, "learning_rate": 3.7968783711098754e-06, "loss": 0.2563, "step": 2130 }, { "epoch": 1.04, "learning_rate": 3.7957513967492087e-06, "loss": 0.2536, "step": 2131 }, { "epoch": 1.04, "learning_rate": 3.7946240622458135e-06, "loss": 0.289, "step": 2132 }, { "epoch": 1.04, "learning_rate": 3.793496367913024e-06, "loss": 0.2563, "step": 2133 }, { "epoch": 1.04, "learning_rate": 3.7923683140642707e-06, "loss": 0.2461, "step": 2134 }, { "epoch": 1.04, "learning_rate": 3.791239901013088e-06, "loss": 0.2721, "step": 2135 }, { "epoch": 1.04, "learning_rate": 3.7901111290731074e-06, "loss": 0.249, "step": 2136 }, { "epoch": 1.04, "learning_rate": 3.7889819985580623e-06, "loss": 0.2631, "step": 2137 }, { "epoch": 1.04, "learning_rate": 3.7878525097817832e-06, "loss": 0.2543, "step": 2138 }, { "epoch": 1.04, "learning_rate": 3.7867226630582016e-06, "loss": 0.2717, "step": 2139 }, { "epoch": 1.04, "learning_rate": 3.7855924587013502e-06, "loss": 0.2508, "step": 2140 }, { "epoch": 1.05, "learning_rate": 3.7844618970253577e-06, "loss": 0.2721, "step": 2141 }, { "epoch": 1.05, "learning_rate": 3.783330978344454e-06, "loss": 0.2644, "step": 2142 }, { "epoch": 1.05, "learning_rate": 3.7821997029729698e-06, "loss": 0.2224, "step": 2143 }, { "epoch": 1.05, "learning_rate": 3.78106807122533e-06, "loss": 0.2426, "step": 2144 }, { "epoch": 1.05, "learning_rate": 3.779936083416065e-06, "loss": 0.2738, "step": 2145 }, { "epoch": 1.05, "learning_rate": 3.7788037398597993e-06, "loss": 0.2612, "step": 2146 }, { "epoch": 1.05, "learning_rate": 3.777671040871257e-06, "loss": 0.2905, "step": 2147 }, { "epoch": 1.05, "learning_rate": 3.776537986765263e-06, "loss": 0.2547, "step": 2148 }, { "epoch": 1.05, "learning_rate": 3.7754045778567394e-06, "loss": 0.2475, "step": 2149 }, { "epoch": 1.05, "learning_rate": 3.774270814460708e-06, "loss": 0.2299, "step": 2150 }, { "epoch": 1.05, "learning_rate": 3.773136696892288e-06, "loss": 0.2544, "step": 2151 }, { "epoch": 1.05, "learning_rate": 3.772002225466696e-06, "loss": 0.2712, "step": 2152 }, { "epoch": 1.05, "learning_rate": 3.7708674004992498e-06, "loss": 0.2479, "step": 2153 }, { "epoch": 1.05, "learning_rate": 3.769732222305364e-06, "loss": 0.29, "step": 2154 }, { "epoch": 1.05, "learning_rate": 3.7685966912005518e-06, "loss": 0.2596, "step": 2155 }, { "epoch": 1.05, "learning_rate": 3.7674608075004226e-06, "loss": 0.2469, "step": 2156 }, { "epoch": 1.05, "learning_rate": 3.7663245715206865e-06, "loss": 0.2839, "step": 2157 }, { "epoch": 1.05, "learning_rate": 3.7651879835771506e-06, "loss": 0.2989, "step": 2158 }, { "epoch": 1.05, "learning_rate": 3.764051043985718e-06, "loss": 0.2633, "step": 2159 }, { "epoch": 1.05, "learning_rate": 3.7629137530623927e-06, "loss": 0.2392, "step": 2160 }, { "epoch": 1.06, "learning_rate": 3.761776111123274e-06, "loss": 0.2584, "step": 2161 }, { "epoch": 1.06, "learning_rate": 3.760638118484559e-06, "loss": 0.2525, "step": 2162 }, { "epoch": 1.06, "learning_rate": 3.759499775462544e-06, "loss": 0.2501, "step": 2163 }, { "epoch": 1.06, "learning_rate": 3.7583610823736207e-06, "loss": 0.2681, "step": 2164 }, { "epoch": 1.06, "learning_rate": 3.7572220395342785e-06, "loss": 0.2731, "step": 2165 }, { "epoch": 1.06, "learning_rate": 3.756082647261105e-06, "loss": 0.2365, "step": 2166 }, { "epoch": 1.06, "learning_rate": 3.754942905870784e-06, "loss": 0.2488, "step": 2167 }, { "epoch": 1.06, "learning_rate": 3.7538028156800977e-06, "loss": 0.2253, "step": 2168 }, { "epoch": 1.06, "learning_rate": 3.7526623770059223e-06, "loss": 0.308, "step": 2169 }, { "epoch": 1.06, "learning_rate": 3.7515215901652334e-06, "loss": 0.2361, "step": 2170 }, { "epoch": 1.06, "learning_rate": 3.7503804554751034e-06, "loss": 0.2153, "step": 2171 }, { "epoch": 1.06, "learning_rate": 3.7492389732526994e-06, "loss": 0.2659, "step": 2172 }, { "epoch": 1.06, "learning_rate": 3.748097143815288e-06, "loss": 0.242, "step": 2173 }, { "epoch": 1.06, "learning_rate": 3.7469549674802297e-06, "loss": 0.2644, "step": 2174 }, { "epoch": 1.06, "learning_rate": 3.745812444564982e-06, "loss": 0.2222, "step": 2175 }, { "epoch": 1.06, "learning_rate": 3.7446695753870993e-06, "loss": 0.2398, "step": 2176 }, { "epoch": 1.06, "learning_rate": 3.7435263602642325e-06, "loss": 0.25, "step": 2177 }, { "epoch": 1.06, "learning_rate": 3.7423827995141286e-06, "loss": 0.2795, "step": 2178 }, { "epoch": 1.06, "learning_rate": 3.7412388934546295e-06, "loss": 0.2332, "step": 2179 }, { "epoch": 1.06, "learning_rate": 3.7400946424036737e-06, "loss": 0.2487, "step": 2180 }, { "epoch": 1.06, "learning_rate": 3.7389500466792967e-06, "loss": 0.2928, "step": 2181 }, { "epoch": 1.07, "learning_rate": 3.737805106599628e-06, "loss": 0.2529, "step": 2182 }, { "epoch": 1.07, "learning_rate": 3.736659822482894e-06, "loss": 0.2463, "step": 2183 }, { "epoch": 1.07, "learning_rate": 3.735514194647416e-06, "loss": 0.2299, "step": 2184 }, { "epoch": 1.07, "learning_rate": 3.734368223411612e-06, "loss": 0.2467, "step": 2185 }, { "epoch": 1.07, "learning_rate": 3.733221909093994e-06, "loss": 0.2705, "step": 2186 }, { "epoch": 1.07, "learning_rate": 3.7320752520131686e-06, "loss": 0.2221, "step": 2187 }, { "epoch": 1.07, "learning_rate": 3.730928252487843e-06, "loss": 0.2375, "step": 2188 }, { "epoch": 1.07, "learning_rate": 3.7297809108368112e-06, "loss": 0.271, "step": 2189 }, { "epoch": 1.07, "learning_rate": 3.7286332273789694e-06, "loss": 0.2094, "step": 2190 }, { "epoch": 1.07, "learning_rate": 3.727485202433306e-06, "loss": 0.2688, "step": 2191 }, { "epoch": 1.07, "learning_rate": 3.726336836318904e-06, "loss": 0.2413, "step": 2192 }, { "epoch": 1.07, "learning_rate": 3.7251881293549406e-06, "loss": 0.2183, "step": 2193 }, { "epoch": 1.07, "learning_rate": 3.724039081860691e-06, "loss": 0.2173, "step": 2194 }, { "epoch": 1.07, "learning_rate": 3.7228896941555217e-06, "loss": 0.2377, "step": 2195 }, { "epoch": 1.07, "learning_rate": 3.7217399665588937e-06, "loss": 0.2301, "step": 2196 }, { "epoch": 1.07, "learning_rate": 3.7205898993903665e-06, "loss": 0.2338, "step": 2197 }, { "epoch": 1.07, "learning_rate": 3.719439492969589e-06, "loss": 0.2446, "step": 2198 }, { "epoch": 1.07, "learning_rate": 3.718288747616307e-06, "loss": 0.2098, "step": 2199 }, { "epoch": 1.07, "learning_rate": 3.7171376636503607e-06, "loss": 0.1772, "step": 2200 }, { "epoch": 1.07, "learning_rate": 3.7159862413916837e-06, "loss": 0.2732, "step": 2201 }, { "epoch": 1.08, "learning_rate": 3.7148344811603027e-06, "loss": 0.215, "step": 2202 }, { "epoch": 1.08, "learning_rate": 3.71368238327634e-06, "loss": 0.2694, "step": 2203 }, { "epoch": 1.08, "learning_rate": 3.7125299480600117e-06, "loss": 0.2346, "step": 2204 }, { "epoch": 1.08, "learning_rate": 3.711377175831626e-06, "loss": 0.2132, "step": 2205 }, { "epoch": 1.08, "learning_rate": 3.7102240669115865e-06, "loss": 0.2132, "step": 2206 }, { "epoch": 1.08, "learning_rate": 3.709070621620389e-06, "loss": 0.2553, "step": 2207 }, { "epoch": 1.08, "learning_rate": 3.707916840278625e-06, "loss": 0.2196, "step": 2208 }, { "epoch": 1.08, "learning_rate": 3.7067627232069757e-06, "loss": 0.2382, "step": 2209 }, { "epoch": 1.08, "learning_rate": 3.7056082707262197e-06, "loss": 0.2103, "step": 2210 }, { "epoch": 1.08, "learning_rate": 3.704453483157226e-06, "loss": 0.2456, "step": 2211 }, { "epoch": 1.08, "learning_rate": 3.7032983608209585e-06, "loss": 0.1812, "step": 2212 }, { "epoch": 1.08, "learning_rate": 3.7021429040384717e-06, "loss": 0.2276, "step": 2213 }, { "epoch": 1.08, "learning_rate": 3.700987113130916e-06, "loss": 0.2369, "step": 2214 }, { "epoch": 1.08, "learning_rate": 3.6998309884195336e-06, "loss": 0.2371, "step": 2215 }, { "epoch": 1.08, "learning_rate": 3.6986745302256585e-06, "loss": 0.2255, "step": 2216 }, { "epoch": 1.08, "learning_rate": 3.697517738870718e-06, "loss": 0.2406, "step": 2217 }, { "epoch": 1.08, "learning_rate": 3.696360614676232e-06, "loss": 0.2005, "step": 2218 }, { "epoch": 1.08, "learning_rate": 3.6952031579638144e-06, "loss": 0.2114, "step": 2219 }, { "epoch": 1.08, "learning_rate": 3.694045369055168e-06, "loss": 0.1916, "step": 2220 }, { "epoch": 1.08, "learning_rate": 3.6928872482720917e-06, "loss": 0.2342, "step": 2221 }, { "epoch": 1.08, "learning_rate": 3.691728795936475e-06, "loss": 0.2118, "step": 2222 }, { "epoch": 1.09, "learning_rate": 3.690570012370298e-06, "loss": 0.1994, "step": 2223 }, { "epoch": 1.09, "learning_rate": 3.689410897895636e-06, "loss": 0.2109, "step": 2224 }, { "epoch": 1.09, "learning_rate": 3.688251452834655e-06, "loss": 0.2382, "step": 2225 }, { "epoch": 1.09, "learning_rate": 3.6870916775096098e-06, "loss": 0.2253, "step": 2226 }, { "epoch": 1.09, "learning_rate": 3.6859315722428536e-06, "loss": 0.1987, "step": 2227 }, { "epoch": 1.09, "learning_rate": 3.684771137356824e-06, "loss": 0.2592, "step": 2228 }, { "epoch": 1.09, "learning_rate": 3.683610373174056e-06, "loss": 0.193, "step": 2229 }, { "epoch": 1.09, "learning_rate": 3.6824492800171733e-06, "loss": 0.2224, "step": 2230 }, { "epoch": 1.09, "learning_rate": 3.681287858208891e-06, "loss": 0.2071, "step": 2231 }, { "epoch": 1.09, "learning_rate": 3.680126108072017e-06, "loss": 0.1923, "step": 2232 }, { "epoch": 1.09, "learning_rate": 3.678964029929448e-06, "loss": 0.2222, "step": 2233 }, { "epoch": 1.09, "learning_rate": 3.677801624104176e-06, "loss": 0.2122, "step": 2234 }, { "epoch": 1.09, "learning_rate": 3.6766388909192784e-06, "loss": 0.2132, "step": 2235 }, { "epoch": 1.09, "learning_rate": 3.675475830697929e-06, "loss": 0.237, "step": 2236 }, { "epoch": 1.09, "learning_rate": 3.6743124437633893e-06, "loss": 0.2562, "step": 2237 }, { "epoch": 1.09, "learning_rate": 3.6731487304390112e-06, "loss": 0.1977, "step": 2238 }, { "epoch": 1.09, "learning_rate": 3.6719846910482417e-06, "loss": 0.1831, "step": 2239 }, { "epoch": 1.09, "learning_rate": 3.6708203259146135e-06, "loss": 0.2269, "step": 2240 }, { "epoch": 1.09, "learning_rate": 3.6696556353617507e-06, "loss": 0.2285, "step": 2241 }, { "epoch": 1.09, "learning_rate": 3.668490619713371e-06, "loss": 0.1961, "step": 2242 }, { "epoch": 1.1, "learning_rate": 3.667325279293278e-06, "loss": 0.1784, "step": 2243 }, { "epoch": 1.1, "learning_rate": 3.6661596144253698e-06, "loss": 0.1879, "step": 2244 }, { "epoch": 1.1, "learning_rate": 3.6649936254336323e-06, "loss": 0.2005, "step": 2245 }, { "epoch": 1.1, "learning_rate": 3.6638273126421407e-06, "loss": 0.1953, "step": 2246 }, { "epoch": 1.1, "learning_rate": 3.662660676375062e-06, "loss": 0.2087, "step": 2247 }, { "epoch": 1.1, "learning_rate": 3.6614937169566535e-06, "loss": 0.2077, "step": 2248 }, { "epoch": 1.1, "learning_rate": 3.6603264347112606e-06, "loss": 0.2161, "step": 2249 }, { "epoch": 1.1, "learning_rate": 3.6591588299633184e-06, "loss": 0.2386, "step": 2250 }, { "epoch": 1.1, "learning_rate": 3.657990903037354e-06, "loss": 0.2392, "step": 2251 }, { "epoch": 1.1, "learning_rate": 3.656822654257981e-06, "loss": 0.2234, "step": 2252 }, { "epoch": 1.1, "learning_rate": 3.6556540839499042e-06, "loss": 0.2195, "step": 2253 }, { "epoch": 1.1, "learning_rate": 3.654485192437919e-06, "loss": 0.2022, "step": 2254 }, { "epoch": 1.1, "learning_rate": 3.6533159800469063e-06, "loss": 0.1842, "step": 2255 }, { "epoch": 1.1, "learning_rate": 3.6521464471018385e-06, "loss": 0.2355, "step": 2256 }, { "epoch": 1.1, "learning_rate": 3.6509765939277797e-06, "loss": 0.2298, "step": 2257 }, { "epoch": 1.1, "learning_rate": 3.6498064208498775e-06, "loss": 0.1955, "step": 2258 }, { "epoch": 1.1, "learning_rate": 3.648635928193372e-06, "loss": 0.2415, "step": 2259 }, { "epoch": 1.1, "learning_rate": 3.647465116283592e-06, "loss": 0.22, "step": 2260 }, { "epoch": 1.1, "learning_rate": 3.6462939854459546e-06, "loss": 0.2025, "step": 2261 }, { "epoch": 1.1, "learning_rate": 3.645122536005964e-06, "loss": 0.2549, "step": 2262 }, { "epoch": 1.1, "learning_rate": 3.643950768289215e-06, "loss": 0.218, "step": 2263 }, { "epoch": 1.11, "learning_rate": 3.6427786826213906e-06, "loss": 0.1949, "step": 2264 }, { "epoch": 1.11, "learning_rate": 3.641606279328261e-06, "loss": 0.2378, "step": 2265 }, { "epoch": 1.11, "learning_rate": 3.6404335587356853e-06, "loss": 0.2194, "step": 2266 }, { "epoch": 1.11, "learning_rate": 3.6392605211696123e-06, "loss": 0.226, "step": 2267 }, { "epoch": 1.11, "learning_rate": 3.638087166956076e-06, "loss": 0.2232, "step": 2268 }, { "epoch": 1.11, "learning_rate": 3.6369134964212e-06, "loss": 0.2009, "step": 2269 }, { "epoch": 1.11, "learning_rate": 3.6357395098911962e-06, "loss": 0.2043, "step": 2270 }, { "epoch": 1.11, "learning_rate": 3.6345652076923643e-06, "loss": 0.2256, "step": 2271 }, { "epoch": 1.11, "learning_rate": 3.6333905901510914e-06, "loss": 0.2195, "step": 2272 }, { "epoch": 1.11, "learning_rate": 3.6322156575938506e-06, "loss": 0.2294, "step": 2273 }, { "epoch": 1.11, "learning_rate": 3.6310404103472046e-06, "loss": 0.1995, "step": 2274 }, { "epoch": 1.11, "learning_rate": 3.629864848737804e-06, "loss": 0.226, "step": 2275 }, { "epoch": 1.11, "learning_rate": 3.6286889730923857e-06, "loss": 0.1707, "step": 2276 }, { "epoch": 1.11, "learning_rate": 3.6275127837377728e-06, "loss": 0.1975, "step": 2277 }, { "epoch": 1.11, "learning_rate": 3.626336281000878e-06, "loss": 0.2062, "step": 2278 }, { "epoch": 1.11, "learning_rate": 3.6251594652086995e-06, "loss": 0.2068, "step": 2279 }, { "epoch": 1.11, "learning_rate": 3.623982336688323e-06, "loss": 0.1956, "step": 2280 }, { "epoch": 1.11, "learning_rate": 3.622804895766921e-06, "loss": 0.2164, "step": 2281 }, { "epoch": 1.11, "learning_rate": 3.621627142771753e-06, "loss": 0.2213, "step": 2282 }, { "epoch": 1.11, "learning_rate": 3.620449078030165e-06, "loss": 0.1948, "step": 2283 }, { "epoch": 1.12, "learning_rate": 3.6192707018695896e-06, "loss": 0.2315, "step": 2284 }, { "epoch": 1.12, "learning_rate": 3.6180920146175456e-06, "loss": 0.1908, "step": 2285 }, { "epoch": 1.12, "learning_rate": 3.61691301660164e-06, "loss": 0.2163, "step": 2286 }, { "epoch": 1.12, "learning_rate": 3.6157337081495647e-06, "loss": 0.2263, "step": 2287 }, { "epoch": 1.12, "learning_rate": 3.6145540895890973e-06, "loss": 0.2291, "step": 2288 }, { "epoch": 1.12, "learning_rate": 3.6133741612481033e-06, "loss": 0.2151, "step": 2289 }, { "epoch": 1.12, "learning_rate": 3.6121939234545323e-06, "loss": 0.2028, "step": 2290 }, { "epoch": 1.12, "learning_rate": 3.6110133765364226e-06, "loss": 0.2251, "step": 2291 }, { "epoch": 1.12, "learning_rate": 3.6098325208218955e-06, "loss": 0.2006, "step": 2292 }, { "epoch": 1.12, "learning_rate": 3.60865135663916e-06, "loss": 0.173, "step": 2293 }, { "epoch": 1.12, "learning_rate": 3.6074698843165114e-06, "loss": 0.2005, "step": 2294 }, { "epoch": 1.12, "learning_rate": 3.6062881041823274e-06, "loss": 0.2426, "step": 2295 }, { "epoch": 1.12, "learning_rate": 3.605106016565074e-06, "loss": 0.2225, "step": 2296 }, { "epoch": 1.12, "learning_rate": 3.603923621793304e-06, "loss": 0.2401, "step": 2297 }, { "epoch": 1.12, "learning_rate": 3.6027409201956515e-06, "loss": 0.2386, "step": 2298 }, { "epoch": 1.12, "learning_rate": 3.6015579121008386e-06, "loss": 0.2047, "step": 2299 }, { "epoch": 1.12, "learning_rate": 3.6003745978376723e-06, "loss": 0.2281, "step": 2300 }, { "epoch": 1.12, "learning_rate": 3.5991909777350454e-06, "loss": 0.2132, "step": 2301 }, { "epoch": 1.12, "learning_rate": 3.5980070521219324e-06, "loss": 0.2461, "step": 2302 }, { "epoch": 1.12, "learning_rate": 3.5968228213273957e-06, "loss": 0.206, "step": 2303 }, { "epoch": 1.12, "learning_rate": 3.595638285680584e-06, "loss": 0.2214, "step": 2304 }, { "epoch": 1.13, "learning_rate": 3.5944534455107254e-06, "loss": 0.229, "step": 2305 }, { "epoch": 1.13, "learning_rate": 3.593268301147139e-06, "loss": 0.24, "step": 2306 }, { "epoch": 1.13, "learning_rate": 3.592082852919223e-06, "loss": 0.208, "step": 2307 }, { "epoch": 1.13, "learning_rate": 3.5908971011564624e-06, "loss": 0.2239, "step": 2308 }, { "epoch": 1.13, "learning_rate": 3.589711046188428e-06, "loss": 0.1865, "step": 2309 }, { "epoch": 1.13, "learning_rate": 3.5885246883447723e-06, "loss": 0.2018, "step": 2310 }, { "epoch": 1.13, "learning_rate": 3.587338027955233e-06, "loss": 0.2087, "step": 2311 }, { "epoch": 1.13, "learning_rate": 3.5861510653496322e-06, "loss": 0.2343, "step": 2312 }, { "epoch": 1.13, "learning_rate": 3.584963800857876e-06, "loss": 0.1764, "step": 2313 }, { "epoch": 1.13, "learning_rate": 3.5837762348099535e-06, "loss": 0.2718, "step": 2314 }, { "epoch": 1.13, "learning_rate": 3.582588367535939e-06, "loss": 0.2203, "step": 2315 }, { "epoch": 1.13, "learning_rate": 3.5814001993659895e-06, "loss": 0.2167, "step": 2316 }, { "epoch": 1.13, "learning_rate": 3.580211730630345e-06, "loss": 0.191, "step": 2317 }, { "epoch": 1.13, "learning_rate": 3.579022961659332e-06, "loss": 0.2155, "step": 2318 }, { "epoch": 1.13, "learning_rate": 3.577833892783357e-06, "loss": 0.2105, "step": 2319 }, { "epoch": 1.13, "learning_rate": 3.57664452433291e-06, "loss": 0.1843, "step": 2320 }, { "epoch": 1.13, "learning_rate": 3.5754548566385684e-06, "loss": 0.1877, "step": 2321 }, { "epoch": 1.13, "learning_rate": 3.574264890030988e-06, "loss": 0.2179, "step": 2322 }, { "epoch": 1.13, "learning_rate": 3.5730746248409097e-06, "loss": 0.2402, "step": 2323 }, { "epoch": 1.13, "learning_rate": 3.571884061399158e-06, "loss": 0.2048, "step": 2324 }, { "epoch": 1.14, "learning_rate": 3.570693200036639e-06, "loss": 0.22, "step": 2325 }, { "epoch": 1.14, "learning_rate": 3.5695020410843424e-06, "loss": 0.2153, "step": 2326 }, { "epoch": 1.14, "learning_rate": 3.56831058487334e-06, "loss": 0.1982, "step": 2327 }, { "epoch": 1.14, "learning_rate": 3.567118831734787e-06, "loss": 0.2128, "step": 2328 }, { "epoch": 1.14, "learning_rate": 3.565926781999921e-06, "loss": 0.2251, "step": 2329 }, { "epoch": 1.14, "learning_rate": 3.564734436000061e-06, "loss": 0.2521, "step": 2330 }, { "epoch": 1.14, "learning_rate": 3.56354179406661e-06, "loss": 0.1896, "step": 2331 }, { "epoch": 1.14, "learning_rate": 3.5623488565310506e-06, "loss": 0.2035, "step": 2332 }, { "epoch": 1.14, "learning_rate": 3.5611556237249517e-06, "loss": 0.211, "step": 2333 }, { "epoch": 1.14, "learning_rate": 3.559962095979961e-06, "loss": 0.189, "step": 2334 }, { "epoch": 1.14, "learning_rate": 3.5587682736278074e-06, "loss": 0.2198, "step": 2335 }, { "epoch": 1.14, "learning_rate": 3.557574157000306e-06, "loss": 0.2355, "step": 2336 }, { "epoch": 1.14, "learning_rate": 3.556379746429349e-06, "loss": 0.1783, "step": 2337 }, { "epoch": 1.14, "learning_rate": 3.5551850422469135e-06, "loss": 0.181, "step": 2338 }, { "epoch": 1.14, "learning_rate": 3.5539900447850573e-06, "loss": 0.1865, "step": 2339 }, { "epoch": 1.14, "learning_rate": 3.552794754375918e-06, "loss": 0.2262, "step": 2340 }, { "epoch": 1.14, "learning_rate": 3.551599171351716e-06, "loss": 0.2156, "step": 2341 }, { "epoch": 1.14, "learning_rate": 3.5504032960447555e-06, "loss": 0.2321, "step": 2342 }, { "epoch": 1.14, "learning_rate": 3.549207128787418e-06, "loss": 0.1852, "step": 2343 }, { "epoch": 1.14, "learning_rate": 3.548010669912167e-06, "loss": 0.2066, "step": 2344 }, { "epoch": 1.15, "learning_rate": 3.5468139197515485e-06, "loss": 0.2338, "step": 2345 }, { "epoch": 1.15, "learning_rate": 3.5456168786381894e-06, "loss": 0.2277, "step": 2346 }, { "epoch": 1.15, "learning_rate": 3.5444195469047954e-06, "loss": 0.2155, "step": 2347 }, { "epoch": 1.15, "learning_rate": 3.5432219248841556e-06, "loss": 0.2153, "step": 2348 }, { "epoch": 1.15, "learning_rate": 3.542024012909138e-06, "loss": 0.2062, "step": 2349 }, { "epoch": 1.15, "learning_rate": 3.5408258113126904e-06, "loss": 0.2003, "step": 2350 }, { "epoch": 1.15, "learning_rate": 3.5396273204278448e-06, "loss": 0.202, "step": 2351 }, { "epoch": 1.15, "learning_rate": 3.5384285405877096e-06, "loss": 0.2559, "step": 2352 }, { "epoch": 1.15, "learning_rate": 3.537229472125475e-06, "loss": 0.213, "step": 2353 }, { "epoch": 1.15, "learning_rate": 3.5360301153744137e-06, "loss": 0.2078, "step": 2354 }, { "epoch": 1.15, "learning_rate": 3.534830470667874e-06, "loss": 0.2422, "step": 2355 }, { "epoch": 1.15, "learning_rate": 3.533630538339288e-06, "loss": 0.1882, "step": 2356 }, { "epoch": 1.15, "learning_rate": 3.532430318722165e-06, "loss": 0.1968, "step": 2357 }, { "epoch": 1.15, "learning_rate": 3.531229812150097e-06, "loss": 0.2222, "step": 2358 }, { "epoch": 1.15, "learning_rate": 3.530029018956753e-06, "loss": 0.217, "step": 2359 }, { "epoch": 1.15, "learning_rate": 3.528827939475884e-06, "loss": 0.2461, "step": 2360 }, { "epoch": 1.15, "learning_rate": 3.5276265740413195e-06, "loss": 0.2165, "step": 2361 }, { "epoch": 1.15, "learning_rate": 3.5264249229869672e-06, "loss": 0.2069, "step": 2362 }, { "epoch": 1.15, "learning_rate": 3.5252229866468167e-06, "loss": 0.1938, "step": 2363 }, { "epoch": 1.15, "learning_rate": 3.524020765354935e-06, "loss": 0.2014, "step": 2364 }, { "epoch": 1.15, "learning_rate": 3.5228182594454697e-06, "loss": 0.2259, "step": 2365 }, { "epoch": 1.16, "learning_rate": 3.521615469252646e-06, "loss": 0.2085, "step": 2366 }, { "epoch": 1.16, "learning_rate": 3.520412395110769e-06, "loss": 0.1676, "step": 2367 }, { "epoch": 1.16, "learning_rate": 3.519209037354222e-06, "loss": 0.213, "step": 2368 }, { "epoch": 1.16, "learning_rate": 3.5180053963174693e-06, "loss": 0.203, "step": 2369 }, { "epoch": 1.16, "learning_rate": 3.5168014723350517e-06, "loss": 0.2215, "step": 2370 }, { "epoch": 1.16, "learning_rate": 3.515597265741589e-06, "loss": 0.1991, "step": 2371 }, { "epoch": 1.16, "learning_rate": 3.51439277687178e-06, "loss": 0.2089, "step": 2372 }, { "epoch": 1.16, "learning_rate": 3.513188006060402e-06, "loss": 0.2113, "step": 2373 }, { "epoch": 1.16, "learning_rate": 3.5119829536423096e-06, "loss": 0.1946, "step": 2374 }, { "epoch": 1.16, "learning_rate": 3.5107776199524374e-06, "loss": 0.2018, "step": 2375 }, { "epoch": 1.16, "learning_rate": 3.5095720053257975e-06, "loss": 0.1843, "step": 2376 }, { "epoch": 1.16, "learning_rate": 3.5083661100974793e-06, "loss": 0.2035, "step": 2377 }, { "epoch": 1.16, "learning_rate": 3.507159934602651e-06, "loss": 0.2113, "step": 2378 }, { "epoch": 1.16, "learning_rate": 3.505953479176558e-06, "loss": 0.1823, "step": 2379 }, { "epoch": 1.16, "learning_rate": 3.5047467441545257e-06, "loss": 0.2123, "step": 2380 }, { "epoch": 1.16, "learning_rate": 3.5035397298719535e-06, "loss": 0.1997, "step": 2381 }, { "epoch": 1.16, "learning_rate": 3.502332436664322e-06, "loss": 0.2134, "step": 2382 }, { "epoch": 1.16, "learning_rate": 3.501124864867186e-06, "loss": 0.2181, "step": 2383 }, { "epoch": 1.16, "learning_rate": 3.499917014816181e-06, "loss": 0.1943, "step": 2384 }, { "epoch": 1.16, "learning_rate": 3.4987088868470176e-06, "loss": 0.2123, "step": 2385 }, { "epoch": 1.17, "learning_rate": 3.497500481295485e-06, "loss": 0.2378, "step": 2386 }, { "epoch": 1.17, "learning_rate": 3.4962917984974476e-06, "loss": 0.2371, "step": 2387 }, { "epoch": 1.17, "learning_rate": 3.4950828387888503e-06, "loss": 0.1964, "step": 2388 }, { "epoch": 1.17, "learning_rate": 3.4938736025057107e-06, "loss": 0.2149, "step": 2389 }, { "epoch": 1.17, "learning_rate": 3.492664089984127e-06, "loss": 0.1851, "step": 2390 }, { "epoch": 1.17, "learning_rate": 3.4914543015602713e-06, "loss": 0.1713, "step": 2391 }, { "epoch": 1.17, "learning_rate": 3.490244237570395e-06, "loss": 0.1973, "step": 2392 }, { "epoch": 1.17, "learning_rate": 3.489033898350823e-06, "loss": 0.2114, "step": 2393 }, { "epoch": 1.17, "learning_rate": 3.48782328423796e-06, "loss": 0.1722, "step": 2394 }, { "epoch": 1.17, "learning_rate": 3.486612395568285e-06, "loss": 0.2179, "step": 2395 }, { "epoch": 1.17, "learning_rate": 3.4854012326783543e-06, "loss": 0.2117, "step": 2396 }, { "epoch": 1.17, "learning_rate": 3.4841897959047993e-06, "loss": 0.2189, "step": 2397 }, { "epoch": 1.17, "learning_rate": 3.4829780855843286e-06, "loss": 0.2002, "step": 2398 }, { "epoch": 1.17, "learning_rate": 3.4817661020537264e-06, "loss": 0.1951, "step": 2399 }, { "epoch": 1.17, "learning_rate": 3.4805538456498534e-06, "loss": 0.1823, "step": 2400 }, { "epoch": 1.17, "learning_rate": 3.4793413167096453e-06, "loss": 0.1849, "step": 2401 }, { "epoch": 1.17, "learning_rate": 3.478128515570112e-06, "loss": 0.2049, "step": 2402 }, { "epoch": 1.17, "learning_rate": 3.4769154425683443e-06, "loss": 0.204, "step": 2403 }, { "epoch": 1.17, "learning_rate": 3.475702098041503e-06, "loss": 0.2261, "step": 2404 }, { "epoch": 1.17, "learning_rate": 3.4744884823268267e-06, "loss": 0.2254, "step": 2405 }, { "epoch": 1.17, "learning_rate": 3.4732745957616306e-06, "loss": 0.2667, "step": 2406 }, { "epoch": 1.18, "learning_rate": 3.4720604386833024e-06, "loss": 0.2274, "step": 2407 }, { "epoch": 1.18, "learning_rate": 3.4708460114293064e-06, "loss": 0.2003, "step": 2408 }, { "epoch": 1.18, "learning_rate": 3.4696313143371823e-06, "loss": 0.2223, "step": 2409 }, { "epoch": 1.18, "learning_rate": 3.468416347744546e-06, "loss": 0.1764, "step": 2410 }, { "epoch": 1.18, "learning_rate": 3.4672011119890837e-06, "loss": 0.175, "step": 2411 }, { "epoch": 1.18, "learning_rate": 3.4659856074085625e-06, "loss": 0.2035, "step": 2412 }, { "epoch": 1.18, "learning_rate": 3.4647698343408197e-06, "loss": 0.2134, "step": 2413 }, { "epoch": 1.18, "learning_rate": 3.463553793123768e-06, "loss": 0.1668, "step": 2414 }, { "epoch": 1.18, "learning_rate": 3.4623374840953982e-06, "loss": 0.1855, "step": 2415 }, { "epoch": 1.18, "learning_rate": 3.46112090759377e-06, "loss": 0.213, "step": 2416 }, { "epoch": 1.18, "learning_rate": 3.4599040639570216e-06, "loss": 0.2332, "step": 2417 }, { "epoch": 1.18, "learning_rate": 3.458686953523363e-06, "loss": 0.2199, "step": 2418 }, { "epoch": 1.18, "learning_rate": 3.45746957663108e-06, "loss": 0.1872, "step": 2419 }, { "epoch": 1.18, "learning_rate": 3.4562519336185313e-06, "loss": 0.1712, "step": 2420 }, { "epoch": 1.18, "learning_rate": 3.455034024824151e-06, "loss": 0.2204, "step": 2421 }, { "epoch": 1.18, "learning_rate": 3.453815850586446e-06, "loss": 0.1865, "step": 2422 }, { "epoch": 1.18, "learning_rate": 3.4525974112439957e-06, "loss": 0.1858, "step": 2423 }, { "epoch": 1.18, "learning_rate": 3.4513787071354567e-06, "loss": 0.2175, "step": 2424 }, { "epoch": 1.18, "learning_rate": 3.4501597385995556e-06, "loss": 0.2151, "step": 2425 }, { "epoch": 1.18, "learning_rate": 3.448940505975094e-06, "loss": 0.2153, "step": 2426 }, { "epoch": 1.19, "learning_rate": 3.447721009600949e-06, "loss": 0.2129, "step": 2427 }, { "epoch": 1.19, "learning_rate": 3.4465012498160664e-06, "loss": 0.1844, "step": 2428 }, { "epoch": 1.19, "learning_rate": 3.4452812269594683e-06, "loss": 0.2113, "step": 2429 }, { "epoch": 1.19, "learning_rate": 3.4440609413702502e-06, "loss": 0.2371, "step": 2430 }, { "epoch": 1.19, "learning_rate": 3.442840393387579e-06, "loss": 0.1856, "step": 2431 }, { "epoch": 1.19, "learning_rate": 3.441619583350696e-06, "loss": 0.2002, "step": 2432 }, { "epoch": 1.19, "learning_rate": 3.440398511598914e-06, "loss": 0.2116, "step": 2433 }, { "epoch": 1.19, "learning_rate": 3.4391771784716195e-06, "loss": 0.2021, "step": 2434 }, { "epoch": 1.19, "learning_rate": 3.4379555843082706e-06, "loss": 0.1995, "step": 2435 }, { "epoch": 1.19, "learning_rate": 3.4367337294484e-06, "loss": 0.21, "step": 2436 }, { "epoch": 1.19, "learning_rate": 3.435511614231611e-06, "loss": 0.2304, "step": 2437 }, { "epoch": 1.19, "learning_rate": 3.434289238997579e-06, "loss": 0.2142, "step": 2438 }, { "epoch": 1.19, "learning_rate": 3.4330666040860532e-06, "loss": 0.201, "step": 2439 }, { "epoch": 1.19, "learning_rate": 3.4318437098368545e-06, "loss": 0.2311, "step": 2440 }, { "epoch": 1.19, "learning_rate": 3.430620556589875e-06, "loss": 0.2161, "step": 2441 }, { "epoch": 1.19, "learning_rate": 3.4293971446850794e-06, "loss": 0.2133, "step": 2442 }, { "epoch": 1.19, "learning_rate": 3.428173474462505e-06, "loss": 0.1923, "step": 2443 }, { "epoch": 1.19, "learning_rate": 3.4269495462622593e-06, "loss": 0.1882, "step": 2444 }, { "epoch": 1.19, "learning_rate": 3.4257253604245233e-06, "loss": 0.2024, "step": 2445 }, { "epoch": 1.19, "learning_rate": 3.4245009172895483e-06, "loss": 0.2419, "step": 2446 }, { "epoch": 1.19, "learning_rate": 3.4232762171976576e-06, "loss": 0.1936, "step": 2447 }, { "epoch": 1.2, "learning_rate": 3.4220512604892454e-06, "loss": 0.2028, "step": 2448 }, { "epoch": 1.2, "learning_rate": 3.4208260475047783e-06, "loss": 0.1991, "step": 2449 }, { "epoch": 1.2, "learning_rate": 3.419600578584793e-06, "loss": 0.1801, "step": 2450 }, { "epoch": 1.2, "learning_rate": 3.418374854069898e-06, "loss": 0.2092, "step": 2451 }, { "epoch": 1.2, "learning_rate": 3.417148874300773e-06, "loss": 0.1865, "step": 2452 }, { "epoch": 1.2, "learning_rate": 3.415922639618168e-06, "loss": 0.1922, "step": 2453 }, { "epoch": 1.2, "learning_rate": 3.4146961503629035e-06, "loss": 0.2297, "step": 2454 }, { "epoch": 1.2, "learning_rate": 3.413469406875873e-06, "loss": 0.1917, "step": 2455 }, { "epoch": 1.2, "learning_rate": 3.4122424094980377e-06, "loss": 0.1816, "step": 2456 }, { "epoch": 1.2, "learning_rate": 3.411015158570431e-06, "loss": 0.2221, "step": 2457 }, { "epoch": 1.2, "learning_rate": 3.4097876544341567e-06, "loss": 0.2331, "step": 2458 }, { "epoch": 1.2, "learning_rate": 3.40855989743039e-06, "loss": 0.2233, "step": 2459 }, { "epoch": 1.2, "learning_rate": 3.407331887900373e-06, "loss": 0.2037, "step": 2460 }, { "epoch": 1.2, "learning_rate": 3.406103626185422e-06, "loss": 0.1877, "step": 2461 }, { "epoch": 1.2, "learning_rate": 3.40487511262692e-06, "loss": 0.2023, "step": 2462 }, { "epoch": 1.2, "learning_rate": 3.403646347566323e-06, "loss": 0.2061, "step": 2463 }, { "epoch": 1.2, "learning_rate": 3.4024173313451546e-06, "loss": 0.1726, "step": 2464 }, { "epoch": 1.2, "learning_rate": 3.401188064305009e-06, "loss": 0.1886, "step": 2465 }, { "epoch": 1.2, "learning_rate": 3.3999585467875507e-06, "loss": 0.2197, "step": 2466 }, { "epoch": 1.2, "learning_rate": 3.3987287791345136e-06, "loss": 0.2369, "step": 2467 }, { "epoch": 1.21, "learning_rate": 3.3974987616877e-06, "loss": 0.176, "step": 2468 }, { "epoch": 1.21, "learning_rate": 3.396268494788982e-06, "loss": 0.1842, "step": 2469 }, { "epoch": 1.21, "learning_rate": 3.3950379787803033e-06, "loss": 0.2132, "step": 2470 }, { "epoch": 1.21, "learning_rate": 3.3938072140036733e-06, "loss": 0.208, "step": 2471 }, { "epoch": 1.21, "learning_rate": 3.3925762008011733e-06, "loss": 0.2011, "step": 2472 }, { "epoch": 1.21, "learning_rate": 3.3913449395149512e-06, "loss": 0.1961, "step": 2473 }, { "epoch": 1.21, "learning_rate": 3.390113430487228e-06, "loss": 0.2162, "step": 2474 }, { "epoch": 1.21, "learning_rate": 3.388881674060288e-06, "loss": 0.2035, "step": 2475 }, { "epoch": 1.21, "learning_rate": 3.3876496705764885e-06, "loss": 0.1855, "step": 2476 }, { "epoch": 1.21, "learning_rate": 3.386417420378254e-06, "loss": 0.2176, "step": 2477 }, { "epoch": 1.21, "learning_rate": 3.385184923808077e-06, "loss": 0.1975, "step": 2478 }, { "epoch": 1.21, "learning_rate": 3.3839521812085202e-06, "loss": 0.2083, "step": 2479 }, { "epoch": 1.21, "learning_rate": 3.3827191929222126e-06, "loss": 0.1785, "step": 2480 }, { "epoch": 1.21, "learning_rate": 3.3814859592918525e-06, "loss": 0.1887, "step": 2481 }, { "epoch": 1.21, "learning_rate": 3.3802524806602068e-06, "loss": 0.2186, "step": 2482 }, { "epoch": 1.21, "learning_rate": 3.37901875737011e-06, "loss": 0.1615, "step": 2483 }, { "epoch": 1.21, "learning_rate": 3.3777847897644645e-06, "loss": 0.1967, "step": 2484 }, { "epoch": 1.21, "learning_rate": 3.3765505781862406e-06, "loss": 0.1908, "step": 2485 }, { "epoch": 1.21, "learning_rate": 3.375316122978477e-06, "loss": 0.1997, "step": 2486 }, { "epoch": 1.21, "learning_rate": 3.3740814244842796e-06, "loss": 0.2287, "step": 2487 }, { "epoch": 1.21, "learning_rate": 3.3728464830468217e-06, "loss": 0.2007, "step": 2488 }, { "epoch": 1.22, "learning_rate": 3.3716112990093454e-06, "loss": 0.2223, "step": 2489 }, { "epoch": 1.22, "learning_rate": 3.3703758727151576e-06, "loss": 0.1731, "step": 2490 }, { "epoch": 1.22, "learning_rate": 3.369140204507635e-06, "loss": 0.2011, "step": 2491 }, { "epoch": 1.22, "learning_rate": 3.367904294730222e-06, "loss": 0.186, "step": 2492 }, { "epoch": 1.22, "learning_rate": 3.366668143726426e-06, "loss": 0.2356, "step": 2493 }, { "epoch": 1.22, "learning_rate": 3.3654317518398273e-06, "loss": 0.1947, "step": 2494 }, { "epoch": 1.22, "learning_rate": 3.3641951194140688e-06, "loss": 0.1774, "step": 2495 }, { "epoch": 1.22, "learning_rate": 3.362958246792861e-06, "loss": 0.1812, "step": 2496 }, { "epoch": 1.22, "learning_rate": 3.361721134319983e-06, "loss": 0.1852, "step": 2497 }, { "epoch": 1.22, "learning_rate": 3.3604837823392787e-06, "loss": 0.174, "step": 2498 }, { "epoch": 1.22, "learning_rate": 3.3592461911946595e-06, "loss": 0.2124, "step": 2499 }, { "epoch": 1.22, "learning_rate": 3.3580083612301033e-06, "loss": 0.2474, "step": 2500 }, { "epoch": 1.22, "learning_rate": 3.3567702927896532e-06, "loss": 0.1563, "step": 2501 }, { "epoch": 1.22, "learning_rate": 3.3555319862174203e-06, "loss": 0.2321, "step": 2502 }, { "epoch": 1.22, "learning_rate": 3.3542934418575802e-06, "loss": 0.1766, "step": 2503 }, { "epoch": 1.22, "learning_rate": 3.353054660054376e-06, "loss": 0.1961, "step": 2504 }, { "epoch": 1.22, "learning_rate": 3.3518156411521162e-06, "loss": 0.2247, "step": 2505 }, { "epoch": 1.22, "learning_rate": 3.350576385495175e-06, "loss": 0.199, "step": 2506 }, { "epoch": 1.22, "learning_rate": 3.3493368934279936e-06, "loss": 0.2173, "step": 2507 }, { "epoch": 1.22, "learning_rate": 3.3480971652950757e-06, "loss": 0.2091, "step": 2508 }, { "epoch": 1.23, "learning_rate": 3.3468572014409956e-06, "loss": 0.1771, "step": 2509 }, { "epoch": 1.23, "learning_rate": 3.3456170022103884e-06, "loss": 0.1832, "step": 2510 }, { "epoch": 1.23, "learning_rate": 3.3443765679479573e-06, "loss": 0.2088, "step": 2511 }, { "epoch": 1.23, "learning_rate": 3.343135898998471e-06, "loss": 0.1951, "step": 2512 }, { "epoch": 1.23, "learning_rate": 3.3418949957067604e-06, "loss": 0.2142, "step": 2513 }, { "epoch": 1.23, "learning_rate": 3.340653858417725e-06, "loss": 0.1819, "step": 2514 }, { "epoch": 1.23, "learning_rate": 3.339412487476328e-06, "loss": 0.1818, "step": 2515 }, { "epoch": 1.23, "learning_rate": 3.3381708832275978e-06, "loss": 0.1875, "step": 2516 }, { "epoch": 1.23, "learning_rate": 3.336929046016627e-06, "loss": 0.191, "step": 2517 }, { "epoch": 1.23, "learning_rate": 3.335686976188573e-06, "loss": 0.2007, "step": 2518 }, { "epoch": 1.23, "learning_rate": 3.334444674088659e-06, "loss": 0.2482, "step": 2519 }, { "epoch": 1.23, "learning_rate": 3.3332021400621716e-06, "loss": 0.2058, "step": 2520 }, { "epoch": 1.23, "learning_rate": 3.3319593744544622e-06, "loss": 0.219, "step": 2521 }, { "epoch": 1.23, "learning_rate": 3.330716377610947e-06, "loss": 0.1881, "step": 2522 }, { "epoch": 1.23, "learning_rate": 3.3294731498771043e-06, "loss": 0.207, "step": 2523 }, { "epoch": 1.23, "learning_rate": 3.3282296915984813e-06, "loss": 0.1739, "step": 2524 }, { "epoch": 1.23, "learning_rate": 3.3269860031206842e-06, "loss": 0.2298, "step": 2525 }, { "epoch": 1.23, "learning_rate": 3.325742084789385e-06, "loss": 0.2085, "step": 2526 }, { "epoch": 1.23, "learning_rate": 3.324497936950322e-06, "loss": 0.18, "step": 2527 }, { "epoch": 1.23, "learning_rate": 3.3232535599492933e-06, "loss": 0.1852, "step": 2528 }, { "epoch": 1.23, "learning_rate": 3.322008954132163e-06, "loss": 0.2201, "step": 2529 }, { "epoch": 1.24, "learning_rate": 3.3207641198448586e-06, "loss": 0.2266, "step": 2530 }, { "epoch": 1.24, "learning_rate": 3.3195190574333714e-06, "loss": 0.2397, "step": 2531 }, { "epoch": 1.24, "learning_rate": 3.3182737672437535e-06, "loss": 0.1686, "step": 2532 }, { "epoch": 1.24, "learning_rate": 3.317028249622125e-06, "loss": 0.1883, "step": 2533 }, { "epoch": 1.24, "learning_rate": 3.3157825049146653e-06, "loss": 0.2167, "step": 2534 }, { "epoch": 1.24, "learning_rate": 3.314536533467618e-06, "loss": 0.2389, "step": 2535 }, { "epoch": 1.24, "learning_rate": 3.3132903356272905e-06, "loss": 0.2027, "step": 2536 }, { "epoch": 1.24, "learning_rate": 3.3120439117400526e-06, "loss": 0.1702, "step": 2537 }, { "epoch": 1.24, "learning_rate": 3.310797262152336e-06, "loss": 0.2065, "step": 2538 }, { "epoch": 1.24, "learning_rate": 3.3095503872106365e-06, "loss": 0.1577, "step": 2539 }, { "epoch": 1.24, "learning_rate": 3.3083032872615124e-06, "loss": 0.1685, "step": 2540 }, { "epoch": 1.24, "learning_rate": 3.3070559626515837e-06, "loss": 0.2154, "step": 2541 }, { "epoch": 1.24, "learning_rate": 3.305808413727534e-06, "loss": 0.1759, "step": 2542 }, { "epoch": 1.24, "learning_rate": 3.304560640836108e-06, "loss": 0.2056, "step": 2543 }, { "epoch": 1.24, "learning_rate": 3.3033126443241133e-06, "loss": 0.187, "step": 2544 }, { "epoch": 1.24, "learning_rate": 3.3020644245384193e-06, "loss": 0.1817, "step": 2545 }, { "epoch": 1.24, "learning_rate": 3.300815981825959e-06, "loss": 0.192, "step": 2546 }, { "epoch": 1.24, "learning_rate": 3.2995673165337237e-06, "loss": 0.2091, "step": 2547 }, { "epoch": 1.24, "learning_rate": 3.298318429008771e-06, "loss": 0.1922, "step": 2548 }, { "epoch": 1.24, "learning_rate": 3.2970693195982182e-06, "loss": 0.2478, "step": 2549 }, { "epoch": 1.25, "learning_rate": 3.2958199886492435e-06, "loss": 0.1637, "step": 2550 }, { "epoch": 1.25, "learning_rate": 3.2945704365090876e-06, "loss": 0.2397, "step": 2551 }, { "epoch": 1.25, "learning_rate": 3.2933206635250533e-06, "loss": 0.1827, "step": 2552 }, { "epoch": 1.25, "learning_rate": 3.2920706700445026e-06, "loss": 0.2001, "step": 2553 }, { "epoch": 1.25, "learning_rate": 3.2908204564148606e-06, "loss": 0.1945, "step": 2554 }, { "epoch": 1.25, "learning_rate": 3.289570022983614e-06, "loss": 0.2067, "step": 2555 }, { "epoch": 1.25, "learning_rate": 3.28831937009831e-06, "loss": 0.2127, "step": 2556 }, { "epoch": 1.25, "learning_rate": 3.2870684981065547e-06, "loss": 0.2115, "step": 2557 }, { "epoch": 1.25, "learning_rate": 3.28581740735602e-06, "loss": 0.1977, "step": 2558 }, { "epoch": 1.25, "learning_rate": 3.284566098194433e-06, "loss": 0.2336, "step": 2559 }, { "epoch": 1.25, "learning_rate": 3.2833145709695847e-06, "loss": 0.1701, "step": 2560 }, { "epoch": 1.25, "learning_rate": 3.2820628260293276e-06, "loss": 0.1741, "step": 2561 }, { "epoch": 1.25, "learning_rate": 3.280810863721571e-06, "loss": 0.2261, "step": 2562 }, { "epoch": 1.25, "learning_rate": 3.2795586843942874e-06, "loss": 0.183, "step": 2563 }, { "epoch": 1.25, "learning_rate": 3.278306288395511e-06, "loss": 0.1851, "step": 2564 }, { "epoch": 1.25, "learning_rate": 3.277053676073331e-06, "loss": 0.189, "step": 2565 }, { "epoch": 1.25, "learning_rate": 3.2758008477759028e-06, "loss": 0.2364, "step": 2566 }, { "epoch": 1.25, "learning_rate": 3.274547803851438e-06, "loss": 0.1788, "step": 2567 }, { "epoch": 1.25, "learning_rate": 3.2732945446482095e-06, "loss": 0.1795, "step": 2568 }, { "epoch": 1.25, "learning_rate": 3.272041070514549e-06, "loss": 0.2249, "step": 2569 }, { "epoch": 1.25, "learning_rate": 3.270787381798848e-06, "loss": 0.1914, "step": 2570 }, { "epoch": 1.26, "learning_rate": 3.2695334788495612e-06, "loss": 0.2185, "step": 2571 }, { "epoch": 1.26, "learning_rate": 3.2682793620151966e-06, "loss": 0.1931, "step": 2572 }, { "epoch": 1.26, "learning_rate": 3.267025031644327e-06, "loss": 0.2114, "step": 2573 }, { "epoch": 1.26, "learning_rate": 3.2657704880855823e-06, "loss": 0.205, "step": 2574 }, { "epoch": 1.26, "learning_rate": 3.2645157316876495e-06, "loss": 0.2091, "step": 2575 }, { "epoch": 1.26, "learning_rate": 3.2632607627992814e-06, "loss": 0.1969, "step": 2576 }, { "epoch": 1.26, "learning_rate": 3.2620055817692818e-06, "loss": 0.2147, "step": 2577 }, { "epoch": 1.26, "learning_rate": 3.2607501889465186e-06, "loss": 0.1836, "step": 2578 }, { "epoch": 1.26, "learning_rate": 3.2594945846799186e-06, "loss": 0.2067, "step": 2579 }, { "epoch": 1.26, "learning_rate": 3.258238769318464e-06, "loss": 0.1655, "step": 2580 }, { "epoch": 1.26, "learning_rate": 3.2569827432111984e-06, "loss": 0.218, "step": 2581 }, { "epoch": 1.26, "learning_rate": 3.255726506707223e-06, "loss": 0.1746, "step": 2582 }, { "epoch": 1.26, "learning_rate": 3.254470060155699e-06, "loss": 0.1769, "step": 2583 }, { "epoch": 1.26, "learning_rate": 3.253213403905842e-06, "loss": 0.2217, "step": 2584 }, { "epoch": 1.26, "learning_rate": 3.2519565383069313e-06, "loss": 0.1548, "step": 2585 }, { "epoch": 1.26, "learning_rate": 3.2506994637083007e-06, "loss": 0.196, "step": 2586 }, { "epoch": 1.26, "learning_rate": 3.249442180459341e-06, "loss": 0.184, "step": 2587 }, { "epoch": 1.26, "learning_rate": 3.248184688909507e-06, "loss": 0.1873, "step": 2588 }, { "epoch": 1.26, "learning_rate": 3.2469269894083056e-06, "loss": 0.2284, "step": 2589 }, { "epoch": 1.26, "learning_rate": 3.2456690823053016e-06, "loss": 0.1924, "step": 2590 }, { "epoch": 1.27, "learning_rate": 3.244410967950122e-06, "loss": 0.2184, "step": 2591 }, { "epoch": 1.27, "learning_rate": 3.243152646692446e-06, "loss": 0.1477, "step": 2592 }, { "epoch": 1.27, "learning_rate": 3.2418941188820153e-06, "loss": 0.2165, "step": 2593 }, { "epoch": 1.27, "learning_rate": 3.2406353848686263e-06, "loss": 0.1901, "step": 2594 }, { "epoch": 1.27, "learning_rate": 3.2393764450021314e-06, "loss": 0.1736, "step": 2595 }, { "epoch": 1.27, "learning_rate": 3.2381172996324432e-06, "loss": 0.1908, "step": 2596 }, { "epoch": 1.27, "learning_rate": 3.23685794910953e-06, "loss": 0.1878, "step": 2597 }, { "epoch": 1.27, "learning_rate": 3.235598393783418e-06, "loss": 0.1832, "step": 2598 }, { "epoch": 1.27, "learning_rate": 3.234338634004187e-06, "loss": 0.2121, "step": 2599 }, { "epoch": 1.27, "learning_rate": 3.2330786701219786e-06, "loss": 0.1887, "step": 2600 }, { "epoch": 1.27, "learning_rate": 3.2318185024869886e-06, "loss": 0.2069, "step": 2601 }, { "epoch": 1.27, "learning_rate": 3.2305581314494673e-06, "loss": 0.204, "step": 2602 }, { "epoch": 1.27, "learning_rate": 3.229297557359727e-06, "loss": 0.1879, "step": 2603 }, { "epoch": 1.27, "learning_rate": 3.2280367805681307e-06, "loss": 0.2266, "step": 2604 }, { "epoch": 1.27, "learning_rate": 3.2267758014251004e-06, "loss": 0.1993, "step": 2605 }, { "epoch": 1.27, "learning_rate": 3.2255146202811162e-06, "loss": 0.2093, "step": 2606 }, { "epoch": 1.27, "learning_rate": 3.22425323748671e-06, "loss": 0.1853, "step": 2607 }, { "epoch": 1.27, "learning_rate": 3.222991653392474e-06, "loss": 0.2097, "step": 2608 }, { "epoch": 1.27, "learning_rate": 3.221729868349053e-06, "loss": 0.1557, "step": 2609 }, { "epoch": 1.27, "learning_rate": 3.2204678827071494e-06, "loss": 0.2185, "step": 2610 }, { "epoch": 1.27, "learning_rate": 3.219205696817521e-06, "loss": 0.2011, "step": 2611 }, { "epoch": 1.28, "learning_rate": 3.217943311030982e-06, "loss": 0.199, "step": 2612 }, { "epoch": 1.28, "learning_rate": 3.216680725698401e-06, "loss": 0.2045, "step": 2613 }, { "epoch": 1.28, "learning_rate": 3.215417941170702e-06, "loss": 0.1584, "step": 2614 }, { "epoch": 1.28, "learning_rate": 3.2141549577988656e-06, "loss": 0.1988, "step": 2615 }, { "epoch": 1.28, "learning_rate": 3.212891775933927e-06, "loss": 0.1876, "step": 2616 }, { "epoch": 1.28, "learning_rate": 3.2116283959269756e-06, "loss": 0.1771, "step": 2617 }, { "epoch": 1.28, "learning_rate": 3.210364818129158e-06, "loss": 0.2195, "step": 2618 }, { "epoch": 1.28, "learning_rate": 3.2091010428916734e-06, "loss": 0.1875, "step": 2619 }, { "epoch": 1.28, "learning_rate": 3.2078370705657774e-06, "loss": 0.1946, "step": 2620 }, { "epoch": 1.28, "learning_rate": 3.2065729015027807e-06, "loss": 0.1948, "step": 2621 }, { "epoch": 1.28, "learning_rate": 3.2053085360540476e-06, "loss": 0.2372, "step": 2622 }, { "epoch": 1.28, "learning_rate": 3.2040439745709966e-06, "loss": 0.2127, "step": 2623 }, { "epoch": 1.28, "learning_rate": 3.202779217405102e-06, "loss": 0.189, "step": 2624 }, { "epoch": 1.28, "learning_rate": 3.2015142649078934e-06, "loss": 0.1861, "step": 2625 }, { "epoch": 1.28, "learning_rate": 3.200249117430951e-06, "loss": 0.2021, "step": 2626 }, { "epoch": 1.28, "learning_rate": 3.1989837753259123e-06, "loss": 0.195, "step": 2627 }, { "epoch": 1.28, "learning_rate": 3.197718238944468e-06, "loss": 0.226, "step": 2628 }, { "epoch": 1.28, "learning_rate": 3.1964525086383637e-06, "loss": 0.1759, "step": 2629 }, { "epoch": 1.28, "learning_rate": 3.195186584759396e-06, "loss": 0.206, "step": 2630 }, { "epoch": 1.28, "learning_rate": 3.19392046765942e-06, "loss": 0.1641, "step": 2631 }, { "epoch": 1.29, "learning_rate": 3.192654157690339e-06, "loss": 0.161, "step": 2632 }, { "epoch": 1.29, "learning_rate": 3.191387655204115e-06, "loss": 0.173, "step": 2633 }, { "epoch": 1.29, "learning_rate": 3.1901209605527605e-06, "loss": 0.1697, "step": 2634 }, { "epoch": 1.29, "learning_rate": 3.1888540740883423e-06, "loss": 0.2208, "step": 2635 }, { "epoch": 1.29, "learning_rate": 3.187586996162979e-06, "loss": 0.1915, "step": 2636 }, { "epoch": 1.29, "learning_rate": 3.1863197271288455e-06, "loss": 0.1827, "step": 2637 }, { "epoch": 1.29, "learning_rate": 3.1850522673381677e-06, "loss": 0.1787, "step": 2638 }, { "epoch": 1.29, "learning_rate": 3.1837846171432247e-06, "loss": 0.1586, "step": 2639 }, { "epoch": 1.29, "learning_rate": 3.182516776896349e-06, "loss": 0.1914, "step": 2640 }, { "epoch": 1.29, "learning_rate": 3.181248746949925e-06, "loss": 0.1892, "step": 2641 }, { "epoch": 1.29, "learning_rate": 3.179980527656391e-06, "loss": 0.1747, "step": 2642 }, { "epoch": 1.29, "learning_rate": 3.178712119368238e-06, "loss": 0.2205, "step": 2643 }, { "epoch": 1.29, "learning_rate": 3.177443522438008e-06, "loss": 0.2375, "step": 2644 }, { "epoch": 1.29, "learning_rate": 3.1761747372182964e-06, "loss": 0.2126, "step": 2645 }, { "epoch": 1.29, "learning_rate": 3.174905764061752e-06, "loss": 0.1801, "step": 2646 }, { "epoch": 1.29, "learning_rate": 3.1736366033210737e-06, "loss": 0.1907, "step": 2647 }, { "epoch": 1.29, "learning_rate": 3.172367255349014e-06, "loss": 0.1894, "step": 2648 }, { "epoch": 1.29, "learning_rate": 3.171097720498376e-06, "loss": 0.1975, "step": 2649 }, { "epoch": 1.29, "learning_rate": 3.169827999122018e-06, "loss": 0.1722, "step": 2650 }, { "epoch": 1.29, "learning_rate": 3.168558091572845e-06, "loss": 0.1887, "step": 2651 }, { "epoch": 1.29, "learning_rate": 3.1672879982038194e-06, "loss": 0.1548, "step": 2652 }, { "epoch": 1.3, "learning_rate": 3.1660177193679514e-06, "loss": 0.2115, "step": 2653 }, { "epoch": 1.3, "learning_rate": 3.164747255418303e-06, "loss": 0.1755, "step": 2654 }, { "epoch": 1.3, "learning_rate": 3.1634766067079903e-06, "loss": 0.1931, "step": 2655 }, { "epoch": 1.3, "learning_rate": 3.1622057735901775e-06, "loss": 0.1789, "step": 2656 }, { "epoch": 1.3, "learning_rate": 3.1609347564180815e-06, "loss": 0.2088, "step": 2657 }, { "epoch": 1.3, "learning_rate": 3.1596635555449717e-06, "loss": 0.1874, "step": 2658 }, { "epoch": 1.3, "learning_rate": 3.158392171324166e-06, "loss": 0.1754, "step": 2659 }, { "epoch": 1.3, "learning_rate": 3.1571206041090348e-06, "loss": 0.1927, "step": 2660 }, { "epoch": 1.3, "learning_rate": 3.1558488542529993e-06, "loss": 0.212, "step": 2661 }, { "epoch": 1.3, "learning_rate": 3.1545769221095313e-06, "loss": 0.1855, "step": 2662 }, { "epoch": 1.3, "learning_rate": 3.153304808032152e-06, "loss": 0.189, "step": 2663 }, { "epoch": 1.3, "learning_rate": 3.1520325123744354e-06, "loss": 0.1962, "step": 2664 }, { "epoch": 1.3, "learning_rate": 3.150760035490006e-06, "loss": 0.1885, "step": 2665 }, { "epoch": 1.3, "learning_rate": 3.149487377732534e-06, "loss": 0.195, "step": 2666 }, { "epoch": 1.3, "learning_rate": 3.148214539455748e-06, "loss": 0.1882, "step": 2667 }, { "epoch": 1.3, "learning_rate": 3.1469415210134196e-06, "loss": 0.1999, "step": 2668 }, { "epoch": 1.3, "learning_rate": 3.145668322759372e-06, "loss": 0.1817, "step": 2669 }, { "epoch": 1.3, "learning_rate": 3.144394945047483e-06, "loss": 0.1734, "step": 2670 }, { "epoch": 1.3, "learning_rate": 3.1431213882316734e-06, "loss": 0.1804, "step": 2671 }, { "epoch": 1.3, "learning_rate": 3.1418476526659193e-06, "loss": 0.2391, "step": 2672 }, { "epoch": 1.31, "learning_rate": 3.1405737387042434e-06, "loss": 0.2182, "step": 2673 }, { "epoch": 1.31, "learning_rate": 3.1392996467007185e-06, "loss": 0.2164, "step": 2674 }, { "epoch": 1.31, "learning_rate": 3.1380253770094676e-06, "loss": 0.2251, "step": 2675 }, { "epoch": 1.31, "learning_rate": 3.136750929984663e-06, "loss": 0.2008, "step": 2676 }, { "epoch": 1.31, "learning_rate": 3.1354763059805273e-06, "loss": 0.1939, "step": 2677 }, { "epoch": 1.31, "learning_rate": 3.1342015053513288e-06, "loss": 0.1556, "step": 2678 }, { "epoch": 1.31, "learning_rate": 3.1329265284513883e-06, "loss": 0.1753, "step": 2679 }, { "epoch": 1.31, "learning_rate": 3.1316513756350747e-06, "loss": 0.2184, "step": 2680 }, { "epoch": 1.31, "learning_rate": 3.130376047256804e-06, "loss": 0.1767, "step": 2681 }, { "epoch": 1.31, "learning_rate": 3.1291005436710447e-06, "loss": 0.2173, "step": 2682 }, { "epoch": 1.31, "learning_rate": 3.1278248652323113e-06, "loss": 0.1967, "step": 2683 }, { "epoch": 1.31, "learning_rate": 3.126549012295165e-06, "loss": 0.1859, "step": 2684 }, { "epoch": 1.31, "learning_rate": 3.1252729852142215e-06, "loss": 0.1732, "step": 2685 }, { "epoch": 1.31, "learning_rate": 3.1239967843441387e-06, "loss": 0.2054, "step": 2686 }, { "epoch": 1.31, "learning_rate": 3.122720410039627e-06, "loss": 0.2201, "step": 2687 }, { "epoch": 1.31, "learning_rate": 3.121443862655443e-06, "loss": 0.2037, "step": 2688 }, { "epoch": 1.31, "learning_rate": 3.1201671425463907e-06, "loss": 0.1785, "step": 2689 }, { "epoch": 1.31, "learning_rate": 3.1188902500673246e-06, "loss": 0.198, "step": 2690 }, { "epoch": 1.31, "learning_rate": 3.1176131855731456e-06, "loss": 0.1616, "step": 2691 }, { "epoch": 1.31, "learning_rate": 3.1163359494188027e-06, "loss": 0.1891, "step": 2692 }, { "epoch": 1.31, "learning_rate": 3.1150585419592914e-06, "loss": 0.2221, "step": 2693 }, { "epoch": 1.32, "learning_rate": 3.113780963549657e-06, "loss": 0.2055, "step": 2694 }, { "epoch": 1.32, "learning_rate": 3.1125032145449906e-06, "loss": 0.2319, "step": 2695 }, { "epoch": 1.32, "learning_rate": 3.111225295300431e-06, "loss": 0.184, "step": 2696 }, { "epoch": 1.32, "learning_rate": 3.1099472061711667e-06, "loss": 0.172, "step": 2697 }, { "epoch": 1.32, "learning_rate": 3.108668947512429e-06, "loss": 0.1691, "step": 2698 }, { "epoch": 1.32, "learning_rate": 3.107390519679499e-06, "loss": 0.1689, "step": 2699 }, { "epoch": 1.32, "learning_rate": 3.106111923027706e-06, "loss": 0.1631, "step": 2700 }, { "epoch": 1.32, "learning_rate": 3.104833157912423e-06, "loss": 0.2248, "step": 2701 }, { "epoch": 1.32, "learning_rate": 3.103554224689072e-06, "loss": 0.2111, "step": 2702 }, { "epoch": 1.32, "learning_rate": 3.102275123713122e-06, "loss": 0.2015, "step": 2703 }, { "epoch": 1.32, "learning_rate": 3.1009958553400875e-06, "loss": 0.1843, "step": 2704 }, { "epoch": 1.32, "learning_rate": 3.0997164199255297e-06, "loss": 0.1969, "step": 2705 }, { "epoch": 1.32, "learning_rate": 3.098436817825057e-06, "loss": 0.1993, "step": 2706 }, { "epoch": 1.32, "learning_rate": 3.0971570493943228e-06, "loss": 0.1523, "step": 2707 }, { "epoch": 1.32, "learning_rate": 3.0958771149890275e-06, "loss": 0.1903, "step": 2708 }, { "epoch": 1.32, "learning_rate": 3.0945970149649183e-06, "loss": 0.1963, "step": 2709 }, { "epoch": 1.32, "learning_rate": 3.093316749677788e-06, "loss": 0.2038, "step": 2710 }, { "epoch": 1.32, "learning_rate": 3.0920363194834736e-06, "loss": 0.179, "step": 2711 }, { "epoch": 1.32, "learning_rate": 3.0907557247378606e-06, "loss": 0.2146, "step": 2712 }, { "epoch": 1.32, "learning_rate": 3.089474965796879e-06, "loss": 0.178, "step": 2713 }, { "epoch": 1.33, "learning_rate": 3.088194043016504e-06, "loss": 0.1852, "step": 2714 }, { "epoch": 1.33, "learning_rate": 3.0869129567527578e-06, "loss": 0.1701, "step": 2715 }, { "epoch": 1.33, "learning_rate": 3.0856317073617064e-06, "loss": 0.2023, "step": 2716 }, { "epoch": 1.33, "learning_rate": 3.0843502951994613e-06, "loss": 0.2233, "step": 2717 }, { "epoch": 1.33, "learning_rate": 3.083068720622181e-06, "loss": 0.1755, "step": 2718 }, { "epoch": 1.33, "learning_rate": 3.0817869839860677e-06, "loss": 0.1829, "step": 2719 }, { "epoch": 1.33, "learning_rate": 3.0805050856473675e-06, "loss": 0.1708, "step": 2720 }, { "epoch": 1.33, "learning_rate": 3.079223025962374e-06, "loss": 0.2083, "step": 2721 }, { "epoch": 1.33, "learning_rate": 3.077940805287425e-06, "loss": 0.1686, "step": 2722 }, { "epoch": 1.33, "learning_rate": 3.0766584239789005e-06, "loss": 0.1861, "step": 2723 }, { "epoch": 1.33, "learning_rate": 3.0753758823932285e-06, "loss": 0.1588, "step": 2724 }, { "epoch": 1.33, "learning_rate": 3.0740931808868805e-06, "loss": 0.1596, "step": 2725 }, { "epoch": 1.33, "learning_rate": 3.072810319816371e-06, "loss": 0.2358, "step": 2726 }, { "epoch": 1.33, "learning_rate": 3.0715272995382604e-06, "loss": 0.1766, "step": 2727 }, { "epoch": 1.33, "learning_rate": 3.0702441204091536e-06, "loss": 0.179, "step": 2728 }, { "epoch": 1.33, "learning_rate": 3.068960782785699e-06, "loss": 0.1994, "step": 2729 }, { "epoch": 1.33, "learning_rate": 3.0676772870245874e-06, "loss": 0.1661, "step": 2730 }, { "epoch": 1.33, "learning_rate": 3.066393633482556e-06, "loss": 0.2004, "step": 2731 }, { "epoch": 1.33, "learning_rate": 3.065109822516386e-06, "loss": 0.1948, "step": 2732 }, { "epoch": 1.33, "learning_rate": 3.0638258544829004e-06, "loss": 0.1898, "step": 2733 }, { "epoch": 1.33, "learning_rate": 3.0625417297389676e-06, "loss": 0.1741, "step": 2734 }, { "epoch": 1.34, "learning_rate": 3.061257448641498e-06, "loss": 0.2037, "step": 2735 }, { "epoch": 1.34, "learning_rate": 3.0599730115474467e-06, "loss": 0.2073, "step": 2736 }, { "epoch": 1.34, "learning_rate": 3.058688418813812e-06, "loss": 0.1519, "step": 2737 }, { "epoch": 1.34, "learning_rate": 3.057403670797634e-06, "loss": 0.2146, "step": 2738 }, { "epoch": 1.34, "learning_rate": 3.056118767855998e-06, "loss": 0.2405, "step": 2739 }, { "epoch": 1.34, "learning_rate": 3.0548337103460325e-06, "loss": 0.1577, "step": 2740 }, { "epoch": 1.34, "learning_rate": 3.0535484986249058e-06, "loss": 0.2064, "step": 2741 }, { "epoch": 1.34, "learning_rate": 3.0522631330498326e-06, "loss": 0.184, "step": 2742 }, { "epoch": 1.34, "learning_rate": 3.050977613978068e-06, "loss": 0.157, "step": 2743 }, { "epoch": 1.34, "learning_rate": 3.049691941766913e-06, "loss": 0.2709, "step": 2744 }, { "epoch": 1.34, "learning_rate": 3.0484061167737062e-06, "loss": 0.1952, "step": 2745 }, { "epoch": 1.34, "learning_rate": 3.0471201393558325e-06, "loss": 0.176, "step": 2746 }, { "epoch": 1.34, "learning_rate": 3.0458340098707183e-06, "loss": 0.1907, "step": 2747 }, { "epoch": 1.34, "learning_rate": 3.0445477286758312e-06, "loss": 0.2111, "step": 2748 }, { "epoch": 1.34, "learning_rate": 3.0432612961286833e-06, "loss": 0.1851, "step": 2749 }, { "epoch": 1.34, "learning_rate": 3.0419747125868253e-06, "loss": 0.1906, "step": 2750 }, { "epoch": 1.34, "learning_rate": 3.040687978407853e-06, "loss": 0.1988, "step": 2751 }, { "epoch": 1.34, "learning_rate": 3.0394010939494034e-06, "loss": 0.1945, "step": 2752 }, { "epoch": 1.34, "learning_rate": 3.0381140595691532e-06, "loss": 0.1804, "step": 2753 }, { "epoch": 1.34, "learning_rate": 3.0368268756248232e-06, "loss": 0.1784, "step": 2754 }, { "epoch": 1.35, "learning_rate": 3.035539542474174e-06, "loss": 0.1959, "step": 2755 }, { "epoch": 1.35, "learning_rate": 3.034252060475011e-06, "loss": 0.1833, "step": 2756 }, { "epoch": 1.35, "learning_rate": 3.032964429985175e-06, "loss": 0.1882, "step": 2757 }, { "epoch": 1.35, "learning_rate": 3.0316766513625545e-06, "loss": 0.163, "step": 2758 }, { "epoch": 1.35, "learning_rate": 3.0303887249650755e-06, "loss": 0.1861, "step": 2759 }, { "epoch": 1.35, "learning_rate": 3.0291006511507035e-06, "loss": 0.1772, "step": 2760 }, { "epoch": 1.35, "learning_rate": 3.0278124302774516e-06, "loss": 0.1761, "step": 2761 }, { "epoch": 1.35, "learning_rate": 3.0265240627033664e-06, "loss": 0.2182, "step": 2762 }, { "epoch": 1.35, "learning_rate": 3.0252355487865388e-06, "loss": 0.1967, "step": 2763 }, { "epoch": 1.35, "learning_rate": 3.023946888885101e-06, "loss": 0.1591, "step": 2764 }, { "epoch": 1.35, "learning_rate": 3.0226580833572234e-06, "loss": 0.1927, "step": 2765 }, { "epoch": 1.35, "learning_rate": 3.021369132561119e-06, "loss": 0.2029, "step": 2766 }, { "epoch": 1.35, "learning_rate": 3.020080036855041e-06, "loss": 0.1715, "step": 2767 }, { "epoch": 1.35, "learning_rate": 3.01879079659728e-06, "loss": 0.2069, "step": 2768 }, { "epoch": 1.35, "learning_rate": 3.017501412146171e-06, "loss": 0.1807, "step": 2769 }, { "epoch": 1.35, "learning_rate": 3.016211883860086e-06, "loss": 0.2346, "step": 2770 }, { "epoch": 1.35, "learning_rate": 3.01492221209744e-06, "loss": 0.1903, "step": 2771 }, { "epoch": 1.35, "learning_rate": 3.0136323972166832e-06, "loss": 0.1792, "step": 2772 }, { "epoch": 1.35, "learning_rate": 3.01234243957631e-06, "loss": 0.1829, "step": 2773 }, { "epoch": 1.35, "learning_rate": 3.011052339534853e-06, "loss": 0.1917, "step": 2774 }, { "epoch": 1.35, "learning_rate": 3.0097620974508824e-06, "loss": 0.2005, "step": 2775 }, { "epoch": 1.36, "learning_rate": 3.0084717136830116e-06, "loss": 0.1938, "step": 2776 }, { "epoch": 1.36, "learning_rate": 3.0071811885898917e-06, "loss": 0.1929, "step": 2777 }, { "epoch": 1.36, "learning_rate": 3.00589052253021e-06, "loss": 0.194, "step": 2778 }, { "epoch": 1.36, "learning_rate": 3.0045997158626987e-06, "loss": 0.163, "step": 2779 }, { "epoch": 1.36, "learning_rate": 3.003308768946125e-06, "loss": 0.1685, "step": 2780 }, { "epoch": 1.36, "learning_rate": 3.0020176821392965e-06, "loss": 0.1886, "step": 2781 }, { "epoch": 1.36, "learning_rate": 3.0007264558010597e-06, "loss": 0.1969, "step": 2782 }, { "epoch": 1.36, "learning_rate": 2.9994350902902986e-06, "loss": 0.1897, "step": 2783 }, { "epoch": 1.36, "learning_rate": 2.998143585965938e-06, "loss": 0.226, "step": 2784 }, { "epoch": 1.36, "learning_rate": 2.9968519431869392e-06, "loss": 0.1991, "step": 2785 }, { "epoch": 1.36, "learning_rate": 2.9955601623123043e-06, "loss": 0.1801, "step": 2786 }, { "epoch": 1.36, "learning_rate": 2.9942682437010718e-06, "loss": 0.1943, "step": 2787 }, { "epoch": 1.36, "learning_rate": 2.992976187712318e-06, "loss": 0.1888, "step": 2788 }, { "epoch": 1.36, "learning_rate": 2.9916839947051607e-06, "loss": 0.1972, "step": 2789 }, { "epoch": 1.36, "learning_rate": 2.990391665038752e-06, "loss": 0.1978, "step": 2790 }, { "epoch": 1.36, "learning_rate": 2.989099199072284e-06, "loss": 0.2169, "step": 2791 }, { "epoch": 1.36, "learning_rate": 2.9878065971649873e-06, "loss": 0.1795, "step": 2792 }, { "epoch": 1.36, "learning_rate": 2.9865138596761268e-06, "loss": 0.1864, "step": 2793 }, { "epoch": 1.36, "learning_rate": 2.98522098696501e-06, "loss": 0.1658, "step": 2794 }, { "epoch": 1.36, "learning_rate": 2.983927979390978e-06, "loss": 0.2464, "step": 2795 }, { "epoch": 1.37, "learning_rate": 2.9826348373134116e-06, "loss": 0.1646, "step": 2796 }, { "epoch": 1.37, "learning_rate": 2.9813415610917286e-06, "loss": 0.1824, "step": 2797 }, { "epoch": 1.37, "learning_rate": 2.9800481510853827e-06, "loss": 0.1815, "step": 2798 }, { "epoch": 1.37, "learning_rate": 2.978754607653867e-06, "loss": 0.1436, "step": 2799 }, { "epoch": 1.37, "learning_rate": 2.9774609311567097e-06, "loss": 0.1958, "step": 2800 }, { "epoch": 1.37, "learning_rate": 2.9761671219534775e-06, "loss": 0.199, "step": 2801 }, { "epoch": 1.37, "learning_rate": 2.9748731804037728e-06, "loss": 0.1584, "step": 2802 }, { "epoch": 1.37, "learning_rate": 2.9735791068672355e-06, "loss": 0.1962, "step": 2803 }, { "epoch": 1.37, "learning_rate": 2.9722849017035427e-06, "loss": 0.1843, "step": 2804 }, { "epoch": 1.37, "learning_rate": 2.9709905652724054e-06, "loss": 0.1971, "step": 2805 }, { "epoch": 1.37, "learning_rate": 2.969696097933575e-06, "loss": 0.2024, "step": 2806 }, { "epoch": 1.37, "learning_rate": 2.9684015000468356e-06, "loss": 0.1636, "step": 2807 }, { "epoch": 1.37, "learning_rate": 2.967106771972012e-06, "loss": 0.2121, "step": 2808 }, { "epoch": 1.37, "learning_rate": 2.9658119140689593e-06, "loss": 0.2016, "step": 2809 }, { "epoch": 1.37, "learning_rate": 2.9645169266975743e-06, "loss": 0.1878, "step": 2810 }, { "epoch": 1.37, "learning_rate": 2.963221810217786e-06, "loss": 0.1933, "step": 2811 }, { "epoch": 1.37, "learning_rate": 2.9619265649895613e-06, "loss": 0.1875, "step": 2812 }, { "epoch": 1.37, "learning_rate": 2.9606311913729023e-06, "loss": 0.1845, "step": 2813 }, { "epoch": 1.37, "learning_rate": 2.9593356897278465e-06, "loss": 0.1639, "step": 2814 }, { "epoch": 1.37, "learning_rate": 2.958040060414467e-06, "loss": 0.1841, "step": 2815 }, { "epoch": 1.38, "learning_rate": 2.9567443037928735e-06, "loss": 0.1948, "step": 2816 }, { "epoch": 1.38, "learning_rate": 2.955448420223209e-06, "loss": 0.1871, "step": 2817 }, { "epoch": 1.38, "learning_rate": 2.954152410065653e-06, "loss": 0.2131, "step": 2818 }, { "epoch": 1.38, "learning_rate": 2.952856273680422e-06, "loss": 0.194, "step": 2819 }, { "epoch": 1.38, "learning_rate": 2.9515600114277632e-06, "loss": 0.1748, "step": 2820 }, { "epoch": 1.38, "learning_rate": 2.9502636236679628e-06, "loss": 0.1872, "step": 2821 }, { "epoch": 1.38, "learning_rate": 2.9489671107613394e-06, "loss": 0.1953, "step": 2822 }, { "epoch": 1.38, "learning_rate": 2.9476704730682494e-06, "loss": 0.2058, "step": 2823 }, { "epoch": 1.38, "learning_rate": 2.9463737109490797e-06, "loss": 0.1943, "step": 2824 }, { "epoch": 1.38, "learning_rate": 2.9450768247642547e-06, "loss": 0.1933, "step": 2825 }, { "epoch": 1.38, "learning_rate": 2.9437798148742332e-06, "loss": 0.1839, "step": 2826 }, { "epoch": 1.38, "learning_rate": 2.9424826816395056e-06, "loss": 0.1811, "step": 2827 }, { "epoch": 1.38, "learning_rate": 2.9411854254206025e-06, "loss": 0.1843, "step": 2828 }, { "epoch": 1.38, "learning_rate": 2.939888046578081e-06, "loss": 0.183, "step": 2829 }, { "epoch": 1.38, "learning_rate": 2.938590545472538e-06, "loss": 0.2055, "step": 2830 }, { "epoch": 1.38, "learning_rate": 2.937292922464603e-06, "loss": 0.1703, "step": 2831 }, { "epoch": 1.38, "learning_rate": 2.9359951779149376e-06, "loss": 0.1814, "step": 2832 }, { "epoch": 1.38, "learning_rate": 2.9346973121842394e-06, "loss": 0.1922, "step": 2833 }, { "epoch": 1.38, "learning_rate": 2.933399325633239e-06, "loss": 0.1917, "step": 2834 }, { "epoch": 1.38, "learning_rate": 2.9321012186226986e-06, "loss": 0.1863, "step": 2835 }, { "epoch": 1.38, "learning_rate": 2.930802991513418e-06, "loss": 0.1706, "step": 2836 }, { "epoch": 1.39, "learning_rate": 2.929504644666226e-06, "loss": 0.1788, "step": 2837 }, { "epoch": 1.39, "learning_rate": 2.928206178441989e-06, "loss": 0.2145, "step": 2838 }, { "epoch": 1.39, "learning_rate": 2.926907593201602e-06, "loss": 0.1865, "step": 2839 }, { "epoch": 1.39, "learning_rate": 2.925608889305997e-06, "loss": 0.1905, "step": 2840 }, { "epoch": 1.39, "learning_rate": 2.9243100671161366e-06, "loss": 0.1975, "step": 2841 }, { "epoch": 1.39, "learning_rate": 2.9230111269930162e-06, "loss": 0.2034, "step": 2842 }, { "epoch": 1.39, "learning_rate": 2.921712069297667e-06, "loss": 0.1716, "step": 2843 }, { "epoch": 1.39, "learning_rate": 2.9204128943911492e-06, "loss": 0.1952, "step": 2844 }, { "epoch": 1.39, "learning_rate": 2.919113602634556e-06, "loss": 0.1586, "step": 2845 }, { "epoch": 1.39, "learning_rate": 2.917814194389017e-06, "loss": 0.2056, "step": 2846 }, { "epoch": 1.39, "learning_rate": 2.916514670015689e-06, "loss": 0.2257, "step": 2847 }, { "epoch": 1.39, "learning_rate": 2.915215029875764e-06, "loss": 0.1827, "step": 2848 }, { "epoch": 1.39, "learning_rate": 2.913915274330466e-06, "loss": 0.203, "step": 2849 }, { "epoch": 1.39, "learning_rate": 2.912615403741051e-06, "loss": 0.1864, "step": 2850 }, { "epoch": 1.39, "learning_rate": 2.9113154184688054e-06, "loss": 0.2007, "step": 2851 }, { "epoch": 1.39, "learning_rate": 2.9100153188750485e-06, "loss": 0.1595, "step": 2852 }, { "epoch": 1.39, "learning_rate": 2.9087151053211337e-06, "loss": 0.189, "step": 2853 }, { "epoch": 1.39, "learning_rate": 2.907414778168441e-06, "loss": 0.1817, "step": 2854 }, { "epoch": 1.39, "learning_rate": 2.906114337778388e-06, "loss": 0.2025, "step": 2855 }, { "epoch": 1.39, "learning_rate": 2.904813784512419e-06, "loss": 0.1669, "step": 2856 }, { "epoch": 1.4, "learning_rate": 2.9035131187320103e-06, "loss": 0.1833, "step": 2857 }, { "epoch": 1.4, "learning_rate": 2.9022123407986735e-06, "loss": 0.1914, "step": 2858 }, { "epoch": 1.4, "learning_rate": 2.9009114510739457e-06, "loss": 0.2002, "step": 2859 }, { "epoch": 1.4, "learning_rate": 2.899610449919399e-06, "loss": 0.1757, "step": 2860 }, { "epoch": 1.4, "learning_rate": 2.898309337696635e-06, "loss": 0.1679, "step": 2861 }, { "epoch": 1.4, "learning_rate": 2.8970081147672864e-06, "loss": 0.1883, "step": 2862 }, { "epoch": 1.4, "learning_rate": 2.895706781493017e-06, "loss": 0.1737, "step": 2863 }, { "epoch": 1.4, "learning_rate": 2.8944053382355203e-06, "loss": 0.1604, "step": 2864 }, { "epoch": 1.4, "learning_rate": 2.893103785356522e-06, "loss": 0.213, "step": 2865 }, { "epoch": 1.4, "learning_rate": 2.891802123217776e-06, "loss": 0.2004, "step": 2866 }, { "epoch": 1.4, "learning_rate": 2.890500352181069e-06, "loss": 0.2029, "step": 2867 }, { "epoch": 1.4, "learning_rate": 2.8891984726082173e-06, "loss": 0.1753, "step": 2868 }, { "epoch": 1.4, "learning_rate": 2.8878964848610648e-06, "loss": 0.2076, "step": 2869 }, { "epoch": 1.4, "learning_rate": 2.88659438930149e-06, "loss": 0.2019, "step": 2870 }, { "epoch": 1.4, "learning_rate": 2.8852921862913975e-06, "loss": 0.1479, "step": 2871 }, { "epoch": 1.4, "learning_rate": 2.883989876192722e-06, "loss": 0.1884, "step": 2872 }, { "epoch": 1.4, "learning_rate": 2.8826874593674326e-06, "loss": 0.1795, "step": 2873 }, { "epoch": 1.4, "learning_rate": 2.8813849361775216e-06, "loss": 0.1656, "step": 2874 }, { "epoch": 1.4, "learning_rate": 2.8800823069850148e-06, "loss": 0.1714, "step": 2875 }, { "epoch": 1.4, "learning_rate": 2.8787795721519673e-06, "loss": 0.1944, "step": 2876 }, { "epoch": 1.4, "learning_rate": 2.8774767320404618e-06, "loss": 0.184, "step": 2877 }, { "epoch": 1.41, "learning_rate": 2.8761737870126115e-06, "loss": 0.1806, "step": 2878 }, { "epoch": 1.41, "learning_rate": 2.8748707374305585e-06, "loss": 0.1718, "step": 2879 }, { "epoch": 1.41, "learning_rate": 2.8735675836564752e-06, "loss": 0.1873, "step": 2880 }, { "epoch": 1.41, "learning_rate": 2.87226432605256e-06, "loss": 0.1817, "step": 2881 }, { "epoch": 1.41, "learning_rate": 2.870960964981043e-06, "loss": 0.1718, "step": 2882 }, { "epoch": 1.41, "learning_rate": 2.8696575008041818e-06, "loss": 0.1628, "step": 2883 }, { "epoch": 1.41, "learning_rate": 2.868353933884262e-06, "loss": 0.1817, "step": 2884 }, { "epoch": 1.41, "learning_rate": 2.8670502645835994e-06, "loss": 0.1851, "step": 2885 }, { "epoch": 1.41, "learning_rate": 2.865746493264538e-06, "loss": 0.1941, "step": 2886 }, { "epoch": 1.41, "learning_rate": 2.864442620289447e-06, "loss": 0.2081, "step": 2887 }, { "epoch": 1.41, "learning_rate": 2.8631386460207307e-06, "loss": 0.1852, "step": 2888 }, { "epoch": 1.41, "learning_rate": 2.8618345708208136e-06, "loss": 0.2153, "step": 2889 }, { "epoch": 1.41, "learning_rate": 2.8605303950521533e-06, "loss": 0.1979, "step": 2890 }, { "epoch": 1.41, "learning_rate": 2.8592261190772347e-06, "loss": 0.1841, "step": 2891 }, { "epoch": 1.41, "learning_rate": 2.8579217432585686e-06, "loss": 0.1597, "step": 2892 }, { "epoch": 1.41, "learning_rate": 2.8566172679586956e-06, "loss": 0.1719, "step": 2893 }, { "epoch": 1.41, "learning_rate": 2.8553126935401828e-06, "loss": 0.1796, "step": 2894 }, { "epoch": 1.41, "learning_rate": 2.8540080203656256e-06, "loss": 0.1851, "step": 2895 }, { "epoch": 1.41, "learning_rate": 2.852703248797646e-06, "loss": 0.2031, "step": 2896 }, { "epoch": 1.41, "learning_rate": 2.8513983791988936e-06, "loss": 0.172, "step": 2897 }, { "epoch": 1.42, "learning_rate": 2.8500934119320466e-06, "loss": 0.206, "step": 2898 }, { "epoch": 1.42, "learning_rate": 2.848788347359808e-06, "loss": 0.1597, "step": 2899 }, { "epoch": 1.42, "learning_rate": 2.847483185844909e-06, "loss": 0.1878, "step": 2900 }, { "epoch": 1.42, "learning_rate": 2.8461779277501095e-06, "loss": 0.193, "step": 2901 }, { "epoch": 1.42, "learning_rate": 2.8448725734381926e-06, "loss": 0.1681, "step": 2902 }, { "epoch": 1.42, "learning_rate": 2.843567123271971e-06, "loss": 0.1729, "step": 2903 }, { "epoch": 1.42, "learning_rate": 2.8422615776142824e-06, "loss": 0.1805, "step": 2904 }, { "epoch": 1.42, "learning_rate": 2.8409559368279927e-06, "loss": 0.1725, "step": 2905 }, { "epoch": 1.42, "learning_rate": 2.839650201275993e-06, "loss": 0.1783, "step": 2906 }, { "epoch": 1.42, "learning_rate": 2.838344371321201e-06, "loss": 0.1794, "step": 2907 }, { "epoch": 1.42, "learning_rate": 2.83703844732656e-06, "loss": 0.2099, "step": 2908 }, { "epoch": 1.42, "learning_rate": 2.835732429655041e-06, "loss": 0.1824, "step": 2909 }, { "epoch": 1.42, "learning_rate": 2.8344263186696404e-06, "loss": 0.1771, "step": 2910 }, { "epoch": 1.42, "learning_rate": 2.833120114733379e-06, "loss": 0.2001, "step": 2911 }, { "epoch": 1.42, "learning_rate": 2.8318138182093053e-06, "loss": 0.2261, "step": 2912 }, { "epoch": 1.42, "learning_rate": 2.830507429460494e-06, "loss": 0.1777, "step": 2913 }, { "epoch": 1.42, "learning_rate": 2.829200948850042e-06, "loss": 0.1742, "step": 2914 }, { "epoch": 1.42, "learning_rate": 2.8278943767410767e-06, "loss": 0.1977, "step": 2915 }, { "epoch": 1.42, "learning_rate": 2.8265877134967467e-06, "loss": 0.1992, "step": 2916 }, { "epoch": 1.42, "learning_rate": 2.8252809594802287e-06, "loss": 0.2194, "step": 2917 }, { "epoch": 1.42, "learning_rate": 2.8239741150547228e-06, "loss": 0.1863, "step": 2918 }, { "epoch": 1.43, "learning_rate": 2.822667180583455e-06, "loss": 0.16, "step": 2919 }, { "epoch": 1.43, "learning_rate": 2.8213601564296766e-06, "loss": 0.182, "step": 2920 }, { "epoch": 1.43, "learning_rate": 2.820053042956662e-06, "loss": 0.1637, "step": 2921 }, { "epoch": 1.43, "learning_rate": 2.8187458405277147e-06, "loss": 0.1894, "step": 2922 }, { "epoch": 1.43, "learning_rate": 2.8174385495061584e-06, "loss": 0.1839, "step": 2923 }, { "epoch": 1.43, "learning_rate": 2.8161311702553417e-06, "loss": 0.1704, "step": 2924 }, { "epoch": 1.43, "learning_rate": 2.8148237031386426e-06, "loss": 0.1901, "step": 2925 }, { "epoch": 1.43, "learning_rate": 2.8135161485194577e-06, "loss": 0.1717, "step": 2926 }, { "epoch": 1.43, "learning_rate": 2.8122085067612117e-06, "loss": 0.2, "step": 2927 }, { "epoch": 1.43, "learning_rate": 2.8109007782273505e-06, "loss": 0.2094, "step": 2928 }, { "epoch": 1.43, "learning_rate": 2.8095929632813472e-06, "loss": 0.2505, "step": 2929 }, { "epoch": 1.43, "learning_rate": 2.808285062286697e-06, "loss": 0.1872, "step": 2930 }, { "epoch": 1.43, "learning_rate": 2.8069770756069194e-06, "loss": 0.2044, "step": 2931 }, { "epoch": 1.43, "learning_rate": 2.805669003605558e-06, "loss": 0.2025, "step": 2932 }, { "epoch": 1.43, "learning_rate": 2.8043608466461793e-06, "loss": 0.1599, "step": 2933 }, { "epoch": 1.43, "learning_rate": 2.803052605092375e-06, "loss": 0.1684, "step": 2934 }, { "epoch": 1.43, "learning_rate": 2.8017442793077593e-06, "loss": 0.1874, "step": 2935 }, { "epoch": 1.43, "learning_rate": 2.8004358696559685e-06, "loss": 0.1847, "step": 2936 }, { "epoch": 1.43, "learning_rate": 2.7991273765006653e-06, "loss": 0.2134, "step": 2937 }, { "epoch": 1.43, "learning_rate": 2.797818800205534e-06, "loss": 0.1886, "step": 2938 }, { "epoch": 1.44, "learning_rate": 2.7965101411342794e-06, "loss": 0.1952, "step": 2939 }, { "epoch": 1.44, "learning_rate": 2.795201399650635e-06, "loss": 0.197, "step": 2940 }, { "epoch": 1.44, "learning_rate": 2.7938925761183515e-06, "loss": 0.1699, "step": 2941 }, { "epoch": 1.44, "learning_rate": 2.7925836709012062e-06, "loss": 0.1826, "step": 2942 }, { "epoch": 1.44, "learning_rate": 2.791274684362998e-06, "loss": 0.2037, "step": 2943 }, { "epoch": 1.44, "learning_rate": 2.789965616867547e-06, "loss": 0.2068, "step": 2944 }, { "epoch": 1.44, "learning_rate": 2.788656468778698e-06, "loss": 0.1614, "step": 2945 }, { "epoch": 1.44, "learning_rate": 2.7873472404603165e-06, "loss": 0.1604, "step": 2946 }, { "epoch": 1.44, "learning_rate": 2.7860379322762925e-06, "loss": 0.1741, "step": 2947 }, { "epoch": 1.44, "learning_rate": 2.784728544590535e-06, "loss": 0.1464, "step": 2948 }, { "epoch": 1.44, "learning_rate": 2.7834190777669777e-06, "loss": 0.232, "step": 2949 }, { "epoch": 1.44, "learning_rate": 2.782109532169576e-06, "loss": 0.1726, "step": 2950 }, { "epoch": 1.44, "learning_rate": 2.7807999081623044e-06, "loss": 0.2322, "step": 2951 }, { "epoch": 1.44, "learning_rate": 2.779490206109164e-06, "loss": 0.1567, "step": 2952 }, { "epoch": 1.44, "learning_rate": 2.778180426374173e-06, "loss": 0.1905, "step": 2953 }, { "epoch": 1.44, "learning_rate": 2.7768705693213744e-06, "loss": 0.1632, "step": 2954 }, { "epoch": 1.44, "learning_rate": 2.7755606353148318e-06, "loss": 0.1999, "step": 2955 }, { "epoch": 1.44, "learning_rate": 2.7742506247186286e-06, "loss": 0.1823, "step": 2956 }, { "epoch": 1.44, "learning_rate": 2.772940537896871e-06, "loss": 0.1778, "step": 2957 }, { "epoch": 1.44, "learning_rate": 2.7716303752136865e-06, "loss": 0.1689, "step": 2958 }, { "epoch": 1.44, "learning_rate": 2.770320137033224e-06, "loss": 0.1926, "step": 2959 }, { "epoch": 1.45, "learning_rate": 2.7690098237196513e-06, "loss": 0.1871, "step": 2960 }, { "epoch": 1.45, "learning_rate": 2.767699435637159e-06, "loss": 0.1421, "step": 2961 }, { "epoch": 1.45, "learning_rate": 2.7663889731499587e-06, "loss": 0.1983, "step": 2962 }, { "epoch": 1.45, "learning_rate": 2.765078436622281e-06, "loss": 0.1712, "step": 2963 }, { "epoch": 1.45, "learning_rate": 2.763767826418378e-06, "loss": 0.1875, "step": 2964 }, { "epoch": 1.45, "learning_rate": 2.7624571429025238e-06, "loss": 0.1391, "step": 2965 }, { "epoch": 1.45, "learning_rate": 2.7611463864390088e-06, "loss": 0.1699, "step": 2966 }, { "epoch": 1.45, "learning_rate": 2.7598355573921487e-06, "loss": 0.1635, "step": 2967 }, { "epoch": 1.45, "learning_rate": 2.7585246561262757e-06, "loss": 0.2266, "step": 2968 }, { "epoch": 1.45, "learning_rate": 2.757213683005743e-06, "loss": 0.1776, "step": 2969 }, { "epoch": 1.45, "learning_rate": 2.7559026383949255e-06, "loss": 0.1743, "step": 2970 }, { "epoch": 1.45, "learning_rate": 2.7545915226582156e-06, "loss": 0.1763, "step": 2971 }, { "epoch": 1.45, "learning_rate": 2.753280336160026e-06, "loss": 0.1813, "step": 2972 }, { "epoch": 1.45, "learning_rate": 2.7519690792647904e-06, "loss": 0.1617, "step": 2973 }, { "epoch": 1.45, "learning_rate": 2.7506577523369605e-06, "loss": 0.1671, "step": 2974 }, { "epoch": 1.45, "learning_rate": 2.7493463557410083e-06, "loss": 0.2491, "step": 2975 }, { "epoch": 1.45, "learning_rate": 2.7480348898414243e-06, "loss": 0.1547, "step": 2976 }, { "epoch": 1.45, "learning_rate": 2.746723355002721e-06, "loss": 0.1682, "step": 2977 }, { "epoch": 1.45, "learning_rate": 2.7454117515894256e-06, "loss": 0.1696, "step": 2978 }, { "epoch": 1.45, "learning_rate": 2.744100079966088e-06, "loss": 0.1587, "step": 2979 }, { "epoch": 1.46, "learning_rate": 2.7427883404972764e-06, "loss": 0.1574, "step": 2980 }, { "epoch": 1.46, "learning_rate": 2.7414765335475757e-06, "loss": 0.1673, "step": 2981 }, { "epoch": 1.46, "learning_rate": 2.740164659481592e-06, "loss": 0.1965, "step": 2982 }, { "epoch": 1.46, "learning_rate": 2.7388527186639484e-06, "loss": 0.1764, "step": 2983 }, { "epoch": 1.46, "learning_rate": 2.7375407114592884e-06, "loss": 0.1663, "step": 2984 }, { "epoch": 1.46, "learning_rate": 2.7362286382322734e-06, "loss": 0.213, "step": 2985 }, { "epoch": 1.46, "learning_rate": 2.734916499347581e-06, "loss": 0.1765, "step": 2986 }, { "epoch": 1.46, "learning_rate": 2.7336042951699093e-06, "loss": 0.1619, "step": 2987 }, { "epoch": 1.46, "learning_rate": 2.732292026063974e-06, "loss": 0.1877, "step": 2988 }, { "epoch": 1.46, "learning_rate": 2.730979692394509e-06, "loss": 0.1763, "step": 2989 }, { "epoch": 1.46, "learning_rate": 2.729667294526266e-06, "loss": 0.2079, "step": 2990 }, { "epoch": 1.46, "learning_rate": 2.728354832824014e-06, "loss": 0.1564, "step": 2991 }, { "epoch": 1.46, "learning_rate": 2.7270423076525408e-06, "loss": 0.1841, "step": 2992 }, { "epoch": 1.46, "learning_rate": 2.725729719376651e-06, "loss": 0.2058, "step": 2993 }, { "epoch": 1.46, "learning_rate": 2.7244170683611657e-06, "loss": 0.1867, "step": 2994 }, { "epoch": 1.46, "learning_rate": 2.723104354970927e-06, "loss": 0.2043, "step": 2995 }, { "epoch": 1.46, "learning_rate": 2.721791579570791e-06, "loss": 0.1701, "step": 2996 }, { "epoch": 1.46, "learning_rate": 2.720478742525632e-06, "loss": 0.1654, "step": 2997 }, { "epoch": 1.46, "learning_rate": 2.719165844200341e-06, "loss": 0.1622, "step": 2998 }, { "epoch": 1.46, "learning_rate": 2.7178528849598277e-06, "loss": 0.2029, "step": 2999 }, { "epoch": 1.46, "learning_rate": 2.716539865169016e-06, "loss": 0.175, "step": 3000 }, { "epoch": 1.47, "learning_rate": 2.715226785192851e-06, "loss": 0.168, "step": 3001 }, { "epoch": 1.47, "learning_rate": 2.713913645396289e-06, "loss": 0.1738, "step": 3002 }, { "epoch": 1.47, "learning_rate": 2.7126004461443067e-06, "loss": 0.1601, "step": 3003 }, { "epoch": 1.47, "learning_rate": 2.7112871878018975e-06, "loss": 0.1743, "step": 3004 }, { "epoch": 1.47, "learning_rate": 2.7099738707340677e-06, "loss": 0.2197, "step": 3005 }, { "epoch": 1.47, "learning_rate": 2.7086604953058436e-06, "loss": 0.1986, "step": 3006 }, { "epoch": 1.47, "learning_rate": 2.707347061882267e-06, "loss": 0.1756, "step": 3007 }, { "epoch": 1.47, "learning_rate": 2.7060335708283936e-06, "loss": 0.1648, "step": 3008 }, { "epoch": 1.47, "learning_rate": 2.7047200225092973e-06, "loss": 0.2381, "step": 3009 }, { "epoch": 1.47, "learning_rate": 2.7034064172900682e-06, "loss": 0.1581, "step": 3010 }, { "epoch": 1.47, "learning_rate": 2.7020927555358106e-06, "loss": 0.2017, "step": 3011 }, { "epoch": 1.47, "learning_rate": 2.700779037611645e-06, "loss": 0.1972, "step": 3012 }, { "epoch": 1.47, "learning_rate": 2.699465263882708e-06, "loss": 0.1981, "step": 3013 }, { "epoch": 1.47, "learning_rate": 2.6981514347141526e-06, "loss": 0.2114, "step": 3014 }, { "epoch": 1.47, "learning_rate": 2.6968375504711432e-06, "loss": 0.1789, "step": 3015 }, { "epoch": 1.47, "learning_rate": 2.6955236115188653e-06, "loss": 0.1954, "step": 3016 }, { "epoch": 1.47, "learning_rate": 2.6942096182225163e-06, "loss": 0.1817, "step": 3017 }, { "epoch": 1.47, "learning_rate": 2.692895570947307e-06, "loss": 0.2061, "step": 3018 }, { "epoch": 1.47, "learning_rate": 2.691581470058468e-06, "loss": 0.2097, "step": 3019 }, { "epoch": 1.47, "learning_rate": 2.6902673159212407e-06, "loss": 0.1479, "step": 3020 }, { "epoch": 1.48, "learning_rate": 2.688953108900882e-06, "loss": 0.1641, "step": 3021 }, { "epoch": 1.48, "learning_rate": 2.6876388493626666e-06, "loss": 0.1636, "step": 3022 }, { "epoch": 1.48, "learning_rate": 2.6863245376718784e-06, "loss": 0.1618, "step": 3023 }, { "epoch": 1.48, "learning_rate": 2.6850101741938206e-06, "loss": 0.1864, "step": 3024 }, { "epoch": 1.48, "learning_rate": 2.683695759293809e-06, "loss": 0.1703, "step": 3025 }, { "epoch": 1.48, "learning_rate": 2.682381293337174e-06, "loss": 0.1772, "step": 3026 }, { "epoch": 1.48, "learning_rate": 2.6810667766892585e-06, "loss": 0.1733, "step": 3027 }, { "epoch": 1.48, "learning_rate": 2.679752209715421e-06, "loss": 0.1945, "step": 3028 }, { "epoch": 1.48, "learning_rate": 2.6784375927810356e-06, "loss": 0.1963, "step": 3029 }, { "epoch": 1.48, "learning_rate": 2.6771229262514863e-06, "loss": 0.178, "step": 3030 }, { "epoch": 1.48, "learning_rate": 2.675808210492175e-06, "loss": 0.1695, "step": 3031 }, { "epoch": 1.48, "learning_rate": 2.674493445868515e-06, "loss": 0.1853, "step": 3032 }, { "epoch": 1.48, "learning_rate": 2.673178632745932e-06, "loss": 0.1598, "step": 3033 }, { "epoch": 1.48, "learning_rate": 2.6718637714898694e-06, "loss": 0.1665, "step": 3034 }, { "epoch": 1.48, "learning_rate": 2.670548862465779e-06, "loss": 0.1681, "step": 3035 }, { "epoch": 1.48, "learning_rate": 2.6692339060391293e-06, "loss": 0.1821, "step": 3036 }, { "epoch": 1.48, "learning_rate": 2.667918902575401e-06, "loss": 0.2283, "step": 3037 }, { "epoch": 1.48, "learning_rate": 2.6666038524400874e-06, "loss": 0.1616, "step": 3038 }, { "epoch": 1.48, "learning_rate": 2.6652887559986947e-06, "loss": 0.1571, "step": 3039 }, { "epoch": 1.48, "learning_rate": 2.663973613616743e-06, "loss": 0.1952, "step": 3040 }, { "epoch": 1.48, "learning_rate": 2.662658425659766e-06, "loss": 0.2178, "step": 3041 }, { "epoch": 1.49, "learning_rate": 2.661343192493306e-06, "loss": 0.1785, "step": 3042 }, { "epoch": 1.49, "learning_rate": 2.6600279144829216e-06, "loss": 0.179, "step": 3043 }, { "epoch": 1.49, "learning_rate": 2.658712591994184e-06, "loss": 0.1764, "step": 3044 }, { "epoch": 1.49, "learning_rate": 2.6573972253926728e-06, "loss": 0.1989, "step": 3045 }, { "epoch": 1.49, "learning_rate": 2.6560818150439856e-06, "loss": 0.1542, "step": 3046 }, { "epoch": 1.49, "learning_rate": 2.654766361313727e-06, "loss": 0.2182, "step": 3047 }, { "epoch": 1.49, "learning_rate": 2.6534508645675165e-06, "loss": 0.1882, "step": 3048 }, { "epoch": 1.49, "learning_rate": 2.6521353251709864e-06, "loss": 0.1972, "step": 3049 }, { "epoch": 1.49, "learning_rate": 2.6508197434897763e-06, "loss": 0.1844, "step": 3050 }, { "epoch": 1.49, "learning_rate": 2.6495041198895434e-06, "loss": 0.1922, "step": 3051 }, { "epoch": 1.49, "learning_rate": 2.6481884547359515e-06, "loss": 0.2042, "step": 3052 }, { "epoch": 1.49, "learning_rate": 2.6468727483946803e-06, "loss": 0.1622, "step": 3053 }, { "epoch": 1.49, "learning_rate": 2.645557001231417e-06, "loss": 0.1512, "step": 3054 }, { "epoch": 1.49, "learning_rate": 2.6442412136118626e-06, "loss": 0.1666, "step": 3055 }, { "epoch": 1.49, "learning_rate": 2.64292538590173e-06, "loss": 0.1795, "step": 3056 }, { "epoch": 1.49, "learning_rate": 2.64160951846674e-06, "loss": 0.1957, "step": 3057 }, { "epoch": 1.49, "learning_rate": 2.640293611672627e-06, "loss": 0.1864, "step": 3058 }, { "epoch": 1.49, "learning_rate": 2.638977665885137e-06, "loss": 0.1815, "step": 3059 }, { "epoch": 1.49, "learning_rate": 2.637661681470023e-06, "loss": 0.1894, "step": 3060 }, { "epoch": 1.49, "learning_rate": 2.6363456587930535e-06, "loss": 0.1758, "step": 3061 }, { "epoch": 1.5, "learning_rate": 2.635029598220005e-06, "loss": 0.2057, "step": 3062 }, { "epoch": 1.5, "learning_rate": 2.6337135001166645e-06, "loss": 0.1672, "step": 3063 }, { "epoch": 1.5, "learning_rate": 2.632397364848831e-06, "loss": 0.1945, "step": 3064 }, { "epoch": 1.5, "learning_rate": 2.6310811927823116e-06, "loss": 0.1714, "step": 3065 }, { "epoch": 1.5, "learning_rate": 2.629764984282925e-06, "loss": 0.1636, "step": 3066 }, { "epoch": 1.5, "learning_rate": 2.6284487397164992e-06, "loss": 0.2163, "step": 3067 }, { "epoch": 1.5, "learning_rate": 2.6271324594488753e-06, "loss": 0.2166, "step": 3068 }, { "epoch": 1.5, "learning_rate": 2.6258161438458985e-06, "loss": 0.1538, "step": 3069 }, { "epoch": 1.5, "learning_rate": 2.6244997932734295e-06, "loss": 0.208, "step": 3070 }, { "epoch": 1.5, "learning_rate": 2.623183408097336e-06, "loss": 0.1727, "step": 3071 }, { "epoch": 1.5, "learning_rate": 2.6218669886834945e-06, "loss": 0.1818, "step": 3072 }, { "epoch": 1.5, "learning_rate": 2.6205505353977935e-06, "loss": 0.1563, "step": 3073 }, { "epoch": 1.5, "learning_rate": 2.6192340486061297e-06, "loss": 0.1492, "step": 3074 }, { "epoch": 1.5, "learning_rate": 2.617917528674408e-06, "loss": 0.1852, "step": 3075 }, { "epoch": 1.5, "learning_rate": 2.616600975968544e-06, "loss": 0.2222, "step": 3076 }, { "epoch": 1.5, "learning_rate": 2.6152843908544624e-06, "loss": 0.2122, "step": 3077 }, { "epoch": 1.5, "learning_rate": 2.6139677736980964e-06, "loss": 0.1679, "step": 3078 }, { "epoch": 1.5, "learning_rate": 2.612651124865388e-06, "loss": 0.1877, "step": 3079 }, { "epoch": 1.5, "learning_rate": 2.6113344447222878e-06, "loss": 0.1728, "step": 3080 }, { "epoch": 1.5, "learning_rate": 2.610017733634756e-06, "loss": 0.1919, "step": 3081 }, { "epoch": 1.5, "learning_rate": 2.6087009919687605e-06, "loss": 0.1952, "step": 3082 }, { "epoch": 1.51, "learning_rate": 2.6073842200902786e-06, "loss": 0.1578, "step": 3083 }, { "epoch": 1.51, "learning_rate": 2.606067418365295e-06, "loss": 0.1788, "step": 3084 }, { "epoch": 1.51, "learning_rate": 2.604750587159804e-06, "loss": 0.1585, "step": 3085 }, { "epoch": 1.51, "learning_rate": 2.603433726839806e-06, "loss": 0.1837, "step": 3086 }, { "epoch": 1.51, "learning_rate": 2.602116837771312e-06, "loss": 0.1738, "step": 3087 }, { "epoch": 1.51, "learning_rate": 2.600799920320339e-06, "loss": 0.2215, "step": 3088 }, { "epoch": 1.51, "learning_rate": 2.5994829748529128e-06, "loss": 0.1669, "step": 3089 }, { "epoch": 1.51, "learning_rate": 2.598166001735067e-06, "loss": 0.164, "step": 3090 }, { "epoch": 1.51, "learning_rate": 2.5968490013328416e-06, "loss": 0.1826, "step": 3091 }, { "epoch": 1.51, "learning_rate": 2.5955319740122868e-06, "loss": 0.1987, "step": 3092 }, { "epoch": 1.51, "learning_rate": 2.5942149201394597e-06, "loss": 0.1463, "step": 3093 }, { "epoch": 1.51, "learning_rate": 2.5928978400804207e-06, "loss": 0.1822, "step": 3094 }, { "epoch": 1.51, "learning_rate": 2.5915807342012433e-06, "loss": 0.1778, "step": 3095 }, { "epoch": 1.51, "learning_rate": 2.5902636028680045e-06, "loss": 0.1573, "step": 3096 }, { "epoch": 1.51, "learning_rate": 2.5889464464467887e-06, "loss": 0.22, "step": 3097 }, { "epoch": 1.51, "learning_rate": 2.58762926530369e-06, "loss": 0.1726, "step": 3098 }, { "epoch": 1.51, "learning_rate": 2.5863120598048053e-06, "loss": 0.1545, "step": 3099 }, { "epoch": 1.51, "learning_rate": 2.5849948303162414e-06, "loss": 0.1975, "step": 3100 }, { "epoch": 1.51, "learning_rate": 2.5836775772041106e-06, "loss": 0.184, "step": 3101 }, { "epoch": 1.51, "learning_rate": 2.5823603008345316e-06, "loss": 0.1972, "step": 3102 }, { "epoch": 1.52, "learning_rate": 2.5810430015736293e-06, "loss": 0.19, "step": 3103 }, { "epoch": 1.52, "learning_rate": 2.5797256797875362e-06, "loss": 0.1772, "step": 3104 }, { "epoch": 1.52, "learning_rate": 2.578408335842391e-06, "loss": 0.214, "step": 3105 }, { "epoch": 1.52, "learning_rate": 2.577090970104336e-06, "loss": 0.1798, "step": 3106 }, { "epoch": 1.52, "learning_rate": 2.5757735829395227e-06, "loss": 0.183, "step": 3107 }, { "epoch": 1.52, "learning_rate": 2.5744561747141078e-06, "loss": 0.22, "step": 3108 }, { "epoch": 1.52, "learning_rate": 2.573138745794251e-06, "loss": 0.1901, "step": 3109 }, { "epoch": 1.52, "learning_rate": 2.571821296546123e-06, "loss": 0.1442, "step": 3110 }, { "epoch": 1.52, "learning_rate": 2.5705038273358962e-06, "loss": 0.1825, "step": 3111 }, { "epoch": 1.52, "learning_rate": 2.5691863385297485e-06, "loss": 0.1736, "step": 3112 }, { "epoch": 1.52, "learning_rate": 2.5678688304938658e-06, "loss": 0.1838, "step": 3113 }, { "epoch": 1.52, "learning_rate": 2.5665513035944373e-06, "loss": 0.2089, "step": 3114 }, { "epoch": 1.52, "learning_rate": 2.5652337581976575e-06, "loss": 0.2127, "step": 3115 }, { "epoch": 1.52, "learning_rate": 2.5639161946697284e-06, "loss": 0.1819, "step": 3116 }, { "epoch": 1.52, "learning_rate": 2.562598613376853e-06, "loss": 0.1787, "step": 3117 }, { "epoch": 1.52, "learning_rate": 2.5612810146852423e-06, "loss": 0.1817, "step": 3118 }, { "epoch": 1.52, "learning_rate": 2.559963398961112e-06, "loss": 0.173, "step": 3119 }, { "epoch": 1.52, "learning_rate": 2.558645766570681e-06, "loss": 0.2032, "step": 3120 }, { "epoch": 1.52, "learning_rate": 2.557328117880174e-06, "loss": 0.183, "step": 3121 }, { "epoch": 1.52, "learning_rate": 2.5560104532558195e-06, "loss": 0.1799, "step": 3122 }, { "epoch": 1.52, "learning_rate": 2.5546927730638517e-06, "loss": 0.2009, "step": 3123 }, { "epoch": 1.53, "learning_rate": 2.5533750776705063e-06, "loss": 0.1487, "step": 3124 }, { "epoch": 1.53, "learning_rate": 2.552057367442028e-06, "loss": 0.1981, "step": 3125 }, { "epoch": 1.53, "learning_rate": 2.5507396427446614e-06, "loss": 0.1834, "step": 3126 }, { "epoch": 1.53, "learning_rate": 2.549421903944655e-06, "loss": 0.1798, "step": 3127 }, { "epoch": 1.53, "learning_rate": 2.5481041514082654e-06, "loss": 0.1724, "step": 3128 }, { "epoch": 1.53, "learning_rate": 2.5467863855017484e-06, "loss": 0.2236, "step": 3129 }, { "epoch": 1.53, "learning_rate": 2.5454686065913665e-06, "loss": 0.1845, "step": 3130 }, { "epoch": 1.53, "learning_rate": 2.5441508150433847e-06, "loss": 0.1729, "step": 3131 }, { "epoch": 1.53, "learning_rate": 2.542833011224071e-06, "loss": 0.1784, "step": 3132 }, { "epoch": 1.53, "learning_rate": 2.541515195499698e-06, "loss": 0.161, "step": 3133 }, { "epoch": 1.53, "learning_rate": 2.5401973682365403e-06, "loss": 0.1832, "step": 3134 }, { "epoch": 1.53, "learning_rate": 2.538879529800878e-06, "loss": 0.1809, "step": 3135 }, { "epoch": 1.53, "learning_rate": 2.5375616805589905e-06, "loss": 0.1736, "step": 3136 }, { "epoch": 1.53, "learning_rate": 2.536243820877164e-06, "loss": 0.2039, "step": 3137 }, { "epoch": 1.53, "learning_rate": 2.5349259511216855e-06, "loss": 0.1655, "step": 3138 }, { "epoch": 1.53, "learning_rate": 2.533608071658845e-06, "loss": 0.1659, "step": 3139 }, { "epoch": 1.53, "learning_rate": 2.5322901828549367e-06, "loss": 0.2138, "step": 3140 }, { "epoch": 1.53, "learning_rate": 2.530972285076255e-06, "loss": 0.1842, "step": 3141 }, { "epoch": 1.53, "learning_rate": 2.5296543786890986e-06, "loss": 0.1861, "step": 3142 }, { "epoch": 1.53, "learning_rate": 2.5283364640597685e-06, "loss": 0.1905, "step": 3143 }, { "epoch": 1.54, "learning_rate": 2.527018541554566e-06, "loss": 0.1983, "step": 3144 }, { "epoch": 1.54, "learning_rate": 2.5257006115397976e-06, "loss": 0.1807, "step": 3145 }, { "epoch": 1.54, "learning_rate": 2.5243826743817696e-06, "loss": 0.1674, "step": 3146 }, { "epoch": 1.54, "learning_rate": 2.523064730446792e-06, "loss": 0.1715, "step": 3147 }, { "epoch": 1.54, "learning_rate": 2.521746780101174e-06, "loss": 0.1949, "step": 3148 }, { "epoch": 1.54, "learning_rate": 2.5204288237112295e-06, "loss": 0.1629, "step": 3149 }, { "epoch": 1.54, "learning_rate": 2.519110861643273e-06, "loss": 0.1595, "step": 3150 }, { "epoch": 1.54, "learning_rate": 2.5177928942636205e-06, "loss": 0.1985, "step": 3151 }, { "epoch": 1.54, "learning_rate": 2.5164749219385887e-06, "loss": 0.1517, "step": 3152 }, { "epoch": 1.54, "learning_rate": 2.515156945034497e-06, "loss": 0.2245, "step": 3153 }, { "epoch": 1.54, "learning_rate": 2.5138389639176657e-06, "loss": 0.2137, "step": 3154 }, { "epoch": 1.54, "learning_rate": 2.5125209789544153e-06, "loss": 0.1822, "step": 3155 }, { "epoch": 1.54, "learning_rate": 2.511202990511069e-06, "loss": 0.1972, "step": 3156 }, { "epoch": 1.54, "learning_rate": 2.5098849989539496e-06, "loss": 0.167, "step": 3157 }, { "epoch": 1.54, "learning_rate": 2.5085670046493826e-06, "loss": 0.1772, "step": 3158 }, { "epoch": 1.54, "learning_rate": 2.507249007963691e-06, "loss": 0.1921, "step": 3159 }, { "epoch": 1.54, "learning_rate": 2.5059310092632012e-06, "loss": 0.1834, "step": 3160 }, { "epoch": 1.54, "learning_rate": 2.504613008914239e-06, "loss": 0.1623, "step": 3161 }, { "epoch": 1.54, "learning_rate": 2.5032950072831326e-06, "loss": 0.1482, "step": 3162 }, { "epoch": 1.54, "learning_rate": 2.5019770047362067e-06, "loss": 0.2158, "step": 3163 }, { "epoch": 1.54, "learning_rate": 2.50065900163979e-06, "loss": 0.1993, "step": 3164 }, { "epoch": 1.55, "learning_rate": 2.4993409983602103e-06, "loss": 0.1553, "step": 3165 }, { "epoch": 1.55, "learning_rate": 2.4980229952637937e-06, "loss": 0.159, "step": 3166 }, { "epoch": 1.55, "learning_rate": 2.4967049927168678e-06, "loss": 0.1394, "step": 3167 }, { "epoch": 1.55, "learning_rate": 2.495386991085761e-06, "loss": 0.2098, "step": 3168 }, { "epoch": 1.55, "learning_rate": 2.4940689907368e-06, "loss": 0.2084, "step": 3169 }, { "epoch": 1.55, "learning_rate": 2.49275099203631e-06, "loss": 0.1656, "step": 3170 }, { "epoch": 1.55, "learning_rate": 2.491432995350619e-06, "loss": 0.1704, "step": 3171 }, { "epoch": 1.55, "learning_rate": 2.4901150010460504e-06, "loss": 0.2077, "step": 3172 }, { "epoch": 1.55, "learning_rate": 2.488797009488931e-06, "loss": 0.1612, "step": 3173 }, { "epoch": 1.55, "learning_rate": 2.4874790210455855e-06, "loss": 0.1687, "step": 3174 }, { "epoch": 1.55, "learning_rate": 2.486161036082335e-06, "loss": 0.1669, "step": 3175 }, { "epoch": 1.55, "learning_rate": 2.484843054965504e-06, "loss": 0.1951, "step": 3176 }, { "epoch": 1.55, "learning_rate": 2.4835250780614117e-06, "loss": 0.1981, "step": 3177 }, { "epoch": 1.55, "learning_rate": 2.4822071057363808e-06, "loss": 0.1729, "step": 3178 }, { "epoch": 1.55, "learning_rate": 2.4808891383567275e-06, "loss": 0.1611, "step": 3179 }, { "epoch": 1.55, "learning_rate": 2.4795711762887713e-06, "loss": 0.1815, "step": 3180 }, { "epoch": 1.55, "learning_rate": 2.4782532198988268e-06, "loss": 0.1762, "step": 3181 }, { "epoch": 1.55, "learning_rate": 2.4769352695532085e-06, "loss": 0.1914, "step": 3182 }, { "epoch": 1.55, "learning_rate": 2.4756173256182304e-06, "loss": 0.2193, "step": 3183 }, { "epoch": 1.55, "learning_rate": 2.474299388460203e-06, "loss": 0.2085, "step": 3184 }, { "epoch": 1.56, "learning_rate": 2.4729814584454345e-06, "loss": 0.1736, "step": 3185 }, { "epoch": 1.56, "learning_rate": 2.4716635359402328e-06, "loss": 0.1977, "step": 3186 }, { "epoch": 1.56, "learning_rate": 2.4703456213109014e-06, "loss": 0.1816, "step": 3187 }, { "epoch": 1.56, "learning_rate": 2.469027714923745e-06, "loss": 0.1833, "step": 3188 }, { "epoch": 1.56, "learning_rate": 2.4677098171450637e-06, "loss": 0.1434, "step": 3189 }, { "epoch": 1.56, "learning_rate": 2.4663919283411554e-06, "loss": 0.2002, "step": 3190 }, { "epoch": 1.56, "learning_rate": 2.4650740488783158e-06, "loss": 0.1664, "step": 3191 }, { "epoch": 1.56, "learning_rate": 2.4637561791228363e-06, "loss": 0.1859, "step": 3192 }, { "epoch": 1.56, "learning_rate": 2.46243831944101e-06, "loss": 0.1691, "step": 3193 }, { "epoch": 1.56, "learning_rate": 2.461120470199123e-06, "loss": 0.1967, "step": 3194 }, { "epoch": 1.56, "learning_rate": 2.4598026317634605e-06, "loss": 0.2357, "step": 3195 }, { "epoch": 1.56, "learning_rate": 2.4584848045003027e-06, "loss": 0.1632, "step": 3196 }, { "epoch": 1.56, "learning_rate": 2.4571669887759292e-06, "loss": 0.1639, "step": 3197 }, { "epoch": 1.56, "learning_rate": 2.455849184956616e-06, "loss": 0.2168, "step": 3198 }, { "epoch": 1.56, "learning_rate": 2.4545313934086343e-06, "loss": 0.1576, "step": 3199 }, { "epoch": 1.56, "learning_rate": 2.453213614498252e-06, "loss": 0.1692, "step": 3200 }, { "epoch": 1.56, "learning_rate": 2.451895848591736e-06, "loss": 0.1989, "step": 3201 }, { "epoch": 1.56, "learning_rate": 2.450578096055345e-06, "loss": 0.1829, "step": 3202 }, { "epoch": 1.56, "learning_rate": 2.4492603572553394e-06, "loss": 0.2251, "step": 3203 }, { "epoch": 1.56, "learning_rate": 2.4479426325579727e-06, "loss": 0.1869, "step": 3204 }, { "epoch": 1.56, "learning_rate": 2.446624922329494e-06, "loss": 0.1678, "step": 3205 }, { "epoch": 1.57, "learning_rate": 2.4453072269361496e-06, "loss": 0.1871, "step": 3206 }, { "epoch": 1.57, "learning_rate": 2.443989546744181e-06, "loss": 0.1604, "step": 3207 }, { "epoch": 1.57, "learning_rate": 2.442671882119827e-06, "loss": 0.1726, "step": 3208 }, { "epoch": 1.57, "learning_rate": 2.44135423342932e-06, "loss": 0.1807, "step": 3209 }, { "epoch": 1.57, "learning_rate": 2.440036601038889e-06, "loss": 0.2024, "step": 3210 }, { "epoch": 1.57, "learning_rate": 2.438718985314759e-06, "loss": 0.1841, "step": 3211 }, { "epoch": 1.57, "learning_rate": 2.4374013866231476e-06, "loss": 0.196, "step": 3212 }, { "epoch": 1.57, "learning_rate": 2.4360838053302724e-06, "loss": 0.1769, "step": 3213 }, { "epoch": 1.57, "learning_rate": 2.434766241802343e-06, "loss": 0.1976, "step": 3214 }, { "epoch": 1.57, "learning_rate": 2.4334486964055635e-06, "loss": 0.1867, "step": 3215 }, { "epoch": 1.57, "learning_rate": 2.4321311695061355e-06, "loss": 0.2009, "step": 3216 }, { "epoch": 1.57, "learning_rate": 2.430813661470252e-06, "loss": 0.1509, "step": 3217 }, { "epoch": 1.57, "learning_rate": 2.429496172664104e-06, "loss": 0.1761, "step": 3218 }, { "epoch": 1.57, "learning_rate": 2.4281787034538778e-06, "loss": 0.1719, "step": 3219 }, { "epoch": 1.57, "learning_rate": 2.4268612542057497e-06, "loss": 0.1654, "step": 3220 }, { "epoch": 1.57, "learning_rate": 2.425543825285894e-06, "loss": 0.1904, "step": 3221 }, { "epoch": 1.57, "learning_rate": 2.4242264170604773e-06, "loss": 0.1545, "step": 3222 }, { "epoch": 1.57, "learning_rate": 2.4229090298956647e-06, "loss": 0.2153, "step": 3223 }, { "epoch": 1.57, "learning_rate": 2.4215916641576096e-06, "loss": 0.2109, "step": 3224 }, { "epoch": 1.57, "learning_rate": 2.4202743202124646e-06, "loss": 0.1879, "step": 3225 }, { "epoch": 1.58, "learning_rate": 2.418956998426372e-06, "loss": 0.184, "step": 3226 }, { "epoch": 1.58, "learning_rate": 2.4176396991654692e-06, "loss": 0.1861, "step": 3227 }, { "epoch": 1.58, "learning_rate": 2.4163224227958902e-06, "loss": 0.1516, "step": 3228 }, { "epoch": 1.58, "learning_rate": 2.415005169683759e-06, "loss": 0.1732, "step": 3229 }, { "epoch": 1.58, "learning_rate": 2.4136879401951955e-06, "loss": 0.2001, "step": 3230 }, { "epoch": 1.58, "learning_rate": 2.4123707346963114e-06, "loss": 0.2167, "step": 3231 }, { "epoch": 1.58, "learning_rate": 2.4110535535532117e-06, "loss": 0.1642, "step": 3232 }, { "epoch": 1.58, "learning_rate": 2.409736397131996e-06, "loss": 0.2034, "step": 3233 }, { "epoch": 1.58, "learning_rate": 2.4084192657987576e-06, "loss": 0.1582, "step": 3234 }, { "epoch": 1.58, "learning_rate": 2.4071021599195806e-06, "loss": 0.1588, "step": 3235 }, { "epoch": 1.58, "learning_rate": 2.4057850798605415e-06, "loss": 0.1915, "step": 3236 }, { "epoch": 1.58, "learning_rate": 2.404468025987713e-06, "loss": 0.1547, "step": 3237 }, { "epoch": 1.58, "learning_rate": 2.4031509986671588e-06, "loss": 0.1909, "step": 3238 }, { "epoch": 1.58, "learning_rate": 2.401833998264934e-06, "loss": 0.1684, "step": 3239 }, { "epoch": 1.58, "learning_rate": 2.4005170251470885e-06, "loss": 0.1966, "step": 3240 }, { "epoch": 1.58, "learning_rate": 2.399200079679663e-06, "loss": 0.1747, "step": 3241 }, { "epoch": 1.58, "learning_rate": 2.397883162228689e-06, "loss": 0.1933, "step": 3242 }, { "epoch": 1.58, "learning_rate": 2.3965662731601943e-06, "loss": 0.1585, "step": 3243 }, { "epoch": 1.58, "learning_rate": 2.395249412840197e-06, "loss": 0.1921, "step": 3244 }, { "epoch": 1.58, "learning_rate": 2.3939325816347055e-06, "loss": 0.1613, "step": 3245 }, { "epoch": 1.58, "learning_rate": 2.3926157799097226e-06, "loss": 0.2206, "step": 3246 }, { "epoch": 1.59, "learning_rate": 2.39129900803124e-06, "loss": 0.1971, "step": 3247 }, { "epoch": 1.59, "learning_rate": 2.389982266365245e-06, "loss": 0.203, "step": 3248 }, { "epoch": 1.59, "learning_rate": 2.388665555277713e-06, "loss": 0.1973, "step": 3249 }, { "epoch": 1.59, "learning_rate": 2.3873488751346134e-06, "loss": 0.1611, "step": 3250 }, { "epoch": 1.59, "learning_rate": 2.3860322263019045e-06, "loss": 0.1943, "step": 3251 }, { "epoch": 1.59, "learning_rate": 2.3847156091455376e-06, "loss": 0.1899, "step": 3252 }, { "epoch": 1.59, "learning_rate": 2.3833990240314564e-06, "loss": 0.1702, "step": 3253 }, { "epoch": 1.59, "learning_rate": 2.3820824713255928e-06, "loss": 0.1939, "step": 3254 }, { "epoch": 1.59, "learning_rate": 2.3807659513938716e-06, "loss": 0.1617, "step": 3255 }, { "epoch": 1.59, "learning_rate": 2.3794494646022077e-06, "loss": 0.189, "step": 3256 }, { "epoch": 1.59, "learning_rate": 2.378133011316506e-06, "loss": 0.1993, "step": 3257 }, { "epoch": 1.59, "learning_rate": 2.376816591902665e-06, "loss": 0.1886, "step": 3258 }, { "epoch": 1.59, "learning_rate": 2.3755002067265714e-06, "loss": 0.2124, "step": 3259 }, { "epoch": 1.59, "learning_rate": 2.374183856154102e-06, "loss": 0.1756, "step": 3260 }, { "epoch": 1.59, "learning_rate": 2.372867540551126e-06, "loss": 0.164, "step": 3261 }, { "epoch": 1.59, "learning_rate": 2.3715512602835008e-06, "loss": 0.2106, "step": 3262 }, { "epoch": 1.59, "learning_rate": 2.370235015717076e-06, "loss": 0.1891, "step": 3263 }, { "epoch": 1.59, "learning_rate": 2.368918807217689e-06, "loss": 0.1931, "step": 3264 }, { "epoch": 1.59, "learning_rate": 2.36760263515117e-06, "loss": 0.2013, "step": 3265 }, { "epoch": 1.59, "learning_rate": 2.3662864998833355e-06, "loss": 0.1934, "step": 3266 }, { "epoch": 1.6, "learning_rate": 2.364970401779995e-06, "loss": 0.1913, "step": 3267 }, { "epoch": 1.6, "learning_rate": 2.363654341206947e-06, "loss": 0.1761, "step": 3268 }, { "epoch": 1.6, "learning_rate": 2.3623383185299776e-06, "loss": 0.1731, "step": 3269 }, { "epoch": 1.6, "learning_rate": 2.3610223341148643e-06, "loss": 0.1974, "step": 3270 }, { "epoch": 1.6, "learning_rate": 2.359706388327373e-06, "loss": 0.2115, "step": 3271 }, { "epoch": 1.6, "learning_rate": 2.358390481533261e-06, "loss": 0.1866, "step": 3272 }, { "epoch": 1.6, "learning_rate": 2.3570746140982705e-06, "loss": 0.1849, "step": 3273 }, { "epoch": 1.6, "learning_rate": 2.3557587863881378e-06, "loss": 0.193, "step": 3274 }, { "epoch": 1.6, "learning_rate": 2.3544429987685835e-06, "loss": 0.198, "step": 3275 }, { "epoch": 1.6, "learning_rate": 2.3531272516053196e-06, "loss": 0.1784, "step": 3276 }, { "epoch": 1.6, "learning_rate": 2.3518115452640485e-06, "loss": 0.1849, "step": 3277 }, { "epoch": 1.6, "learning_rate": 2.3504958801104574e-06, "loss": 0.1981, "step": 3278 }, { "epoch": 1.6, "learning_rate": 2.349180256510224e-06, "loss": 0.1953, "step": 3279 }, { "epoch": 1.6, "learning_rate": 2.3478646748290153e-06, "loss": 0.1791, "step": 3280 }, { "epoch": 1.6, "learning_rate": 2.346549135432483e-06, "loss": 0.179, "step": 3281 }, { "epoch": 1.6, "learning_rate": 2.345233638686273e-06, "loss": 0.1955, "step": 3282 }, { "epoch": 1.6, "learning_rate": 2.343918184956015e-06, "loss": 0.1888, "step": 3283 }, { "epoch": 1.6, "learning_rate": 2.342602774607328e-06, "loss": 0.1645, "step": 3284 }, { "epoch": 1.6, "learning_rate": 2.3412874080058174e-06, "loss": 0.1764, "step": 3285 }, { "epoch": 1.6, "learning_rate": 2.3399720855170784e-06, "loss": 0.1615, "step": 3286 }, { "epoch": 1.6, "learning_rate": 2.338656807506695e-06, "loss": 0.1825, "step": 3287 }, { "epoch": 1.61, "learning_rate": 2.337341574340235e-06, "loss": 0.2007, "step": 3288 }, { "epoch": 1.61, "learning_rate": 2.3360263863832574e-06, "loss": 0.1664, "step": 3289 }, { "epoch": 1.61, "learning_rate": 2.3347112440013057e-06, "loss": 0.1691, "step": 3290 }, { "epoch": 1.61, "learning_rate": 2.333396147559913e-06, "loss": 0.1833, "step": 3291 }, { "epoch": 1.61, "learning_rate": 2.3320810974246e-06, "loss": 0.1612, "step": 3292 }, { "epoch": 1.61, "learning_rate": 2.3307660939608715e-06, "loss": 0.2103, "step": 3293 }, { "epoch": 1.61, "learning_rate": 2.329451137534222e-06, "loss": 0.1579, "step": 3294 }, { "epoch": 1.61, "learning_rate": 2.3281362285101323e-06, "loss": 0.1711, "step": 3295 }, { "epoch": 1.61, "learning_rate": 2.326821367254068e-06, "loss": 0.1643, "step": 3296 }, { "epoch": 1.61, "learning_rate": 2.3255065541314856e-06, "loss": 0.1554, "step": 3297 }, { "epoch": 1.61, "learning_rate": 2.3241917895078258e-06, "loss": 0.1795, "step": 3298 }, { "epoch": 1.61, "learning_rate": 2.3228770737485145e-06, "loss": 0.1835, "step": 3299 }, { "epoch": 1.61, "learning_rate": 2.3215624072189657e-06, "loss": 0.2265, "step": 3300 }, { "epoch": 1.61, "learning_rate": 2.320247790284579e-06, "loss": 0.1558, "step": 3301 }, { "epoch": 1.61, "learning_rate": 2.3189332233107424e-06, "loss": 0.2453, "step": 3302 }, { "epoch": 1.61, "learning_rate": 2.317618706662827e-06, "loss": 0.1911, "step": 3303 }, { "epoch": 1.61, "learning_rate": 2.3163042407061916e-06, "loss": 0.1912, "step": 3304 }, { "epoch": 1.61, "learning_rate": 2.3149898258061802e-06, "loss": 0.1726, "step": 3305 }, { "epoch": 1.61, "learning_rate": 2.313675462328122e-06, "loss": 0.1926, "step": 3306 }, { "epoch": 1.61, "learning_rate": 2.3123611506373347e-06, "loss": 0.1584, "step": 3307 }, { "epoch": 1.62, "learning_rate": 2.3110468910991185e-06, "loss": 0.1536, "step": 3308 }, { "epoch": 1.62, "learning_rate": 2.3097326840787606e-06, "loss": 0.1607, "step": 3309 }, { "epoch": 1.62, "learning_rate": 2.308418529941533e-06, "loss": 0.2046, "step": 3310 }, { "epoch": 1.62, "learning_rate": 2.3071044290526933e-06, "loss": 0.1735, "step": 3311 }, { "epoch": 1.62, "learning_rate": 2.3057903817774845e-06, "loss": 0.2107, "step": 3312 }, { "epoch": 1.62, "learning_rate": 2.304476388481135e-06, "loss": 0.1901, "step": 3313 }, { "epoch": 1.62, "learning_rate": 2.3031624495288576e-06, "loss": 0.1764, "step": 3314 }, { "epoch": 1.62, "learning_rate": 2.301848565285849e-06, "loss": 0.169, "step": 3315 }, { "epoch": 1.62, "learning_rate": 2.300534736117292e-06, "loss": 0.211, "step": 3316 }, { "epoch": 1.62, "learning_rate": 2.2992209623883557e-06, "loss": 0.1889, "step": 3317 }, { "epoch": 1.62, "learning_rate": 2.2979072444641902e-06, "loss": 0.2003, "step": 3318 }, { "epoch": 1.62, "learning_rate": 2.296593582709933e-06, "loss": 0.1588, "step": 3319 }, { "epoch": 1.62, "learning_rate": 2.2952799774907035e-06, "loss": 0.1564, "step": 3320 }, { "epoch": 1.62, "learning_rate": 2.2939664291716072e-06, "loss": 0.2073, "step": 3321 }, { "epoch": 1.62, "learning_rate": 2.2926529381177336e-06, "loss": 0.213, "step": 3322 }, { "epoch": 1.62, "learning_rate": 2.2913395046941568e-06, "loss": 0.1864, "step": 3323 }, { "epoch": 1.62, "learning_rate": 2.2900261292659327e-06, "loss": 0.1569, "step": 3324 }, { "epoch": 1.62, "learning_rate": 2.2887128121981037e-06, "loss": 0.1944, "step": 3325 }, { "epoch": 1.62, "learning_rate": 2.2873995538556933e-06, "loss": 0.1772, "step": 3326 }, { "epoch": 1.62, "learning_rate": 2.2860863546037116e-06, "loss": 0.2027, "step": 3327 }, { "epoch": 1.62, "learning_rate": 2.2847732148071497e-06, "loss": 0.1738, "step": 3328 }, { "epoch": 1.63, "learning_rate": 2.2834601348309845e-06, "loss": 0.1508, "step": 3329 }, { "epoch": 1.63, "learning_rate": 2.2821471150401736e-06, "loss": 0.1739, "step": 3330 }, { "epoch": 1.63, "learning_rate": 2.2808341557996594e-06, "loss": 0.2074, "step": 3331 }, { "epoch": 1.63, "learning_rate": 2.2795212574743692e-06, "loss": 0.1726, "step": 3332 }, { "epoch": 1.63, "learning_rate": 2.27820842042921e-06, "loss": 0.1765, "step": 3333 }, { "epoch": 1.63, "learning_rate": 2.276895645029074e-06, "loss": 0.256, "step": 3334 }, { "epoch": 1.63, "learning_rate": 2.2755829316388355e-06, "loss": 0.1871, "step": 3335 }, { "epoch": 1.63, "learning_rate": 2.2742702806233503e-06, "loss": 0.168, "step": 3336 }, { "epoch": 1.63, "learning_rate": 2.2729576923474596e-06, "loss": 0.1776, "step": 3337 }, { "epoch": 1.63, "learning_rate": 2.2716451671759864e-06, "loss": 0.1939, "step": 3338 }, { "epoch": 1.63, "learning_rate": 2.2703327054737346e-06, "loss": 0.1902, "step": 3339 }, { "epoch": 1.63, "learning_rate": 2.2690203076054916e-06, "loss": 0.2123, "step": 3340 }, { "epoch": 1.63, "learning_rate": 2.267707973936026e-06, "loss": 0.2058, "step": 3341 }, { "epoch": 1.63, "learning_rate": 2.2663957048300915e-06, "loss": 0.195, "step": 3342 }, { "epoch": 1.63, "learning_rate": 2.26508350065242e-06, "loss": 0.1632, "step": 3343 }, { "epoch": 1.63, "learning_rate": 2.263771361767728e-06, "loss": 0.2056, "step": 3344 }, { "epoch": 1.63, "learning_rate": 2.262459288540712e-06, "loss": 0.2095, "step": 3345 }, { "epoch": 1.63, "learning_rate": 2.2611472813360516e-06, "loss": 0.1676, "step": 3346 }, { "epoch": 1.63, "learning_rate": 2.2598353405184094e-06, "loss": 0.1823, "step": 3347 }, { "epoch": 1.63, "learning_rate": 2.258523466452425e-06, "loss": 0.2159, "step": 3348 }, { "epoch": 1.64, "learning_rate": 2.257211659502725e-06, "loss": 0.1719, "step": 3349 }, { "epoch": 1.64, "learning_rate": 2.255899920033913e-06, "loss": 0.2008, "step": 3350 }, { "epoch": 1.64, "learning_rate": 2.2545882484105748e-06, "loss": 0.1801, "step": 3351 }, { "epoch": 1.64, "learning_rate": 2.2532766449972797e-06, "loss": 0.2037, "step": 3352 }, { "epoch": 1.64, "learning_rate": 2.251965110158576e-06, "loss": 0.1591, "step": 3353 }, { "epoch": 1.64, "learning_rate": 2.2506536442589925e-06, "loss": 0.1866, "step": 3354 }, { "epoch": 1.64, "learning_rate": 2.2493422476630407e-06, "loss": 0.1759, "step": 3355 }, { "epoch": 1.64, "learning_rate": 2.24803092073521e-06, "loss": 0.1622, "step": 3356 }, { "epoch": 1.64, "learning_rate": 2.2467196638399745e-06, "loss": 0.1918, "step": 3357 }, { "epoch": 1.64, "learning_rate": 2.2454084773417853e-06, "loss": 0.1596, "step": 3358 }, { "epoch": 1.64, "learning_rate": 2.2440973616050753e-06, "loss": 0.1833, "step": 3359 }, { "epoch": 1.64, "learning_rate": 2.2427863169942573e-06, "loss": 0.1974, "step": 3360 }, { "epoch": 1.64, "learning_rate": 2.2414753438737247e-06, "loss": 0.204, "step": 3361 }, { "epoch": 1.64, "learning_rate": 2.240164442607852e-06, "loss": 0.1588, "step": 3362 }, { "epoch": 1.64, "learning_rate": 2.238853613560992e-06, "loss": 0.2067, "step": 3363 }, { "epoch": 1.64, "learning_rate": 2.237542857097478e-06, "loss": 0.2029, "step": 3364 }, { "epoch": 1.64, "learning_rate": 2.2362321735816224e-06, "loss": 0.182, "step": 3365 }, { "epoch": 1.64, "learning_rate": 2.23492156337772e-06, "loss": 0.2133, "step": 3366 }, { "epoch": 1.64, "learning_rate": 2.233611026850042e-06, "loss": 0.1806, "step": 3367 }, { "epoch": 1.64, "learning_rate": 2.232300564362842e-06, "loss": 0.1752, "step": 3368 }, { "epoch": 1.65, "learning_rate": 2.2309901762803495e-06, "loss": 0.1686, "step": 3369 }, { "epoch": 1.65, "learning_rate": 2.2296798629667763e-06, "loss": 0.1964, "step": 3370 }, { "epoch": 1.65, "learning_rate": 2.228369624786314e-06, "loss": 0.1667, "step": 3371 }, { "epoch": 1.65, "learning_rate": 2.22705946210313e-06, "loss": 0.1815, "step": 3372 }, { "epoch": 1.65, "learning_rate": 2.2257493752813723e-06, "loss": 0.1899, "step": 3373 }, { "epoch": 1.65, "learning_rate": 2.2244393646851695e-06, "loss": 0.1735, "step": 3374 }, { "epoch": 1.65, "learning_rate": 2.2231294306786256e-06, "loss": 0.1564, "step": 3375 }, { "epoch": 1.65, "learning_rate": 2.2218195736258267e-06, "loss": 0.1724, "step": 3376 }, { "epoch": 1.65, "learning_rate": 2.220509793890837e-06, "loss": 0.1914, "step": 3377 }, { "epoch": 1.65, "learning_rate": 2.2192000918376964e-06, "loss": 0.1703, "step": 3378 }, { "epoch": 1.65, "learning_rate": 2.2178904678304254e-06, "loss": 0.1869, "step": 3379 }, { "epoch": 1.65, "learning_rate": 2.2165809222330223e-06, "loss": 0.1726, "step": 3380 }, { "epoch": 1.65, "learning_rate": 2.2152714554094656e-06, "loss": 0.1825, "step": 3381 }, { "epoch": 1.65, "learning_rate": 2.213962067723708e-06, "loss": 0.2255, "step": 3382 }, { "epoch": 1.65, "learning_rate": 2.212652759539684e-06, "loss": 0.1668, "step": 3383 }, { "epoch": 1.65, "learning_rate": 2.2113435312213034e-06, "loss": 0.2152, "step": 3384 }, { "epoch": 1.65, "learning_rate": 2.210034383132454e-06, "loss": 0.2011, "step": 3385 }, { "epoch": 1.65, "learning_rate": 2.2087253156370032e-06, "loss": 0.1858, "step": 3386 }, { "epoch": 1.65, "learning_rate": 2.207416329098795e-06, "loss": 0.1648, "step": 3387 }, { "epoch": 1.65, "learning_rate": 2.2061074238816494e-06, "loss": 0.2096, "step": 3388 }, { "epoch": 1.65, "learning_rate": 2.2047986003493664e-06, "loss": 0.1714, "step": 3389 }, { "epoch": 1.66, "learning_rate": 2.2034898588657206e-06, "loss": 0.1472, "step": 3390 }, { "epoch": 1.66, "learning_rate": 2.202181199794467e-06, "loss": 0.1829, "step": 3391 }, { "epoch": 1.66, "learning_rate": 2.200872623499335e-06, "loss": 0.1306, "step": 3392 }, { "epoch": 1.66, "learning_rate": 2.199564130344032e-06, "loss": 0.1703, "step": 3393 }, { "epoch": 1.66, "learning_rate": 2.198255720692242e-06, "loss": 0.1993, "step": 3394 }, { "epoch": 1.66, "learning_rate": 2.1969473949076253e-06, "loss": 0.165, "step": 3395 }, { "epoch": 1.66, "learning_rate": 2.195639153353821e-06, "loss": 0.1906, "step": 3396 }, { "epoch": 1.66, "learning_rate": 2.194330996394443e-06, "loss": 0.1806, "step": 3397 }, { "epoch": 1.66, "learning_rate": 2.193022924393082e-06, "loss": 0.1905, "step": 3398 }, { "epoch": 1.66, "learning_rate": 2.1917149377133046e-06, "loss": 0.2225, "step": 3399 }, { "epoch": 1.66, "learning_rate": 2.1904070367186536e-06, "loss": 0.1437, "step": 3400 }, { "epoch": 1.66, "learning_rate": 2.18909922177265e-06, "loss": 0.1929, "step": 3401 }, { "epoch": 1.66, "learning_rate": 2.1877914932387896e-06, "loss": 0.1741, "step": 3402 }, { "epoch": 1.66, "learning_rate": 2.1864838514805427e-06, "loss": 0.173, "step": 3403 }, { "epoch": 1.66, "learning_rate": 2.1851762968613582e-06, "loss": 0.1809, "step": 3404 }, { "epoch": 1.66, "learning_rate": 2.1838688297446583e-06, "loss": 0.1491, "step": 3405 }, { "epoch": 1.66, "learning_rate": 2.1825614504938424e-06, "loss": 0.1647, "step": 3406 }, { "epoch": 1.66, "learning_rate": 2.181254159472286e-06, "loss": 0.1836, "step": 3407 }, { "epoch": 1.66, "learning_rate": 2.1799469570433385e-06, "loss": 0.1728, "step": 3408 }, { "epoch": 1.66, "learning_rate": 2.178639843570325e-06, "loss": 0.1736, "step": 3409 }, { "epoch": 1.67, "learning_rate": 2.1773328194165454e-06, "loss": 0.1872, "step": 3410 }, { "epoch": 1.67, "learning_rate": 2.176025884945278e-06, "loss": 0.1777, "step": 3411 }, { "epoch": 1.67, "learning_rate": 2.1747190405197718e-06, "loss": 0.1614, "step": 3412 }, { "epoch": 1.67, "learning_rate": 2.1734122865032537e-06, "loss": 0.1406, "step": 3413 }, { "epoch": 1.67, "learning_rate": 2.1721056232589245e-06, "loss": 0.2111, "step": 3414 }, { "epoch": 1.67, "learning_rate": 2.1707990511499584e-06, "loss": 0.2011, "step": 3415 }, { "epoch": 1.67, "learning_rate": 2.169492570539507e-06, "loss": 0.177, "step": 3416 }, { "epoch": 1.67, "learning_rate": 2.1681861817906955e-06, "loss": 0.1839, "step": 3417 }, { "epoch": 1.67, "learning_rate": 2.166879885266622e-06, "loss": 0.1783, "step": 3418 }, { "epoch": 1.67, "learning_rate": 2.1655736813303613e-06, "loss": 0.1603, "step": 3419 }, { "epoch": 1.67, "learning_rate": 2.1642675703449593e-06, "loss": 0.1582, "step": 3420 }, { "epoch": 1.67, "learning_rate": 2.162961552673441e-06, "loss": 0.1853, "step": 3421 }, { "epoch": 1.67, "learning_rate": 2.1616556286788e-06, "loss": 0.1893, "step": 3422 }, { "epoch": 1.67, "learning_rate": 2.160349798724008e-06, "loss": 0.195, "step": 3423 }, { "epoch": 1.67, "learning_rate": 2.1590440631720077e-06, "loss": 0.1608, "step": 3424 }, { "epoch": 1.67, "learning_rate": 2.1577384223857176e-06, "loss": 0.1595, "step": 3425 }, { "epoch": 1.67, "learning_rate": 2.1564328767280295e-06, "loss": 0.172, "step": 3426 }, { "epoch": 1.67, "learning_rate": 2.155127426561808e-06, "loss": 0.1865, "step": 3427 }, { "epoch": 1.67, "learning_rate": 2.1538220722498913e-06, "loss": 0.1436, "step": 3428 }, { "epoch": 1.67, "learning_rate": 2.1525168141550917e-06, "loss": 0.2012, "step": 3429 }, { "epoch": 1.67, "learning_rate": 2.1512116526401928e-06, "loss": 0.1718, "step": 3430 }, { "epoch": 1.68, "learning_rate": 2.149906588067954e-06, "loss": 0.1976, "step": 3431 }, { "epoch": 1.68, "learning_rate": 2.1486016208011073e-06, "loss": 0.1749, "step": 3432 }, { "epoch": 1.68, "learning_rate": 2.147296751202355e-06, "loss": 0.1532, "step": 3433 }, { "epoch": 1.68, "learning_rate": 2.1459919796343757e-06, "loss": 0.1883, "step": 3434 }, { "epoch": 1.68, "learning_rate": 2.1446873064598176e-06, "loss": 0.1578, "step": 3435 }, { "epoch": 1.68, "learning_rate": 2.1433827320413053e-06, "loss": 0.2185, "step": 3436 }, { "epoch": 1.68, "learning_rate": 2.142078256741432e-06, "loss": 0.1839, "step": 3437 }, { "epoch": 1.68, "learning_rate": 2.1407738809227666e-06, "loss": 0.2256, "step": 3438 }, { "epoch": 1.68, "learning_rate": 2.1394696049478475e-06, "loss": 0.1729, "step": 3439 }, { "epoch": 1.68, "learning_rate": 2.138165429179187e-06, "loss": 0.1839, "step": 3440 }, { "epoch": 1.68, "learning_rate": 2.1368613539792706e-06, "loss": 0.1726, "step": 3441 }, { "epoch": 1.68, "learning_rate": 2.1355573797105535e-06, "loss": 0.1811, "step": 3442 }, { "epoch": 1.68, "learning_rate": 2.1342535067354636e-06, "loss": 0.1807, "step": 3443 }, { "epoch": 1.68, "learning_rate": 2.132949735416402e-06, "loss": 0.2023, "step": 3444 }, { "epoch": 1.68, "learning_rate": 2.131646066115739e-06, "loss": 0.1973, "step": 3445 }, { "epoch": 1.68, "learning_rate": 2.1303424991958195e-06, "loss": 0.1701, "step": 3446 }, { "epoch": 1.68, "learning_rate": 2.1290390350189584e-06, "loss": 0.1857, "step": 3447 }, { "epoch": 1.68, "learning_rate": 2.1277356739474408e-06, "loss": 0.188, "step": 3448 }, { "epoch": 1.68, "learning_rate": 2.1264324163435264e-06, "loss": 0.1887, "step": 3449 }, { "epoch": 1.68, "learning_rate": 2.1251292625694415e-06, "loss": 0.2011, "step": 3450 }, { "epoch": 1.69, "learning_rate": 2.1238262129873894e-06, "loss": 0.1599, "step": 3451 }, { "epoch": 1.69, "learning_rate": 2.122523267959539e-06, "loss": 0.1773, "step": 3452 }, { "epoch": 1.69, "learning_rate": 2.1212204278480344e-06, "loss": 0.2091, "step": 3453 }, { "epoch": 1.69, "learning_rate": 2.119917693014986e-06, "loss": 0.1697, "step": 3454 }, { "epoch": 1.69, "learning_rate": 2.1186150638224788e-06, "loss": 0.1536, "step": 3455 }, { "epoch": 1.69, "learning_rate": 2.1173125406325682e-06, "loss": 0.1724, "step": 3456 }, { "epoch": 1.69, "learning_rate": 2.1160101238072784e-06, "loss": 0.1834, "step": 3457 }, { "epoch": 1.69, "learning_rate": 2.1147078137086038e-06, "loss": 0.1789, "step": 3458 }, { "epoch": 1.69, "learning_rate": 2.1134056106985106e-06, "loss": 0.1636, "step": 3459 }, { "epoch": 1.69, "learning_rate": 2.1121035151389352e-06, "loss": 0.1568, "step": 3460 }, { "epoch": 1.69, "learning_rate": 2.110801527391783e-06, "loss": 0.1645, "step": 3461 }, { "epoch": 1.69, "learning_rate": 2.1094996478189313e-06, "loss": 0.197, "step": 3462 }, { "epoch": 1.69, "learning_rate": 2.1081978767822242e-06, "loss": 0.1647, "step": 3463 }, { "epoch": 1.69, "learning_rate": 2.106896214643478e-06, "loss": 0.1828, "step": 3464 }, { "epoch": 1.69, "learning_rate": 2.1055946617644797e-06, "loss": 0.2091, "step": 3465 }, { "epoch": 1.69, "learning_rate": 2.104293218506984e-06, "loss": 0.1641, "step": 3466 }, { "epoch": 1.69, "learning_rate": 2.102991885232714e-06, "loss": 0.2135, "step": 3467 }, { "epoch": 1.69, "learning_rate": 2.101690662303366e-06, "loss": 0.1752, "step": 3468 }, { "epoch": 1.69, "learning_rate": 2.1003895500806014e-06, "loss": 0.2066, "step": 3469 }, { "epoch": 1.69, "learning_rate": 2.0990885489260547e-06, "loss": 0.1836, "step": 3470 }, { "epoch": 1.69, "learning_rate": 2.0977876592013273e-06, "loss": 0.182, "step": 3471 }, { "epoch": 1.7, "learning_rate": 2.09648688126799e-06, "loss": 0.1815, "step": 3472 }, { "epoch": 1.7, "learning_rate": 2.0951862154875826e-06, "loss": 0.219, "step": 3473 }, { "epoch": 1.7, "learning_rate": 2.093885662221612e-06, "loss": 0.1681, "step": 3474 }, { "epoch": 1.7, "learning_rate": 2.092585221831559e-06, "loss": 0.2078, "step": 3475 }, { "epoch": 1.7, "learning_rate": 2.091284894678867e-06, "loss": 0.2238, "step": 3476 }, { "epoch": 1.7, "learning_rate": 2.089984681124952e-06, "loss": 0.182, "step": 3477 }, { "epoch": 1.7, "learning_rate": 2.0886845815311963e-06, "loss": 0.1646, "step": 3478 }, { "epoch": 1.7, "learning_rate": 2.08738459625895e-06, "loss": 0.1727, "step": 3479 }, { "epoch": 1.7, "learning_rate": 2.086084725669534e-06, "loss": 0.2109, "step": 3480 }, { "epoch": 1.7, "learning_rate": 2.0847849701242363e-06, "loss": 0.1625, "step": 3481 }, { "epoch": 1.7, "learning_rate": 2.0834853299843117e-06, "loss": 0.1695, "step": 3482 }, { "epoch": 1.7, "learning_rate": 2.082185805610984e-06, "loss": 0.2269, "step": 3483 }, { "epoch": 1.7, "learning_rate": 2.080886397365444e-06, "loss": 0.1535, "step": 3484 }, { "epoch": 1.7, "learning_rate": 2.0795871056088516e-06, "loss": 0.2138, "step": 3485 }, { "epoch": 1.7, "learning_rate": 2.0782879307023336e-06, "loss": 0.1779, "step": 3486 }, { "epoch": 1.7, "learning_rate": 2.0769888730069846e-06, "loss": 0.1942, "step": 3487 }, { "epoch": 1.7, "learning_rate": 2.0756899328838647e-06, "loss": 0.1949, "step": 3488 }, { "epoch": 1.7, "learning_rate": 2.0743911106940035e-06, "loss": 0.1991, "step": 3489 }, { "epoch": 1.7, "learning_rate": 2.0730924067983983e-06, "loss": 0.1574, "step": 3490 }, { "epoch": 1.7, "learning_rate": 2.0717938215580115e-06, "loss": 0.1705, "step": 3491 }, { "epoch": 1.71, "learning_rate": 2.0704953553337743e-06, "loss": 0.2331, "step": 3492 }, { "epoch": 1.71, "learning_rate": 2.0691970084865838e-06, "loss": 0.1787, "step": 3493 }, { "epoch": 1.71, "learning_rate": 2.0678987813773018e-06, "loss": 0.1699, "step": 3494 }, { "epoch": 1.71, "learning_rate": 2.0666006743667622e-06, "loss": 0.1523, "step": 3495 }, { "epoch": 1.71, "learning_rate": 2.0653026878157614e-06, "loss": 0.1797, "step": 3496 }, { "epoch": 1.71, "learning_rate": 2.0640048220850633e-06, "loss": 0.1611, "step": 3497 }, { "epoch": 1.71, "learning_rate": 2.0627070775353984e-06, "loss": 0.1841, "step": 3498 }, { "epoch": 1.71, "learning_rate": 2.0614094545274623e-06, "loss": 0.2271, "step": 3499 }, { "epoch": 1.71, "learning_rate": 2.06011195342192e-06, "loss": 0.189, "step": 3500 }, { "epoch": 1.71, "learning_rate": 2.0588145745793987e-06, "loss": 0.1729, "step": 3501 }, { "epoch": 1.71, "learning_rate": 2.057517318360495e-06, "loss": 0.1764, "step": 3502 }, { "epoch": 1.71, "learning_rate": 2.056220185125768e-06, "loss": 0.1999, "step": 3503 }, { "epoch": 1.71, "learning_rate": 2.0549231752357453e-06, "loss": 0.1519, "step": 3504 }, { "epoch": 1.71, "learning_rate": 2.053626289050921e-06, "loss": 0.1874, "step": 3505 }, { "epoch": 1.71, "learning_rate": 2.0523295269317515e-06, "loss": 0.1584, "step": 3506 }, { "epoch": 1.71, "learning_rate": 2.051032889238661e-06, "loss": 0.229, "step": 3507 }, { "epoch": 1.71, "learning_rate": 2.049736376332039e-06, "loss": 0.1644, "step": 3508 }, { "epoch": 1.71, "learning_rate": 2.0484399885722376e-06, "loss": 0.1871, "step": 3509 }, { "epoch": 1.71, "learning_rate": 2.047143726319579e-06, "loss": 0.1865, "step": 3510 }, { "epoch": 1.71, "learning_rate": 2.0458475899343473e-06, "loss": 0.1983, "step": 3511 }, { "epoch": 1.71, "learning_rate": 2.0445515797767916e-06, "loss": 0.1733, "step": 3512 }, { "epoch": 1.72, "learning_rate": 2.0432556962071277e-06, "loss": 0.1911, "step": 3513 }, { "epoch": 1.72, "learning_rate": 2.041959939585533e-06, "loss": 0.1515, "step": 3514 }, { "epoch": 1.72, "learning_rate": 2.040664310272154e-06, "loss": 0.2037, "step": 3515 }, { "epoch": 1.72, "learning_rate": 2.039368808627098e-06, "loss": 0.1895, "step": 3516 }, { "epoch": 1.72, "learning_rate": 2.0380734350104396e-06, "loss": 0.2084, "step": 3517 }, { "epoch": 1.72, "learning_rate": 2.0367781897822147e-06, "loss": 0.1971, "step": 3518 }, { "epoch": 1.72, "learning_rate": 2.035483073302426e-06, "loss": 0.1895, "step": 3519 }, { "epoch": 1.72, "learning_rate": 2.034188085931041e-06, "loss": 0.1596, "step": 3520 }, { "epoch": 1.72, "learning_rate": 2.032893228027989e-06, "loss": 0.1859, "step": 3521 }, { "epoch": 1.72, "learning_rate": 2.031598499953165e-06, "loss": 0.1771, "step": 3522 }, { "epoch": 1.72, "learning_rate": 2.0303039020664267e-06, "loss": 0.1642, "step": 3523 }, { "epoch": 1.72, "learning_rate": 2.029009434727595e-06, "loss": 0.2063, "step": 3524 }, { "epoch": 1.72, "learning_rate": 2.027715098296458e-06, "loss": 0.1999, "step": 3525 }, { "epoch": 1.72, "learning_rate": 2.026420893132765e-06, "loss": 0.1978, "step": 3526 }, { "epoch": 1.72, "learning_rate": 2.0251268195962276e-06, "loss": 0.1739, "step": 3527 }, { "epoch": 1.72, "learning_rate": 2.0238328780465233e-06, "loss": 0.1837, "step": 3528 }, { "epoch": 1.72, "learning_rate": 2.0225390688432907e-06, "loss": 0.1888, "step": 3529 }, { "epoch": 1.72, "learning_rate": 2.021245392346134e-06, "loss": 0.1454, "step": 3530 }, { "epoch": 1.72, "learning_rate": 2.0199518489146177e-06, "loss": 0.1839, "step": 3531 }, { "epoch": 1.72, "learning_rate": 2.0186584389082726e-06, "loss": 0.1716, "step": 3532 }, { "epoch": 1.73, "learning_rate": 2.0173651626865892e-06, "loss": 0.1638, "step": 3533 }, { "epoch": 1.73, "learning_rate": 2.0160720206090224e-06, "loss": 0.1678, "step": 3534 }, { "epoch": 1.73, "learning_rate": 2.014779013034991e-06, "loss": 0.2132, "step": 3535 }, { "epoch": 1.73, "learning_rate": 2.013486140323874e-06, "loss": 0.1855, "step": 3536 }, { "epoch": 1.73, "learning_rate": 2.0121934028350144e-06, "loss": 0.1665, "step": 3537 }, { "epoch": 1.73, "learning_rate": 2.010900800927717e-06, "loss": 0.1861, "step": 3538 }, { "epoch": 1.73, "learning_rate": 2.0096083349612487e-06, "loss": 0.1706, "step": 3539 }, { "epoch": 1.73, "learning_rate": 2.0083160052948397e-06, "loss": 0.1886, "step": 3540 }, { "epoch": 1.73, "learning_rate": 2.0070238122876825e-06, "loss": 0.1825, "step": 3541 }, { "epoch": 1.73, "learning_rate": 2.005731756298929e-06, "loss": 0.151, "step": 3542 }, { "epoch": 1.73, "learning_rate": 2.0044398376876965e-06, "loss": 0.1544, "step": 3543 }, { "epoch": 1.73, "learning_rate": 2.0031480568130608e-06, "loss": 0.1997, "step": 3544 }, { "epoch": 1.73, "learning_rate": 2.001856414034063e-06, "loss": 0.1602, "step": 3545 }, { "epoch": 1.73, "learning_rate": 2.0005649097097023e-06, "loss": 0.1604, "step": 3546 }, { "epoch": 1.73, "learning_rate": 1.9992735441989416e-06, "loss": 0.1962, "step": 3547 }, { "epoch": 1.73, "learning_rate": 1.9979823178607044e-06, "loss": 0.1789, "step": 3548 }, { "epoch": 1.73, "learning_rate": 1.996691231053875e-06, "loss": 0.2026, "step": 3549 }, { "epoch": 1.73, "learning_rate": 1.9954002841373017e-06, "loss": 0.1995, "step": 3550 }, { "epoch": 1.73, "learning_rate": 1.994109477469791e-06, "loss": 0.1679, "step": 3551 }, { "epoch": 1.73, "learning_rate": 1.99281881141011e-06, "loss": 0.1966, "step": 3552 }, { "epoch": 1.73, "learning_rate": 1.991528286316989e-06, "loss": 0.1749, "step": 3553 }, { "epoch": 1.74, "learning_rate": 1.990237902549118e-06, "loss": 0.1425, "step": 3554 }, { "epoch": 1.74, "learning_rate": 1.988947660465148e-06, "loss": 0.1824, "step": 3555 }, { "epoch": 1.74, "learning_rate": 1.987657560423691e-06, "loss": 0.1802, "step": 3556 }, { "epoch": 1.74, "learning_rate": 1.986367602783318e-06, "loss": 0.2016, "step": 3557 }, { "epoch": 1.74, "learning_rate": 1.985077787902561e-06, "loss": 0.191, "step": 3558 }, { "epoch": 1.74, "learning_rate": 1.983788116139914e-06, "loss": 0.1826, "step": 3559 }, { "epoch": 1.74, "learning_rate": 1.9824985878538297e-06, "loss": 0.1953, "step": 3560 }, { "epoch": 1.74, "learning_rate": 1.9812092034027203e-06, "loss": 0.1806, "step": 3561 }, { "epoch": 1.74, "learning_rate": 1.9799199631449608e-06, "loss": 0.1865, "step": 3562 }, { "epoch": 1.74, "learning_rate": 1.978630867438881e-06, "loss": 0.1629, "step": 3563 }, { "epoch": 1.74, "learning_rate": 1.9773419166427766e-06, "loss": 0.2139, "step": 3564 }, { "epoch": 1.74, "learning_rate": 1.9760531111148995e-06, "loss": 0.1828, "step": 3565 }, { "epoch": 1.74, "learning_rate": 1.9747644512134617e-06, "loss": 0.1748, "step": 3566 }, { "epoch": 1.74, "learning_rate": 1.9734759372966344e-06, "loss": 0.2268, "step": 3567 }, { "epoch": 1.74, "learning_rate": 1.9721875697225484e-06, "loss": 0.1743, "step": 3568 }, { "epoch": 1.74, "learning_rate": 1.970899348849296e-06, "loss": 0.1805, "step": 3569 }, { "epoch": 1.74, "learning_rate": 1.9696112750349254e-06, "loss": 0.1771, "step": 3570 }, { "epoch": 1.74, "learning_rate": 1.9683233486374464e-06, "loss": 0.194, "step": 3571 }, { "epoch": 1.74, "learning_rate": 1.9670355700148257e-06, "loss": 0.183, "step": 3572 }, { "epoch": 1.74, "learning_rate": 1.96574793952499e-06, "loss": 0.1562, "step": 3573 }, { "epoch": 1.75, "learning_rate": 1.964460457525826e-06, "loss": 0.1663, "step": 3574 }, { "epoch": 1.75, "learning_rate": 1.963173124375178e-06, "loss": 0.1948, "step": 3575 }, { "epoch": 1.75, "learning_rate": 1.9618859404308476e-06, "loss": 0.1741, "step": 3576 }, { "epoch": 1.75, "learning_rate": 1.960598906050598e-06, "loss": 0.2206, "step": 3577 }, { "epoch": 1.75, "learning_rate": 1.9593120215921467e-06, "loss": 0.1816, "step": 3578 }, { "epoch": 1.75, "learning_rate": 1.9580252874131747e-06, "loss": 0.2115, "step": 3579 }, { "epoch": 1.75, "learning_rate": 1.9567387038713175e-06, "loss": 0.1664, "step": 3580 }, { "epoch": 1.75, "learning_rate": 1.9554522713241696e-06, "loss": 0.183, "step": 3581 }, { "epoch": 1.75, "learning_rate": 1.9541659901292826e-06, "loss": 0.1786, "step": 3582 }, { "epoch": 1.75, "learning_rate": 1.9528798606441675e-06, "loss": 0.1628, "step": 3583 }, { "epoch": 1.75, "learning_rate": 1.9515938832262946e-06, "loss": 0.1745, "step": 3584 }, { "epoch": 1.75, "learning_rate": 1.950308058233088e-06, "loss": 0.1751, "step": 3585 }, { "epoch": 1.75, "learning_rate": 1.9490223860219322e-06, "loss": 0.1787, "step": 3586 }, { "epoch": 1.75, "learning_rate": 1.9477368669501686e-06, "loss": 0.1842, "step": 3587 }, { "epoch": 1.75, "learning_rate": 1.9464515013750946e-06, "loss": 0.1799, "step": 3588 }, { "epoch": 1.75, "learning_rate": 1.9451662896539683e-06, "loss": 0.2015, "step": 3589 }, { "epoch": 1.75, "learning_rate": 1.943881232144002e-06, "loss": 0.1658, "step": 3590 }, { "epoch": 1.75, "learning_rate": 1.9425963292023663e-06, "loss": 0.1732, "step": 3591 }, { "epoch": 1.75, "learning_rate": 1.941311581186189e-06, "loss": 0.197, "step": 3592 }, { "epoch": 1.75, "learning_rate": 1.9400269884525537e-06, "loss": 0.1616, "step": 3593 }, { "epoch": 1.75, "learning_rate": 1.9387425513585025e-06, "loss": 0.1785, "step": 3594 }, { "epoch": 1.76, "learning_rate": 1.937458270261033e-06, "loss": 0.15, "step": 3595 }, { "epoch": 1.76, "learning_rate": 1.9361741455171e-06, "loss": 0.1942, "step": 3596 }, { "epoch": 1.76, "learning_rate": 1.934890177483615e-06, "loss": 0.1923, "step": 3597 }, { "epoch": 1.76, "learning_rate": 1.933606366517444e-06, "loss": 0.1989, "step": 3598 }, { "epoch": 1.76, "learning_rate": 1.9323227129754134e-06, "loss": 0.185, "step": 3599 }, { "epoch": 1.76, "learning_rate": 1.9310392172143024e-06, "loss": 0.1801, "step": 3600 }, { "epoch": 1.76, "learning_rate": 1.9297558795908473e-06, "loss": 0.1932, "step": 3601 }, { "epoch": 1.76, "learning_rate": 1.928472700461741e-06, "loss": 0.1812, "step": 3602 }, { "epoch": 1.76, "learning_rate": 1.9271896801836297e-06, "loss": 0.2122, "step": 3603 }, { "epoch": 1.76, "learning_rate": 1.9259068191131203e-06, "loss": 0.1732, "step": 3604 }, { "epoch": 1.76, "learning_rate": 1.9246241176067724e-06, "loss": 0.16, "step": 3605 }, { "epoch": 1.76, "learning_rate": 1.9233415760211003e-06, "loss": 0.1836, "step": 3606 }, { "epoch": 1.76, "learning_rate": 1.922059194712577e-06, "loss": 0.1774, "step": 3607 }, { "epoch": 1.76, "learning_rate": 1.920776974037626e-06, "loss": 0.1502, "step": 3608 }, { "epoch": 1.76, "learning_rate": 1.919494914352633e-06, "loss": 0.1636, "step": 3609 }, { "epoch": 1.76, "learning_rate": 1.918213016013933e-06, "loss": 0.1432, "step": 3610 }, { "epoch": 1.76, "learning_rate": 1.9169312793778198e-06, "loss": 0.1959, "step": 3611 }, { "epoch": 1.76, "learning_rate": 1.915649704800539e-06, "loss": 0.2018, "step": 3612 }, { "epoch": 1.76, "learning_rate": 1.914368292638294e-06, "loss": 0.1706, "step": 3613 }, { "epoch": 1.76, "learning_rate": 1.9130870432472426e-06, "loss": 0.1613, "step": 3614 }, { "epoch": 1.77, "learning_rate": 1.911805956983497e-06, "loss": 0.1846, "step": 3615 }, { "epoch": 1.77, "learning_rate": 1.9105250342031216e-06, "loss": 0.1821, "step": 3616 }, { "epoch": 1.77, "learning_rate": 1.9092442752621407e-06, "loss": 0.1819, "step": 3617 }, { "epoch": 1.77, "learning_rate": 1.907963680516527e-06, "loss": 0.2126, "step": 3618 }, { "epoch": 1.77, "learning_rate": 1.906683250322213e-06, "loss": 0.1743, "step": 3619 }, { "epoch": 1.77, "learning_rate": 1.9054029850350825e-06, "loss": 0.1521, "step": 3620 }, { "epoch": 1.77, "learning_rate": 1.9041228850109731e-06, "loss": 0.1926, "step": 3621 }, { "epoch": 1.77, "learning_rate": 1.9028429506056783e-06, "loss": 0.1554, "step": 3622 }, { "epoch": 1.77, "learning_rate": 1.9015631821749436e-06, "loss": 0.1735, "step": 3623 }, { "epoch": 1.77, "learning_rate": 1.900283580074471e-06, "loss": 0.171, "step": 3624 }, { "epoch": 1.77, "learning_rate": 1.8990041446599127e-06, "loss": 0.1656, "step": 3625 }, { "epoch": 1.77, "learning_rate": 1.897724876286879e-06, "loss": 0.1905, "step": 3626 }, { "epoch": 1.77, "learning_rate": 1.8964457753109285e-06, "loss": 0.1453, "step": 3627 }, { "epoch": 1.77, "learning_rate": 1.8951668420875774e-06, "loss": 0.1656, "step": 3628 }, { "epoch": 1.77, "learning_rate": 1.8938880769722949e-06, "loss": 0.1757, "step": 3629 }, { "epoch": 1.77, "learning_rate": 1.8926094803205018e-06, "loss": 0.1745, "step": 3630 }, { "epoch": 1.77, "learning_rate": 1.8913310524875723e-06, "loss": 0.1861, "step": 3631 }, { "epoch": 1.77, "learning_rate": 1.8900527938288345e-06, "loss": 0.1868, "step": 3632 }, { "epoch": 1.77, "learning_rate": 1.888774704699569e-06, "loss": 0.205, "step": 3633 }, { "epoch": 1.77, "learning_rate": 1.8874967854550096e-06, "loss": 0.2141, "step": 3634 }, { "epoch": 1.77, "learning_rate": 1.8862190364503438e-06, "loss": 0.1598, "step": 3635 }, { "epoch": 1.78, "learning_rate": 1.8849414580407094e-06, "loss": 0.1638, "step": 3636 }, { "epoch": 1.78, "learning_rate": 1.8836640505811987e-06, "loss": 0.1484, "step": 3637 }, { "epoch": 1.78, "learning_rate": 1.8823868144268544e-06, "loss": 0.1903, "step": 3638 }, { "epoch": 1.78, "learning_rate": 1.8811097499326758e-06, "loss": 0.1788, "step": 3639 }, { "epoch": 1.78, "learning_rate": 1.87983285745361e-06, "loss": 0.2025, "step": 3640 }, { "epoch": 1.78, "learning_rate": 1.8785561373445585e-06, "loss": 0.1564, "step": 3641 }, { "epoch": 1.78, "learning_rate": 1.877279589960374e-06, "loss": 0.1667, "step": 3642 }, { "epoch": 1.78, "learning_rate": 1.8760032156558615e-06, "loss": 0.2019, "step": 3643 }, { "epoch": 1.78, "learning_rate": 1.8747270147857793e-06, "loss": 0.1843, "step": 3644 }, { "epoch": 1.78, "learning_rate": 1.8734509877048357e-06, "loss": 0.1802, "step": 3645 }, { "epoch": 1.78, "learning_rate": 1.8721751347676904e-06, "loss": 0.1917, "step": 3646 }, { "epoch": 1.78, "learning_rate": 1.8708994563289561e-06, "loss": 0.1849, "step": 3647 }, { "epoch": 1.78, "learning_rate": 1.8696239527431962e-06, "loss": 0.1863, "step": 3648 }, { "epoch": 1.78, "learning_rate": 1.8683486243649259e-06, "loss": 0.2029, "step": 3649 }, { "epoch": 1.78, "learning_rate": 1.8670734715486125e-06, "loss": 0.1573, "step": 3650 }, { "epoch": 1.78, "learning_rate": 1.8657984946486723e-06, "loss": 0.1708, "step": 3651 }, { "epoch": 1.78, "learning_rate": 1.8645236940194738e-06, "loss": 0.1718, "step": 3652 }, { "epoch": 1.78, "learning_rate": 1.8632490700153365e-06, "loss": 0.1541, "step": 3653 }, { "epoch": 1.78, "learning_rate": 1.8619746229905328e-06, "loss": 0.2028, "step": 3654 }, { "epoch": 1.78, "learning_rate": 1.8607003532992823e-06, "loss": 0.1958, "step": 3655 }, { "epoch": 1.79, "learning_rate": 1.859426261295758e-06, "loss": 0.1498, "step": 3656 }, { "epoch": 1.79, "learning_rate": 1.8581523473340813e-06, "loss": 0.1937, "step": 3657 }, { "epoch": 1.79, "learning_rate": 1.8568786117683268e-06, "loss": 0.1692, "step": 3658 }, { "epoch": 1.79, "learning_rate": 1.8556050549525178e-06, "loss": 0.1828, "step": 3659 }, { "epoch": 1.79, "learning_rate": 1.8543316772406283e-06, "loss": 0.1885, "step": 3660 }, { "epoch": 1.79, "learning_rate": 1.8530584789865819e-06, "loss": 0.1391, "step": 3661 }, { "epoch": 1.79, "learning_rate": 1.8517854605442523e-06, "loss": 0.1816, "step": 3662 }, { "epoch": 1.79, "learning_rate": 1.8505126222674658e-06, "loss": 0.2097, "step": 3663 }, { "epoch": 1.79, "learning_rate": 1.849239964509995e-06, "loss": 0.2022, "step": 3664 }, { "epoch": 1.79, "learning_rate": 1.8479674876255648e-06, "loss": 0.2067, "step": 3665 }, { "epoch": 1.79, "learning_rate": 1.8466951919678491e-06, "loss": 0.1903, "step": 3666 }, { "epoch": 1.79, "learning_rate": 1.8454230778904698e-06, "loss": 0.1985, "step": 3667 }, { "epoch": 1.79, "learning_rate": 1.8441511457470013e-06, "loss": 0.1902, "step": 3668 }, { "epoch": 1.79, "learning_rate": 1.8428793958909657e-06, "loss": 0.1943, "step": 3669 }, { "epoch": 1.79, "learning_rate": 1.8416078286758346e-06, "loss": 0.2339, "step": 3670 }, { "epoch": 1.79, "learning_rate": 1.8403364444550293e-06, "loss": 0.1727, "step": 3671 }, { "epoch": 1.79, "learning_rate": 1.8390652435819183e-06, "loss": 0.1843, "step": 3672 }, { "epoch": 1.79, "learning_rate": 1.8377942264098234e-06, "loss": 0.1754, "step": 3673 }, { "epoch": 1.79, "learning_rate": 1.8365233932920103e-06, "loss": 0.178, "step": 3674 }, { "epoch": 1.79, "learning_rate": 1.8352527445816975e-06, "loss": 0.1851, "step": 3675 }, { "epoch": 1.79, "learning_rate": 1.8339822806320494e-06, "loss": 0.1583, "step": 3676 }, { "epoch": 1.8, "learning_rate": 1.8327120017961808e-06, "loss": 0.1753, "step": 3677 }, { "epoch": 1.8, "learning_rate": 1.831441908427155e-06, "loss": 0.161, "step": 3678 }, { "epoch": 1.8, "learning_rate": 1.830172000877983e-06, "loss": 0.1669, "step": 3679 }, { "epoch": 1.8, "learning_rate": 1.8289022795016249e-06, "loss": 0.1922, "step": 3680 }, { "epoch": 1.8, "learning_rate": 1.8276327446509879e-06, "loss": 0.1652, "step": 3681 }, { "epoch": 1.8, "learning_rate": 1.8263633966789272e-06, "loss": 0.2014, "step": 3682 }, { "epoch": 1.8, "learning_rate": 1.8250942359382485e-06, "loss": 0.183, "step": 3683 }, { "epoch": 1.8, "learning_rate": 1.823825262781704e-06, "loss": 0.1662, "step": 3684 }, { "epoch": 1.8, "learning_rate": 1.8225564775619925e-06, "loss": 0.1751, "step": 3685 }, { "epoch": 1.8, "learning_rate": 1.821287880631763e-06, "loss": 0.1856, "step": 3686 }, { "epoch": 1.8, "learning_rate": 1.8200194723436091e-06, "loss": 0.1814, "step": 3687 }, { "epoch": 1.8, "learning_rate": 1.8187512530500757e-06, "loss": 0.176, "step": 3688 }, { "epoch": 1.8, "learning_rate": 1.8174832231036516e-06, "loss": 0.1807, "step": 3689 }, { "epoch": 1.8, "learning_rate": 1.8162153828567763e-06, "loss": 0.1759, "step": 3690 }, { "epoch": 1.8, "learning_rate": 1.8149477326618331e-06, "loss": 0.1662, "step": 3691 }, { "epoch": 1.8, "learning_rate": 1.8136802728711544e-06, "loss": 0.1663, "step": 3692 }, { "epoch": 1.8, "learning_rate": 1.8124130038370213e-06, "loss": 0.1839, "step": 3693 }, { "epoch": 1.8, "learning_rate": 1.8111459259116588e-06, "loss": 0.2045, "step": 3694 }, { "epoch": 1.8, "learning_rate": 1.8098790394472405e-06, "loss": 0.1763, "step": 3695 }, { "epoch": 1.8, "learning_rate": 1.8086123447958862e-06, "loss": 0.1972, "step": 3696 }, { "epoch": 1.81, "learning_rate": 1.8073458423096612e-06, "loss": 0.1874, "step": 3697 }, { "epoch": 1.81, "learning_rate": 1.8060795323405808e-06, "loss": 0.2252, "step": 3698 }, { "epoch": 1.81, "learning_rate": 1.8048134152406043e-06, "loss": 0.1914, "step": 3699 }, { "epoch": 1.81, "learning_rate": 1.8035474913616375e-06, "loss": 0.1466, "step": 3700 }, { "epoch": 1.81, "learning_rate": 1.8022817610555329e-06, "loss": 0.2007, "step": 3701 }, { "epoch": 1.81, "learning_rate": 1.801016224674088e-06, "loss": 0.1649, "step": 3702 }, { "epoch": 1.81, "learning_rate": 1.7997508825690499e-06, "loss": 0.2154, "step": 3703 }, { "epoch": 1.81, "learning_rate": 1.7984857350921075e-06, "loss": 0.1674, "step": 3704 }, { "epoch": 1.81, "learning_rate": 1.7972207825948985e-06, "loss": 0.1687, "step": 3705 }, { "epoch": 1.81, "learning_rate": 1.7959560254290043e-06, "loss": 0.2099, "step": 3706 }, { "epoch": 1.81, "learning_rate": 1.7946914639459528e-06, "loss": 0.1903, "step": 3707 }, { "epoch": 1.81, "learning_rate": 1.7934270984972197e-06, "loss": 0.1596, "step": 3708 }, { "epoch": 1.81, "learning_rate": 1.7921629294342232e-06, "loss": 0.1733, "step": 3709 }, { "epoch": 1.81, "learning_rate": 1.7908989571083275e-06, "loss": 0.1858, "step": 3710 }, { "epoch": 1.81, "learning_rate": 1.7896351818708435e-06, "loss": 0.1907, "step": 3711 }, { "epoch": 1.81, "learning_rate": 1.7883716040730248e-06, "loss": 0.2358, "step": 3712 }, { "epoch": 1.81, "learning_rate": 1.7871082240660736e-06, "loss": 0.1899, "step": 3713 }, { "epoch": 1.81, "learning_rate": 1.7858450422011348e-06, "loss": 0.1735, "step": 3714 }, { "epoch": 1.81, "learning_rate": 1.7845820588292984e-06, "loss": 0.2463, "step": 3715 }, { "epoch": 1.81, "learning_rate": 1.7833192743015998e-06, "loss": 0.1713, "step": 3716 }, { "epoch": 1.81, "learning_rate": 1.7820566889690183e-06, "loss": 0.2049, "step": 3717 }, { "epoch": 1.82, "learning_rate": 1.7807943031824792e-06, "loss": 0.2029, "step": 3718 }, { "epoch": 1.82, "learning_rate": 1.7795321172928513e-06, "loss": 0.1749, "step": 3719 }, { "epoch": 1.82, "learning_rate": 1.7782701316509482e-06, "loss": 0.1887, "step": 3720 }, { "epoch": 1.82, "learning_rate": 1.7770083466075268e-06, "loss": 0.1939, "step": 3721 }, { "epoch": 1.82, "learning_rate": 1.7757467625132897e-06, "loss": 0.189, "step": 3722 }, { "epoch": 1.82, "learning_rate": 1.7744853797188844e-06, "loss": 0.1804, "step": 3723 }, { "epoch": 1.82, "learning_rate": 1.7732241985748998e-06, "loss": 0.1991, "step": 3724 }, { "epoch": 1.82, "learning_rate": 1.7719632194318704e-06, "loss": 0.2002, "step": 3725 }, { "epoch": 1.82, "learning_rate": 1.7707024426402743e-06, "loss": 0.1845, "step": 3726 }, { "epoch": 1.82, "learning_rate": 1.7694418685505327e-06, "loss": 0.1976, "step": 3727 }, { "epoch": 1.82, "learning_rate": 1.7681814975130125e-06, "loss": 0.1734, "step": 3728 }, { "epoch": 1.82, "learning_rate": 1.7669213298780218e-06, "loss": 0.2034, "step": 3729 }, { "epoch": 1.82, "learning_rate": 1.7656613659958139e-06, "loss": 0.2045, "step": 3730 }, { "epoch": 1.82, "learning_rate": 1.7644016062165837e-06, "loss": 0.1668, "step": 3731 }, { "epoch": 1.82, "learning_rate": 1.76314205089047e-06, "loss": 0.1598, "step": 3732 }, { "epoch": 1.82, "learning_rate": 1.7618827003675574e-06, "loss": 0.1563, "step": 3733 }, { "epoch": 1.82, "learning_rate": 1.7606235549978692e-06, "loss": 0.1555, "step": 3734 }, { "epoch": 1.82, "learning_rate": 1.7593646151313751e-06, "loss": 0.1736, "step": 3735 }, { "epoch": 1.82, "learning_rate": 1.7581058811179851e-06, "loss": 0.1802, "step": 3736 }, { "epoch": 1.82, "learning_rate": 1.7568473533075538e-06, "loss": 0.1801, "step": 3737 }, { "epoch": 1.83, "learning_rate": 1.7555890320498792e-06, "loss": 0.1615, "step": 3738 }, { "epoch": 1.83, "learning_rate": 1.7543309176946993e-06, "loss": 0.1953, "step": 3739 }, { "epoch": 1.83, "learning_rate": 1.753073010591696e-06, "loss": 0.1741, "step": 3740 }, { "epoch": 1.83, "learning_rate": 1.7518153110904939e-06, "loss": 0.1806, "step": 3741 }, { "epoch": 1.83, "learning_rate": 1.7505578195406586e-06, "loss": 0.1654, "step": 3742 }, { "epoch": 1.83, "learning_rate": 1.7493005362917003e-06, "loss": 0.1697, "step": 3743 }, { "epoch": 1.83, "learning_rate": 1.7480434616930697e-06, "loss": 0.1667, "step": 3744 }, { "epoch": 1.83, "learning_rate": 1.7467865960941588e-06, "loss": 0.204, "step": 3745 }, { "epoch": 1.83, "learning_rate": 1.7455299398443027e-06, "loss": 0.2167, "step": 3746 }, { "epoch": 1.83, "learning_rate": 1.7442734932927772e-06, "loss": 0.1915, "step": 3747 }, { "epoch": 1.83, "learning_rate": 1.7430172567888024e-06, "loss": 0.2018, "step": 3748 }, { "epoch": 1.83, "learning_rate": 1.7417612306815369e-06, "loss": 0.1677, "step": 3749 }, { "epoch": 1.83, "learning_rate": 1.7405054153200823e-06, "loss": 0.1682, "step": 3750 }, { "epoch": 1.83, "learning_rate": 1.7392498110534812e-06, "loss": 0.1974, "step": 3751 }, { "epoch": 1.83, "learning_rate": 1.7379944182307184e-06, "loss": 0.1938, "step": 3752 }, { "epoch": 1.83, "learning_rate": 1.7367392372007197e-06, "loss": 0.2115, "step": 3753 }, { "epoch": 1.83, "learning_rate": 1.7354842683123506e-06, "loss": 0.1935, "step": 3754 }, { "epoch": 1.83, "learning_rate": 1.7342295119144191e-06, "loss": 0.1613, "step": 3755 }, { "epoch": 1.83, "learning_rate": 1.7329749683556734e-06, "loss": 0.1608, "step": 3756 }, { "epoch": 1.83, "learning_rate": 1.731720637984804e-06, "loss": 0.1367, "step": 3757 }, { "epoch": 1.83, "learning_rate": 1.7304665211504396e-06, "loss": 0.2202, "step": 3758 }, { "epoch": 1.84, "learning_rate": 1.729212618201152e-06, "loss": 0.1539, "step": 3759 }, { "epoch": 1.84, "learning_rate": 1.7279589294854526e-06, "loss": 0.1803, "step": 3760 }, { "epoch": 1.84, "learning_rate": 1.7267054553517915e-06, "loss": 0.204, "step": 3761 }, { "epoch": 1.84, "learning_rate": 1.725452196148562e-06, "loss": 0.1753, "step": 3762 }, { "epoch": 1.84, "learning_rate": 1.7241991522240974e-06, "loss": 0.1893, "step": 3763 }, { "epoch": 1.84, "learning_rate": 1.7229463239266691e-06, "loss": 0.1846, "step": 3764 }, { "epoch": 1.84, "learning_rate": 1.7216937116044907e-06, "loss": 0.1779, "step": 3765 }, { "epoch": 1.84, "learning_rate": 1.7204413156057125e-06, "loss": 0.1848, "step": 3766 }, { "epoch": 1.84, "learning_rate": 1.7191891362784303e-06, "loss": 0.1918, "step": 3767 }, { "epoch": 1.84, "learning_rate": 1.7179371739706735e-06, "loss": 0.2086, "step": 3768 }, { "epoch": 1.84, "learning_rate": 1.7166854290304159e-06, "loss": 0.2074, "step": 3769 }, { "epoch": 1.84, "learning_rate": 1.7154339018055677e-06, "loss": 0.1797, "step": 3770 }, { "epoch": 1.84, "learning_rate": 1.7141825926439803e-06, "loss": 0.1555, "step": 3771 }, { "epoch": 1.84, "learning_rate": 1.712931501893445e-06, "loss": 0.1975, "step": 3772 }, { "epoch": 1.84, "learning_rate": 1.7116806299016905e-06, "loss": 0.1866, "step": 3773 }, { "epoch": 1.84, "learning_rate": 1.7104299770163865e-06, "loss": 0.1778, "step": 3774 }, { "epoch": 1.84, "learning_rate": 1.7091795435851404e-06, "loss": 0.1907, "step": 3775 }, { "epoch": 1.84, "learning_rate": 1.7079293299554982e-06, "loss": 0.1949, "step": 3776 }, { "epoch": 1.84, "learning_rate": 1.7066793364749476e-06, "loss": 0.1621, "step": 3777 }, { "epoch": 1.84, "learning_rate": 1.705429563490913e-06, "loss": 0.1663, "step": 3778 }, { "epoch": 1.85, "learning_rate": 1.704180011350757e-06, "loss": 0.2222, "step": 3779 }, { "epoch": 1.85, "learning_rate": 1.7029306804017826e-06, "loss": 0.1689, "step": 3780 }, { "epoch": 1.85, "learning_rate": 1.7016815709912287e-06, "loss": 0.1984, "step": 3781 }, { "epoch": 1.85, "learning_rate": 1.7004326834662765e-06, "loss": 0.1435, "step": 3782 }, { "epoch": 1.85, "learning_rate": 1.6991840181740422e-06, "loss": 0.1856, "step": 3783 }, { "epoch": 1.85, "learning_rate": 1.6979355754615814e-06, "loss": 0.1712, "step": 3784 }, { "epoch": 1.85, "learning_rate": 1.6966873556758876e-06, "loss": 0.1726, "step": 3785 }, { "epoch": 1.85, "learning_rate": 1.6954393591638925e-06, "loss": 0.2371, "step": 3786 }, { "epoch": 1.85, "learning_rate": 1.6941915862724663e-06, "loss": 0.1904, "step": 3787 }, { "epoch": 1.85, "learning_rate": 1.6929440373484167e-06, "loss": 0.1872, "step": 3788 }, { "epoch": 1.85, "learning_rate": 1.6916967127384882e-06, "loss": 0.1965, "step": 3789 }, { "epoch": 1.85, "learning_rate": 1.6904496127893647e-06, "loss": 0.1686, "step": 3790 }, { "epoch": 1.85, "learning_rate": 1.6892027378476648e-06, "loss": 0.1983, "step": 3791 }, { "epoch": 1.85, "learning_rate": 1.6879560882599482e-06, "loss": 0.1916, "step": 3792 }, { "epoch": 1.85, "learning_rate": 1.6867096643727102e-06, "loss": 0.1824, "step": 3793 }, { "epoch": 1.85, "learning_rate": 1.6854634665323825e-06, "loss": 0.1759, "step": 3794 }, { "epoch": 1.85, "learning_rate": 1.6842174950853358e-06, "loss": 0.2134, "step": 3795 }, { "epoch": 1.85, "learning_rate": 1.6829717503778752e-06, "loss": 0.1533, "step": 3796 }, { "epoch": 1.85, "learning_rate": 1.6817262327562467e-06, "loss": 0.1724, "step": 3797 }, { "epoch": 1.85, "learning_rate": 1.6804809425666296e-06, "loss": 0.1604, "step": 3798 }, { "epoch": 1.85, "learning_rate": 1.6792358801551422e-06, "loss": 0.1859, "step": 3799 }, { "epoch": 1.86, "learning_rate": 1.6779910458678379e-06, "loss": 0.1758, "step": 3800 }, { "epoch": 1.86, "learning_rate": 1.6767464400507069e-06, "loss": 0.1622, "step": 3801 }, { "epoch": 1.86, "learning_rate": 1.6755020630496788e-06, "loss": 0.1939, "step": 3802 }, { "epoch": 1.86, "learning_rate": 1.6742579152106153e-06, "loss": 0.2114, "step": 3803 }, { "epoch": 1.86, "learning_rate": 1.6730139968793168e-06, "loss": 0.2389, "step": 3804 }, { "epoch": 1.86, "learning_rate": 1.67177030840152e-06, "loss": 0.1746, "step": 3805 }, { "epoch": 1.86, "learning_rate": 1.6705268501228955e-06, "loss": 0.1831, "step": 3806 }, { "epoch": 1.86, "learning_rate": 1.6692836223890538e-06, "loss": 0.178, "step": 3807 }, { "epoch": 1.86, "learning_rate": 1.6680406255455384e-06, "loss": 0.1817, "step": 3808 }, { "epoch": 1.86, "learning_rate": 1.666797859937829e-06, "loss": 0.1841, "step": 3809 }, { "epoch": 1.86, "learning_rate": 1.6655553259113418e-06, "loss": 0.1559, "step": 3810 }, { "epoch": 1.86, "learning_rate": 1.664313023811427e-06, "loss": 0.1978, "step": 3811 }, { "epoch": 1.86, "learning_rate": 1.663070953983374e-06, "loss": 0.1612, "step": 3812 }, { "epoch": 1.86, "learning_rate": 1.6618291167724026e-06, "loss": 0.1663, "step": 3813 }, { "epoch": 1.86, "learning_rate": 1.6605875125236729e-06, "loss": 0.1888, "step": 3814 }, { "epoch": 1.86, "learning_rate": 1.659346141582276e-06, "loss": 0.1906, "step": 3815 }, { "epoch": 1.86, "learning_rate": 1.65810500429324e-06, "loss": 0.173, "step": 3816 }, { "epoch": 1.86, "learning_rate": 1.6568641010015302e-06, "loss": 0.1811, "step": 3817 }, { "epoch": 1.86, "learning_rate": 1.6556234320520438e-06, "loss": 0.201, "step": 3818 }, { "epoch": 1.86, "learning_rate": 1.6543829977896126e-06, "loss": 0.1553, "step": 3819 }, { "epoch": 1.87, "learning_rate": 1.6531427985590057e-06, "loss": 0.1659, "step": 3820 }, { "epoch": 1.87, "learning_rate": 1.6519028347049242e-06, "loss": 0.1605, "step": 3821 }, { "epoch": 1.87, "learning_rate": 1.6506631065720072e-06, "loss": 0.2046, "step": 3822 }, { "epoch": 1.87, "learning_rate": 1.6494236145048254e-06, "loss": 0.162, "step": 3823 }, { "epoch": 1.87, "learning_rate": 1.6481843588478848e-06, "loss": 0.192, "step": 3824 }, { "epoch": 1.87, "learning_rate": 1.6469453399456247e-06, "loss": 0.2184, "step": 3825 }, { "epoch": 1.87, "learning_rate": 1.6457065581424204e-06, "loss": 0.1706, "step": 3826 }, { "epoch": 1.87, "learning_rate": 1.6444680137825807e-06, "loss": 0.1817, "step": 3827 }, { "epoch": 1.87, "learning_rate": 1.6432297072103474e-06, "loss": 0.1785, "step": 3828 }, { "epoch": 1.87, "learning_rate": 1.6419916387698975e-06, "loss": 0.2036, "step": 3829 }, { "epoch": 1.87, "learning_rate": 1.640753808805341e-06, "loss": 0.1609, "step": 3830 }, { "epoch": 1.87, "learning_rate": 1.6395162176607213e-06, "loss": 0.2106, "step": 3831 }, { "epoch": 1.87, "learning_rate": 1.6382788656800175e-06, "loss": 0.1847, "step": 3832 }, { "epoch": 1.87, "learning_rate": 1.6370417532071397e-06, "loss": 0.1688, "step": 3833 }, { "epoch": 1.87, "learning_rate": 1.6358048805859323e-06, "loss": 0.1849, "step": 3834 }, { "epoch": 1.87, "learning_rate": 1.6345682481601737e-06, "loss": 0.1686, "step": 3835 }, { "epoch": 1.87, "learning_rate": 1.6333318562735743e-06, "loss": 0.1909, "step": 3836 }, { "epoch": 1.87, "learning_rate": 1.632095705269779e-06, "loss": 0.1766, "step": 3837 }, { "epoch": 1.87, "learning_rate": 1.6308597954923655e-06, "loss": 0.1704, "step": 3838 }, { "epoch": 1.87, "learning_rate": 1.6296241272848436e-06, "loss": 0.164, "step": 3839 }, { "epoch": 1.88, "learning_rate": 1.6283887009906559e-06, "loss": 0.2204, "step": 3840 }, { "epoch": 1.88, "learning_rate": 1.6271535169531783e-06, "loss": 0.1439, "step": 3841 }, { "epoch": 1.88, "learning_rate": 1.6259185755157208e-06, "loss": 0.1401, "step": 3842 }, { "epoch": 1.88, "learning_rate": 1.6246838770215235e-06, "loss": 0.1783, "step": 3843 }, { "epoch": 1.88, "learning_rate": 1.6234494218137602e-06, "loss": 0.1668, "step": 3844 }, { "epoch": 1.88, "learning_rate": 1.622215210235537e-06, "loss": 0.1591, "step": 3845 }, { "epoch": 1.88, "learning_rate": 1.620981242629891e-06, "loss": 0.165, "step": 3846 }, { "epoch": 1.88, "learning_rate": 1.6197475193397939e-06, "loss": 0.1632, "step": 3847 }, { "epoch": 1.88, "learning_rate": 1.6185140407081485e-06, "loss": 0.1506, "step": 3848 }, { "epoch": 1.88, "learning_rate": 1.6172808070777884e-06, "loss": 0.1718, "step": 3849 }, { "epoch": 1.88, "learning_rate": 1.6160478187914802e-06, "loss": 0.1979, "step": 3850 }, { "epoch": 1.88, "learning_rate": 1.6148150761919235e-06, "loss": 0.1676, "step": 3851 }, { "epoch": 1.88, "learning_rate": 1.6135825796217464e-06, "loss": 0.1771, "step": 3852 }, { "epoch": 1.88, "learning_rate": 1.6123503294235122e-06, "loss": 0.2016, "step": 3853 }, { "epoch": 1.88, "learning_rate": 1.6111183259397132e-06, "loss": 0.2704, "step": 3854 }, { "epoch": 1.88, "learning_rate": 1.609886569512773e-06, "loss": 0.1695, "step": 3855 }, { "epoch": 1.88, "learning_rate": 1.6086550604850487e-06, "loss": 0.1884, "step": 3856 }, { "epoch": 1.88, "learning_rate": 1.607423799198828e-06, "loss": 0.1714, "step": 3857 }, { "epoch": 1.88, "learning_rate": 1.6061927859963275e-06, "loss": 0.1744, "step": 3858 }, { "epoch": 1.88, "learning_rate": 1.6049620212196982e-06, "loss": 0.1677, "step": 3859 }, { "epoch": 1.88, "learning_rate": 1.6037315052110184e-06, "loss": 0.2013, "step": 3860 }, { "epoch": 1.89, "learning_rate": 1.6025012383123012e-06, "loss": 0.1417, "step": 3861 }, { "epoch": 1.89, "learning_rate": 1.6012712208654873e-06, "loss": 0.1745, "step": 3862 }, { "epoch": 1.89, "learning_rate": 1.6000414532124503e-06, "loss": 0.1858, "step": 3863 }, { "epoch": 1.89, "learning_rate": 1.5988119356949916e-06, "loss": 0.1723, "step": 3864 }, { "epoch": 1.89, "learning_rate": 1.5975826686548456e-06, "loss": 0.2237, "step": 3865 }, { "epoch": 1.89, "learning_rate": 1.5963536524336776e-06, "loss": 0.1645, "step": 3866 }, { "epoch": 1.89, "learning_rate": 1.59512488737308e-06, "loss": 0.1961, "step": 3867 }, { "epoch": 1.89, "learning_rate": 1.593896373814579e-06, "loss": 0.2793, "step": 3868 }, { "epoch": 1.89, "learning_rate": 1.592668112099628e-06, "loss": 0.2054, "step": 3869 }, { "epoch": 1.89, "learning_rate": 1.5914401025696107e-06, "loss": 0.1876, "step": 3870 }, { "epoch": 1.89, "learning_rate": 1.590212345565843e-06, "loss": 0.1749, "step": 3871 }, { "epoch": 1.89, "learning_rate": 1.5889848414295697e-06, "loss": 0.1619, "step": 3872 }, { "epoch": 1.89, "learning_rate": 1.5877575905019631e-06, "loss": 0.2139, "step": 3873 }, { "epoch": 1.89, "learning_rate": 1.5865305931241285e-06, "loss": 0.1968, "step": 3874 }, { "epoch": 1.89, "learning_rate": 1.5853038496370969e-06, "loss": 0.1819, "step": 3875 }, { "epoch": 1.89, "learning_rate": 1.5840773603818332e-06, "loss": 0.1881, "step": 3876 }, { "epoch": 1.89, "learning_rate": 1.582851125699228e-06, "loss": 0.1854, "step": 3877 }, { "epoch": 1.89, "learning_rate": 1.581625145930103e-06, "loss": 0.1892, "step": 3878 }, { "epoch": 1.89, "learning_rate": 1.580399421415208e-06, "loss": 0.2173, "step": 3879 }, { "epoch": 1.89, "learning_rate": 1.579173952495222e-06, "loss": 0.2102, "step": 3880 }, { "epoch": 1.9, "learning_rate": 1.5779487395107552e-06, "loss": 0.1747, "step": 3881 }, { "epoch": 1.9, "learning_rate": 1.5767237828023434e-06, "loss": 0.1744, "step": 3882 }, { "epoch": 1.9, "learning_rate": 1.5754990827104521e-06, "loss": 0.1818, "step": 3883 }, { "epoch": 1.9, "learning_rate": 1.5742746395754777e-06, "loss": 0.193, "step": 3884 }, { "epoch": 1.9, "learning_rate": 1.5730504537377409e-06, "loss": 0.176, "step": 3885 }, { "epoch": 1.9, "learning_rate": 1.5718265255374955e-06, "loss": 0.2085, "step": 3886 }, { "epoch": 1.9, "learning_rate": 1.570602855314921e-06, "loss": 0.1884, "step": 3887 }, { "epoch": 1.9, "learning_rate": 1.5693794434101256e-06, "loss": 0.2176, "step": 3888 }, { "epoch": 1.9, "learning_rate": 1.5681562901631467e-06, "loss": 0.1693, "step": 3889 }, { "epoch": 1.9, "learning_rate": 1.5669333959139472e-06, "loss": 0.2062, "step": 3890 }, { "epoch": 1.9, "learning_rate": 1.565710761002422e-06, "loss": 0.1991, "step": 3891 }, { "epoch": 1.9, "learning_rate": 1.5644883857683899e-06, "loss": 0.1552, "step": 3892 }, { "epoch": 1.9, "learning_rate": 1.5632662705516007e-06, "loss": 0.1785, "step": 3893 }, { "epoch": 1.9, "learning_rate": 1.5620444156917298e-06, "loss": 0.2046, "step": 3894 }, { "epoch": 1.9, "learning_rate": 1.5608228215283807e-06, "loss": 0.2141, "step": 3895 }, { "epoch": 1.9, "learning_rate": 1.5596014884010866e-06, "loss": 0.1963, "step": 3896 }, { "epoch": 1.9, "learning_rate": 1.558380416649305e-06, "loss": 0.1857, "step": 3897 }, { "epoch": 1.9, "learning_rate": 1.5571596066124217e-06, "loss": 0.1679, "step": 3898 }, { "epoch": 1.9, "learning_rate": 1.5559390586297508e-06, "loss": 0.1665, "step": 3899 }, { "epoch": 1.9, "learning_rate": 1.5547187730405322e-06, "loss": 0.1727, "step": 3900 }, { "epoch": 1.9, "learning_rate": 1.5534987501839344e-06, "loss": 0.1981, "step": 3901 }, { "epoch": 1.91, "learning_rate": 1.552278990399052e-06, "loss": 0.1867, "step": 3902 }, { "epoch": 1.91, "learning_rate": 1.5510594940249062e-06, "loss": 0.1725, "step": 3903 }, { "epoch": 1.91, "learning_rate": 1.5498402614004454e-06, "loss": 0.1521, "step": 3904 }, { "epoch": 1.91, "learning_rate": 1.548621292864544e-06, "loss": 0.1888, "step": 3905 }, { "epoch": 1.91, "learning_rate": 1.5474025887560045e-06, "loss": 0.1919, "step": 3906 }, { "epoch": 1.91, "learning_rate": 1.5461841494135548e-06, "loss": 0.147, "step": 3907 }, { "epoch": 1.91, "learning_rate": 1.5449659751758497e-06, "loss": 0.1886, "step": 3908 }, { "epoch": 1.91, "learning_rate": 1.5437480663814691e-06, "loss": 0.2118, "step": 3909 }, { "epoch": 1.91, "learning_rate": 1.5425304233689203e-06, "loss": 0.1641, "step": 3910 }, { "epoch": 1.91, "learning_rate": 1.5413130464766376e-06, "loss": 0.1819, "step": 3911 }, { "epoch": 1.91, "learning_rate": 1.5400959360429797e-06, "loss": 0.1788, "step": 3912 }, { "epoch": 1.91, "learning_rate": 1.5388790924062308e-06, "loss": 0.1931, "step": 3913 }, { "epoch": 1.91, "learning_rate": 1.537662515904603e-06, "loss": 0.1745, "step": 3914 }, { "epoch": 1.91, "learning_rate": 1.5364462068762316e-06, "loss": 0.2242, "step": 3915 }, { "epoch": 1.91, "learning_rate": 1.5352301656591805e-06, "loss": 0.1605, "step": 3916 }, { "epoch": 1.91, "learning_rate": 1.534014392591438e-06, "loss": 0.2246, "step": 3917 }, { "epoch": 1.91, "learning_rate": 1.5327988880109167e-06, "loss": 0.191, "step": 3918 }, { "epoch": 1.91, "learning_rate": 1.5315836522554557e-06, "loss": 0.1639, "step": 3919 }, { "epoch": 1.91, "learning_rate": 1.5303686856628175e-06, "loss": 0.1772, "step": 3920 }, { "epoch": 1.91, "learning_rate": 1.5291539885706944e-06, "loss": 0.1942, "step": 3921 }, { "epoch": 1.92, "learning_rate": 1.5279395613166987e-06, "loss": 0.214, "step": 3922 }, { "epoch": 1.92, "learning_rate": 1.5267254042383705e-06, "loss": 0.1853, "step": 3923 }, { "epoch": 1.92, "learning_rate": 1.525511517673174e-06, "loss": 0.2157, "step": 3924 }, { "epoch": 1.92, "learning_rate": 1.5242979019584975e-06, "loss": 0.1965, "step": 3925 }, { "epoch": 1.92, "learning_rate": 1.5230845574316563e-06, "loss": 0.1995, "step": 3926 }, { "epoch": 1.92, "learning_rate": 1.5218714844298884e-06, "loss": 0.1828, "step": 3927 }, { "epoch": 1.92, "learning_rate": 1.5206586832903564e-06, "loss": 0.1583, "step": 3928 }, { "epoch": 1.92, "learning_rate": 1.519446154350148e-06, "loss": 0.2068, "step": 3929 }, { "epoch": 1.92, "learning_rate": 1.518233897946274e-06, "loss": 0.1972, "step": 3930 }, { "epoch": 1.92, "learning_rate": 1.5170219144156718e-06, "loss": 0.1806, "step": 3931 }, { "epoch": 1.92, "learning_rate": 1.5158102040952013e-06, "loss": 0.2031, "step": 3932 }, { "epoch": 1.92, "learning_rate": 1.514598767321647e-06, "loss": 0.2234, "step": 3933 }, { "epoch": 1.92, "learning_rate": 1.5133876044317159e-06, "loss": 0.1668, "step": 3934 }, { "epoch": 1.92, "learning_rate": 1.5121767157620404e-06, "loss": 0.1647, "step": 3935 }, { "epoch": 1.92, "learning_rate": 1.5109661016491779e-06, "loss": 0.161, "step": 3936 }, { "epoch": 1.92, "learning_rate": 1.5097557624296062e-06, "loss": 0.233, "step": 3937 }, { "epoch": 1.92, "learning_rate": 1.5085456984397295e-06, "loss": 0.1868, "step": 3938 }, { "epoch": 1.92, "learning_rate": 1.5073359100158747e-06, "loss": 0.195, "step": 3939 }, { "epoch": 1.92, "learning_rate": 1.5061263974942897e-06, "loss": 0.1661, "step": 3940 }, { "epoch": 1.92, "learning_rate": 1.5049171612111502e-06, "loss": 0.2006, "step": 3941 }, { "epoch": 1.92, "learning_rate": 1.5037082015025528e-06, "loss": 0.1962, "step": 3942 }, { "epoch": 1.93, "learning_rate": 1.502499518704516e-06, "loss": 0.1758, "step": 3943 }, { "epoch": 1.93, "learning_rate": 1.5012911131529834e-06, "loss": 0.1794, "step": 3944 }, { "epoch": 1.93, "learning_rate": 1.5000829851838195e-06, "loss": 0.1919, "step": 3945 }, { "epoch": 1.93, "learning_rate": 1.4988751351328142e-06, "loss": 0.1877, "step": 3946 }, { "epoch": 1.93, "learning_rate": 1.497667563335679e-06, "loss": 0.197, "step": 3947 }, { "epoch": 1.93, "learning_rate": 1.4964602701280473e-06, "loss": 0.1788, "step": 3948 }, { "epoch": 1.93, "learning_rate": 1.4952532558454747e-06, "loss": 0.2007, "step": 3949 }, { "epoch": 1.93, "learning_rate": 1.4940465208234418e-06, "loss": 0.2025, "step": 3950 }, { "epoch": 1.93, "learning_rate": 1.4928400653973496e-06, "loss": 0.2022, "step": 3951 }, { "epoch": 1.93, "learning_rate": 1.4916338899025213e-06, "loss": 0.1503, "step": 3952 }, { "epoch": 1.93, "learning_rate": 1.4904279946742034e-06, "loss": 0.1762, "step": 3953 }, { "epoch": 1.93, "learning_rate": 1.4892223800475627e-06, "loss": 0.1575, "step": 3954 }, { "epoch": 1.93, "learning_rate": 1.488017046357691e-06, "loss": 0.1804, "step": 3955 }, { "epoch": 1.93, "learning_rate": 1.486811993939599e-06, "loss": 0.1697, "step": 3956 }, { "epoch": 1.93, "learning_rate": 1.485607223128221e-06, "loss": 0.1595, "step": 3957 }, { "epoch": 1.93, "learning_rate": 1.4844027342584116e-06, "loss": 0.1519, "step": 3958 }, { "epoch": 1.93, "learning_rate": 1.4831985276649483e-06, "loss": 0.205, "step": 3959 }, { "epoch": 1.93, "learning_rate": 1.4819946036825309e-06, "loss": 0.1827, "step": 3960 }, { "epoch": 1.93, "learning_rate": 1.4807909626457785e-06, "loss": 0.1938, "step": 3961 }, { "epoch": 1.93, "learning_rate": 1.479587604889232e-06, "loss": 0.2461, "step": 3962 }, { "epoch": 1.94, "learning_rate": 1.4783845307473555e-06, "loss": 0.1714, "step": 3963 }, { "epoch": 1.94, "learning_rate": 1.4771817405545311e-06, "loss": 0.2, "step": 3964 }, { "epoch": 1.94, "learning_rate": 1.4759792346450651e-06, "loss": 0.1927, "step": 3965 }, { "epoch": 1.94, "learning_rate": 1.4747770133531841e-06, "loss": 0.1755, "step": 3966 }, { "epoch": 1.94, "learning_rate": 1.4735750770130334e-06, "loss": 0.1994, "step": 3967 }, { "epoch": 1.94, "learning_rate": 1.4723734259586816e-06, "loss": 0.1824, "step": 3968 }, { "epoch": 1.94, "learning_rate": 1.471172060524116e-06, "loss": 0.1792, "step": 3969 }, { "epoch": 1.94, "learning_rate": 1.4699709810432472e-06, "loss": 0.1906, "step": 3970 }, { "epoch": 1.94, "learning_rate": 1.4687701878499035e-06, "loss": 0.1938, "step": 3971 }, { "epoch": 1.94, "learning_rate": 1.4675696812778359e-06, "loss": 0.1906, "step": 3972 }, { "epoch": 1.94, "learning_rate": 1.4663694616607134e-06, "loss": 0.1798, "step": 3973 }, { "epoch": 1.94, "learning_rate": 1.4651695293321261e-06, "loss": 0.1729, "step": 3974 }, { "epoch": 1.94, "learning_rate": 1.4639698846255867e-06, "loss": 0.1778, "step": 3975 }, { "epoch": 1.94, "learning_rate": 1.4627705278745252e-06, "loss": 0.1611, "step": 3976 }, { "epoch": 1.94, "learning_rate": 1.4615714594122914e-06, "loss": 0.2067, "step": 3977 }, { "epoch": 1.94, "learning_rate": 1.4603726795721567e-06, "loss": 0.1883, "step": 3978 }, { "epoch": 1.94, "learning_rate": 1.4591741886873102e-06, "loss": 0.188, "step": 3979 }, { "epoch": 1.94, "learning_rate": 1.4579759870908639e-06, "loss": 0.1778, "step": 3980 }, { "epoch": 1.94, "learning_rate": 1.456778075115845e-06, "loss": 0.1332, "step": 3981 }, { "epoch": 1.94, "learning_rate": 1.455580453095205e-06, "loss": 0.235, "step": 3982 }, { "epoch": 1.94, "learning_rate": 1.4543831213618117e-06, "loss": 0.1719, "step": 3983 }, { "epoch": 1.95, "learning_rate": 1.4531860802484515e-06, "loss": 0.1505, "step": 3984 }, { "epoch": 1.95, "learning_rate": 1.4519893300878338e-06, "loss": 0.2043, "step": 3985 }, { "epoch": 1.95, "learning_rate": 1.4507928712125835e-06, "loss": 0.1523, "step": 3986 }, { "epoch": 1.95, "learning_rate": 1.449596703955245e-06, "loss": 0.1714, "step": 3987 }, { "epoch": 1.95, "learning_rate": 1.4484008286482843e-06, "loss": 0.1854, "step": 3988 }, { "epoch": 1.95, "learning_rate": 1.4472052456240826e-06, "loss": 0.1902, "step": 3989 }, { "epoch": 1.95, "learning_rate": 1.4460099552149437e-06, "loss": 0.2117, "step": 3990 }, { "epoch": 1.95, "learning_rate": 1.4448149577530873e-06, "loss": 0.1995, "step": 3991 }, { "epoch": 1.95, "learning_rate": 1.443620253570652e-06, "loss": 0.1851, "step": 3992 }, { "epoch": 1.95, "learning_rate": 1.4424258429996946e-06, "loss": 0.1556, "step": 3993 }, { "epoch": 1.95, "learning_rate": 1.4412317263721924e-06, "loss": 0.1603, "step": 3994 }, { "epoch": 1.95, "learning_rate": 1.4400379040200396e-06, "loss": 0.1587, "step": 3995 }, { "epoch": 1.95, "learning_rate": 1.4388443762750487e-06, "loss": 0.1678, "step": 3996 }, { "epoch": 1.95, "learning_rate": 1.43765114346895e-06, "loss": 0.1799, "step": 3997 }, { "epoch": 1.95, "learning_rate": 1.4364582059333918e-06, "loss": 0.1922, "step": 3998 }, { "epoch": 1.95, "learning_rate": 1.4352655639999397e-06, "loss": 0.1646, "step": 3999 }, { "epoch": 1.95, "learning_rate": 1.4340732180000794e-06, "loss": 0.2063, "step": 4000 }, { "epoch": 1.95, "learning_rate": 1.4328811682652134e-06, "loss": 0.1607, "step": 4001 }, { "epoch": 1.95, "learning_rate": 1.4316894151266609e-06, "loss": 0.1821, "step": 4002 }, { "epoch": 1.95, "learning_rate": 1.430497958915659e-06, "loss": 0.192, "step": 4003 }, { "epoch": 1.96, "learning_rate": 1.4293067999633617e-06, "loss": 0.1815, "step": 4004 }, { "epoch": 1.96, "learning_rate": 1.4281159386008432e-06, "loss": 0.2035, "step": 4005 }, { "epoch": 1.96, "learning_rate": 1.4269253751590907e-06, "loss": 0.1674, "step": 4006 }, { "epoch": 1.96, "learning_rate": 1.425735109969013e-06, "loss": 0.2033, "step": 4007 }, { "epoch": 1.96, "learning_rate": 1.424545143361433e-06, "loss": 0.1662, "step": 4008 }, { "epoch": 1.96, "learning_rate": 1.4233554756670903e-06, "loss": 0.2145, "step": 4009 }, { "epoch": 1.96, "learning_rate": 1.4221661072166447e-06, "loss": 0.1679, "step": 4010 }, { "epoch": 1.96, "learning_rate": 1.4209770383406686e-06, "loss": 0.1743, "step": 4011 }, { "epoch": 1.96, "learning_rate": 1.4197882693696552e-06, "loss": 0.1618, "step": 4012 }, { "epoch": 1.96, "learning_rate": 1.4185998006340115e-06, "loss": 0.1974, "step": 4013 }, { "epoch": 1.96, "learning_rate": 1.4174116324640614e-06, "loss": 0.1911, "step": 4014 }, { "epoch": 1.96, "learning_rate": 1.4162237651900469e-06, "loss": 0.1602, "step": 4015 }, { "epoch": 1.96, "learning_rate": 1.4150361991421248e-06, "loss": 0.1762, "step": 4016 }, { "epoch": 1.96, "learning_rate": 1.4138489346503682e-06, "loss": 0.2349, "step": 4017 }, { "epoch": 1.96, "learning_rate": 1.4126619720447675e-06, "loss": 0.1838, "step": 4018 }, { "epoch": 1.96, "learning_rate": 1.4114753116552277e-06, "loss": 0.1777, "step": 4019 }, { "epoch": 1.96, "learning_rate": 1.4102889538115725e-06, "loss": 0.1895, "step": 4020 }, { "epoch": 1.96, "learning_rate": 1.409102898843538e-06, "loss": 0.2438, "step": 4021 }, { "epoch": 1.96, "learning_rate": 1.4079171470807785e-06, "loss": 0.1724, "step": 4022 }, { "epoch": 1.96, "learning_rate": 1.4067316988528618e-06, "loss": 0.198, "step": 4023 }, { "epoch": 1.96, "learning_rate": 1.4055465544892738e-06, "loss": 0.1949, "step": 4024 }, { "epoch": 1.97, "learning_rate": 1.4043617143194166e-06, "loss": 0.1723, "step": 4025 }, { "epoch": 1.97, "learning_rate": 1.4031771786726045e-06, "loss": 0.1931, "step": 4026 }, { "epoch": 1.97, "learning_rate": 1.401992947878069e-06, "loss": 0.2257, "step": 4027 }, { "epoch": 1.97, "learning_rate": 1.4008090222649567e-06, "loss": 0.1731, "step": 4028 }, { "epoch": 1.97, "learning_rate": 1.399625402162328e-06, "loss": 0.1795, "step": 4029 }, { "epoch": 1.97, "learning_rate": 1.3984420878991614e-06, "loss": 0.1747, "step": 4030 }, { "epoch": 1.97, "learning_rate": 1.3972590798043493e-06, "loss": 0.1615, "step": 4031 }, { "epoch": 1.97, "learning_rate": 1.3960763782066971e-06, "loss": 0.2141, "step": 4032 }, { "epoch": 1.97, "learning_rate": 1.3948939834349267e-06, "loss": 0.2247, "step": 4033 }, { "epoch": 1.97, "learning_rate": 1.3937118958176737e-06, "loss": 0.186, "step": 4034 }, { "epoch": 1.97, "learning_rate": 1.3925301156834902e-06, "loss": 0.1719, "step": 4035 }, { "epoch": 1.97, "learning_rate": 1.3913486433608404e-06, "loss": 0.1684, "step": 4036 }, { "epoch": 1.97, "learning_rate": 1.3901674791781053e-06, "loss": 0.1705, "step": 4037 }, { "epoch": 1.97, "learning_rate": 1.3889866234635788e-06, "loss": 0.2146, "step": 4038 }, { "epoch": 1.97, "learning_rate": 1.3878060765454681e-06, "loss": 0.1748, "step": 4039 }, { "epoch": 1.97, "learning_rate": 1.3866258387518977e-06, "loss": 0.1872, "step": 4040 }, { "epoch": 1.97, "learning_rate": 1.3854459104109041e-06, "loss": 0.2206, "step": 4041 }, { "epoch": 1.97, "learning_rate": 1.3842662918504364e-06, "loss": 0.1985, "step": 4042 }, { "epoch": 1.97, "learning_rate": 1.3830869833983607e-06, "loss": 0.1903, "step": 4043 }, { "epoch": 1.97, "learning_rate": 1.3819079853824546e-06, "loss": 0.2043, "step": 4044 }, { "epoch": 1.98, "learning_rate": 1.3807292981304115e-06, "loss": 0.1748, "step": 4045 }, { "epoch": 1.98, "learning_rate": 1.379550921969836e-06, "loss": 0.2211, "step": 4046 }, { "epoch": 1.98, "learning_rate": 1.3783728572282484e-06, "loss": 0.1599, "step": 4047 }, { "epoch": 1.98, "learning_rate": 1.3771951042330795e-06, "loss": 0.1887, "step": 4048 }, { "epoch": 1.98, "learning_rate": 1.376017663311677e-06, "loss": 0.2139, "step": 4049 }, { "epoch": 1.98, "learning_rate": 1.3748405347913007e-06, "loss": 0.1821, "step": 4050 }, { "epoch": 1.98, "learning_rate": 1.3736637189991226e-06, "loss": 0.2056, "step": 4051 }, { "epoch": 1.98, "learning_rate": 1.3724872162622283e-06, "loss": 0.1877, "step": 4052 }, { "epoch": 1.98, "learning_rate": 1.3713110269076152e-06, "loss": 0.1859, "step": 4053 }, { "epoch": 1.98, "learning_rate": 1.3701351512621958e-06, "loss": 0.1956, "step": 4054 }, { "epoch": 1.98, "learning_rate": 1.3689595896527958e-06, "loss": 0.1791, "step": 4055 }, { "epoch": 1.98, "learning_rate": 1.3677843424061504e-06, "loss": 0.2096, "step": 4056 }, { "epoch": 1.98, "learning_rate": 1.3666094098489103e-06, "loss": 0.1828, "step": 4057 }, { "epoch": 1.98, "learning_rate": 1.3654347923076359e-06, "loss": 0.1868, "step": 4058 }, { "epoch": 1.98, "learning_rate": 1.3642604901088042e-06, "loss": 0.1831, "step": 4059 }, { "epoch": 1.98, "learning_rate": 1.3630865035787998e-06, "loss": 0.1754, "step": 4060 }, { "epoch": 1.98, "learning_rate": 1.3619128330439247e-06, "loss": 0.2301, "step": 4061 }, { "epoch": 1.98, "learning_rate": 1.360739478830389e-06, "loss": 0.195, "step": 4062 }, { "epoch": 1.98, "learning_rate": 1.3595664412643147e-06, "loss": 0.1717, "step": 4063 }, { "epoch": 1.98, "learning_rate": 1.3583937206717398e-06, "loss": 0.2012, "step": 4064 }, { "epoch": 1.98, "learning_rate": 1.3572213173786108e-06, "loss": 0.1876, "step": 4065 }, { "epoch": 1.99, "learning_rate": 1.3560492317107854e-06, "loss": 0.1934, "step": 4066 }, { "epoch": 1.99, "learning_rate": 1.354877463994037e-06, "loss": 0.199, "step": 4067 }, { "epoch": 1.99, "learning_rate": 1.353706014554046e-06, "loss": 0.1783, "step": 4068 }, { "epoch": 1.99, "learning_rate": 1.3525348837164082e-06, "loss": 0.1748, "step": 4069 }, { "epoch": 1.99, "learning_rate": 1.3513640718066285e-06, "loss": 0.1982, "step": 4070 }, { "epoch": 1.99, "learning_rate": 1.3501935791501238e-06, "loss": 0.194, "step": 4071 }, { "epoch": 1.99, "learning_rate": 1.3490234060722213e-06, "loss": 0.1649, "step": 4072 }, { "epoch": 1.99, "learning_rate": 1.347853552898161e-06, "loss": 0.2035, "step": 4073 }, { "epoch": 1.99, "learning_rate": 1.3466840199530945e-06, "loss": 0.1661, "step": 4074 }, { "epoch": 1.99, "learning_rate": 1.3455148075620823e-06, "loss": 0.2014, "step": 4075 }, { "epoch": 1.99, "learning_rate": 1.3443459160500966e-06, "loss": 0.2049, "step": 4076 }, { "epoch": 1.99, "learning_rate": 1.3431773457420205e-06, "loss": 0.2074, "step": 4077 }, { "epoch": 1.99, "learning_rate": 1.342009096962647e-06, "loss": 0.1842, "step": 4078 }, { "epoch": 1.99, "learning_rate": 1.3408411700366814e-06, "loss": 0.2127, "step": 4079 }, { "epoch": 1.99, "learning_rate": 1.33967356528874e-06, "loss": 0.1595, "step": 4080 }, { "epoch": 1.99, "learning_rate": 1.3385062830433473e-06, "loss": 0.1563, "step": 4081 }, { "epoch": 1.99, "learning_rate": 1.3373393236249387e-06, "loss": 0.1943, "step": 4082 }, { "epoch": 1.99, "learning_rate": 1.3361726873578601e-06, "loss": 0.1814, "step": 4083 }, { "epoch": 1.99, "learning_rate": 1.3350063745663694e-06, "loss": 0.2042, "step": 4084 }, { "epoch": 1.99, "learning_rate": 1.3338403855746307e-06, "loss": 0.1783, "step": 4085 }, { "epoch": 2.0, "learning_rate": 1.3326747207067225e-06, "loss": 0.218, "step": 4086 }, { "epoch": 2.0, "learning_rate": 1.3315093802866305e-06, "loss": 0.1956, "step": 4087 }, { "epoch": 2.0, "learning_rate": 1.3303443646382497e-06, "loss": 0.1574, "step": 4088 }, { "epoch": 2.0, "learning_rate": 1.329179674085388e-06, "loss": 0.1985, "step": 4089 }, { "epoch": 2.0, "learning_rate": 1.3280153089517583e-06, "loss": 0.1834, "step": 4090 }, { "epoch": 2.0, "learning_rate": 1.3268512695609885e-06, "loss": 0.2091, "step": 4091 }, { "epoch": 2.0, "learning_rate": 1.3256875562366123e-06, "loss": 0.1872, "step": 4092 }, { "epoch": 2.0, "learning_rate": 1.3245241693020715e-06, "loss": 0.1688, "step": 4093 }, { "epoch": 2.0, "learning_rate": 1.3233611090807224e-06, "loss": 0.176, "step": 4094 }, { "epoch": 2.0, "learning_rate": 1.3221983758958256e-06, "loss": 0.1882, "step": 4095 }, { "epoch": 2.0, "learning_rate": 1.321035970070552e-06, "loss": 0.1363, "step": 4096 }, { "epoch": 2.0, "learning_rate": 1.319873891927984e-06, "loss": 0.1469, "step": 4097 }, { "epoch": 2.0, "learning_rate": 1.318712141791109e-06, "loss": 0.1882, "step": 4098 }, { "epoch": 2.0, "learning_rate": 1.3175507199828269e-06, "loss": 0.2349, "step": 4099 }, { "epoch": 2.0, "learning_rate": 1.3163896268259446e-06, "loss": 0.1716, "step": 4100 }, { "epoch": 2.0, "learning_rate": 1.315228862643177e-06, "loss": 0.153, "step": 4101 }, { "epoch": 2.0, "learning_rate": 1.3140684277571475e-06, "loss": 0.2055, "step": 4102 }, { "epoch": 2.0, "learning_rate": 1.31290832249039e-06, "loss": 0.2066, "step": 4103 }, { "epoch": 2.0, "learning_rate": 1.311748547165346e-06, "loss": 0.2065, "step": 4104 }, { "epoch": 2.0, "learning_rate": 1.3105891021043643e-06, "loss": 0.2231, "step": 4105 }, { "epoch": 2.0, "learning_rate": 1.3094299876297029e-06, "loss": 0.2143, "step": 4106 }, { "epoch": 2.01, "learning_rate": 1.3082712040635264e-06, "loss": 0.1577, "step": 4107 }, { "epoch": 2.01, "learning_rate": 1.3071127517279087e-06, "loss": 0.2182, "step": 4108 }, { "epoch": 2.01, "learning_rate": 1.3059546309448318e-06, "loss": 0.1851, "step": 4109 }, { "epoch": 2.01, "learning_rate": 1.3047968420361863e-06, "loss": 0.1814, "step": 4110 }, { "epoch": 2.01, "learning_rate": 1.3036393853237683e-06, "loss": 0.1866, "step": 4111 }, { "epoch": 2.01, "learning_rate": 1.302482261129283e-06, "loss": 0.1943, "step": 4112 }, { "epoch": 2.01, "learning_rate": 1.3013254697743422e-06, "loss": 0.1712, "step": 4113 }, { "epoch": 2.01, "learning_rate": 1.3001690115804672e-06, "loss": 0.1652, "step": 4114 }, { "epoch": 2.01, "learning_rate": 1.2990128868690837e-06, "loss": 0.1795, "step": 4115 }, { "epoch": 2.01, "learning_rate": 1.297857095961529e-06, "loss": 0.1846, "step": 4116 }, { "epoch": 2.01, "learning_rate": 1.296701639179043e-06, "loss": 0.1664, "step": 4117 }, { "epoch": 2.01, "learning_rate": 1.2955465168427744e-06, "loss": 0.1684, "step": 4118 }, { "epoch": 2.01, "learning_rate": 1.2943917292737812e-06, "loss": 0.2354, "step": 4119 }, { "epoch": 2.01, "learning_rate": 1.2932372767930254e-06, "loss": 0.1757, "step": 4120 }, { "epoch": 2.01, "learning_rate": 1.2920831597213761e-06, "loss": 0.2083, "step": 4121 }, { "epoch": 2.01, "learning_rate": 1.2909293783796118e-06, "loss": 0.1865, "step": 4122 }, { "epoch": 2.01, "learning_rate": 1.2897759330884141e-06, "loss": 0.1968, "step": 4123 }, { "epoch": 2.01, "learning_rate": 1.288622824168375e-06, "loss": 0.1963, "step": 4124 }, { "epoch": 2.01, "learning_rate": 1.2874700519399896e-06, "loss": 0.1836, "step": 4125 }, { "epoch": 2.01, "learning_rate": 1.28631761672366e-06, "loss": 0.1921, "step": 4126 }, { "epoch": 2.02, "learning_rate": 1.2851655188396983e-06, "loss": 0.2018, "step": 4127 }, { "epoch": 2.02, "learning_rate": 1.2840137586083169e-06, "loss": 0.2178, "step": 4128 }, { "epoch": 2.02, "learning_rate": 1.2828623363496395e-06, "loss": 0.1834, "step": 4129 }, { "epoch": 2.02, "learning_rate": 1.2817112523836933e-06, "loss": 0.1703, "step": 4130 }, { "epoch": 2.02, "learning_rate": 1.2805605070304121e-06, "loss": 0.1636, "step": 4131 }, { "epoch": 2.02, "learning_rate": 1.279410100609634e-06, "loss": 0.183, "step": 4132 }, { "epoch": 2.02, "learning_rate": 1.2782600334411054e-06, "loss": 0.1983, "step": 4133 }, { "epoch": 2.02, "learning_rate": 1.277110305844479e-06, "loss": 0.2047, "step": 4134 }, { "epoch": 2.02, "learning_rate": 1.2759609181393096e-06, "loss": 0.1862, "step": 4135 }, { "epoch": 2.02, "learning_rate": 1.2748118706450602e-06, "loss": 0.1905, "step": 4136 }, { "epoch": 2.02, "learning_rate": 1.2736631636810978e-06, "loss": 0.1992, "step": 4137 }, { "epoch": 2.02, "learning_rate": 1.2725147975666948e-06, "loss": 0.1815, "step": 4138 }, { "epoch": 2.02, "learning_rate": 1.2713667726210306e-06, "loss": 0.1834, "step": 4139 }, { "epoch": 2.02, "learning_rate": 1.2702190891631894e-06, "loss": 0.1912, "step": 4140 }, { "epoch": 2.02, "learning_rate": 1.2690717475121586e-06, "loss": 0.1834, "step": 4141 }, { "epoch": 2.02, "learning_rate": 1.2679247479868311e-06, "loss": 0.1959, "step": 4142 }, { "epoch": 2.02, "learning_rate": 1.2667780909060074e-06, "loss": 0.2075, "step": 4143 }, { "epoch": 2.02, "learning_rate": 1.2656317765883894e-06, "loss": 0.1579, "step": 4144 }, { "epoch": 2.02, "learning_rate": 1.2644858053525844e-06, "loss": 0.1809, "step": 4145 }, { "epoch": 2.02, "learning_rate": 1.2633401775171066e-06, "loss": 0.1846, "step": 4146 }, { "epoch": 2.02, "learning_rate": 1.262194893400372e-06, "loss": 0.1861, "step": 4147 }, { "epoch": 2.03, "learning_rate": 1.2610499533207037e-06, "loss": 0.1718, "step": 4148 }, { "epoch": 2.03, "learning_rate": 1.2599053575963265e-06, "loss": 0.1726, "step": 4149 }, { "epoch": 2.03, "learning_rate": 1.2587611065453713e-06, "loss": 0.232, "step": 4150 }, { "epoch": 2.03, "learning_rate": 1.2576172004858716e-06, "loss": 0.1713, "step": 4151 }, { "epoch": 2.03, "learning_rate": 1.2564736397357669e-06, "loss": 0.1825, "step": 4152 }, { "epoch": 2.03, "learning_rate": 1.2553304246129009e-06, "loss": 0.1999, "step": 4153 }, { "epoch": 2.03, "learning_rate": 1.2541875554350188e-06, "loss": 0.1561, "step": 4154 }, { "epoch": 2.03, "learning_rate": 1.2530450325197718e-06, "loss": 0.1657, "step": 4155 }, { "epoch": 2.03, "learning_rate": 1.2519028561847136e-06, "loss": 0.1831, "step": 4156 }, { "epoch": 2.03, "learning_rate": 1.2507610267473008e-06, "loss": 0.1845, "step": 4157 }, { "epoch": 2.03, "learning_rate": 1.2496195445248968e-06, "loss": 0.1662, "step": 4158 }, { "epoch": 2.03, "learning_rate": 1.2484784098347668e-06, "loss": 0.1626, "step": 4159 }, { "epoch": 2.03, "learning_rate": 1.2473376229940787e-06, "loss": 0.2097, "step": 4160 }, { "epoch": 2.03, "learning_rate": 1.2461971843199036e-06, "loss": 0.1904, "step": 4161 }, { "epoch": 2.03, "learning_rate": 1.2450570941292159e-06, "loss": 0.1323, "step": 4162 }, { "epoch": 2.03, "learning_rate": 1.2439173527388955e-06, "loss": 0.1896, "step": 4163 }, { "epoch": 2.03, "learning_rate": 1.2427779604657217e-06, "loss": 0.156, "step": 4164 }, { "epoch": 2.03, "learning_rate": 1.2416389176263802e-06, "loss": 0.2263, "step": 4165 }, { "epoch": 2.03, "learning_rate": 1.240500224537457e-06, "loss": 0.1578, "step": 4166 }, { "epoch": 2.03, "learning_rate": 1.2393618815154412e-06, "loss": 0.1905, "step": 4167 }, { "epoch": 2.04, "learning_rate": 1.238223888876727e-06, "loss": 0.1866, "step": 4168 }, { "epoch": 2.04, "learning_rate": 1.2370862469376077e-06, "loss": 0.1625, "step": 4169 }, { "epoch": 2.04, "learning_rate": 1.2359489560142827e-06, "loss": 0.1951, "step": 4170 }, { "epoch": 2.04, "learning_rate": 1.234812016422851e-06, "loss": 0.1828, "step": 4171 }, { "epoch": 2.04, "learning_rate": 1.2336754284793137e-06, "loss": 0.2039, "step": 4172 }, { "epoch": 2.04, "learning_rate": 1.232539192499578e-06, "loss": 0.1747, "step": 4173 }, { "epoch": 2.04, "learning_rate": 1.2314033087994495e-06, "loss": 0.1759, "step": 4174 }, { "epoch": 2.04, "learning_rate": 1.2302677776946363e-06, "loss": 0.1792, "step": 4175 }, { "epoch": 2.04, "learning_rate": 1.2291325995007508e-06, "loss": 0.1893, "step": 4176 }, { "epoch": 2.04, "learning_rate": 1.2279977745333043e-06, "loss": 0.1646, "step": 4177 }, { "epoch": 2.04, "learning_rate": 1.2268633031077132e-06, "loss": 0.163, "step": 4178 }, { "epoch": 2.04, "learning_rate": 1.2257291855392928e-06, "loss": 0.1682, "step": 4179 }, { "epoch": 2.04, "learning_rate": 1.2245954221432614e-06, "loss": 0.1914, "step": 4180 }, { "epoch": 2.04, "learning_rate": 1.2234620132347374e-06, "loss": 0.1664, "step": 4181 }, { "epoch": 2.04, "learning_rate": 1.222328959128743e-06, "loss": 0.1719, "step": 4182 }, { "epoch": 2.04, "learning_rate": 1.2211962601402014e-06, "loss": 0.1857, "step": 4183 }, { "epoch": 2.04, "learning_rate": 1.2200639165839356e-06, "loss": 0.1662, "step": 4184 }, { "epoch": 2.04, "learning_rate": 1.2189319287746704e-06, "loss": 0.1797, "step": 4185 }, { "epoch": 2.04, "learning_rate": 1.217800297027032e-06, "loss": 0.1557, "step": 4186 }, { "epoch": 2.04, "learning_rate": 1.2166690216555462e-06, "loss": 0.1747, "step": 4187 }, { "epoch": 2.04, "learning_rate": 1.2155381029746425e-06, "loss": 0.1637, "step": 4188 }, { "epoch": 2.05, "learning_rate": 1.2144075412986506e-06, "loss": 0.1765, "step": 4189 }, { "epoch": 2.05, "learning_rate": 1.213277336941799e-06, "loss": 0.1759, "step": 4190 }, { "epoch": 2.05, "learning_rate": 1.2121474902182185e-06, "loss": 0.144, "step": 4191 }, { "epoch": 2.05, "learning_rate": 1.2110180014419387e-06, "loss": 0.1644, "step": 4192 }, { "epoch": 2.05, "learning_rate": 1.2098888709268932e-06, "loss": 0.1801, "step": 4193 }, { "epoch": 2.05, "learning_rate": 1.2087600989869125e-06, "loss": 0.171, "step": 4194 }, { "epoch": 2.05, "learning_rate": 1.2076316859357297e-06, "loss": 0.1954, "step": 4195 }, { "epoch": 2.05, "learning_rate": 1.2065036320869772e-06, "loss": 0.1711, "step": 4196 }, { "epoch": 2.05, "learning_rate": 1.2053759377541865e-06, "loss": 0.1687, "step": 4197 }, { "epoch": 2.05, "learning_rate": 1.2042486032507921e-06, "loss": 0.151, "step": 4198 }, { "epoch": 2.05, "learning_rate": 1.203121628890126e-06, "loss": 0.1734, "step": 4199 }, { "epoch": 2.05, "learning_rate": 1.2019950149854195e-06, "loss": 0.1734, "step": 4200 }, { "epoch": 2.05, "learning_rate": 1.2008687618498075e-06, "loss": 0.1591, "step": 4201 }, { "epoch": 2.05, "learning_rate": 1.1997428697963195e-06, "loss": 0.1968, "step": 4202 }, { "epoch": 2.05, "learning_rate": 1.1986173391378904e-06, "loss": 0.184, "step": 4203 }, { "epoch": 2.05, "learning_rate": 1.1974921701873492e-06, "loss": 0.1577, "step": 4204 }, { "epoch": 2.05, "learning_rate": 1.1963673632574268e-06, "loss": 0.1943, "step": 4205 }, { "epoch": 2.05, "learning_rate": 1.195242918660755e-06, "loss": 0.2008, "step": 4206 }, { "epoch": 2.05, "learning_rate": 1.1941188367098615e-06, "loss": 0.1711, "step": 4207 }, { "epoch": 2.05, "learning_rate": 1.1929951177171767e-06, "loss": 0.1638, "step": 4208 }, { "epoch": 2.06, "learning_rate": 1.1918717619950277e-06, "loss": 0.1785, "step": 4209 }, { "epoch": 2.06, "learning_rate": 1.1907487698556414e-06, "loss": 0.1683, "step": 4210 }, { "epoch": 2.06, "learning_rate": 1.1896261416111422e-06, "loss": 0.1655, "step": 4211 }, { "epoch": 2.06, "learning_rate": 1.1885038775735564e-06, "loss": 0.1745, "step": 4212 }, { "epoch": 2.06, "learning_rate": 1.1873819780548076e-06, "loss": 0.1767, "step": 4213 }, { "epoch": 2.06, "learning_rate": 1.1862604433667178e-06, "loss": 0.1662, "step": 4214 }, { "epoch": 2.06, "learning_rate": 1.1851392738210072e-06, "loss": 0.1585, "step": 4215 }, { "epoch": 2.06, "learning_rate": 1.184018469729295e-06, "loss": 0.1477, "step": 4216 }, { "epoch": 2.06, "learning_rate": 1.1828980314030982e-06, "loss": 0.2252, "step": 4217 }, { "epoch": 2.06, "learning_rate": 1.1817779591538337e-06, "loss": 0.1548, "step": 4218 }, { "epoch": 2.06, "learning_rate": 1.1806582532928166e-06, "loss": 0.149, "step": 4219 }, { "epoch": 2.06, "learning_rate": 1.1795389141312584e-06, "loss": 0.1825, "step": 4220 }, { "epoch": 2.06, "learning_rate": 1.1784199419802694e-06, "loss": 0.152, "step": 4221 }, { "epoch": 2.06, "learning_rate": 1.1773013371508576e-06, "loss": 0.1721, "step": 4222 }, { "epoch": 2.06, "learning_rate": 1.1761830999539309e-06, "loss": 0.1552, "step": 4223 }, { "epoch": 2.06, "learning_rate": 1.175065230700292e-06, "loss": 0.1559, "step": 4224 }, { "epoch": 2.06, "learning_rate": 1.1739477297006442e-06, "loss": 0.1734, "step": 4225 }, { "epoch": 2.06, "learning_rate": 1.1728305972655863e-06, "loss": 0.183, "step": 4226 }, { "epoch": 2.06, "learning_rate": 1.171713833705615e-06, "loss": 0.1556, "step": 4227 }, { "epoch": 2.06, "learning_rate": 1.1705974393311261e-06, "loss": 0.1677, "step": 4228 }, { "epoch": 2.06, "learning_rate": 1.1694814144524114e-06, "loss": 0.2052, "step": 4229 }, { "epoch": 2.07, "learning_rate": 1.1683657593796585e-06, "loss": 0.1592, "step": 4230 }, { "epoch": 2.07, "learning_rate": 1.1672504744229565e-06, "loss": 0.1657, "step": 4231 }, { "epoch": 2.07, "learning_rate": 1.1661355598922868e-06, "loss": 0.1445, "step": 4232 }, { "epoch": 2.07, "learning_rate": 1.1650210160975323e-06, "loss": 0.1665, "step": 4233 }, { "epoch": 2.07, "learning_rate": 1.1639068433484693e-06, "loss": 0.1909, "step": 4234 }, { "epoch": 2.07, "learning_rate": 1.1627930419547728e-06, "loss": 0.1358, "step": 4235 }, { "epoch": 2.07, "learning_rate": 1.1616796122260127e-06, "loss": 0.157, "step": 4236 }, { "epoch": 2.07, "learning_rate": 1.1605665544716584e-06, "loss": 0.1786, "step": 4237 }, { "epoch": 2.07, "learning_rate": 1.159453869001075e-06, "loss": 0.1318, "step": 4238 }, { "epoch": 2.07, "learning_rate": 1.1583415561235234e-06, "loss": 0.1757, "step": 4239 }, { "epoch": 2.07, "learning_rate": 1.1572296161481604e-06, "loss": 0.1432, "step": 4240 }, { "epoch": 2.07, "learning_rate": 1.1561180493840398e-06, "loss": 0.1453, "step": 4241 }, { "epoch": 2.07, "learning_rate": 1.1550068561401124e-06, "loss": 0.1442, "step": 4242 }, { "epoch": 2.07, "learning_rate": 1.1538960367252253e-06, "loss": 0.1515, "step": 4243 }, { "epoch": 2.07, "learning_rate": 1.1527855914481204e-06, "loss": 0.1522, "step": 4244 }, { "epoch": 2.07, "learning_rate": 1.151675520617436e-06, "loss": 0.1606, "step": 4245 }, { "epoch": 2.07, "learning_rate": 1.150565824541706e-06, "loss": 0.1676, "step": 4246 }, { "epoch": 2.07, "learning_rate": 1.149456503529362e-06, "loss": 0.1326, "step": 4247 }, { "epoch": 2.07, "learning_rate": 1.1483475578887285e-06, "loss": 0.1196, "step": 4248 }, { "epoch": 2.07, "learning_rate": 1.147238987928029e-06, "loss": 0.1871, "step": 4249 }, { "epoch": 2.08, "learning_rate": 1.14613079395538e-06, "loss": 0.1395, "step": 4250 }, { "epoch": 2.08, "learning_rate": 1.1450229762787932e-06, "loss": 0.1818, "step": 4251 }, { "epoch": 2.08, "learning_rate": 1.1439155352061787e-06, "loss": 0.1585, "step": 4252 }, { "epoch": 2.08, "learning_rate": 1.1428084710453391e-06, "loss": 0.1451, "step": 4253 }, { "epoch": 2.08, "learning_rate": 1.1417017841039724e-06, "loss": 0.1345, "step": 4254 }, { "epoch": 2.08, "learning_rate": 1.1405954746896742e-06, "loss": 0.166, "step": 4255 }, { "epoch": 2.08, "learning_rate": 1.1394895431099315e-06, "loss": 0.1395, "step": 4256 }, { "epoch": 2.08, "learning_rate": 1.1383839896721302e-06, "loss": 0.1528, "step": 4257 }, { "epoch": 2.08, "learning_rate": 1.1372788146835487e-06, "loss": 0.1497, "step": 4258 }, { "epoch": 2.08, "learning_rate": 1.1361740184513601e-06, "loss": 0.1734, "step": 4259 }, { "epoch": 2.08, "learning_rate": 1.1350696012826321e-06, "loss": 0.1176, "step": 4260 }, { "epoch": 2.08, "learning_rate": 1.1339655634843285e-06, "loss": 0.1463, "step": 4261 }, { "epoch": 2.08, "learning_rate": 1.1328619053633083e-06, "loss": 0.1548, "step": 4262 }, { "epoch": 2.08, "learning_rate": 1.1317586272263222e-06, "loss": 0.1459, "step": 4263 }, { "epoch": 2.08, "learning_rate": 1.1306557293800172e-06, "loss": 0.1471, "step": 4264 }, { "epoch": 2.08, "learning_rate": 1.1295532121309336e-06, "loss": 0.1532, "step": 4265 }, { "epoch": 2.08, "learning_rate": 1.1284510757855055e-06, "loss": 0.1262, "step": 4266 }, { "epoch": 2.08, "learning_rate": 1.1273493206500632e-06, "loss": 0.1394, "step": 4267 }, { "epoch": 2.08, "learning_rate": 1.1262479470308308e-06, "loss": 0.1171, "step": 4268 }, { "epoch": 2.08, "learning_rate": 1.1251469552339242e-06, "loss": 0.157, "step": 4269 }, { "epoch": 2.08, "learning_rate": 1.1240463455653547e-06, "loss": 0.1357, "step": 4270 }, { "epoch": 2.09, "learning_rate": 1.1229461183310259e-06, "loss": 0.1228, "step": 4271 }, { "epoch": 2.09, "learning_rate": 1.1218462738367384e-06, "loss": 0.1521, "step": 4272 }, { "epoch": 2.09, "learning_rate": 1.1207468123881824e-06, "loss": 0.1683, "step": 4273 }, { "epoch": 2.09, "learning_rate": 1.119647734290945e-06, "loss": 0.1546, "step": 4274 }, { "epoch": 2.09, "learning_rate": 1.1185490398505052e-06, "loss": 0.1363, "step": 4275 }, { "epoch": 2.09, "learning_rate": 1.1174507293722341e-06, "loss": 0.1734, "step": 4276 }, { "epoch": 2.09, "learning_rate": 1.1163528031613994e-06, "loss": 0.1227, "step": 4277 }, { "epoch": 2.09, "learning_rate": 1.1152552615231588e-06, "loss": 0.1348, "step": 4278 }, { "epoch": 2.09, "learning_rate": 1.1141581047625638e-06, "loss": 0.1224, "step": 4279 }, { "epoch": 2.09, "learning_rate": 1.1130613331845613e-06, "loss": 0.1202, "step": 4280 }, { "epoch": 2.09, "learning_rate": 1.1119649470939875e-06, "loss": 0.1413, "step": 4281 }, { "epoch": 2.09, "learning_rate": 1.1108689467955749e-06, "loss": 0.135, "step": 4282 }, { "epoch": 2.09, "learning_rate": 1.1097733325939465e-06, "loss": 0.1354, "step": 4283 }, { "epoch": 2.09, "learning_rate": 1.1086781047936176e-06, "loss": 0.1585, "step": 4284 }, { "epoch": 2.09, "learning_rate": 1.107583263698999e-06, "loss": 0.1695, "step": 4285 }, { "epoch": 2.09, "learning_rate": 1.1064888096143906e-06, "loss": 0.1213, "step": 4286 }, { "epoch": 2.09, "learning_rate": 1.1053947428439877e-06, "loss": 0.1177, "step": 4287 }, { "epoch": 2.09, "learning_rate": 1.1043010636918759e-06, "loss": 0.1485, "step": 4288 }, { "epoch": 2.09, "learning_rate": 1.1032077724620335e-06, "loss": 0.1631, "step": 4289 }, { "epoch": 2.09, "learning_rate": 1.1021148694583306e-06, "loss": 0.1261, "step": 4290 }, { "epoch": 2.1, "learning_rate": 1.1010223549845309e-06, "loss": 0.1122, "step": 4291 }, { "epoch": 2.1, "learning_rate": 1.0999302293442898e-06, "loss": 0.1208, "step": 4292 }, { "epoch": 2.1, "learning_rate": 1.0988384928411535e-06, "loss": 0.1219, "step": 4293 }, { "epoch": 2.1, "learning_rate": 1.0977471457785602e-06, "loss": 0.1209, "step": 4294 }, { "epoch": 2.1, "learning_rate": 1.0966561884598405e-06, "loss": 0.1361, "step": 4295 }, { "epoch": 2.1, "learning_rate": 1.0955656211882157e-06, "loss": 0.1443, "step": 4296 }, { "epoch": 2.1, "learning_rate": 1.0944754442667996e-06, "loss": 0.1456, "step": 4297 }, { "epoch": 2.1, "learning_rate": 1.093385657998599e-06, "loss": 0.1592, "step": 4298 }, { "epoch": 2.1, "learning_rate": 1.092296262686509e-06, "loss": 0.1624, "step": 4299 }, { "epoch": 2.1, "learning_rate": 1.0912072586333177e-06, "loss": 0.1467, "step": 4300 }, { "epoch": 2.1, "learning_rate": 1.0901186461417036e-06, "loss": 0.1543, "step": 4301 }, { "epoch": 2.1, "learning_rate": 1.0890304255142377e-06, "loss": 0.1291, "step": 4302 }, { "epoch": 2.1, "learning_rate": 1.0879425970533811e-06, "loss": 0.1101, "step": 4303 }, { "epoch": 2.1, "learning_rate": 1.0868551610614865e-06, "loss": 0.1584, "step": 4304 }, { "epoch": 2.1, "learning_rate": 1.0857681178407974e-06, "loss": 0.1503, "step": 4305 }, { "epoch": 2.1, "learning_rate": 1.0846814676934466e-06, "loss": 0.1142, "step": 4306 }, { "epoch": 2.1, "learning_rate": 1.083595210921461e-06, "loss": 0.1498, "step": 4307 }, { "epoch": 2.1, "learning_rate": 1.0825093478267545e-06, "loss": 0.1541, "step": 4308 }, { "epoch": 2.1, "learning_rate": 1.0814238787111334e-06, "loss": 0.13, "step": 4309 }, { "epoch": 2.1, "learning_rate": 1.0803388038762953e-06, "loss": 0.1638, "step": 4310 }, { "epoch": 2.1, "learning_rate": 1.0792541236238255e-06, "loss": 0.1427, "step": 4311 }, { "epoch": 2.11, "learning_rate": 1.0781698382552034e-06, "loss": 0.1206, "step": 4312 }, { "epoch": 2.11, "learning_rate": 1.0770859480717957e-06, "loss": 0.1567, "step": 4313 }, { "epoch": 2.11, "learning_rate": 1.07600245337486e-06, "loss": 0.1354, "step": 4314 }, { "epoch": 2.11, "learning_rate": 1.0749193544655435e-06, "loss": 0.1471, "step": 4315 }, { "epoch": 2.11, "learning_rate": 1.0738366516448848e-06, "loss": 0.1464, "step": 4316 }, { "epoch": 2.11, "learning_rate": 1.0727543452138123e-06, "loss": 0.1315, "step": 4317 }, { "epoch": 2.11, "learning_rate": 1.0716724354731432e-06, "loss": 0.128, "step": 4318 }, { "epoch": 2.11, "learning_rate": 1.070590922723584e-06, "loss": 0.1534, "step": 4319 }, { "epoch": 2.11, "learning_rate": 1.0695098072657318e-06, "loss": 0.1483, "step": 4320 }, { "epoch": 2.11, "learning_rate": 1.0684290894000735e-06, "loss": 0.1627, "step": 4321 }, { "epoch": 2.11, "learning_rate": 1.067348769426986e-06, "loss": 0.1329, "step": 4322 }, { "epoch": 2.11, "learning_rate": 1.0662688476467336e-06, "loss": 0.1525, "step": 4323 }, { "epoch": 2.11, "learning_rate": 1.065189324359472e-06, "loss": 0.1073, "step": 4324 }, { "epoch": 2.11, "learning_rate": 1.0641101998652443e-06, "loss": 0.1232, "step": 4325 }, { "epoch": 2.11, "learning_rate": 1.0630314744639831e-06, "loss": 0.1275, "step": 4326 }, { "epoch": 2.11, "learning_rate": 1.0619531484555117e-06, "loss": 0.1355, "step": 4327 }, { "epoch": 2.11, "learning_rate": 1.060875222139542e-06, "loss": 0.1128, "step": 4328 }, { "epoch": 2.11, "learning_rate": 1.0597976958156737e-06, "loss": 0.1457, "step": 4329 }, { "epoch": 2.11, "learning_rate": 1.0587205697833952e-06, "loss": 0.1426, "step": 4330 }, { "epoch": 2.11, "learning_rate": 1.0576438443420836e-06, "loss": 0.1299, "step": 4331 }, { "epoch": 2.12, "learning_rate": 1.0565675197910072e-06, "loss": 0.1562, "step": 4332 }, { "epoch": 2.12, "learning_rate": 1.0554915964293192e-06, "loss": 0.1267, "step": 4333 }, { "epoch": 2.12, "learning_rate": 1.0544160745560645e-06, "loss": 0.1434, "step": 4334 }, { "epoch": 2.12, "learning_rate": 1.0533409544701743e-06, "loss": 0.16, "step": 4335 }, { "epoch": 2.12, "learning_rate": 1.0522662364704677e-06, "loss": 0.1541, "step": 4336 }, { "epoch": 2.12, "learning_rate": 1.0511919208556554e-06, "loss": 0.1404, "step": 4337 }, { "epoch": 2.12, "learning_rate": 1.0501180079243327e-06, "loss": 0.1364, "step": 4338 }, { "epoch": 2.12, "learning_rate": 1.0490444979749834e-06, "loss": 0.1458, "step": 4339 }, { "epoch": 2.12, "learning_rate": 1.0479713913059812e-06, "loss": 0.1279, "step": 4340 }, { "epoch": 2.12, "learning_rate": 1.0468986882155875e-06, "loss": 0.1077, "step": 4341 }, { "epoch": 2.12, "learning_rate": 1.0458263890019496e-06, "loss": 0.1362, "step": 4342 }, { "epoch": 2.12, "learning_rate": 1.0447544939631041e-06, "loss": 0.16, "step": 4343 }, { "epoch": 2.12, "learning_rate": 1.0436830033969746e-06, "loss": 0.1524, "step": 4344 }, { "epoch": 2.12, "learning_rate": 1.0426119176013717e-06, "loss": 0.1657, "step": 4345 }, { "epoch": 2.12, "learning_rate": 1.041541236873995e-06, "loss": 0.1677, "step": 4346 }, { "epoch": 2.12, "learning_rate": 1.0404709615124317e-06, "loss": 0.1315, "step": 4347 }, { "epoch": 2.12, "learning_rate": 1.0394010918141547e-06, "loss": 0.1466, "step": 4348 }, { "epoch": 2.12, "learning_rate": 1.0383316280765248e-06, "loss": 0.1417, "step": 4349 }, { "epoch": 2.12, "learning_rate": 1.0372625705967893e-06, "loss": 0.1601, "step": 4350 }, { "epoch": 2.12, "learning_rate": 1.0361939196720848e-06, "loss": 0.1335, "step": 4351 }, { "epoch": 2.12, "learning_rate": 1.0351256755994324e-06, "loss": 0.1437, "step": 4352 }, { "epoch": 2.13, "learning_rate": 1.0340578386757423e-06, "loss": 0.1613, "step": 4353 }, { "epoch": 2.13, "learning_rate": 1.0329904091978094e-06, "loss": 0.1593, "step": 4354 }, { "epoch": 2.13, "learning_rate": 1.0319233874623157e-06, "loss": 0.139, "step": 4355 }, { "epoch": 2.13, "learning_rate": 1.0308567737658325e-06, "loss": 0.1524, "step": 4356 }, { "epoch": 2.13, "learning_rate": 1.0297905684048137e-06, "loss": 0.117, "step": 4357 }, { "epoch": 2.13, "learning_rate": 1.0287247716756038e-06, "loss": 0.1283, "step": 4358 }, { "epoch": 2.13, "learning_rate": 1.0276593838744303e-06, "loss": 0.1405, "step": 4359 }, { "epoch": 2.13, "learning_rate": 1.0265944052974079e-06, "loss": 0.1602, "step": 4360 }, { "epoch": 2.13, "learning_rate": 1.0255298362405393e-06, "loss": 0.1088, "step": 4361 }, { "epoch": 2.13, "learning_rate": 1.0244656769997118e-06, "loss": 0.1873, "step": 4362 }, { "epoch": 2.13, "learning_rate": 1.0234019278706981e-06, "loss": 0.156, "step": 4363 }, { "epoch": 2.13, "learning_rate": 1.0223385891491594e-06, "loss": 0.1439, "step": 4364 }, { "epoch": 2.13, "learning_rate": 1.0212756611306398e-06, "loss": 0.1292, "step": 4365 }, { "epoch": 2.13, "learning_rate": 1.0202131441105728e-06, "loss": 0.142, "step": 4366 }, { "epoch": 2.13, "learning_rate": 1.0191510383842745e-06, "loss": 0.1389, "step": 4367 }, { "epoch": 2.13, "learning_rate": 1.0180893442469482e-06, "loss": 0.1228, "step": 4368 }, { "epoch": 2.13, "learning_rate": 1.0170280619936808e-06, "loss": 0.1151, "step": 4369 }, { "epoch": 2.13, "learning_rate": 1.0159671919194483e-06, "loss": 0.1501, "step": 4370 }, { "epoch": 2.13, "learning_rate": 1.0149067343191104e-06, "loss": 0.1699, "step": 4371 }, { "epoch": 2.13, "learning_rate": 1.0138466894874116e-06, "loss": 0.1336, "step": 4372 }, { "epoch": 2.14, "learning_rate": 1.0127870577189817e-06, "loss": 0.1409, "step": 4373 }, { "epoch": 2.14, "learning_rate": 1.0117278393083357e-06, "loss": 0.1398, "step": 4374 }, { "epoch": 2.14, "learning_rate": 1.0106690345498741e-06, "loss": 0.124, "step": 4375 }, { "epoch": 2.14, "learning_rate": 1.0096106437378824e-06, "loss": 0.1373, "step": 4376 }, { "epoch": 2.14, "learning_rate": 1.0085526671665327e-06, "loss": 0.1383, "step": 4377 }, { "epoch": 2.14, "learning_rate": 1.0074951051298788e-06, "loss": 0.1805, "step": 4378 }, { "epoch": 2.14, "learning_rate": 1.006437957921861e-06, "loss": 0.1239, "step": 4379 }, { "epoch": 2.14, "learning_rate": 1.0053812258363033e-06, "loss": 0.1339, "step": 4380 }, { "epoch": 2.14, "learning_rate": 1.0043249091669168e-06, "loss": 0.1338, "step": 4381 }, { "epoch": 2.14, "learning_rate": 1.0032690082072935e-06, "loss": 0.1251, "step": 4382 }, { "epoch": 2.14, "learning_rate": 1.002213523250914e-06, "loss": 0.1441, "step": 4383 }, { "epoch": 2.14, "learning_rate": 1.00115845459114e-06, "loss": 0.1622, "step": 4384 }, { "epoch": 2.14, "learning_rate": 1.0001038025212173e-06, "loss": 0.1094, "step": 4385 }, { "epoch": 2.14, "learning_rate": 9.990495673342792e-07, "loss": 0.1152, "step": 4386 }, { "epoch": 2.14, "learning_rate": 9.979957493233406e-07, "loss": 0.1258, "step": 4387 }, { "epoch": 2.14, "learning_rate": 9.969423487812996e-07, "loss": 0.154, "step": 4388 }, { "epoch": 2.14, "learning_rate": 9.95889366000942e-07, "loss": 0.157, "step": 4389 }, { "epoch": 2.14, "learning_rate": 9.948368012749325e-07, "loss": 0.1643, "step": 4390 }, { "epoch": 2.14, "learning_rate": 9.937846548958247e-07, "loss": 0.1285, "step": 4391 }, { "epoch": 2.14, "learning_rate": 9.92732927156052e-07, "loss": 0.1305, "step": 4392 }, { "epoch": 2.15, "learning_rate": 9.916816183479336e-07, "loss": 0.1613, "step": 4393 }, { "epoch": 2.15, "learning_rate": 9.906307287636703e-07, "loss": 0.1507, "step": 4394 }, { "epoch": 2.15, "learning_rate": 9.895802586953482e-07, "loss": 0.1395, "step": 4395 }, { "epoch": 2.15, "learning_rate": 9.885302084349375e-07, "loss": 0.1371, "step": 4396 }, { "epoch": 2.15, "learning_rate": 9.874805782742897e-07, "loss": 0.1439, "step": 4397 }, { "epoch": 2.15, "learning_rate": 9.864313685051396e-07, "loss": 0.1395, "step": 4398 }, { "epoch": 2.15, "learning_rate": 9.853825794191061e-07, "loss": 0.1354, "step": 4399 }, { "epoch": 2.15, "learning_rate": 9.843342113076906e-07, "loss": 0.1718, "step": 4400 }, { "epoch": 2.15, "learning_rate": 9.832862644622796e-07, "loss": 0.1481, "step": 4401 }, { "epoch": 2.15, "learning_rate": 9.822387391741395e-07, "loss": 0.1377, "step": 4402 }, { "epoch": 2.15, "learning_rate": 9.811916357344206e-07, "loss": 0.1643, "step": 4403 }, { "epoch": 2.15, "learning_rate": 9.801449544341562e-07, "loss": 0.1224, "step": 4404 }, { "epoch": 2.15, "learning_rate": 9.790986955642615e-07, "loss": 0.1323, "step": 4405 }, { "epoch": 2.15, "learning_rate": 9.780528594155355e-07, "loss": 0.1606, "step": 4406 }, { "epoch": 2.15, "learning_rate": 9.770074462786605e-07, "loss": 0.1484, "step": 4407 }, { "epoch": 2.15, "learning_rate": 9.75962456444199e-07, "loss": 0.1817, "step": 4408 }, { "epoch": 2.15, "learning_rate": 9.749178902025964e-07, "loss": 0.1496, "step": 4409 }, { "epoch": 2.15, "learning_rate": 9.738737478441798e-07, "loss": 0.1342, "step": 4410 }, { "epoch": 2.15, "learning_rate": 9.728300296591615e-07, "loss": 0.1331, "step": 4411 }, { "epoch": 2.15, "learning_rate": 9.717867359376318e-07, "loss": 0.1383, "step": 4412 }, { "epoch": 2.15, "learning_rate": 9.707438669695668e-07, "loss": 0.1604, "step": 4413 }, { "epoch": 2.16, "learning_rate": 9.697014230448224e-07, "loss": 0.1321, "step": 4414 }, { "epoch": 2.16, "learning_rate": 9.68659404453135e-07, "loss": 0.1029, "step": 4415 }, { "epoch": 2.16, "learning_rate": 9.67617811484127e-07, "loss": 0.1434, "step": 4416 }, { "epoch": 2.16, "learning_rate": 9.665766444272989e-07, "loss": 0.1299, "step": 4417 }, { "epoch": 2.16, "learning_rate": 9.65535903572033e-07, "loss": 0.1448, "step": 4418 }, { "epoch": 2.16, "learning_rate": 9.644955892075962e-07, "loss": 0.1329, "step": 4419 }, { "epoch": 2.16, "learning_rate": 9.634557016231322e-07, "loss": 0.1448, "step": 4420 }, { "epoch": 2.16, "learning_rate": 9.62416241107671e-07, "loss": 0.1351, "step": 4421 }, { "epoch": 2.16, "learning_rate": 9.613772079501205e-07, "loss": 0.1269, "step": 4422 }, { "epoch": 2.16, "learning_rate": 9.603386024392709e-07, "loss": 0.128, "step": 4423 }, { "epoch": 2.16, "learning_rate": 9.593004248637927e-07, "loss": 0.1255, "step": 4424 }, { "epoch": 2.16, "learning_rate": 9.582626755122387e-07, "loss": 0.133, "step": 4425 }, { "epoch": 2.16, "learning_rate": 9.572253546730437e-07, "loss": 0.1443, "step": 4426 }, { "epoch": 2.16, "learning_rate": 9.561884626345206e-07, "loss": 0.1124, "step": 4427 }, { "epoch": 2.16, "learning_rate": 9.551519996848647e-07, "loss": 0.1443, "step": 4428 }, { "epoch": 2.16, "learning_rate": 9.541159661121516e-07, "loss": 0.1296, "step": 4429 }, { "epoch": 2.16, "learning_rate": 9.530803622043367e-07, "loss": 0.148, "step": 4430 }, { "epoch": 2.16, "learning_rate": 9.520451882492585e-07, "loss": 0.1479, "step": 4431 }, { "epoch": 2.16, "learning_rate": 9.510104445346347e-07, "loss": 0.1311, "step": 4432 }, { "epoch": 2.16, "learning_rate": 9.499761313480627e-07, "loss": 0.1385, "step": 4433 }, { "epoch": 2.17, "learning_rate": 9.489422489770208e-07, "loss": 0.1607, "step": 4434 }, { "epoch": 2.17, "learning_rate": 9.479087977088666e-07, "loss": 0.1643, "step": 4435 }, { "epoch": 2.17, "learning_rate": 9.468757778308396e-07, "loss": 0.1213, "step": 4436 }, { "epoch": 2.17, "learning_rate": 9.458431896300599e-07, "loss": 0.1443, "step": 4437 }, { "epoch": 2.17, "learning_rate": 9.448110333935248e-07, "loss": 0.1244, "step": 4438 }, { "epoch": 2.17, "learning_rate": 9.437793094081124e-07, "loss": 0.1155, "step": 4439 }, { "epoch": 2.17, "learning_rate": 9.427480179605835e-07, "loss": 0.1291, "step": 4440 }, { "epoch": 2.17, "learning_rate": 9.417171593375751e-07, "loss": 0.1404, "step": 4441 }, { "epoch": 2.17, "learning_rate": 9.406867338256048e-07, "loss": 0.1165, "step": 4442 }, { "epoch": 2.17, "learning_rate": 9.396567417110716e-07, "loss": 0.1538, "step": 4443 }, { "epoch": 2.17, "learning_rate": 9.386271832802518e-07, "loss": 0.1464, "step": 4444 }, { "epoch": 2.17, "learning_rate": 9.375980588193032e-07, "loss": 0.1422, "step": 4445 }, { "epoch": 2.17, "learning_rate": 9.365693686142613e-07, "loss": 0.1348, "step": 4446 }, { "epoch": 2.17, "learning_rate": 9.355411129510414e-07, "loss": 0.1285, "step": 4447 }, { "epoch": 2.17, "learning_rate": 9.345132921154373e-07, "loss": 0.1194, "step": 4448 }, { "epoch": 2.17, "learning_rate": 9.334859063931237e-07, "loss": 0.1234, "step": 4449 }, { "epoch": 2.17, "learning_rate": 9.324589560696545e-07, "loss": 0.1399, "step": 4450 }, { "epoch": 2.17, "learning_rate": 9.314324414304602e-07, "loss": 0.1352, "step": 4451 }, { "epoch": 2.17, "learning_rate": 9.30406362760852e-07, "loss": 0.1581, "step": 4452 }, { "epoch": 2.17, "learning_rate": 9.293807203460192e-07, "loss": 0.1485, "step": 4453 }, { "epoch": 2.17, "learning_rate": 9.283555144710291e-07, "loss": 0.1905, "step": 4454 }, { "epoch": 2.18, "learning_rate": 9.273307454208299e-07, "loss": 0.1493, "step": 4455 }, { "epoch": 2.18, "learning_rate": 9.26306413480248e-07, "loss": 0.1405, "step": 4456 }, { "epoch": 2.18, "learning_rate": 9.252825189339865e-07, "loss": 0.1523, "step": 4457 }, { "epoch": 2.18, "learning_rate": 9.242590620666275e-07, "loss": 0.1126, "step": 4458 }, { "epoch": 2.18, "learning_rate": 9.232360431626317e-07, "loss": 0.1166, "step": 4459 }, { "epoch": 2.18, "learning_rate": 9.222134625063395e-07, "loss": 0.1397, "step": 4460 }, { "epoch": 2.18, "learning_rate": 9.211913203819667e-07, "loss": 0.1393, "step": 4461 }, { "epoch": 2.18, "learning_rate": 9.201696170736102e-07, "loss": 0.1103, "step": 4462 }, { "epoch": 2.18, "learning_rate": 9.191483528652429e-07, "loss": 0.1246, "step": 4463 }, { "epoch": 2.18, "learning_rate": 9.181275280407151e-07, "loss": 0.1526, "step": 4464 }, { "epoch": 2.18, "learning_rate": 9.171071428837583e-07, "loss": 0.1656, "step": 4465 }, { "epoch": 2.18, "learning_rate": 9.160871976779781e-07, "loss": 0.1526, "step": 4466 }, { "epoch": 2.18, "learning_rate": 9.150676927068589e-07, "loss": 0.1269, "step": 4467 }, { "epoch": 2.18, "learning_rate": 9.140486282537645e-07, "loss": 0.1128, "step": 4468 }, { "epoch": 2.18, "learning_rate": 9.130300046019336e-07, "loss": 0.1543, "step": 4469 }, { "epoch": 2.18, "learning_rate": 9.120118220344854e-07, "loss": 0.1308, "step": 4470 }, { "epoch": 2.18, "learning_rate": 9.109940808344137e-07, "loss": 0.1389, "step": 4471 }, { "epoch": 2.18, "learning_rate": 9.099767812845898e-07, "loss": 0.1558, "step": 4472 }, { "epoch": 2.18, "learning_rate": 9.089599236677652e-07, "loss": 0.1463, "step": 4473 }, { "epoch": 2.18, "learning_rate": 9.079435082665646e-07, "loss": 0.1624, "step": 4474 }, { "epoch": 2.19, "learning_rate": 9.069275353634938e-07, "loss": 0.1477, "step": 4475 }, { "epoch": 2.19, "learning_rate": 9.059120052409326e-07, "loss": 0.123, "step": 4476 }, { "epoch": 2.19, "learning_rate": 9.048969181811382e-07, "loss": 0.1417, "step": 4477 }, { "epoch": 2.19, "learning_rate": 9.038822744662448e-07, "loss": 0.1601, "step": 4478 }, { "epoch": 2.19, "learning_rate": 9.028680743782641e-07, "loss": 0.1238, "step": 4479 }, { "epoch": 2.19, "learning_rate": 9.018543181990858e-07, "loss": 0.1358, "step": 4480 }, { "epoch": 2.19, "learning_rate": 9.008410062104728e-07, "loss": 0.1438, "step": 4481 }, { "epoch": 2.19, "learning_rate": 8.998281386940666e-07, "loss": 0.127, "step": 4482 }, { "epoch": 2.19, "learning_rate": 8.988157159313852e-07, "loss": 0.1344, "step": 4483 }, { "epoch": 2.19, "learning_rate": 8.97803738203821e-07, "loss": 0.1346, "step": 4484 }, { "epoch": 2.19, "learning_rate": 8.967922057926456e-07, "loss": 0.1502, "step": 4485 }, { "epoch": 2.19, "learning_rate": 8.957811189790067e-07, "loss": 0.1508, "step": 4486 }, { "epoch": 2.19, "learning_rate": 8.947704780439259e-07, "loss": 0.1449, "step": 4487 }, { "epoch": 2.19, "learning_rate": 8.937602832683018e-07, "loss": 0.1589, "step": 4488 }, { "epoch": 2.19, "learning_rate": 8.927505349329085e-07, "loss": 0.1529, "step": 4489 }, { "epoch": 2.19, "learning_rate": 8.917412333183984e-07, "loss": 0.1481, "step": 4490 }, { "epoch": 2.19, "learning_rate": 8.90732378705296e-07, "loss": 0.1275, "step": 4491 }, { "epoch": 2.19, "learning_rate": 8.897239713740058e-07, "loss": 0.1239, "step": 4492 }, { "epoch": 2.19, "learning_rate": 8.887160116048046e-07, "loss": 0.1409, "step": 4493 }, { "epoch": 2.19, "learning_rate": 8.877084996778454e-07, "loss": 0.1749, "step": 4494 }, { "epoch": 2.19, "learning_rate": 8.867014358731584e-07, "loss": 0.1349, "step": 4495 }, { "epoch": 2.2, "learning_rate": 8.856948204706481e-07, "loss": 0.1459, "step": 4496 }, { "epoch": 2.2, "learning_rate": 8.846886537500932e-07, "loss": 0.1346, "step": 4497 }, { "epoch": 2.2, "learning_rate": 8.836829359911502e-07, "loss": 0.1241, "step": 4498 }, { "epoch": 2.2, "learning_rate": 8.826776674733484e-07, "loss": 0.1404, "step": 4499 }, { "epoch": 2.2, "learning_rate": 8.816728484760947e-07, "loss": 0.123, "step": 4500 }, { "epoch": 2.2, "learning_rate": 8.806684792786693e-07, "loss": 0.1313, "step": 4501 }, { "epoch": 2.2, "learning_rate": 8.796645601602274e-07, "loss": 0.1622, "step": 4502 }, { "epoch": 2.2, "learning_rate": 8.786610913997986e-07, "loss": 0.1359, "step": 4503 }, { "epoch": 2.2, "learning_rate": 8.776580732762896e-07, "loss": 0.1245, "step": 4504 }, { "epoch": 2.2, "learning_rate": 8.766555060684809e-07, "loss": 0.1493, "step": 4505 }, { "epoch": 2.2, "learning_rate": 8.756533900550265e-07, "loss": 0.1635, "step": 4506 }, { "epoch": 2.2, "learning_rate": 8.746517255144557e-07, "loss": 0.1526, "step": 4507 }, { "epoch": 2.2, "learning_rate": 8.736505127251727e-07, "loss": 0.1351, "step": 4508 }, { "epoch": 2.2, "learning_rate": 8.726497519654542e-07, "loss": 0.1279, "step": 4509 }, { "epoch": 2.2, "learning_rate": 8.716494435134546e-07, "loss": 0.1327, "step": 4510 }, { "epoch": 2.2, "learning_rate": 8.706495876472012e-07, "loss": 0.1277, "step": 4511 }, { "epoch": 2.2, "learning_rate": 8.696501846445942e-07, "loss": 0.1094, "step": 4512 }, { "epoch": 2.2, "learning_rate": 8.686512347834094e-07, "loss": 0.1308, "step": 4513 }, { "epoch": 2.2, "learning_rate": 8.67652738341295e-07, "loss": 0.151, "step": 4514 }, { "epoch": 2.2, "learning_rate": 8.666546955957752e-07, "loss": 0.1648, "step": 4515 }, { "epoch": 2.21, "learning_rate": 8.65657106824248e-07, "loss": 0.1157, "step": 4516 }, { "epoch": 2.21, "learning_rate": 8.646599723039836e-07, "loss": 0.1227, "step": 4517 }, { "epoch": 2.21, "learning_rate": 8.636632923121271e-07, "loss": 0.1517, "step": 4518 }, { "epoch": 2.21, "learning_rate": 8.626670671256956e-07, "loss": 0.1426, "step": 4519 }, { "epoch": 2.21, "learning_rate": 8.616712970215831e-07, "loss": 0.135, "step": 4520 }, { "epoch": 2.21, "learning_rate": 8.606759822765534e-07, "loss": 0.1343, "step": 4521 }, { "epoch": 2.21, "learning_rate": 8.596811231672475e-07, "loss": 0.1474, "step": 4522 }, { "epoch": 2.21, "learning_rate": 8.586867199701765e-07, "loss": 0.1382, "step": 4523 }, { "epoch": 2.21, "learning_rate": 8.576927729617254e-07, "loss": 0.1235, "step": 4524 }, { "epoch": 2.21, "learning_rate": 8.566992824181547e-07, "loss": 0.1514, "step": 4525 }, { "epoch": 2.21, "learning_rate": 8.557062486155954e-07, "loss": 0.1347, "step": 4526 }, { "epoch": 2.21, "learning_rate": 8.547136718300519e-07, "loss": 0.1491, "step": 4527 }, { "epoch": 2.21, "learning_rate": 8.537215523374037e-07, "loss": 0.1211, "step": 4528 }, { "epoch": 2.21, "learning_rate": 8.527298904134005e-07, "loss": 0.1187, "step": 4529 }, { "epoch": 2.21, "learning_rate": 8.517386863336669e-07, "loss": 0.1553, "step": 4530 }, { "epoch": 2.21, "learning_rate": 8.50747940373699e-07, "loss": 0.1079, "step": 4531 }, { "epoch": 2.21, "learning_rate": 8.497576528088661e-07, "loss": 0.1314, "step": 4532 }, { "epoch": 2.21, "learning_rate": 8.48767823914409e-07, "loss": 0.1285, "step": 4533 }, { "epoch": 2.21, "learning_rate": 8.477784539654424e-07, "loss": 0.1377, "step": 4534 }, { "epoch": 2.21, "learning_rate": 8.467895432369544e-07, "loss": 0.1578, "step": 4535 }, { "epoch": 2.21, "learning_rate": 8.458010920038029e-07, "loss": 0.137, "step": 4536 }, { "epoch": 2.22, "learning_rate": 8.448131005407193e-07, "loss": 0.1608, "step": 4537 }, { "epoch": 2.22, "learning_rate": 8.438255691223062e-07, "loss": 0.1154, "step": 4538 }, { "epoch": 2.22, "learning_rate": 8.428384980230411e-07, "loss": 0.1437, "step": 4539 }, { "epoch": 2.22, "learning_rate": 8.418518875172701e-07, "loss": 0.1252, "step": 4540 }, { "epoch": 2.22, "learning_rate": 8.408657378792146e-07, "loss": 0.1719, "step": 4541 }, { "epoch": 2.22, "learning_rate": 8.398800493829653e-07, "loss": 0.136, "step": 4542 }, { "epoch": 2.22, "learning_rate": 8.388948223024851e-07, "loss": 0.1165, "step": 4543 }, { "epoch": 2.22, "learning_rate": 8.379100569116105e-07, "loss": 0.1191, "step": 4544 }, { "epoch": 2.22, "learning_rate": 8.369257534840478e-07, "loss": 0.1171, "step": 4545 }, { "epoch": 2.22, "learning_rate": 8.359419122933745e-07, "loss": 0.1111, "step": 4546 }, { "epoch": 2.22, "learning_rate": 8.349585336130425e-07, "loss": 0.1511, "step": 4547 }, { "epoch": 2.22, "learning_rate": 8.339756177163713e-07, "loss": 0.1734, "step": 4548 }, { "epoch": 2.22, "learning_rate": 8.329931648765561e-07, "loss": 0.1015, "step": 4549 }, { "epoch": 2.22, "learning_rate": 8.320111753666593e-07, "loss": 0.163, "step": 4550 }, { "epoch": 2.22, "learning_rate": 8.310296494596159e-07, "loss": 0.1209, "step": 4551 }, { "epoch": 2.22, "learning_rate": 8.300485874282344e-07, "loss": 0.1341, "step": 4552 }, { "epoch": 2.22, "learning_rate": 8.290679895451901e-07, "loss": 0.1616, "step": 4553 }, { "epoch": 2.22, "learning_rate": 8.280878560830333e-07, "loss": 0.1431, "step": 4554 }, { "epoch": 2.22, "learning_rate": 8.271081873141831e-07, "loss": 0.152, "step": 4555 }, { "epoch": 2.22, "learning_rate": 8.261289835109295e-07, "loss": 0.1451, "step": 4556 }, { "epoch": 2.23, "learning_rate": 8.25150244945433e-07, "loss": 0.1184, "step": 4557 }, { "epoch": 2.23, "learning_rate": 8.241719718897259e-07, "loss": 0.1255, "step": 4558 }, { "epoch": 2.23, "learning_rate": 8.231941646157119e-07, "loss": 0.1448, "step": 4559 }, { "epoch": 2.23, "learning_rate": 8.222168233951628e-07, "loss": 0.1391, "step": 4560 }, { "epoch": 2.23, "learning_rate": 8.212399484997218e-07, "loss": 0.1543, "step": 4561 }, { "epoch": 2.23, "learning_rate": 8.202635402009032e-07, "loss": 0.1246, "step": 4562 }, { "epoch": 2.23, "learning_rate": 8.192875987700899e-07, "loss": 0.1256, "step": 4563 }, { "epoch": 2.23, "learning_rate": 8.183121244785375e-07, "loss": 0.1273, "step": 4564 }, { "epoch": 2.23, "learning_rate": 8.173371175973707e-07, "loss": 0.1271, "step": 4565 }, { "epoch": 2.23, "learning_rate": 8.163625783975839e-07, "loss": 0.1305, "step": 4566 }, { "epoch": 2.23, "learning_rate": 8.153885071500414e-07, "loss": 0.172, "step": 4567 }, { "epoch": 2.23, "learning_rate": 8.144149041254773e-07, "loss": 0.1396, "step": 4568 }, { "epoch": 2.23, "learning_rate": 8.134417695944971e-07, "loss": 0.147, "step": 4569 }, { "epoch": 2.23, "learning_rate": 8.124691038275736e-07, "loss": 0.1285, "step": 4570 }, { "epoch": 2.23, "learning_rate": 8.114969070950526e-07, "loss": 0.145, "step": 4571 }, { "epoch": 2.23, "learning_rate": 8.105251796671465e-07, "loss": 0.123, "step": 4572 }, { "epoch": 2.23, "learning_rate": 8.095539218139375e-07, "loss": 0.1627, "step": 4573 }, { "epoch": 2.23, "learning_rate": 8.085831338053804e-07, "loss": 0.1375, "step": 4574 }, { "epoch": 2.23, "learning_rate": 8.076128159112959e-07, "loss": 0.1226, "step": 4575 }, { "epoch": 2.23, "learning_rate": 8.066429684013743e-07, "loss": 0.1342, "step": 4576 }, { "epoch": 2.23, "learning_rate": 8.056735915451786e-07, "loss": 0.1584, "step": 4577 }, { "epoch": 2.24, "learning_rate": 8.047046856121366e-07, "loss": 0.1589, "step": 4578 }, { "epoch": 2.24, "learning_rate": 8.037362508715485e-07, "loss": 0.1806, "step": 4579 }, { "epoch": 2.24, "learning_rate": 8.027682875925818e-07, "loss": 0.1191, "step": 4580 }, { "epoch": 2.24, "learning_rate": 8.018007960442734e-07, "loss": 0.1289, "step": 4581 }, { "epoch": 2.24, "learning_rate": 8.008337764955279e-07, "loss": 0.1509, "step": 4582 }, { "epoch": 2.24, "learning_rate": 7.998672292151214e-07, "loss": 0.1676, "step": 4583 }, { "epoch": 2.24, "learning_rate": 7.989011544716973e-07, "loss": 0.1445, "step": 4584 }, { "epoch": 2.24, "learning_rate": 7.97935552533767e-07, "loss": 0.112, "step": 4585 }, { "epoch": 2.24, "learning_rate": 7.969704236697115e-07, "loss": 0.1481, "step": 4586 }, { "epoch": 2.24, "learning_rate": 7.960057681477787e-07, "loss": 0.1008, "step": 4587 }, { "epoch": 2.24, "learning_rate": 7.950415862360878e-07, "loss": 0.1107, "step": 4588 }, { "epoch": 2.24, "learning_rate": 7.940778782026232e-07, "loss": 0.1521, "step": 4589 }, { "epoch": 2.24, "learning_rate": 7.931146443152404e-07, "loss": 0.1261, "step": 4590 }, { "epoch": 2.24, "learning_rate": 7.921518848416612e-07, "loss": 0.1497, "step": 4591 }, { "epoch": 2.24, "learning_rate": 7.911896000494765e-07, "loss": 0.1282, "step": 4592 }, { "epoch": 2.24, "learning_rate": 7.902277902061433e-07, "loss": 0.1303, "step": 4593 }, { "epoch": 2.24, "learning_rate": 7.8926645557899e-07, "loss": 0.1337, "step": 4594 }, { "epoch": 2.24, "learning_rate": 7.883055964352109e-07, "loss": 0.1379, "step": 4595 }, { "epoch": 2.24, "learning_rate": 7.873452130418685e-07, "loss": 0.1418, "step": 4596 }, { "epoch": 2.24, "learning_rate": 7.863853056658924e-07, "loss": 0.1765, "step": 4597 }, { "epoch": 2.25, "learning_rate": 7.854258745740795e-07, "loss": 0.1072, "step": 4598 }, { "epoch": 2.25, "learning_rate": 7.844669200330973e-07, "loss": 0.1789, "step": 4599 }, { "epoch": 2.25, "learning_rate": 7.835084423094768e-07, "loss": 0.127, "step": 4600 }, { "epoch": 2.25, "learning_rate": 7.825504416696206e-07, "loss": 0.1397, "step": 4601 }, { "epoch": 2.25, "learning_rate": 7.815929183797949e-07, "loss": 0.1337, "step": 4602 }, { "epoch": 2.25, "learning_rate": 7.80635872706135e-07, "loss": 0.1456, "step": 4603 }, { "epoch": 2.25, "learning_rate": 7.796793049146445e-07, "loss": 0.1601, "step": 4604 }, { "epoch": 2.25, "learning_rate": 7.787232152711924e-07, "loss": 0.1525, "step": 4605 }, { "epoch": 2.25, "learning_rate": 7.777676040415141e-07, "loss": 0.1337, "step": 4606 }, { "epoch": 2.25, "learning_rate": 7.768124714912156e-07, "loss": 0.1606, "step": 4607 }, { "epoch": 2.25, "learning_rate": 7.758578178857659e-07, "loss": 0.1166, "step": 4608 }, { "epoch": 2.25, "learning_rate": 7.749036434905041e-07, "loss": 0.1174, "step": 4609 }, { "epoch": 2.25, "learning_rate": 7.739499485706334e-07, "loss": 0.1624, "step": 4610 }, { "epoch": 2.25, "learning_rate": 7.729967333912255e-07, "loss": 0.1238, "step": 4611 }, { "epoch": 2.25, "learning_rate": 7.720439982172173e-07, "loss": 0.1311, "step": 4612 }, { "epoch": 2.25, "learning_rate": 7.710917433134138e-07, "loss": 0.1334, "step": 4613 }, { "epoch": 2.25, "learning_rate": 7.701399689444866e-07, "loss": 0.1675, "step": 4614 }, { "epoch": 2.25, "learning_rate": 7.691886753749728e-07, "loss": 0.1245, "step": 4615 }, { "epoch": 2.25, "learning_rate": 7.682378628692752e-07, "loss": 0.1225, "step": 4616 }, { "epoch": 2.25, "learning_rate": 7.672875316916645e-07, "loss": 0.152, "step": 4617 }, { "epoch": 2.25, "learning_rate": 7.663376821062759e-07, "loss": 0.1379, "step": 4618 }, { "epoch": 2.26, "learning_rate": 7.653883143771126e-07, "loss": 0.1577, "step": 4619 }, { "epoch": 2.26, "learning_rate": 7.644394287680438e-07, "loss": 0.1297, "step": 4620 }, { "epoch": 2.26, "learning_rate": 7.63491025542803e-07, "loss": 0.1549, "step": 4621 }, { "epoch": 2.26, "learning_rate": 7.625431049649906e-07, "loss": 0.1479, "step": 4622 }, { "epoch": 2.26, "learning_rate": 7.615956672980721e-07, "loss": 0.1572, "step": 4623 }, { "epoch": 2.26, "learning_rate": 7.60648712805381e-07, "loss": 0.1378, "step": 4624 }, { "epoch": 2.26, "learning_rate": 7.597022417501135e-07, "loss": 0.1551, "step": 4625 }, { "epoch": 2.26, "learning_rate": 7.587562543953345e-07, "loss": 0.119, "step": 4626 }, { "epoch": 2.26, "learning_rate": 7.57810751003972e-07, "loss": 0.1555, "step": 4627 }, { "epoch": 2.26, "learning_rate": 7.568657318388195e-07, "loss": 0.1102, "step": 4628 }, { "epoch": 2.26, "learning_rate": 7.559211971625385e-07, "loss": 0.1522, "step": 4629 }, { "epoch": 2.26, "learning_rate": 7.549771472376526e-07, "loss": 0.1188, "step": 4630 }, { "epoch": 2.26, "learning_rate": 7.540335823265538e-07, "loss": 0.1251, "step": 4631 }, { "epoch": 2.26, "learning_rate": 7.530905026914959e-07, "loss": 0.1583, "step": 4632 }, { "epoch": 2.26, "learning_rate": 7.521479085946013e-07, "loss": 0.1049, "step": 4633 }, { "epoch": 2.26, "learning_rate": 7.512058002978551e-07, "loss": 0.1413, "step": 4634 }, { "epoch": 2.26, "learning_rate": 7.502641780631076e-07, "loss": 0.1288, "step": 4635 }, { "epoch": 2.26, "learning_rate": 7.493230421520742e-07, "loss": 0.1307, "step": 4636 }, { "epoch": 2.26, "learning_rate": 7.483823928263357e-07, "loss": 0.1512, "step": 4637 }, { "epoch": 2.26, "learning_rate": 7.474422303473383e-07, "loss": 0.1296, "step": 4638 }, { "epoch": 2.27, "learning_rate": 7.46502554976391e-07, "loss": 0.1529, "step": 4639 }, { "epoch": 2.27, "learning_rate": 7.455633669746687e-07, "loss": 0.0994, "step": 4640 }, { "epoch": 2.27, "learning_rate": 7.4462466660321e-07, "loss": 0.1532, "step": 4641 }, { "epoch": 2.27, "learning_rate": 7.43686454122918e-07, "loss": 0.1379, "step": 4642 }, { "epoch": 2.27, "learning_rate": 7.427487297945612e-07, "loss": 0.1216, "step": 4643 }, { "epoch": 2.27, "learning_rate": 7.41811493878773e-07, "loss": 0.1344, "step": 4644 }, { "epoch": 2.27, "learning_rate": 7.408747466360483e-07, "loss": 0.1312, "step": 4645 }, { "epoch": 2.27, "learning_rate": 7.399384883267485e-07, "loss": 0.1291, "step": 4646 }, { "epoch": 2.27, "learning_rate": 7.390027192110974e-07, "loss": 0.15, "step": 4647 }, { "epoch": 2.27, "learning_rate": 7.380674395491855e-07, "loss": 0.1384, "step": 4648 }, { "epoch": 2.27, "learning_rate": 7.371326496009637e-07, "loss": 0.1467, "step": 4649 }, { "epoch": 2.27, "learning_rate": 7.361983496262504e-07, "loss": 0.1451, "step": 4650 }, { "epoch": 2.27, "learning_rate": 7.352645398847253e-07, "loss": 0.1319, "step": 4651 }, { "epoch": 2.27, "learning_rate": 7.343312206359315e-07, "loss": 0.1709, "step": 4652 }, { "epoch": 2.27, "learning_rate": 7.333983921392792e-07, "loss": 0.1423, "step": 4653 }, { "epoch": 2.27, "learning_rate": 7.324660546540385e-07, "loss": 0.1494, "step": 4654 }, { "epoch": 2.27, "learning_rate": 7.315342084393439e-07, "loss": 0.123, "step": 4655 }, { "epoch": 2.27, "learning_rate": 7.306028537541954e-07, "loss": 0.1461, "step": 4656 }, { "epoch": 2.27, "learning_rate": 7.296719908574532e-07, "loss": 0.1089, "step": 4657 }, { "epoch": 2.27, "learning_rate": 7.28741620007844e-07, "loss": 0.1606, "step": 4658 }, { "epoch": 2.27, "learning_rate": 7.278117414639557e-07, "loss": 0.1444, "step": 4659 }, { "epoch": 2.28, "learning_rate": 7.268823554842397e-07, "loss": 0.143, "step": 4660 }, { "epoch": 2.28, "learning_rate": 7.259534623270099e-07, "loss": 0.1485, "step": 4661 }, { "epoch": 2.28, "learning_rate": 7.250250622504448e-07, "loss": 0.1075, "step": 4662 }, { "epoch": 2.28, "learning_rate": 7.240971555125858e-07, "loss": 0.1357, "step": 4663 }, { "epoch": 2.28, "learning_rate": 7.231697423713355e-07, "loss": 0.135, "step": 4664 }, { "epoch": 2.28, "learning_rate": 7.222428230844608e-07, "loss": 0.1187, "step": 4665 }, { "epoch": 2.28, "learning_rate": 7.213163979095894e-07, "loss": 0.1622, "step": 4666 }, { "epoch": 2.28, "learning_rate": 7.203904671042139e-07, "loss": 0.1326, "step": 4667 }, { "epoch": 2.28, "learning_rate": 7.194650309256898e-07, "loss": 0.1403, "step": 4668 }, { "epoch": 2.28, "learning_rate": 7.185400896312328e-07, "loss": 0.1379, "step": 4669 }, { "epoch": 2.28, "learning_rate": 7.176156434779225e-07, "loss": 0.1773, "step": 4670 }, { "epoch": 2.28, "learning_rate": 7.166916927227005e-07, "loss": 0.1463, "step": 4671 }, { "epoch": 2.28, "learning_rate": 7.157682376223698e-07, "loss": 0.1292, "step": 4672 }, { "epoch": 2.28, "learning_rate": 7.148452784335977e-07, "loss": 0.1273, "step": 4673 }, { "epoch": 2.28, "learning_rate": 7.139228154129132e-07, "loss": 0.1431, "step": 4674 }, { "epoch": 2.28, "learning_rate": 7.130008488167062e-07, "loss": 0.1401, "step": 4675 }, { "epoch": 2.28, "learning_rate": 7.12079378901229e-07, "loss": 0.1638, "step": 4676 }, { "epoch": 2.28, "learning_rate": 7.111584059225957e-07, "loss": 0.1198, "step": 4677 }, { "epoch": 2.28, "learning_rate": 7.102379301367837e-07, "loss": 0.1482, "step": 4678 }, { "epoch": 2.28, "learning_rate": 7.093179517996298e-07, "loss": 0.1182, "step": 4679 }, { "epoch": 2.29, "learning_rate": 7.083984711668357e-07, "loss": 0.1136, "step": 4680 }, { "epoch": 2.29, "learning_rate": 7.07479488493962e-07, "loss": 0.1181, "step": 4681 }, { "epoch": 2.29, "learning_rate": 7.065610040364312e-07, "loss": 0.1201, "step": 4682 }, { "epoch": 2.29, "learning_rate": 7.056430180495291e-07, "loss": 0.1639, "step": 4683 }, { "epoch": 2.29, "learning_rate": 7.047255307884018e-07, "loss": 0.1392, "step": 4684 }, { "epoch": 2.29, "learning_rate": 7.038085425080554e-07, "loss": 0.1267, "step": 4685 }, { "epoch": 2.29, "learning_rate": 7.028920534633607e-07, "loss": 0.1291, "step": 4686 }, { "epoch": 2.29, "learning_rate": 7.019760639090461e-07, "loss": 0.1065, "step": 4687 }, { "epoch": 2.29, "learning_rate": 7.010605740997042e-07, "loss": 0.128, "step": 4688 }, { "epoch": 2.29, "learning_rate": 7.00145584289787e-07, "loss": 0.1372, "step": 4689 }, { "epoch": 2.29, "learning_rate": 6.992310947336076e-07, "loss": 0.117, "step": 4690 }, { "epoch": 2.29, "learning_rate": 6.983171056853397e-07, "loss": 0.1615, "step": 4691 }, { "epoch": 2.29, "learning_rate": 6.974036173990192e-07, "loss": 0.1741, "step": 4692 }, { "epoch": 2.29, "learning_rate": 6.964906301285432e-07, "loss": 0.1482, "step": 4693 }, { "epoch": 2.29, "learning_rate": 6.955781441276674e-07, "loss": 0.1276, "step": 4694 }, { "epoch": 2.29, "learning_rate": 6.946661596500093e-07, "loss": 0.1357, "step": 4695 }, { "epoch": 2.29, "learning_rate": 6.937546769490469e-07, "loss": 0.136, "step": 4696 }, { "epoch": 2.29, "learning_rate": 6.928436962781185e-07, "loss": 0.1448, "step": 4697 }, { "epoch": 2.29, "learning_rate": 6.919332178904237e-07, "loss": 0.1192, "step": 4698 }, { "epoch": 2.29, "learning_rate": 6.910232420390226e-07, "loss": 0.1327, "step": 4699 }, { "epoch": 2.29, "learning_rate": 6.901137689768342e-07, "loss": 0.1043, "step": 4700 }, { "epoch": 2.3, "learning_rate": 6.892047989566388e-07, "loss": 0.1522, "step": 4701 }, { "epoch": 2.3, "learning_rate": 6.88296332231076e-07, "loss": 0.1141, "step": 4702 }, { "epoch": 2.3, "learning_rate": 6.873883690526464e-07, "loss": 0.1352, "step": 4703 }, { "epoch": 2.3, "learning_rate": 6.864809096737116e-07, "loss": 0.1297, "step": 4704 }, { "epoch": 2.3, "learning_rate": 6.855739543464909e-07, "loss": 0.1533, "step": 4705 }, { "epoch": 2.3, "learning_rate": 6.846675033230649e-07, "loss": 0.1339, "step": 4706 }, { "epoch": 2.3, "learning_rate": 6.837615568553726e-07, "loss": 0.1229, "step": 4707 }, { "epoch": 2.3, "learning_rate": 6.828561151952157e-07, "loss": 0.1339, "step": 4708 }, { "epoch": 2.3, "learning_rate": 6.81951178594252e-07, "loss": 0.1517, "step": 4709 }, { "epoch": 2.3, "learning_rate": 6.810467473040025e-07, "loss": 0.1295, "step": 4710 }, { "epoch": 2.3, "learning_rate": 6.801428215758448e-07, "loss": 0.142, "step": 4711 }, { "epoch": 2.3, "learning_rate": 6.792394016610168e-07, "loss": 0.1435, "step": 4712 }, { "epoch": 2.3, "learning_rate": 6.783364878106171e-07, "loss": 0.1349, "step": 4713 }, { "epoch": 2.3, "learning_rate": 6.774340802756024e-07, "loss": 0.1353, "step": 4714 }, { "epoch": 2.3, "learning_rate": 6.765321793067881e-07, "loss": 0.1309, "step": 4715 }, { "epoch": 2.3, "learning_rate": 6.756307851548511e-07, "loss": 0.1436, "step": 4716 }, { "epoch": 2.3, "learning_rate": 6.747298980703243e-07, "loss": 0.1349, "step": 4717 }, { "epoch": 2.3, "learning_rate": 6.738295183036031e-07, "loss": 0.1217, "step": 4718 }, { "epoch": 2.3, "learning_rate": 6.729296461049395e-07, "loss": 0.1266, "step": 4719 }, { "epoch": 2.3, "learning_rate": 6.720302817244448e-07, "loss": 0.1801, "step": 4720 }, { "epoch": 2.31, "learning_rate": 6.711314254120887e-07, "loss": 0.1601, "step": 4721 }, { "epoch": 2.31, "learning_rate": 6.702330774177016e-07, "loss": 0.1619, "step": 4722 }, { "epoch": 2.31, "learning_rate": 6.693352379909718e-07, "loss": 0.1595, "step": 4723 }, { "epoch": 2.31, "learning_rate": 6.684379073814451e-07, "loss": 0.1422, "step": 4724 }, { "epoch": 2.31, "learning_rate": 6.675410858385268e-07, "loss": 0.1421, "step": 4725 }, { "epoch": 2.31, "learning_rate": 6.666447736114806e-07, "loss": 0.1042, "step": 4726 }, { "epoch": 2.31, "learning_rate": 6.657489709494277e-07, "loss": 0.1271, "step": 4727 }, { "epoch": 2.31, "learning_rate": 6.648536781013496e-07, "loss": 0.1652, "step": 4728 }, { "epoch": 2.31, "learning_rate": 6.639588953160858e-07, "loss": 0.1235, "step": 4729 }, { "epoch": 2.31, "learning_rate": 6.630646228423324e-07, "loss": 0.1673, "step": 4730 }, { "epoch": 2.31, "learning_rate": 6.621708609286437e-07, "loss": 0.1351, "step": 4731 }, { "epoch": 2.31, "learning_rate": 6.612776098234349e-07, "loss": 0.135, "step": 4732 }, { "epoch": 2.31, "learning_rate": 6.603848697749762e-07, "loss": 0.1246, "step": 4733 }, { "epoch": 2.31, "learning_rate": 6.594926410313965e-07, "loss": 0.1497, "step": 4734 }, { "epoch": 2.31, "learning_rate": 6.58600923840684e-07, "loss": 0.1586, "step": 4735 }, { "epoch": 2.31, "learning_rate": 6.577097184506826e-07, "loss": 0.1528, "step": 4736 }, { "epoch": 2.31, "learning_rate": 6.568190251090958e-07, "loss": 0.126, "step": 4737 }, { "epoch": 2.31, "learning_rate": 6.559288440634842e-07, "loss": 0.143, "step": 4738 }, { "epoch": 2.31, "learning_rate": 6.550391755612653e-07, "loss": 0.1098, "step": 4739 }, { "epoch": 2.31, "learning_rate": 6.541500198497139e-07, "loss": 0.1413, "step": 4740 }, { "epoch": 2.31, "learning_rate": 6.532613771759635e-07, "loss": 0.1583, "step": 4741 }, { "epoch": 2.32, "learning_rate": 6.52373247787006e-07, "loss": 0.1526, "step": 4742 }, { "epoch": 2.32, "learning_rate": 6.514856319296878e-07, "loss": 0.168, "step": 4743 }, { "epoch": 2.32, "learning_rate": 6.505985298507145e-07, "loss": 0.129, "step": 4744 }, { "epoch": 2.32, "learning_rate": 6.497119417966469e-07, "loss": 0.124, "step": 4745 }, { "epoch": 2.32, "learning_rate": 6.488258680139056e-07, "loss": 0.1182, "step": 4746 }, { "epoch": 2.32, "learning_rate": 6.479403087487677e-07, "loss": 0.1205, "step": 4747 }, { "epoch": 2.32, "learning_rate": 6.470552642473658e-07, "loss": 0.1164, "step": 4748 }, { "epoch": 2.32, "learning_rate": 6.461707347556901e-07, "loss": 0.1634, "step": 4749 }, { "epoch": 2.32, "learning_rate": 6.452867205195881e-07, "loss": 0.1465, "step": 4750 }, { "epoch": 2.32, "learning_rate": 6.444032217847629e-07, "loss": 0.1478, "step": 4751 }, { "epoch": 2.32, "learning_rate": 6.435202387967759e-07, "loss": 0.137, "step": 4752 }, { "epoch": 2.32, "learning_rate": 6.426377718010454e-07, "loss": 0.1356, "step": 4753 }, { "epoch": 2.32, "learning_rate": 6.417558210428446e-07, "loss": 0.1405, "step": 4754 }, { "epoch": 2.32, "learning_rate": 6.40874386767304e-07, "loss": 0.1029, "step": 4755 }, { "epoch": 2.32, "learning_rate": 6.399934692194093e-07, "loss": 0.1334, "step": 4756 }, { "epoch": 2.32, "learning_rate": 6.391130686440062e-07, "loss": 0.1388, "step": 4757 }, { "epoch": 2.32, "learning_rate": 6.382331852857918e-07, "loss": 0.1444, "step": 4758 }, { "epoch": 2.32, "learning_rate": 6.373538193893242e-07, "loss": 0.1287, "step": 4759 }, { "epoch": 2.32, "learning_rate": 6.364749711990142e-07, "loss": 0.1603, "step": 4760 }, { "epoch": 2.32, "learning_rate": 6.355966409591297e-07, "loss": 0.128, "step": 4761 }, { "epoch": 2.33, "learning_rate": 6.347188289137959e-07, "loss": 0.1419, "step": 4762 }, { "epoch": 2.33, "learning_rate": 6.338415353069923e-07, "loss": 0.1179, "step": 4763 }, { "epoch": 2.33, "learning_rate": 6.329647603825548e-07, "loss": 0.15, "step": 4764 }, { "epoch": 2.33, "learning_rate": 6.320885043841757e-07, "loss": 0.1648, "step": 4765 }, { "epoch": 2.33, "learning_rate": 6.312127675554022e-07, "loss": 0.1324, "step": 4766 }, { "epoch": 2.33, "learning_rate": 6.303375501396386e-07, "loss": 0.1351, "step": 4767 }, { "epoch": 2.33, "learning_rate": 6.294628523801433e-07, "loss": 0.1176, "step": 4768 }, { "epoch": 2.33, "learning_rate": 6.285886745200307e-07, "loss": 0.1525, "step": 4769 }, { "epoch": 2.33, "learning_rate": 6.277150168022703e-07, "loss": 0.122, "step": 4770 }, { "epoch": 2.33, "learning_rate": 6.268418794696884e-07, "loss": 0.1344, "step": 4771 }, { "epoch": 2.33, "learning_rate": 6.259692627649664e-07, "loss": 0.1118, "step": 4772 }, { "epoch": 2.33, "learning_rate": 6.250971669306399e-07, "loss": 0.1098, "step": 4773 }, { "epoch": 2.33, "learning_rate": 6.242255922091001e-07, "loss": 0.1846, "step": 4774 }, { "epoch": 2.33, "learning_rate": 6.233545388425937e-07, "loss": 0.1264, "step": 4775 }, { "epoch": 2.33, "learning_rate": 6.224840070732216e-07, "loss": 0.1262, "step": 4776 }, { "epoch": 2.33, "learning_rate": 6.216139971429411e-07, "loss": 0.1445, "step": 4777 }, { "epoch": 2.33, "learning_rate": 6.207445092935646e-07, "loss": 0.1224, "step": 4778 }, { "epoch": 2.33, "learning_rate": 6.19875543766758e-07, "loss": 0.1465, "step": 4779 }, { "epoch": 2.33, "learning_rate": 6.190071008040424e-07, "loss": 0.1435, "step": 4780 }, { "epoch": 2.33, "learning_rate": 6.181391806467935e-07, "loss": 0.1407, "step": 4781 }, { "epoch": 2.33, "learning_rate": 6.172717835362424e-07, "loss": 0.1234, "step": 4782 }, { "epoch": 2.34, "learning_rate": 6.164049097134756e-07, "loss": 0.1585, "step": 4783 }, { "epoch": 2.34, "learning_rate": 6.155385594194318e-07, "loss": 0.1482, "step": 4784 }, { "epoch": 2.34, "learning_rate": 6.146727328949065e-07, "loss": 0.1052, "step": 4785 }, { "epoch": 2.34, "learning_rate": 6.138074303805466e-07, "loss": 0.1604, "step": 4786 }, { "epoch": 2.34, "learning_rate": 6.12942652116858e-07, "loss": 0.1879, "step": 4787 }, { "epoch": 2.34, "learning_rate": 6.120783983441958e-07, "loss": 0.1081, "step": 4788 }, { "epoch": 2.34, "learning_rate": 6.112146693027738e-07, "loss": 0.154, "step": 4789 }, { "epoch": 2.34, "learning_rate": 6.103514652326567e-07, "loss": 0.1254, "step": 4790 }, { "epoch": 2.34, "learning_rate": 6.094887863737642e-07, "loss": 0.1142, "step": 4791 }, { "epoch": 2.34, "learning_rate": 6.086266329658716e-07, "loss": 0.2042, "step": 4792 }, { "epoch": 2.34, "learning_rate": 6.077650052486061e-07, "loss": 0.1415, "step": 4793 }, { "epoch": 2.34, "learning_rate": 6.069039034614491e-07, "loss": 0.1272, "step": 4794 }, { "epoch": 2.34, "learning_rate": 6.060433278437375e-07, "loss": 0.142, "step": 4795 }, { "epoch": 2.34, "learning_rate": 6.05183278634659e-07, "loss": 0.1501, "step": 4796 }, { "epoch": 2.34, "learning_rate": 6.043237560732587e-07, "loss": 0.1318, "step": 4797 }, { "epoch": 2.34, "learning_rate": 6.034647603984325e-07, "loss": 0.1387, "step": 4798 }, { "epoch": 2.34, "learning_rate": 6.026062918489306e-07, "loss": 0.1537, "step": 4799 }, { "epoch": 2.34, "learning_rate": 6.017483506633559e-07, "loss": 0.1383, "step": 4800 }, { "epoch": 2.34, "learning_rate": 6.008909370801669e-07, "loss": 0.1312, "step": 4801 }, { "epoch": 2.34, "learning_rate": 6.000340513376743e-07, "loss": 0.1307, "step": 4802 }, { "epoch": 2.35, "learning_rate": 5.991776936740415e-07, "loss": 0.1392, "step": 4803 }, { "epoch": 2.35, "learning_rate": 5.983218643272859e-07, "loss": 0.1327, "step": 4804 }, { "epoch": 2.35, "learning_rate": 5.974665635352772e-07, "loss": 0.1415, "step": 4805 }, { "epoch": 2.35, "learning_rate": 5.966117915357383e-07, "loss": 0.1203, "step": 4806 }, { "epoch": 2.35, "learning_rate": 5.957575485662462e-07, "loss": 0.1384, "step": 4807 }, { "epoch": 2.35, "learning_rate": 5.94903834864231e-07, "loss": 0.1284, "step": 4808 }, { "epoch": 2.35, "learning_rate": 5.940506506669747e-07, "loss": 0.128, "step": 4809 }, { "epoch": 2.35, "learning_rate": 5.931979962116113e-07, "loss": 0.1581, "step": 4810 }, { "epoch": 2.35, "learning_rate": 5.923458717351291e-07, "loss": 0.143, "step": 4811 }, { "epoch": 2.35, "learning_rate": 5.91494277474369e-07, "loss": 0.1158, "step": 4812 }, { "epoch": 2.35, "learning_rate": 5.906432136660234e-07, "loss": 0.1409, "step": 4813 }, { "epoch": 2.35, "learning_rate": 5.897926805466391e-07, "loss": 0.1497, "step": 4814 }, { "epoch": 2.35, "learning_rate": 5.889426783526139e-07, "loss": 0.1246, "step": 4815 }, { "epoch": 2.35, "learning_rate": 5.880932073201976e-07, "loss": 0.1546, "step": 4816 }, { "epoch": 2.35, "learning_rate": 5.872442676854942e-07, "loss": 0.1197, "step": 4817 }, { "epoch": 2.35, "learning_rate": 5.863958596844585e-07, "loss": 0.1784, "step": 4818 }, { "epoch": 2.35, "learning_rate": 5.855479835528988e-07, "loss": 0.1353, "step": 4819 }, { "epoch": 2.35, "learning_rate": 5.847006395264743e-07, "loss": 0.1258, "step": 4820 }, { "epoch": 2.35, "learning_rate": 5.83853827840696e-07, "loss": 0.1327, "step": 4821 }, { "epoch": 2.35, "learning_rate": 5.830075487309294e-07, "loss": 0.1404, "step": 4822 }, { "epoch": 2.35, "learning_rate": 5.821618024323897e-07, "loss": 0.1516, "step": 4823 }, { "epoch": 2.36, "learning_rate": 5.813165891801437e-07, "loss": 0.148, "step": 4824 }, { "epoch": 2.36, "learning_rate": 5.804719092091124e-07, "loss": 0.1421, "step": 4825 }, { "epoch": 2.36, "learning_rate": 5.796277627540661e-07, "loss": 0.1424, "step": 4826 }, { "epoch": 2.36, "learning_rate": 5.787841500496291e-07, "loss": 0.1194, "step": 4827 }, { "epoch": 2.36, "learning_rate": 5.779410713302755e-07, "loss": 0.1183, "step": 4828 }, { "epoch": 2.36, "learning_rate": 5.770985268303314e-07, "loss": 0.1355, "step": 4829 }, { "epoch": 2.36, "learning_rate": 5.762565167839745e-07, "loss": 0.1501, "step": 4830 }, { "epoch": 2.36, "learning_rate": 5.754150414252341e-07, "loss": 0.1328, "step": 4831 }, { "epoch": 2.36, "learning_rate": 5.745741009879921e-07, "loss": 0.1689, "step": 4832 }, { "epoch": 2.36, "learning_rate": 5.737336957059797e-07, "loss": 0.1447, "step": 4833 }, { "epoch": 2.36, "learning_rate": 5.7289382581278e-07, "loss": 0.1282, "step": 4834 }, { "epoch": 2.36, "learning_rate": 5.720544915418269e-07, "loss": 0.1483, "step": 4835 }, { "epoch": 2.36, "learning_rate": 5.712156931264074e-07, "loss": 0.1317, "step": 4836 }, { "epoch": 2.36, "learning_rate": 5.703774307996565e-07, "loss": 0.1486, "step": 4837 }, { "epoch": 2.36, "learning_rate": 5.695397047945639e-07, "loss": 0.1463, "step": 4838 }, { "epoch": 2.36, "learning_rate": 5.687025153439668e-07, "loss": 0.1605, "step": 4839 }, { "epoch": 2.36, "learning_rate": 5.678658626805545e-07, "loss": 0.1313, "step": 4840 }, { "epoch": 2.36, "learning_rate": 5.670297470368682e-07, "loss": 0.1357, "step": 4841 }, { "epoch": 2.36, "learning_rate": 5.661941686452985e-07, "loss": 0.1217, "step": 4842 }, { "epoch": 2.36, "learning_rate": 5.653591277380863e-07, "loss": 0.1926, "step": 4843 }, { "epoch": 2.37, "learning_rate": 5.645246245473252e-07, "loss": 0.118, "step": 4844 }, { "epoch": 2.37, "learning_rate": 5.636906593049568e-07, "loss": 0.1264, "step": 4845 }, { "epoch": 2.37, "learning_rate": 5.628572322427755e-07, "loss": 0.1354, "step": 4846 }, { "epoch": 2.37, "learning_rate": 5.62024343592425e-07, "loss": 0.1059, "step": 4847 }, { "epoch": 2.37, "learning_rate": 5.611919935853985e-07, "loss": 0.1382, "step": 4848 }, { "epoch": 2.37, "learning_rate": 5.603601824530405e-07, "loss": 0.155, "step": 4849 }, { "epoch": 2.37, "learning_rate": 5.595289104265458e-07, "loss": 0.1196, "step": 4850 }, { "epoch": 2.37, "learning_rate": 5.586981777369602e-07, "loss": 0.1469, "step": 4851 }, { "epoch": 2.37, "learning_rate": 5.578679846151775e-07, "loss": 0.1351, "step": 4852 }, { "epoch": 2.37, "learning_rate": 5.570383312919428e-07, "loss": 0.145, "step": 4853 }, { "epoch": 2.37, "learning_rate": 5.562092179978509e-07, "loss": 0.1441, "step": 4854 }, { "epoch": 2.37, "learning_rate": 5.553806449633459e-07, "loss": 0.1215, "step": 4855 }, { "epoch": 2.37, "learning_rate": 5.545526124187231e-07, "loss": 0.1585, "step": 4856 }, { "epoch": 2.37, "learning_rate": 5.537251205941277e-07, "loss": 0.1517, "step": 4857 }, { "epoch": 2.37, "learning_rate": 5.528981697195529e-07, "loss": 0.1409, "step": 4858 }, { "epoch": 2.37, "learning_rate": 5.520717600248423e-07, "loss": 0.1441, "step": 4859 }, { "epoch": 2.37, "learning_rate": 5.51245891739689e-07, "loss": 0.1418, "step": 4860 }, { "epoch": 2.37, "learning_rate": 5.504205650936362e-07, "loss": 0.1374, "step": 4861 }, { "epoch": 2.37, "learning_rate": 5.495957803160767e-07, "loss": 0.1156, "step": 4862 }, { "epoch": 2.37, "learning_rate": 5.487715376362521e-07, "loss": 0.1395, "step": 4863 }, { "epoch": 2.38, "learning_rate": 5.479478372832528e-07, "loss": 0.1443, "step": 4864 }, { "epoch": 2.38, "learning_rate": 5.471246794860188e-07, "loss": 0.14, "step": 4865 }, { "epoch": 2.38, "learning_rate": 5.463020644733408e-07, "loss": 0.1585, "step": 4866 }, { "epoch": 2.38, "learning_rate": 5.45479992473856e-07, "loss": 0.148, "step": 4867 }, { "epoch": 2.38, "learning_rate": 5.446584637160534e-07, "loss": 0.1248, "step": 4868 }, { "epoch": 2.38, "learning_rate": 5.438374784282696e-07, "loss": 0.1388, "step": 4869 }, { "epoch": 2.38, "learning_rate": 5.430170368386889e-07, "loss": 0.143, "step": 4870 }, { "epoch": 2.38, "learning_rate": 5.421971391753475e-07, "loss": 0.1503, "step": 4871 }, { "epoch": 2.38, "learning_rate": 5.413777856661279e-07, "loss": 0.1429, "step": 4872 }, { "epoch": 2.38, "learning_rate": 5.405589765387617e-07, "loss": 0.1489, "step": 4873 }, { "epoch": 2.38, "learning_rate": 5.397407120208308e-07, "loss": 0.1362, "step": 4874 }, { "epoch": 2.38, "learning_rate": 5.38922992339764e-07, "loss": 0.1307, "step": 4875 }, { "epoch": 2.38, "learning_rate": 5.381058177228398e-07, "loss": 0.1374, "step": 4876 }, { "epoch": 2.38, "learning_rate": 5.372891883971845e-07, "loss": 0.1359, "step": 4877 }, { "epoch": 2.38, "learning_rate": 5.364731045897731e-07, "loss": 0.161, "step": 4878 }, { "epoch": 2.38, "learning_rate": 5.35657566527428e-07, "loss": 0.1219, "step": 4879 }, { "epoch": 2.38, "learning_rate": 5.348425744368216e-07, "loss": 0.1333, "step": 4880 }, { "epoch": 2.38, "learning_rate": 5.340281285444745e-07, "loss": 0.1406, "step": 4881 }, { "epoch": 2.38, "learning_rate": 5.332142290767542e-07, "loss": 0.1384, "step": 4882 }, { "epoch": 2.38, "learning_rate": 5.324008762598768e-07, "loss": 0.1418, "step": 4883 }, { "epoch": 2.38, "learning_rate": 5.315880703199067e-07, "loss": 0.1257, "step": 4884 }, { "epoch": 2.39, "learning_rate": 5.307758114827552e-07, "loss": 0.133, "step": 4885 }, { "epoch": 2.39, "learning_rate": 5.299640999741834e-07, "loss": 0.1666, "step": 4886 }, { "epoch": 2.39, "learning_rate": 5.291529360198002e-07, "loss": 0.1333, "step": 4887 }, { "epoch": 2.39, "learning_rate": 5.283423198450607e-07, "loss": 0.1434, "step": 4888 }, { "epoch": 2.39, "learning_rate": 5.275322516752688e-07, "loss": 0.1507, "step": 4889 }, { "epoch": 2.39, "learning_rate": 5.267227317355747e-07, "loss": 0.1572, "step": 4890 }, { "epoch": 2.39, "learning_rate": 5.259137602509787e-07, "loss": 0.1241, "step": 4891 }, { "epoch": 2.39, "learning_rate": 5.251053374463264e-07, "loss": 0.1424, "step": 4892 }, { "epoch": 2.39, "learning_rate": 5.24297463546313e-07, "loss": 0.1062, "step": 4893 }, { "epoch": 2.39, "learning_rate": 5.234901387754793e-07, "loss": 0.1553, "step": 4894 }, { "epoch": 2.39, "learning_rate": 5.226833633582132e-07, "loss": 0.1703, "step": 4895 }, { "epoch": 2.39, "learning_rate": 5.218771375187526e-07, "loss": 0.1386, "step": 4896 }, { "epoch": 2.39, "learning_rate": 5.210714614811793e-07, "loss": 0.1551, "step": 4897 }, { "epoch": 2.39, "learning_rate": 5.202663354694254e-07, "loss": 0.1314, "step": 4898 }, { "epoch": 2.39, "learning_rate": 5.194617597072677e-07, "loss": 0.1498, "step": 4899 }, { "epoch": 2.39, "learning_rate": 5.186577344183305e-07, "loss": 0.1147, "step": 4900 }, { "epoch": 2.39, "learning_rate": 5.178542598260866e-07, "loss": 0.1408, "step": 4901 }, { "epoch": 2.39, "learning_rate": 5.170513361538546e-07, "loss": 0.1359, "step": 4902 }, { "epoch": 2.39, "learning_rate": 5.162489636247989e-07, "loss": 0.1568, "step": 4903 }, { "epoch": 2.39, "learning_rate": 5.154471424619334e-07, "loss": 0.1226, "step": 4904 }, { "epoch": 2.4, "learning_rate": 5.146458728881162e-07, "loss": 0.1359, "step": 4905 }, { "epoch": 2.4, "learning_rate": 5.13845155126054e-07, "loss": 0.142, "step": 4906 }, { "epoch": 2.4, "learning_rate": 5.130449893982989e-07, "loss": 0.1502, "step": 4907 }, { "epoch": 2.4, "learning_rate": 5.1224537592725e-07, "loss": 0.1284, "step": 4908 }, { "epoch": 2.4, "learning_rate": 5.11446314935152e-07, "loss": 0.1268, "step": 4909 }, { "epoch": 2.4, "learning_rate": 5.106478066440975e-07, "loss": 0.1367, "step": 4910 }, { "epoch": 2.4, "learning_rate": 5.098498512760258e-07, "loss": 0.1286, "step": 4911 }, { "epoch": 2.4, "learning_rate": 5.090524490527207e-07, "loss": 0.1184, "step": 4912 }, { "epoch": 2.4, "learning_rate": 5.082556001958133e-07, "loss": 0.1707, "step": 4913 }, { "epoch": 2.4, "learning_rate": 5.074593049267809e-07, "loss": 0.1493, "step": 4914 }, { "epoch": 2.4, "learning_rate": 5.066635634669456e-07, "loss": 0.1516, "step": 4915 }, { "epoch": 2.4, "learning_rate": 5.058683760374778e-07, "loss": 0.1343, "step": 4916 }, { "epoch": 2.4, "learning_rate": 5.050737428593933e-07, "loss": 0.1593, "step": 4917 }, { "epoch": 2.4, "learning_rate": 5.04279664153553e-07, "loss": 0.1494, "step": 4918 }, { "epoch": 2.4, "learning_rate": 5.03486140140664e-07, "loss": 0.1106, "step": 4919 }, { "epoch": 2.4, "learning_rate": 5.026931710412789e-07, "loss": 0.1422, "step": 4920 }, { "epoch": 2.4, "learning_rate": 5.019007570757972e-07, "loss": 0.1292, "step": 4921 }, { "epoch": 2.4, "learning_rate": 5.011088984644627e-07, "loss": 0.1256, "step": 4922 }, { "epoch": 2.4, "learning_rate": 5.003175954273667e-07, "loss": 0.1249, "step": 4923 }, { "epoch": 2.4, "learning_rate": 4.995268481844435e-07, "loss": 0.152, "step": 4924 }, { "epoch": 2.4, "learning_rate": 4.987366569554758e-07, "loss": 0.1398, "step": 4925 }, { "epoch": 2.41, "learning_rate": 4.979470219600896e-07, "loss": 0.1334, "step": 4926 }, { "epoch": 2.41, "learning_rate": 4.971579434177571e-07, "loss": 0.1268, "step": 4927 }, { "epoch": 2.41, "learning_rate": 4.963694215477946e-07, "loss": 0.1345, "step": 4928 }, { "epoch": 2.41, "learning_rate": 4.955814565693665e-07, "loss": 0.1372, "step": 4929 }, { "epoch": 2.41, "learning_rate": 4.947940487014804e-07, "loss": 0.1319, "step": 4930 }, { "epoch": 2.41, "learning_rate": 4.940071981629893e-07, "loss": 0.1225, "step": 4931 }, { "epoch": 2.41, "learning_rate": 4.932209051725915e-07, "loss": 0.1362, "step": 4932 }, { "epoch": 2.41, "learning_rate": 4.924351699488289e-07, "loss": 0.1293, "step": 4933 }, { "epoch": 2.41, "learning_rate": 4.916499927100918e-07, "loss": 0.147, "step": 4934 }, { "epoch": 2.41, "learning_rate": 4.908653736746113e-07, "loss": 0.1583, "step": 4935 }, { "epoch": 2.41, "learning_rate": 4.900813130604676e-07, "loss": 0.1439, "step": 4936 }, { "epoch": 2.41, "learning_rate": 4.89297811085582e-07, "loss": 0.1723, "step": 4937 }, { "epoch": 2.41, "learning_rate": 4.885148679677221e-07, "loss": 0.1566, "step": 4938 }, { "epoch": 2.41, "learning_rate": 4.877324839244998e-07, "loss": 0.1431, "step": 4939 }, { "epoch": 2.41, "learning_rate": 4.869506591733725e-07, "loss": 0.118, "step": 4940 }, { "epoch": 2.41, "learning_rate": 4.861693939316417e-07, "loss": 0.1243, "step": 4941 }, { "epoch": 2.41, "learning_rate": 4.853886884164529e-07, "loss": 0.1362, "step": 4942 }, { "epoch": 2.41, "learning_rate": 4.846085428447964e-07, "loss": 0.138, "step": 4943 }, { "epoch": 2.41, "learning_rate": 4.838289574335059e-07, "loss": 0.1542, "step": 4944 }, { "epoch": 2.41, "learning_rate": 4.830499323992622e-07, "loss": 0.1227, "step": 4945 }, { "epoch": 2.42, "learning_rate": 4.822714679585863e-07, "loss": 0.1599, "step": 4946 }, { "epoch": 2.42, "learning_rate": 4.814935643278476e-07, "loss": 0.1193, "step": 4947 }, { "epoch": 2.42, "learning_rate": 4.807162217232566e-07, "loss": 0.1395, "step": 4948 }, { "epoch": 2.42, "learning_rate": 4.799394403608682e-07, "loss": 0.1483, "step": 4949 }, { "epoch": 2.42, "learning_rate": 4.791632204565833e-07, "loss": 0.1275, "step": 4950 }, { "epoch": 2.42, "learning_rate": 4.783875622261449e-07, "loss": 0.1313, "step": 4951 }, { "epoch": 2.42, "learning_rate": 4.776124658851397e-07, "loss": 0.1346, "step": 4952 }, { "epoch": 2.42, "learning_rate": 4.768379316490001e-07, "loss": 0.1283, "step": 4953 }, { "epoch": 2.42, "learning_rate": 4.7606395973299966e-07, "loss": 0.1357, "step": 4954 }, { "epoch": 2.42, "learning_rate": 4.75290550352259e-07, "loss": 0.1372, "step": 4955 }, { "epoch": 2.42, "learning_rate": 4.745177037217391e-07, "loss": 0.1605, "step": 4956 }, { "epoch": 2.42, "learning_rate": 4.737454200562461e-07, "loss": 0.1391, "step": 4957 }, { "epoch": 2.42, "learning_rate": 4.72973699570429e-07, "loss": 0.1374, "step": 4958 }, { "epoch": 2.42, "learning_rate": 4.7220254247878126e-07, "loss": 0.1445, "step": 4959 }, { "epoch": 2.42, "learning_rate": 4.714319489956401e-07, "loss": 0.1735, "step": 4960 }, { "epoch": 2.42, "learning_rate": 4.706619193351841e-07, "loss": 0.1288, "step": 4961 }, { "epoch": 2.42, "learning_rate": 4.698924537114369e-07, "loss": 0.1336, "step": 4962 }, { "epoch": 2.42, "learning_rate": 4.6912355233826396e-07, "loss": 0.1491, "step": 4963 }, { "epoch": 2.42, "learning_rate": 4.6835521542937466e-07, "loss": 0.1488, "step": 4964 }, { "epoch": 2.42, "learning_rate": 4.675874431983218e-07, "loss": 0.1738, "step": 4965 }, { "epoch": 2.42, "learning_rate": 4.6682023585850167e-07, "loss": 0.1446, "step": 4966 }, { "epoch": 2.43, "learning_rate": 4.6605359362315195e-07, "loss": 0.1181, "step": 4967 }, { "epoch": 2.43, "learning_rate": 4.652875167053547e-07, "loss": 0.1346, "step": 4968 }, { "epoch": 2.43, "learning_rate": 4.6452200531803297e-07, "loss": 0.1195, "step": 4969 }, { "epoch": 2.43, "learning_rate": 4.6375705967395573e-07, "loss": 0.1465, "step": 4970 }, { "epoch": 2.43, "learning_rate": 4.629926799857312e-07, "loss": 0.1361, "step": 4971 }, { "epoch": 2.43, "learning_rate": 4.6222886646581364e-07, "loss": 0.1288, "step": 4972 }, { "epoch": 2.43, "learning_rate": 4.614656193264977e-07, "loss": 0.1422, "step": 4973 }, { "epoch": 2.43, "learning_rate": 4.6070293877992e-07, "loss": 0.1325, "step": 4974 }, { "epoch": 2.43, "learning_rate": 4.5994082503806296e-07, "loss": 0.1595, "step": 4975 }, { "epoch": 2.43, "learning_rate": 4.591792783127474e-07, "loss": 0.1599, "step": 4976 }, { "epoch": 2.43, "learning_rate": 4.584182988156405e-07, "loss": 0.1974, "step": 4977 }, { "epoch": 2.43, "learning_rate": 4.57657886758249e-07, "loss": 0.144, "step": 4978 }, { "epoch": 2.43, "learning_rate": 4.5689804235192177e-07, "loss": 0.1529, "step": 4979 }, { "epoch": 2.43, "learning_rate": 4.5613876580785225e-07, "loss": 0.1549, "step": 4980 }, { "epoch": 2.43, "learning_rate": 4.553800573370745e-07, "loss": 0.1164, "step": 4981 }, { "epoch": 2.43, "learning_rate": 4.54621917150464e-07, "loss": 0.1273, "step": 4982 }, { "epoch": 2.43, "learning_rate": 4.538643454587402e-07, "loss": 0.1402, "step": 4983 }, { "epoch": 2.43, "learning_rate": 4.531073424724625e-07, "loss": 0.1414, "step": 4984 }, { "epoch": 2.43, "learning_rate": 4.5235090840203414e-07, "loss": 0.1647, "step": 4985 }, { "epoch": 2.43, "learning_rate": 4.5159504345769935e-07, "loss": 0.1409, "step": 4986 }, { "epoch": 2.44, "learning_rate": 4.5083974784954357e-07, "loss": 0.149, "step": 4987 }, { "epoch": 2.44, "learning_rate": 4.500850217874941e-07, "loss": 0.1507, "step": 4988 }, { "epoch": 2.44, "learning_rate": 4.4933086548132137e-07, "loss": 0.1282, "step": 4989 }, { "epoch": 2.44, "learning_rate": 4.4857727914063673e-07, "loss": 0.1383, "step": 4990 }, { "epoch": 2.44, "learning_rate": 4.4782426297489273e-07, "loss": 0.1598, "step": 4991 }, { "epoch": 2.44, "learning_rate": 4.47071817193383e-07, "loss": 0.16, "step": 4992 }, { "epoch": 2.44, "learning_rate": 4.463199420052436e-07, "loss": 0.1187, "step": 4993 }, { "epoch": 2.44, "learning_rate": 4.4556863761945094e-07, "loss": 0.1115, "step": 4994 }, { "epoch": 2.44, "learning_rate": 4.448179042448239e-07, "loss": 0.1325, "step": 4995 }, { "epoch": 2.44, "learning_rate": 4.440677420900233e-07, "loss": 0.1046, "step": 4996 }, { "epoch": 2.44, "learning_rate": 4.433181513635493e-07, "loss": 0.1872, "step": 4997 }, { "epoch": 2.44, "learning_rate": 4.425691322737441e-07, "loss": 0.139, "step": 4998 }, { "epoch": 2.44, "learning_rate": 4.418206850287901e-07, "loss": 0.1804, "step": 4999 }, { "epoch": 2.44, "learning_rate": 4.410728098367131e-07, "loss": 0.12, "step": 5000 }, { "epoch": 2.44, "learning_rate": 4.4032550690537745e-07, "loss": 0.1488, "step": 5001 }, { "epoch": 2.44, "learning_rate": 4.3957877644249e-07, "loss": 0.127, "step": 5002 }, { "epoch": 2.44, "learning_rate": 4.38832618655598e-07, "loss": 0.1569, "step": 5003 }, { "epoch": 2.44, "learning_rate": 4.380870337520887e-07, "loss": 0.143, "step": 5004 }, { "epoch": 2.44, "learning_rate": 4.373420219391916e-07, "loss": 0.1335, "step": 5005 }, { "epoch": 2.44, "learning_rate": 4.3659758342397636e-07, "loss": 0.1314, "step": 5006 }, { "epoch": 2.44, "learning_rate": 4.3585371841335183e-07, "loss": 0.1483, "step": 5007 }, { "epoch": 2.45, "learning_rate": 4.3511042711407045e-07, "loss": 0.1432, "step": 5008 }, { "epoch": 2.45, "learning_rate": 4.3436770973272203e-07, "loss": 0.1031, "step": 5009 }, { "epoch": 2.45, "learning_rate": 4.3362556647573966e-07, "loss": 0.1457, "step": 5010 }, { "epoch": 2.45, "learning_rate": 4.328839975493948e-07, "loss": 0.1314, "step": 5011 }, { "epoch": 2.45, "learning_rate": 4.321430031597998e-07, "loss": 0.1454, "step": 5012 }, { "epoch": 2.45, "learning_rate": 4.314025835129082e-07, "loss": 0.1028, "step": 5013 }, { "epoch": 2.45, "learning_rate": 4.306627388145124e-07, "loss": 0.1244, "step": 5014 }, { "epoch": 2.45, "learning_rate": 4.29923469270247e-07, "loss": 0.1191, "step": 5015 }, { "epoch": 2.45, "learning_rate": 4.291847750855843e-07, "loss": 0.1798, "step": 5016 }, { "epoch": 2.45, "learning_rate": 4.2844665646583847e-07, "loss": 0.1419, "step": 5017 }, { "epoch": 2.45, "learning_rate": 4.2770911361616207e-07, "loss": 0.1339, "step": 5018 }, { "epoch": 2.45, "learning_rate": 4.269721467415497e-07, "loss": 0.1299, "step": 5019 }, { "epoch": 2.45, "learning_rate": 4.26235756046835e-07, "loss": 0.1401, "step": 5020 }, { "epoch": 2.45, "learning_rate": 4.254999417366909e-07, "loss": 0.1217, "step": 5021 }, { "epoch": 2.45, "learning_rate": 4.2476470401563053e-07, "loss": 0.1255, "step": 5022 }, { "epoch": 2.45, "learning_rate": 4.240300430880062e-07, "loss": 0.1993, "step": 5023 }, { "epoch": 2.45, "learning_rate": 4.232959591580116e-07, "loss": 0.1201, "step": 5024 }, { "epoch": 2.45, "learning_rate": 4.225624524296776e-07, "loss": 0.1255, "step": 5025 }, { "epoch": 2.45, "learning_rate": 4.2182952310687753e-07, "loss": 0.1265, "step": 5026 }, { "epoch": 2.45, "learning_rate": 4.210971713933218e-07, "loss": 0.1201, "step": 5027 }, { "epoch": 2.46, "learning_rate": 4.2036539749256033e-07, "loss": 0.1234, "step": 5028 }, { "epoch": 2.46, "learning_rate": 4.196342016079849e-07, "loss": 0.1216, "step": 5029 }, { "epoch": 2.46, "learning_rate": 4.1890358394282406e-07, "loss": 0.1542, "step": 5030 }, { "epoch": 2.46, "learning_rate": 4.1817354470014614e-07, "loss": 0.1299, "step": 5031 }, { "epoch": 2.46, "learning_rate": 4.174440840828603e-07, "loss": 0.1244, "step": 5032 }, { "epoch": 2.46, "learning_rate": 4.167152022937124e-07, "loss": 0.1625, "step": 5033 }, { "epoch": 2.46, "learning_rate": 4.1598689953529e-07, "loss": 0.1356, "step": 5034 }, { "epoch": 2.46, "learning_rate": 4.15259176010018e-07, "loss": 0.1169, "step": 5035 }, { "epoch": 2.46, "learning_rate": 4.1453203192016096e-07, "loss": 0.1443, "step": 5036 }, { "epoch": 2.46, "learning_rate": 4.13805467467821e-07, "loss": 0.1366, "step": 5037 }, { "epoch": 2.46, "learning_rate": 4.1307948285494113e-07, "loss": 0.1579, "step": 5038 }, { "epoch": 2.46, "learning_rate": 4.123540782833033e-07, "loss": 0.1176, "step": 5039 }, { "epoch": 2.46, "learning_rate": 4.1162925395452665e-07, "loss": 0.1425, "step": 5040 }, { "epoch": 2.46, "learning_rate": 4.109050100700693e-07, "loss": 0.161, "step": 5041 }, { "epoch": 2.46, "learning_rate": 4.1018134683122906e-07, "loss": 0.1405, "step": 5042 }, { "epoch": 2.46, "learning_rate": 4.094582644391409e-07, "loss": 0.1615, "step": 5043 }, { "epoch": 2.46, "learning_rate": 4.0873576309477994e-07, "loss": 0.1296, "step": 5044 }, { "epoch": 2.46, "learning_rate": 4.0801384299895966e-07, "loss": 0.1232, "step": 5045 }, { "epoch": 2.46, "learning_rate": 4.072925043523307e-07, "loss": 0.1249, "step": 5046 }, { "epoch": 2.46, "learning_rate": 4.0657174735538263e-07, "loss": 0.1612, "step": 5047 }, { "epoch": 2.46, "learning_rate": 4.0585157220844357e-07, "loss": 0.1365, "step": 5048 }, { "epoch": 2.47, "learning_rate": 4.0513197911167977e-07, "loss": 0.1322, "step": 5049 }, { "epoch": 2.47, "learning_rate": 4.0441296826509683e-07, "loss": 0.1285, "step": 5050 }, { "epoch": 2.47, "learning_rate": 4.036945398685369e-07, "loss": 0.1227, "step": 5051 }, { "epoch": 2.47, "learning_rate": 4.029766941216806e-07, "loss": 0.1341, "step": 5052 }, { "epoch": 2.47, "learning_rate": 4.0225943122404657e-07, "loss": 0.1736, "step": 5053 }, { "epoch": 2.47, "learning_rate": 4.015427513749928e-07, "loss": 0.1552, "step": 5054 }, { "epoch": 2.47, "learning_rate": 4.008266547737127e-07, "loss": 0.1328, "step": 5055 }, { "epoch": 2.47, "learning_rate": 4.0011114161924076e-07, "loss": 0.1244, "step": 5056 }, { "epoch": 2.47, "learning_rate": 3.9939621211044665e-07, "loss": 0.1886, "step": 5057 }, { "epoch": 2.47, "learning_rate": 3.986818664460382e-07, "loss": 0.1197, "step": 5058 }, { "epoch": 2.47, "learning_rate": 3.979681048245626e-07, "loss": 0.1591, "step": 5059 }, { "epoch": 2.47, "learning_rate": 3.972549274444032e-07, "loss": 0.1495, "step": 5060 }, { "epoch": 2.47, "learning_rate": 3.9654233450378055e-07, "loss": 0.1546, "step": 5061 }, { "epoch": 2.47, "learning_rate": 3.958303262007551e-07, "loss": 0.1668, "step": 5062 }, { "epoch": 2.47, "learning_rate": 3.951189027332222e-07, "loss": 0.1347, "step": 5063 }, { "epoch": 2.47, "learning_rate": 3.944080642989168e-07, "loss": 0.1541, "step": 5064 }, { "epoch": 2.47, "learning_rate": 3.936978110954093e-07, "loss": 0.1396, "step": 5065 }, { "epoch": 2.47, "learning_rate": 3.9298814332010853e-07, "loss": 0.1654, "step": 5066 }, { "epoch": 2.47, "learning_rate": 3.922790611702601e-07, "loss": 0.1669, "step": 5067 }, { "epoch": 2.47, "learning_rate": 3.915705648429477e-07, "loss": 0.1118, "step": 5068 }, { "epoch": 2.48, "learning_rate": 3.908626545350919e-07, "loss": 0.1303, "step": 5069 }, { "epoch": 2.48, "learning_rate": 3.9015533044344964e-07, "loss": 0.1228, "step": 5070 }, { "epoch": 2.48, "learning_rate": 3.8944859276461607e-07, "loss": 0.1226, "step": 5071 }, { "epoch": 2.48, "learning_rate": 3.8874244169502197e-07, "loss": 0.1398, "step": 5072 }, { "epoch": 2.48, "learning_rate": 3.880368774309354e-07, "loss": 0.1307, "step": 5073 }, { "epoch": 2.48, "learning_rate": 3.873319001684625e-07, "loss": 0.1355, "step": 5074 }, { "epoch": 2.48, "learning_rate": 3.866275101035463e-07, "loss": 0.1351, "step": 5075 }, { "epoch": 2.48, "learning_rate": 3.8592370743196494e-07, "loss": 0.1525, "step": 5076 }, { "epoch": 2.48, "learning_rate": 3.852204923493341e-07, "loss": 0.1551, "step": 5077 }, { "epoch": 2.48, "learning_rate": 3.8451786505110583e-07, "loss": 0.1437, "step": 5078 }, { "epoch": 2.48, "learning_rate": 3.8381582573257053e-07, "loss": 0.1281, "step": 5079 }, { "epoch": 2.48, "learning_rate": 3.831143745888527e-07, "loss": 0.1442, "step": 5080 }, { "epoch": 2.48, "learning_rate": 3.824135118149153e-07, "loss": 0.1222, "step": 5081 }, { "epoch": 2.48, "learning_rate": 3.8171323760555654e-07, "loss": 0.1229, "step": 5082 }, { "epoch": 2.48, "learning_rate": 3.810135521554112e-07, "loss": 0.1289, "step": 5083 }, { "epoch": 2.48, "learning_rate": 3.803144556589516e-07, "loss": 0.1427, "step": 5084 }, { "epoch": 2.48, "learning_rate": 3.7961594831048525e-07, "loss": 0.1791, "step": 5085 }, { "epoch": 2.48, "learning_rate": 3.789180303041551e-07, "loss": 0.1252, "step": 5086 }, { "epoch": 2.48, "learning_rate": 3.782207018339426e-07, "loss": 0.1121, "step": 5087 }, { "epoch": 2.48, "learning_rate": 3.7752396309366335e-07, "loss": 0.158, "step": 5088 }, { "epoch": 2.48, "learning_rate": 3.7682781427697017e-07, "loss": 0.1789, "step": 5089 }, { "epoch": 2.49, "learning_rate": 3.761322555773517e-07, "loss": 0.1332, "step": 5090 }, { "epoch": 2.49, "learning_rate": 3.754372871881312e-07, "loss": 0.1407, "step": 5091 }, { "epoch": 2.49, "learning_rate": 3.7474290930247043e-07, "loss": 0.1365, "step": 5092 }, { "epoch": 2.49, "learning_rate": 3.7404912211336465e-07, "loss": 0.1495, "step": 5093 }, { "epoch": 2.49, "learning_rate": 3.733559258136468e-07, "loss": 0.1211, "step": 5094 }, { "epoch": 2.49, "learning_rate": 3.7266332059598427e-07, "loss": 0.1756, "step": 5095 }, { "epoch": 2.49, "learning_rate": 3.7197130665288044e-07, "loss": 0.1473, "step": 5096 }, { "epoch": 2.49, "learning_rate": 3.712798841766743e-07, "loss": 0.1576, "step": 5097 }, { "epoch": 2.49, "learning_rate": 3.7058905335954075e-07, "loss": 0.1456, "step": 5098 }, { "epoch": 2.49, "learning_rate": 3.698988143934912e-07, "loss": 0.1514, "step": 5099 }, { "epoch": 2.49, "learning_rate": 3.692091674703704e-07, "loss": 0.1607, "step": 5100 }, { "epoch": 2.49, "learning_rate": 3.685201127818602e-07, "loss": 0.1273, "step": 5101 }, { "epoch": 2.49, "learning_rate": 3.678316505194771e-07, "loss": 0.1138, "step": 5102 }, { "epoch": 2.49, "learning_rate": 3.671437808745726e-07, "loss": 0.1266, "step": 5103 }, { "epoch": 2.49, "learning_rate": 3.664565040383344e-07, "loss": 0.1372, "step": 5104 }, { "epoch": 2.49, "learning_rate": 3.6576982020178565e-07, "loss": 0.1575, "step": 5105 }, { "epoch": 2.49, "learning_rate": 3.6508372955578407e-07, "loss": 0.1434, "step": 5106 }, { "epoch": 2.49, "learning_rate": 3.643982322910217e-07, "loss": 0.1416, "step": 5107 }, { "epoch": 2.49, "learning_rate": 3.6371332859802656e-07, "loss": 0.1431, "step": 5108 }, { "epoch": 2.49, "learning_rate": 3.6302901866716256e-07, "loss": 0.1414, "step": 5109 }, { "epoch": 2.5, "learning_rate": 3.623453026886264e-07, "loss": 0.1685, "step": 5110 }, { "epoch": 2.5, "learning_rate": 3.6166218085245204e-07, "loss": 0.1229, "step": 5111 }, { "epoch": 2.5, "learning_rate": 3.609796533485069e-07, "loss": 0.1501, "step": 5112 }, { "epoch": 2.5, "learning_rate": 3.6029772036649284e-07, "loss": 0.1313, "step": 5113 }, { "epoch": 2.5, "learning_rate": 3.5961638209594807e-07, "loss": 0.1288, "step": 5114 }, { "epoch": 2.5, "learning_rate": 3.5893563872624437e-07, "loss": 0.173, "step": 5115 }, { "epoch": 2.5, "learning_rate": 3.5825549044658735e-07, "loss": 0.177, "step": 5116 }, { "epoch": 2.5, "learning_rate": 3.5757593744602e-07, "loss": 0.1208, "step": 5117 }, { "epoch": 2.5, "learning_rate": 3.5689697991341643e-07, "loss": 0.1699, "step": 5118 }, { "epoch": 2.5, "learning_rate": 3.5621861803748826e-07, "loss": 0.136, "step": 5119 }, { "epoch": 2.5, "learning_rate": 3.5554085200677964e-07, "loss": 0.1399, "step": 5120 }, { "epoch": 2.5, "learning_rate": 3.5486368200966984e-07, "loss": 0.1184, "step": 5121 }, { "epoch": 2.5, "learning_rate": 3.541871082343715e-07, "loss": 0.1147, "step": 5122 }, { "epoch": 2.5, "learning_rate": 3.5351113086893316e-07, "loss": 0.1448, "step": 5123 }, { "epoch": 2.5, "learning_rate": 3.5283575010123724e-07, "loss": 0.1734, "step": 5124 }, { "epoch": 2.5, "learning_rate": 3.5216096611899924e-07, "loss": 0.1723, "step": 5125 }, { "epoch": 2.5, "learning_rate": 3.5148677910976954e-07, "loss": 0.1318, "step": 5126 }, { "epoch": 2.5, "learning_rate": 3.508131892609321e-07, "loss": 0.1429, "step": 5127 }, { "epoch": 2.5, "learning_rate": 3.501401967597054e-07, "loss": 0.1282, "step": 5128 }, { "epoch": 2.5, "learning_rate": 3.49467801793143e-07, "loss": 0.1484, "step": 5129 }, { "epoch": 2.5, "learning_rate": 3.4879600454813006e-07, "loss": 0.1562, "step": 5130 }, { "epoch": 2.51, "learning_rate": 3.4812480521138727e-07, "loss": 0.1228, "step": 5131 }, { "epoch": 2.51, "learning_rate": 3.4745420396946754e-07, "loss": 0.1419, "step": 5132 }, { "epoch": 2.51, "learning_rate": 3.4678420100875994e-07, "loss": 0.1223, "step": 5133 }, { "epoch": 2.51, "learning_rate": 3.4611479651548456e-07, "loss": 0.1447, "step": 5134 }, { "epoch": 2.51, "learning_rate": 3.4544599067569795e-07, "loss": 0.1391, "step": 5135 }, { "epoch": 2.51, "learning_rate": 3.447777836752883e-07, "loss": 0.1791, "step": 5136 }, { "epoch": 2.51, "learning_rate": 3.4411017569997704e-07, "loss": 0.1269, "step": 5137 }, { "epoch": 2.51, "learning_rate": 3.434431669353211e-07, "loss": 0.1291, "step": 5138 }, { "epoch": 2.51, "learning_rate": 3.427767575667093e-07, "loss": 0.1446, "step": 5139 }, { "epoch": 2.51, "learning_rate": 3.4211094777936383e-07, "loss": 0.1594, "step": 5140 }, { "epoch": 2.51, "learning_rate": 3.414457377583413e-07, "loss": 0.1107, "step": 5141 }, { "epoch": 2.51, "learning_rate": 3.4078112768853034e-07, "loss": 0.1429, "step": 5142 }, { "epoch": 2.51, "learning_rate": 3.401171177546547e-07, "loss": 0.1363, "step": 5143 }, { "epoch": 2.51, "learning_rate": 3.394537081412691e-07, "loss": 0.1236, "step": 5144 }, { "epoch": 2.51, "learning_rate": 3.387908990327626e-07, "loss": 0.1778, "step": 5145 }, { "epoch": 2.51, "learning_rate": 3.3812869061335655e-07, "loss": 0.1355, "step": 5146 }, { "epoch": 2.51, "learning_rate": 3.3746708306710674e-07, "loss": 0.1219, "step": 5147 }, { "epoch": 2.51, "learning_rate": 3.368060765779016e-07, "loss": 0.155, "step": 5148 }, { "epoch": 2.51, "learning_rate": 3.361456713294617e-07, "loss": 0.1402, "step": 5149 }, { "epoch": 2.51, "learning_rate": 3.3548586750534074e-07, "loss": 0.1572, "step": 5150 }, { "epoch": 2.52, "learning_rate": 3.348266652889251e-07, "loss": 0.1478, "step": 5151 }, { "epoch": 2.52, "learning_rate": 3.3416806486343436e-07, "loss": 0.1402, "step": 5152 }, { "epoch": 2.52, "learning_rate": 3.3351006641192065e-07, "loss": 0.1765, "step": 5153 }, { "epoch": 2.52, "learning_rate": 3.328526701172696e-07, "loss": 0.1364, "step": 5154 }, { "epoch": 2.52, "learning_rate": 3.321958761621985e-07, "loss": 0.1431, "step": 5155 }, { "epoch": 2.52, "learning_rate": 3.315396847292571e-07, "loss": 0.1833, "step": 5156 }, { "epoch": 2.52, "learning_rate": 3.3088409600082775e-07, "loss": 0.1488, "step": 5157 }, { "epoch": 2.52, "learning_rate": 3.3022911015912653e-07, "loss": 0.109, "step": 5158 }, { "epoch": 2.52, "learning_rate": 3.295747273861999e-07, "loss": 0.147, "step": 5159 }, { "epoch": 2.52, "learning_rate": 3.289209478639288e-07, "loss": 0.1362, "step": 5160 }, { "epoch": 2.52, "learning_rate": 3.282677717740254e-07, "loss": 0.1503, "step": 5161 }, { "epoch": 2.52, "learning_rate": 3.276151992980331e-07, "loss": 0.1651, "step": 5162 }, { "epoch": 2.52, "learning_rate": 3.269632306173304e-07, "loss": 0.1772, "step": 5163 }, { "epoch": 2.52, "learning_rate": 3.263118659131248e-07, "loss": 0.1447, "step": 5164 }, { "epoch": 2.52, "learning_rate": 3.256611053664585e-07, "loss": 0.1442, "step": 5165 }, { "epoch": 2.52, "learning_rate": 3.250109491582043e-07, "loss": 0.1454, "step": 5166 }, { "epoch": 2.52, "learning_rate": 3.243613974690668e-07, "loss": 0.1432, "step": 5167 }, { "epoch": 2.52, "learning_rate": 3.237124504795841e-07, "loss": 0.1625, "step": 5168 }, { "epoch": 2.52, "learning_rate": 3.2306410837012524e-07, "loss": 0.1466, "step": 5169 }, { "epoch": 2.52, "learning_rate": 3.2241637132089024e-07, "loss": 0.1345, "step": 5170 }, { "epoch": 2.52, "learning_rate": 3.217692395119132e-07, "loss": 0.159, "step": 5171 }, { "epoch": 2.53, "learning_rate": 3.2112271312305737e-07, "loss": 0.1143, "step": 5172 }, { "epoch": 2.53, "learning_rate": 3.2047679233402043e-07, "loss": 0.158, "step": 5173 }, { "epoch": 2.53, "learning_rate": 3.1983147732432996e-07, "loss": 0.1456, "step": 5174 }, { "epoch": 2.53, "learning_rate": 3.1918676827334544e-07, "loss": 0.1439, "step": 5175 }, { "epoch": 2.53, "learning_rate": 3.1854266536025775e-07, "loss": 0.1382, "step": 5176 }, { "epoch": 2.53, "learning_rate": 3.178991687640898e-07, "loss": 0.1847, "step": 5177 }, { "epoch": 2.53, "learning_rate": 3.172562786636965e-07, "loss": 0.1538, "step": 5178 }, { "epoch": 2.53, "learning_rate": 3.166139952377631e-07, "loss": 0.1383, "step": 5179 }, { "epoch": 2.53, "learning_rate": 3.159723186648067e-07, "loss": 0.1413, "step": 5180 }, { "epoch": 2.53, "learning_rate": 3.1533124912317525e-07, "loss": 0.122, "step": 5181 }, { "epoch": 2.53, "learning_rate": 3.146907867910484e-07, "loss": 0.147, "step": 5182 }, { "epoch": 2.53, "learning_rate": 3.140509318464374e-07, "loss": 0.1433, "step": 5183 }, { "epoch": 2.53, "learning_rate": 3.1341168446718475e-07, "loss": 0.1378, "step": 5184 }, { "epoch": 2.53, "learning_rate": 3.127730448309629e-07, "loss": 0.1644, "step": 5185 }, { "epoch": 2.53, "learning_rate": 3.1213501311527674e-07, "loss": 0.1305, "step": 5186 }, { "epoch": 2.53, "learning_rate": 3.114975894974603e-07, "loss": 0.1282, "step": 5187 }, { "epoch": 2.53, "learning_rate": 3.108607741546815e-07, "loss": 0.1702, "step": 5188 }, { "epoch": 2.53, "learning_rate": 3.1022456726393645e-07, "loss": 0.1445, "step": 5189 }, { "epoch": 2.53, "learning_rate": 3.0958896900205424e-07, "loss": 0.1498, "step": 5190 }, { "epoch": 2.53, "learning_rate": 3.0895397954569336e-07, "loss": 0.1566, "step": 5191 }, { "epoch": 2.54, "learning_rate": 3.083195990713428e-07, "loss": 0.1593, "step": 5192 }, { "epoch": 2.54, "learning_rate": 3.076858277553241e-07, "loss": 0.1449, "step": 5193 }, { "epoch": 2.54, "learning_rate": 3.0705266577378823e-07, "loss": 0.1312, "step": 5194 }, { "epoch": 2.54, "learning_rate": 3.0642011330271645e-07, "loss": 0.1357, "step": 5195 }, { "epoch": 2.54, "learning_rate": 3.057881705179219e-07, "loss": 0.1573, "step": 5196 }, { "epoch": 2.54, "learning_rate": 3.051568375950467e-07, "loss": 0.1303, "step": 5197 }, { "epoch": 2.54, "learning_rate": 3.0452611470956546e-07, "loss": 0.1209, "step": 5198 }, { "epoch": 2.54, "learning_rate": 3.038960020367812e-07, "loss": 0.1608, "step": 5199 }, { "epoch": 2.54, "learning_rate": 3.0326649975182866e-07, "loss": 0.1203, "step": 5200 }, { "epoch": 2.54, "learning_rate": 3.026376080296714e-07, "loss": 0.1843, "step": 5201 }, { "epoch": 2.54, "learning_rate": 3.0200932704510523e-07, "loss": 0.1758, "step": 5202 }, { "epoch": 2.54, "learning_rate": 3.0138165697275597e-07, "loss": 0.1474, "step": 5203 }, { "epoch": 2.54, "learning_rate": 3.0075459798707824e-07, "loss": 0.1563, "step": 5204 }, { "epoch": 2.54, "learning_rate": 3.0012815026235764e-07, "loss": 0.1322, "step": 5205 }, { "epoch": 2.54, "learning_rate": 2.9950231397270937e-07, "loss": 0.1417, "step": 5206 }, { "epoch": 2.54, "learning_rate": 2.988770892920795e-07, "loss": 0.1596, "step": 5207 }, { "epoch": 2.54, "learning_rate": 2.9825247639424433e-07, "loss": 0.1453, "step": 5208 }, { "epoch": 2.54, "learning_rate": 2.976284754528092e-07, "loss": 0.1304, "step": 5209 }, { "epoch": 2.54, "learning_rate": 2.9700508664120913e-07, "loss": 0.1127, "step": 5210 }, { "epoch": 2.54, "learning_rate": 2.963823101327104e-07, "loss": 0.1766, "step": 5211 }, { "epoch": 2.54, "learning_rate": 2.957601461004073e-07, "loss": 0.1597, "step": 5212 }, { "epoch": 2.55, "learning_rate": 2.9513859471722527e-07, "loss": 0.1176, "step": 5213 }, { "epoch": 2.55, "learning_rate": 2.9451765615591985e-07, "loss": 0.1261, "step": 5214 }, { "epoch": 2.55, "learning_rate": 2.938973305890747e-07, "loss": 0.1077, "step": 5215 }, { "epoch": 2.55, "learning_rate": 2.9327761818910337e-07, "loss": 0.1678, "step": 5216 }, { "epoch": 2.55, "learning_rate": 2.926585191282508e-07, "loss": 0.1672, "step": 5217 }, { "epoch": 2.55, "learning_rate": 2.9204003357858944e-07, "loss": 0.1322, "step": 5218 }, { "epoch": 2.55, "learning_rate": 2.9142216171202135e-07, "loss": 0.1379, "step": 5219 }, { "epoch": 2.55, "learning_rate": 2.908049037002797e-07, "loss": 0.1697, "step": 5220 }, { "epoch": 2.55, "learning_rate": 2.90188259714925e-07, "loss": 0.13, "step": 5221 }, { "epoch": 2.55, "learning_rate": 2.895722299273487e-07, "loss": 0.1342, "step": 5222 }, { "epoch": 2.55, "learning_rate": 2.8895681450877066e-07, "loss": 0.133, "step": 5223 }, { "epoch": 2.55, "learning_rate": 2.883420136302403e-07, "loss": 0.1534, "step": 5224 }, { "epoch": 2.55, "learning_rate": 2.877278274626352e-07, "loss": 0.1595, "step": 5225 }, { "epoch": 2.55, "learning_rate": 2.8711425617666404e-07, "loss": 0.1366, "step": 5226 }, { "epoch": 2.55, "learning_rate": 2.8650129994286375e-07, "loss": 0.1256, "step": 5227 }, { "epoch": 2.55, "learning_rate": 2.8588895893159966e-07, "loss": 0.15, "step": 5228 }, { "epoch": 2.55, "learning_rate": 2.8527723331306655e-07, "loss": 0.1411, "step": 5229 }, { "epoch": 2.55, "learning_rate": 2.8466612325728836e-07, "loss": 0.1553, "step": 5230 }, { "epoch": 2.55, "learning_rate": 2.840556289341173e-07, "loss": 0.1777, "step": 5231 }, { "epoch": 2.55, "learning_rate": 2.8344575051323516e-07, "loss": 0.1727, "step": 5232 }, { "epoch": 2.56, "learning_rate": 2.8283648816415263e-07, "loss": 0.1389, "step": 5233 }, { "epoch": 2.56, "learning_rate": 2.8222784205620886e-07, "loss": 0.1609, "step": 5234 }, { "epoch": 2.56, "learning_rate": 2.8161981235857145e-07, "loss": 0.1426, "step": 5235 }, { "epoch": 2.56, "learning_rate": 2.8101239924023635e-07, "loss": 0.141, "step": 5236 }, { "epoch": 2.56, "learning_rate": 2.804056028700297e-07, "loss": 0.1122, "step": 5237 }, { "epoch": 2.56, "learning_rate": 2.797994234166046e-07, "loss": 0.1611, "step": 5238 }, { "epoch": 2.56, "learning_rate": 2.7919386104844375e-07, "loss": 0.1286, "step": 5239 }, { "epoch": 2.56, "learning_rate": 2.7858891593385757e-07, "loss": 0.1487, "step": 5240 }, { "epoch": 2.56, "learning_rate": 2.7798458824098487e-07, "loss": 0.1334, "step": 5241 }, { "epoch": 2.56, "learning_rate": 2.773808781377943e-07, "loss": 0.1633, "step": 5242 }, { "epoch": 2.56, "learning_rate": 2.767777857920806e-07, "loss": 0.2, "step": 5243 }, { "epoch": 2.56, "learning_rate": 2.7617531137146906e-07, "loss": 0.1311, "step": 5244 }, { "epoch": 2.56, "learning_rate": 2.7557345504341167e-07, "loss": 0.1322, "step": 5245 }, { "epoch": 2.56, "learning_rate": 2.749722169751887e-07, "loss": 0.1734, "step": 5246 }, { "epoch": 2.56, "learning_rate": 2.743715973339095e-07, "loss": 0.1249, "step": 5247 }, { "epoch": 2.56, "learning_rate": 2.737715962865112e-07, "loss": 0.1359, "step": 5248 }, { "epoch": 2.56, "learning_rate": 2.7317221399975805e-07, "loss": 0.1626, "step": 5249 }, { "epoch": 2.56, "learning_rate": 2.7257345064024413e-07, "loss": 0.145, "step": 5250 }, { "epoch": 2.56, "learning_rate": 2.7197530637438935e-07, "loss": 0.1909, "step": 5251 }, { "epoch": 2.56, "learning_rate": 2.7137778136844375e-07, "loss": 0.149, "step": 5252 }, { "epoch": 2.56, "learning_rate": 2.707808757884836e-07, "loss": 0.1357, "step": 5253 }, { "epoch": 2.57, "learning_rate": 2.701845898004138e-07, "loss": 0.1509, "step": 5254 }, { "epoch": 2.57, "learning_rate": 2.6958892356996585e-07, "loss": 0.1251, "step": 5255 }, { "epoch": 2.57, "learning_rate": 2.689938772627007e-07, "loss": 0.1388, "step": 5256 }, { "epoch": 2.57, "learning_rate": 2.6839945104400687e-07, "loss": 0.1462, "step": 5257 }, { "epoch": 2.57, "learning_rate": 2.678056450790995e-07, "loss": 0.1616, "step": 5258 }, { "epoch": 2.57, "learning_rate": 2.6721245953302136e-07, "loss": 0.1469, "step": 5259 }, { "epoch": 2.57, "learning_rate": 2.666198945706433e-07, "loss": 0.1607, "step": 5260 }, { "epoch": 2.57, "learning_rate": 2.6602795035666316e-07, "loss": 0.1462, "step": 5261 }, { "epoch": 2.57, "learning_rate": 2.654366270556072e-07, "loss": 0.1636, "step": 5262 }, { "epoch": 2.57, "learning_rate": 2.6484592483182884e-07, "loss": 0.1507, "step": 5263 }, { "epoch": 2.57, "learning_rate": 2.6425584384950826e-07, "loss": 0.1658, "step": 5264 }, { "epoch": 2.57, "learning_rate": 2.6366638427265307e-07, "loss": 0.1194, "step": 5265 }, { "epoch": 2.57, "learning_rate": 2.6307754626509806e-07, "loss": 0.1398, "step": 5266 }, { "epoch": 2.57, "learning_rate": 2.6248932999050653e-07, "loss": 0.1376, "step": 5267 }, { "epoch": 2.57, "learning_rate": 2.6190173561236684e-07, "loss": 0.1304, "step": 5268 }, { "epoch": 2.57, "learning_rate": 2.6131476329399716e-07, "loss": 0.1553, "step": 5269 }, { "epoch": 2.57, "learning_rate": 2.607284131985405e-07, "loss": 0.1208, "step": 5270 }, { "epoch": 2.57, "learning_rate": 2.6014268548896713e-07, "loss": 0.175, "step": 5271 }, { "epoch": 2.57, "learning_rate": 2.595575803280762e-07, "loss": 0.1671, "step": 5272 }, { "epoch": 2.57, "learning_rate": 2.5897309787849155e-07, "loss": 0.1561, "step": 5273 }, { "epoch": 2.58, "learning_rate": 2.583892383026651e-07, "loss": 0.1461, "step": 5274 }, { "epoch": 2.58, "learning_rate": 2.578060017628761e-07, "loss": 0.1511, "step": 5275 }, { "epoch": 2.58, "learning_rate": 2.572233884212294e-07, "loss": 0.1212, "step": 5276 }, { "epoch": 2.58, "learning_rate": 2.5664139843965763e-07, "loss": 0.1412, "step": 5277 }, { "epoch": 2.58, "learning_rate": 2.5606003197991975e-07, "loss": 0.1632, "step": 5278 }, { "epoch": 2.58, "learning_rate": 2.554792892036009e-07, "loss": 0.1794, "step": 5279 }, { "epoch": 2.58, "learning_rate": 2.548991702721143e-07, "loss": 0.1279, "step": 5280 }, { "epoch": 2.58, "learning_rate": 2.543196753466981e-07, "loss": 0.1685, "step": 5281 }, { "epoch": 2.58, "learning_rate": 2.5374080458841875e-07, "loss": 0.1286, "step": 5282 }, { "epoch": 2.58, "learning_rate": 2.531625581581676e-07, "loss": 0.1228, "step": 5283 }, { "epoch": 2.58, "learning_rate": 2.525849362166635e-07, "loss": 0.1552, "step": 5284 }, { "epoch": 2.58, "learning_rate": 2.520079389244509e-07, "loss": 0.1277, "step": 5285 }, { "epoch": 2.58, "learning_rate": 2.5143156644190147e-07, "loss": 0.1594, "step": 5286 }, { "epoch": 2.58, "learning_rate": 2.508558189292132e-07, "loss": 0.1324, "step": 5287 }, { "epoch": 2.58, "learning_rate": 2.5028069654640997e-07, "loss": 0.1619, "step": 5288 }, { "epoch": 2.58, "learning_rate": 2.4970619945334173e-07, "loss": 0.1425, "step": 5289 }, { "epoch": 2.58, "learning_rate": 2.491323278096849e-07, "loss": 0.1535, "step": 5290 }, { "epoch": 2.58, "learning_rate": 2.485590817749417e-07, "loss": 0.127, "step": 5291 }, { "epoch": 2.58, "learning_rate": 2.4798646150844123e-07, "loss": 0.1566, "step": 5292 }, { "epoch": 2.58, "learning_rate": 2.4741446716933874e-07, "loss": 0.127, "step": 5293 }, { "epoch": 2.58, "learning_rate": 2.468430989166148e-07, "loss": 0.1865, "step": 5294 }, { "epoch": 2.59, "learning_rate": 2.462723569090758e-07, "loss": 0.1608, "step": 5295 }, { "epoch": 2.59, "learning_rate": 2.4570224130535417e-07, "loss": 0.1659, "step": 5296 }, { "epoch": 2.59, "learning_rate": 2.4513275226390963e-07, "loss": 0.1665, "step": 5297 }, { "epoch": 2.59, "learning_rate": 2.445638899430258e-07, "loss": 0.1285, "step": 5298 }, { "epoch": 2.59, "learning_rate": 2.439956545008135e-07, "loss": 0.1668, "step": 5299 }, { "epoch": 2.59, "learning_rate": 2.4342804609520847e-07, "loss": 0.1559, "step": 5300 }, { "epoch": 2.59, "learning_rate": 2.4286106488397215e-07, "loss": 0.1309, "step": 5301 }, { "epoch": 2.59, "learning_rate": 2.422947110246929e-07, "loss": 0.156, "step": 5302 }, { "epoch": 2.59, "learning_rate": 2.417289846747831e-07, "loss": 0.1311, "step": 5303 }, { "epoch": 2.59, "learning_rate": 2.411638859914814e-07, "loss": 0.1545, "step": 5304 }, { "epoch": 2.59, "learning_rate": 2.4059941513185245e-07, "loss": 0.164, "step": 5305 }, { "epoch": 2.59, "learning_rate": 2.400355722527853e-07, "loss": 0.1536, "step": 5306 }, { "epoch": 2.59, "learning_rate": 2.394723575109964e-07, "loss": 0.1718, "step": 5307 }, { "epoch": 2.59, "learning_rate": 2.3890977106302505e-07, "loss": 0.1419, "step": 5308 }, { "epoch": 2.59, "learning_rate": 2.3834781306523808e-07, "loss": 0.1295, "step": 5309 }, { "epoch": 2.59, "learning_rate": 2.3778648367382558e-07, "loss": 0.1716, "step": 5310 }, { "epoch": 2.59, "learning_rate": 2.372257830448052e-07, "loss": 0.1524, "step": 5311 }, { "epoch": 2.59, "learning_rate": 2.36665711334019e-07, "loss": 0.1556, "step": 5312 }, { "epoch": 2.59, "learning_rate": 2.3610626869713326e-07, "loss": 0.1679, "step": 5313 }, { "epoch": 2.59, "learning_rate": 2.3554745528964058e-07, "loss": 0.1597, "step": 5314 }, { "epoch": 2.6, "learning_rate": 2.349892712668575e-07, "loss": 0.1566, "step": 5315 }, { "epoch": 2.6, "learning_rate": 2.3443171678392745e-07, "loss": 0.1435, "step": 5316 }, { "epoch": 2.6, "learning_rate": 2.3387479199581675e-07, "loss": 0.1437, "step": 5317 }, { "epoch": 2.6, "learning_rate": 2.333184970573188e-07, "loss": 0.1649, "step": 5318 }, { "epoch": 2.6, "learning_rate": 2.3276283212305017e-07, "loss": 0.1805, "step": 5319 }, { "epoch": 2.6, "learning_rate": 2.322077973474529e-07, "loss": 0.1546, "step": 5320 }, { "epoch": 2.6, "learning_rate": 2.3165339288479498e-07, "loss": 0.1472, "step": 5321 }, { "epoch": 2.6, "learning_rate": 2.3109961888916732e-07, "loss": 0.1597, "step": 5322 }, { "epoch": 2.6, "learning_rate": 2.3054647551448705e-07, "loss": 0.1687, "step": 5323 }, { "epoch": 2.6, "learning_rate": 2.2999396291449566e-07, "loss": 0.149, "step": 5324 }, { "epoch": 2.6, "learning_rate": 2.2944208124275808e-07, "loss": 0.1561, "step": 5325 }, { "epoch": 2.6, "learning_rate": 2.288908306526666e-07, "loss": 0.164, "step": 5326 }, { "epoch": 2.6, "learning_rate": 2.2834021129743534e-07, "loss": 0.1615, "step": 5327 }, { "epoch": 2.6, "learning_rate": 2.2779022333010408e-07, "loss": 0.1461, "step": 5328 }, { "epoch": 2.6, "learning_rate": 2.2724086690353807e-07, "loss": 0.1474, "step": 5329 }, { "epoch": 2.6, "learning_rate": 2.266921421704252e-07, "loss": 0.1597, "step": 5330 }, { "epoch": 2.6, "learning_rate": 2.2614404928327922e-07, "loss": 0.1515, "step": 5331 }, { "epoch": 2.6, "learning_rate": 2.2559658839443777e-07, "loss": 0.1315, "step": 5332 }, { "epoch": 2.6, "learning_rate": 2.2504975965606245e-07, "loss": 0.1452, "step": 5333 }, { "epoch": 2.6, "learning_rate": 2.2450356322013922e-07, "loss": 0.1287, "step": 5334 }, { "epoch": 2.6, "learning_rate": 2.239579992384791e-07, "loss": 0.1531, "step": 5335 }, { "epoch": 2.61, "learning_rate": 2.2341306786271695e-07, "loss": 0.166, "step": 5336 }, { "epoch": 2.61, "learning_rate": 2.2286876924431156e-07, "loss": 0.1402, "step": 5337 }, { "epoch": 2.61, "learning_rate": 2.2232510353454585e-07, "loss": 0.1396, "step": 5338 }, { "epoch": 2.61, "learning_rate": 2.2178207088452698e-07, "loss": 0.1509, "step": 5339 }, { "epoch": 2.61, "learning_rate": 2.212396714451856e-07, "loss": 0.1358, "step": 5340 }, { "epoch": 2.61, "learning_rate": 2.20697905367277e-07, "loss": 0.1754, "step": 5341 }, { "epoch": 2.61, "learning_rate": 2.2015677280138147e-07, "loss": 0.1259, "step": 5342 }, { "epoch": 2.61, "learning_rate": 2.1961627389790102e-07, "loss": 0.1419, "step": 5343 }, { "epoch": 2.61, "learning_rate": 2.1907640880706265e-07, "loss": 0.1316, "step": 5344 }, { "epoch": 2.61, "learning_rate": 2.1853717767891692e-07, "loss": 0.1271, "step": 5345 }, { "epoch": 2.61, "learning_rate": 2.1799858066333917e-07, "loss": 0.1492, "step": 5346 }, { "epoch": 2.61, "learning_rate": 2.1746061791002692e-07, "loss": 0.1528, "step": 5347 }, { "epoch": 2.61, "learning_rate": 2.169232895685028e-07, "loss": 0.1881, "step": 5348 }, { "epoch": 2.61, "learning_rate": 2.1638659578811228e-07, "loss": 0.1255, "step": 5349 }, { "epoch": 2.61, "learning_rate": 2.1585053671802414e-07, "loss": 0.2089, "step": 5350 }, { "epoch": 2.61, "learning_rate": 2.1531511250723224e-07, "loss": 0.1587, "step": 5351 }, { "epoch": 2.61, "learning_rate": 2.1478032330455255e-07, "loss": 0.1584, "step": 5352 }, { "epoch": 2.61, "learning_rate": 2.1424616925862478e-07, "loss": 0.134, "step": 5353 }, { "epoch": 2.61, "learning_rate": 2.1371265051791295e-07, "loss": 0.1599, "step": 5354 }, { "epoch": 2.61, "learning_rate": 2.1317976723070317e-07, "loss": 0.1309, "step": 5355 }, { "epoch": 2.62, "learning_rate": 2.1264751954510664e-07, "loss": 0.1238, "step": 5356 }, { "epoch": 2.62, "learning_rate": 2.1211590760905647e-07, "loss": 0.1353, "step": 5357 }, { "epoch": 2.62, "learning_rate": 2.115849315703089e-07, "loss": 0.1706, "step": 5358 }, { "epoch": 2.62, "learning_rate": 2.1105459157644502e-07, "loss": 0.1396, "step": 5359 }, { "epoch": 2.62, "learning_rate": 2.1052488777486751e-07, "loss": 0.1762, "step": 5360 }, { "epoch": 2.62, "learning_rate": 2.0999582031280358e-07, "loss": 0.1572, "step": 5361 }, { "epoch": 2.62, "learning_rate": 2.0946738933730254e-07, "loss": 0.1439, "step": 5362 }, { "epoch": 2.62, "learning_rate": 2.0893959499523693e-07, "loss": 0.1374, "step": 5363 }, { "epoch": 2.62, "learning_rate": 2.0841243743330243e-07, "loss": 0.179, "step": 5364 }, { "epoch": 2.62, "learning_rate": 2.0788591679801796e-07, "loss": 0.1592, "step": 5365 }, { "epoch": 2.62, "learning_rate": 2.0736003323572618e-07, "loss": 0.1689, "step": 5366 }, { "epoch": 2.62, "learning_rate": 2.0683478689259072e-07, "loss": 0.128, "step": 5367 }, { "epoch": 2.62, "learning_rate": 2.0631017791459984e-07, "loss": 0.1232, "step": 5368 }, { "epoch": 2.62, "learning_rate": 2.057862064475638e-07, "loss": 0.1738, "step": 5369 }, { "epoch": 2.62, "learning_rate": 2.052628726371153e-07, "loss": 0.1767, "step": 5370 }, { "epoch": 2.62, "learning_rate": 2.0474017662871076e-07, "loss": 0.1562, "step": 5371 }, { "epoch": 2.62, "learning_rate": 2.042181185676298e-07, "loss": 0.1282, "step": 5372 }, { "epoch": 2.62, "learning_rate": 2.03696698598973e-07, "loss": 0.1599, "step": 5373 }, { "epoch": 2.62, "learning_rate": 2.0317591686766474e-07, "loss": 0.1441, "step": 5374 }, { "epoch": 2.62, "learning_rate": 2.026557735184509e-07, "loss": 0.1706, "step": 5375 }, { "epoch": 2.62, "learning_rate": 2.021362686959022e-07, "loss": 0.1449, "step": 5376 }, { "epoch": 2.63, "learning_rate": 2.01617402544409e-07, "loss": 0.1211, "step": 5377 }, { "epoch": 2.63, "learning_rate": 2.01099175208187e-07, "loss": 0.1428, "step": 5378 }, { "epoch": 2.63, "learning_rate": 2.0058158683127215e-07, "loss": 0.1744, "step": 5379 }, { "epoch": 2.63, "learning_rate": 2.0006463755752325e-07, "loss": 0.1435, "step": 5380 }, { "epoch": 2.63, "learning_rate": 1.9954832753062287e-07, "loss": 0.1464, "step": 5381 }, { "epoch": 2.63, "learning_rate": 1.9903265689407398e-07, "loss": 0.2184, "step": 5382 }, { "epoch": 2.63, "learning_rate": 1.9851762579120275e-07, "loss": 0.1537, "step": 5383 }, { "epoch": 2.63, "learning_rate": 1.9800323436515805e-07, "loss": 0.1371, "step": 5384 }, { "epoch": 2.63, "learning_rate": 1.9748948275890995e-07, "loss": 0.1499, "step": 5385 }, { "epoch": 2.63, "learning_rate": 1.969763711152517e-07, "loss": 0.1606, "step": 5386 }, { "epoch": 2.63, "learning_rate": 1.9646389957679784e-07, "loss": 0.1574, "step": 5387 }, { "epoch": 2.63, "learning_rate": 1.9595206828598558e-07, "loss": 0.1806, "step": 5388 }, { "epoch": 2.63, "learning_rate": 1.954408773850733e-07, "loss": 0.176, "step": 5389 }, { "epoch": 2.63, "learning_rate": 1.9493032701614234e-07, "loss": 0.1625, "step": 5390 }, { "epoch": 2.63, "learning_rate": 1.9442041732109645e-07, "loss": 0.132, "step": 5391 }, { "epoch": 2.63, "learning_rate": 1.9391114844166003e-07, "loss": 0.1707, "step": 5392 }, { "epoch": 2.63, "learning_rate": 1.9340252051937957e-07, "loss": 0.1757, "step": 5393 }, { "epoch": 2.63, "learning_rate": 1.9289453369562367e-07, "loss": 0.1418, "step": 5394 }, { "epoch": 2.63, "learning_rate": 1.9238718811158352e-07, "loss": 0.149, "step": 5395 }, { "epoch": 2.63, "learning_rate": 1.9188048390827052e-07, "loss": 0.1835, "step": 5396 }, { "epoch": 2.64, "learning_rate": 1.9137442122651977e-07, "loss": 0.1401, "step": 5397 }, { "epoch": 2.64, "learning_rate": 1.9086900020698601e-07, "loss": 0.1688, "step": 5398 }, { "epoch": 2.64, "learning_rate": 1.9036422099014684e-07, "loss": 0.1479, "step": 5399 }, { "epoch": 2.64, "learning_rate": 1.8986008371630116e-07, "loss": 0.1728, "step": 5400 }, { "epoch": 2.64, "learning_rate": 1.893565885255691e-07, "loss": 0.128, "step": 5401 }, { "epoch": 2.64, "learning_rate": 1.8885373555789399e-07, "loss": 0.1531, "step": 5402 }, { "epoch": 2.64, "learning_rate": 1.883515249530385e-07, "loss": 0.1488, "step": 5403 }, { "epoch": 2.64, "learning_rate": 1.878499568505876e-07, "loss": 0.1281, "step": 5404 }, { "epoch": 2.64, "learning_rate": 1.8734903138994758e-07, "loss": 0.1584, "step": 5405 }, { "epoch": 2.64, "learning_rate": 1.8684874871034708e-07, "loss": 0.1272, "step": 5406 }, { "epoch": 2.64, "learning_rate": 1.8634910895083458e-07, "loss": 0.1555, "step": 5407 }, { "epoch": 2.64, "learning_rate": 1.8585011225028095e-07, "loss": 0.1673, "step": 5408 }, { "epoch": 2.64, "learning_rate": 1.8535175874737772e-07, "loss": 0.1756, "step": 5409 }, { "epoch": 2.64, "learning_rate": 1.848540485806377e-07, "loss": 0.1326, "step": 5410 }, { "epoch": 2.64, "learning_rate": 1.8435698188839558e-07, "loss": 0.1759, "step": 5411 }, { "epoch": 2.64, "learning_rate": 1.838605588088066e-07, "loss": 0.1692, "step": 5412 }, { "epoch": 2.64, "learning_rate": 1.8336477947984628e-07, "loss": 0.1516, "step": 5413 }, { "epoch": 2.64, "learning_rate": 1.8286964403931324e-07, "loss": 0.1818, "step": 5414 }, { "epoch": 2.64, "learning_rate": 1.8237515262482575e-07, "loss": 0.1507, "step": 5415 }, { "epoch": 2.64, "learning_rate": 1.818813053738233e-07, "loss": 0.1442, "step": 5416 }, { "epoch": 2.65, "learning_rate": 1.813881024235664e-07, "loss": 0.1396, "step": 5417 }, { "epoch": 2.65, "learning_rate": 1.808955439111365e-07, "loss": 0.1681, "step": 5418 }, { "epoch": 2.65, "learning_rate": 1.8040362997343518e-07, "loss": 0.135, "step": 5419 }, { "epoch": 2.65, "learning_rate": 1.799123607471867e-07, "loss": 0.1522, "step": 5420 }, { "epoch": 2.65, "learning_rate": 1.7942173636893462e-07, "loss": 0.1578, "step": 5421 }, { "epoch": 2.65, "learning_rate": 1.7893175697504373e-07, "loss": 0.143, "step": 5422 }, { "epoch": 2.65, "learning_rate": 1.7844242270169954e-07, "loss": 0.1296, "step": 5423 }, { "epoch": 2.65, "learning_rate": 1.779537336849077e-07, "loss": 0.1428, "step": 5424 }, { "epoch": 2.65, "learning_rate": 1.7746569006049596e-07, "loss": 0.1608, "step": 5425 }, { "epoch": 2.65, "learning_rate": 1.7697829196411054e-07, "loss": 0.1404, "step": 5426 }, { "epoch": 2.65, "learning_rate": 1.7649153953122083e-07, "loss": 0.1572, "step": 5427 }, { "epoch": 2.65, "learning_rate": 1.7600543289711446e-07, "loss": 0.1465, "step": 5428 }, { "epoch": 2.65, "learning_rate": 1.755199721969006e-07, "loss": 0.1534, "step": 5429 }, { "epoch": 2.65, "learning_rate": 1.750351575655093e-07, "loss": 0.196, "step": 5430 }, { "epoch": 2.65, "learning_rate": 1.7455098913769037e-07, "loss": 0.1371, "step": 5431 }, { "epoch": 2.65, "learning_rate": 1.7406746704801331e-07, "loss": 0.1838, "step": 5432 }, { "epoch": 2.65, "learning_rate": 1.735845914308701e-07, "loss": 0.1714, "step": 5433 }, { "epoch": 2.65, "learning_rate": 1.7310236242047086e-07, "loss": 0.1586, "step": 5434 }, { "epoch": 2.65, "learning_rate": 1.7262078015084753e-07, "loss": 0.1363, "step": 5435 }, { "epoch": 2.65, "learning_rate": 1.7213984475585143e-07, "loss": 0.1787, "step": 5436 }, { "epoch": 2.65, "learning_rate": 1.7165955636915395e-07, "loss": 0.1424, "step": 5437 }, { "epoch": 2.66, "learning_rate": 1.711799151242477e-07, "loss": 0.1212, "step": 5438 }, { "epoch": 2.66, "learning_rate": 1.7070092115444414e-07, "loss": 0.1565, "step": 5439 }, { "epoch": 2.66, "learning_rate": 1.7022257459287623e-07, "loss": 0.1057, "step": 5440 }, { "epoch": 2.66, "learning_rate": 1.6974487557249563e-07, "loss": 0.1397, "step": 5441 }, { "epoch": 2.66, "learning_rate": 1.6926782422607452e-07, "loss": 0.1722, "step": 5442 }, { "epoch": 2.66, "learning_rate": 1.6879142068620514e-07, "loss": 0.1384, "step": 5443 }, { "epoch": 2.66, "learning_rate": 1.6831566508529962e-07, "loss": 0.1564, "step": 5444 }, { "epoch": 2.66, "learning_rate": 1.678405575555908e-07, "loss": 0.1485, "step": 5445 }, { "epoch": 2.66, "learning_rate": 1.6736609822913004e-07, "loss": 0.1639, "step": 5446 }, { "epoch": 2.66, "learning_rate": 1.6689228723778927e-07, "loss": 0.1916, "step": 5447 }, { "epoch": 2.66, "learning_rate": 1.6641912471325987e-07, "loss": 0.1188, "step": 5448 }, { "epoch": 2.66, "learning_rate": 1.6594661078705305e-07, "loss": 0.1613, "step": 5449 }, { "epoch": 2.66, "learning_rate": 1.6547474559050036e-07, "loss": 0.1473, "step": 5450 }, { "epoch": 2.66, "learning_rate": 1.6500352925475278e-07, "loss": 0.1447, "step": 5451 }, { "epoch": 2.66, "learning_rate": 1.645329619107805e-07, "loss": 0.1497, "step": 5452 }, { "epoch": 2.66, "learning_rate": 1.6406304368937336e-07, "loss": 0.1268, "step": 5453 }, { "epoch": 2.66, "learning_rate": 1.6359377472114106e-07, "loss": 0.1373, "step": 5454 }, { "epoch": 2.66, "learning_rate": 1.6312515513651316e-07, "loss": 0.1556, "step": 5455 }, { "epoch": 2.66, "learning_rate": 1.6265718506573768e-07, "loss": 0.1464, "step": 5456 }, { "epoch": 2.66, "learning_rate": 1.6218986463888337e-07, "loss": 0.1413, "step": 5457 }, { "epoch": 2.67, "learning_rate": 1.6172319398583797e-07, "loss": 0.1591, "step": 5458 }, { "epoch": 2.67, "learning_rate": 1.6125717323630773e-07, "loss": 0.1477, "step": 5459 }, { "epoch": 2.67, "learning_rate": 1.607918025198199e-07, "loss": 0.1324, "step": 5460 }, { "epoch": 2.67, "learning_rate": 1.603270819657199e-07, "loss": 0.1151, "step": 5461 }, { "epoch": 2.67, "learning_rate": 1.5986301170317215e-07, "loss": 0.1834, "step": 5462 }, { "epoch": 2.67, "learning_rate": 1.5939959186116184e-07, "loss": 0.1717, "step": 5463 }, { "epoch": 2.67, "learning_rate": 1.5893682256849146e-07, "loss": 0.1506, "step": 5464 }, { "epoch": 2.67, "learning_rate": 1.5847470395378457e-07, "loss": 0.1555, "step": 5465 }, { "epoch": 2.67, "learning_rate": 1.5801323614548281e-07, "loss": 0.1529, "step": 5466 }, { "epoch": 2.67, "learning_rate": 1.5755241927184667e-07, "loss": 0.1362, "step": 5467 }, { "epoch": 2.67, "learning_rate": 1.5709225346095618e-07, "loss": 0.1313, "step": 5468 }, { "epoch": 2.67, "learning_rate": 1.5663273884071045e-07, "loss": 0.1579, "step": 5469 }, { "epoch": 2.67, "learning_rate": 1.561738755388284e-07, "loss": 0.1646, "step": 5470 }, { "epoch": 2.67, "learning_rate": 1.5571566368284608e-07, "loss": 0.1681, "step": 5471 }, { "epoch": 2.67, "learning_rate": 1.5525810340011965e-07, "loss": 0.1356, "step": 5472 }, { "epoch": 2.67, "learning_rate": 1.5480119481782378e-07, "loss": 0.135, "step": 5473 }, { "epoch": 2.67, "learning_rate": 1.5434493806295248e-07, "loss": 0.1455, "step": 5474 }, { "epoch": 2.67, "learning_rate": 1.538893332623187e-07, "loss": 0.1599, "step": 5475 }, { "epoch": 2.67, "learning_rate": 1.5343438054255337e-07, "loss": 0.1177, "step": 5476 }, { "epoch": 2.67, "learning_rate": 1.529800800301065e-07, "loss": 0.1687, "step": 5477 }, { "epoch": 2.67, "learning_rate": 1.5252643185124704e-07, "loss": 0.1441, "step": 5478 }, { "epoch": 2.68, "learning_rate": 1.520734361320622e-07, "loss": 0.1693, "step": 5479 }, { "epoch": 2.68, "learning_rate": 1.516210929984588e-07, "loss": 0.1472, "step": 5480 }, { "epoch": 2.68, "learning_rate": 1.5116940257616147e-07, "loss": 0.1262, "step": 5481 }, { "epoch": 2.68, "learning_rate": 1.5071836499071346e-07, "loss": 0.1587, "step": 5482 }, { "epoch": 2.68, "learning_rate": 1.502679803674767e-07, "loss": 0.134, "step": 5483 }, { "epoch": 2.68, "learning_rate": 1.4981824883163132e-07, "loss": 0.1897, "step": 5484 }, { "epoch": 2.68, "learning_rate": 1.4936917050817707e-07, "loss": 0.1573, "step": 5485 }, { "epoch": 2.68, "learning_rate": 1.489207455219302e-07, "loss": 0.1968, "step": 5486 }, { "epoch": 2.68, "learning_rate": 1.484729739975277e-07, "loss": 0.1417, "step": 5487 }, { "epoch": 2.68, "learning_rate": 1.4802585605942332e-07, "loss": 0.1553, "step": 5488 }, { "epoch": 2.68, "learning_rate": 1.475793918318888e-07, "loss": 0.1454, "step": 5489 }, { "epoch": 2.68, "learning_rate": 1.4713358143901624e-07, "loss": 0.1544, "step": 5490 }, { "epoch": 2.68, "learning_rate": 1.4668842500471403e-07, "loss": 0.1527, "step": 5491 }, { "epoch": 2.68, "learning_rate": 1.4624392265270903e-07, "loss": 0.174, "step": 5492 }, { "epoch": 2.68, "learning_rate": 1.458000745065477e-07, "loss": 0.169, "step": 5493 }, { "epoch": 2.68, "learning_rate": 1.4535688068959275e-07, "loss": 0.1456, "step": 5494 }, { "epoch": 2.68, "learning_rate": 1.4491434132502728e-07, "loss": 0.1588, "step": 5495 }, { "epoch": 2.68, "learning_rate": 1.4447245653585047e-07, "loss": 0.161, "step": 5496 }, { "epoch": 2.68, "learning_rate": 1.440312264448801e-07, "loss": 0.1578, "step": 5497 }, { "epoch": 2.68, "learning_rate": 1.435906511747523e-07, "loss": 0.1738, "step": 5498 }, { "epoch": 2.69, "learning_rate": 1.431507308479213e-07, "loss": 0.1338, "step": 5499 }, { "epoch": 2.69, "learning_rate": 1.427114655866593e-07, "loss": 0.1481, "step": 5500 }, { "epoch": 2.69, "learning_rate": 1.4227285551305593e-07, "loss": 0.1796, "step": 5501 }, { "epoch": 2.69, "learning_rate": 1.418349007490194e-07, "loss": 0.1465, "step": 5502 }, { "epoch": 2.69, "learning_rate": 1.4139760141627462e-07, "loss": 0.1279, "step": 5503 }, { "epoch": 2.69, "learning_rate": 1.4096095763636547e-07, "loss": 0.1455, "step": 5504 }, { "epoch": 2.69, "learning_rate": 1.405249695306532e-07, "loss": 0.1564, "step": 5505 }, { "epoch": 2.69, "learning_rate": 1.4008963722031754e-07, "loss": 0.1555, "step": 5506 }, { "epoch": 2.69, "learning_rate": 1.396549608263545e-07, "loss": 0.1413, "step": 5507 }, { "epoch": 2.69, "learning_rate": 1.3922094046957907e-07, "loss": 0.1323, "step": 5508 }, { "epoch": 2.69, "learning_rate": 1.3878757627062256e-07, "loss": 0.1426, "step": 5509 }, { "epoch": 2.69, "learning_rate": 1.3835486834993556e-07, "loss": 0.1682, "step": 5510 }, { "epoch": 2.69, "learning_rate": 1.379228168277852e-07, "loss": 0.1399, "step": 5511 }, { "epoch": 2.69, "learning_rate": 1.3749142182425678e-07, "loss": 0.1531, "step": 5512 }, { "epoch": 2.69, "learning_rate": 1.3706068345925195e-07, "loss": 0.1833, "step": 5513 }, { "epoch": 2.69, "learning_rate": 1.3663060185249126e-07, "loss": 0.1357, "step": 5514 }, { "epoch": 2.69, "learning_rate": 1.3620117712351222e-07, "loss": 0.1868, "step": 5515 }, { "epoch": 2.69, "learning_rate": 1.3577240939166876e-07, "loss": 0.1496, "step": 5516 }, { "epoch": 2.69, "learning_rate": 1.3534429877613443e-07, "loss": 0.1787, "step": 5517 }, { "epoch": 2.69, "learning_rate": 1.3491684539589768e-07, "loss": 0.1579, "step": 5518 }, { "epoch": 2.69, "learning_rate": 1.3449004936976623e-07, "loss": 0.1548, "step": 5519 }, { "epoch": 2.7, "learning_rate": 1.3406391081636384e-07, "loss": 0.1531, "step": 5520 }, { "epoch": 2.7, "learning_rate": 1.336384298541321e-07, "loss": 0.1922, "step": 5521 }, { "epoch": 2.7, "learning_rate": 1.3321360660132925e-07, "loss": 0.1406, "step": 5522 }, { "epoch": 2.7, "learning_rate": 1.3278944117603192e-07, "loss": 0.1817, "step": 5523 }, { "epoch": 2.7, "learning_rate": 1.3236593369613305e-07, "loss": 0.1963, "step": 5524 }, { "epoch": 2.7, "learning_rate": 1.319430842793426e-07, "loss": 0.1558, "step": 5525 }, { "epoch": 2.7, "learning_rate": 1.31520893043188e-07, "loss": 0.1393, "step": 5526 }, { "epoch": 2.7, "learning_rate": 1.3109936010501367e-07, "loss": 0.1455, "step": 5527 }, { "epoch": 2.7, "learning_rate": 1.306784855819801e-07, "loss": 0.1827, "step": 5528 }, { "epoch": 2.7, "learning_rate": 1.3025826959106673e-07, "loss": 0.1396, "step": 5529 }, { "epoch": 2.7, "learning_rate": 1.298387122490688e-07, "loss": 0.1437, "step": 5530 }, { "epoch": 2.7, "learning_rate": 1.2941981367259825e-07, "loss": 0.1951, "step": 5531 }, { "epoch": 2.7, "learning_rate": 1.290015739780845e-07, "loss": 0.1267, "step": 5532 }, { "epoch": 2.7, "learning_rate": 1.2858399328177312e-07, "loss": 0.1883, "step": 5533 }, { "epoch": 2.7, "learning_rate": 1.281670716997277e-07, "loss": 0.1539, "step": 5534 }, { "epoch": 2.7, "learning_rate": 1.2775080934782718e-07, "loss": 0.1687, "step": 5535 }, { "epoch": 2.7, "learning_rate": 1.2733520634176877e-07, "loss": 0.1622, "step": 5536 }, { "epoch": 2.7, "learning_rate": 1.269202627970656e-07, "loss": 0.1703, "step": 5537 }, { "epoch": 2.7, "learning_rate": 1.265059788290468e-07, "loss": 0.135, "step": 5538 }, { "epoch": 2.7, "learning_rate": 1.2609235455285973e-07, "loss": 0.1444, "step": 5539 }, { "epoch": 2.71, "learning_rate": 1.256793900834677e-07, "loss": 0.2059, "step": 5540 }, { "epoch": 2.71, "learning_rate": 1.252670855356497e-07, "loss": 0.1496, "step": 5541 }, { "epoch": 2.71, "learning_rate": 1.2485544102400298e-07, "loss": 0.1445, "step": 5542 }, { "epoch": 2.71, "learning_rate": 1.244444566629399e-07, "loss": 0.1257, "step": 5543 }, { "epoch": 2.71, "learning_rate": 1.2403413256669078e-07, "loss": 0.152, "step": 5544 }, { "epoch": 2.71, "learning_rate": 1.236244688493013e-07, "loss": 0.1372, "step": 5545 }, { "epoch": 2.71, "learning_rate": 1.2321546562463344e-07, "loss": 0.1577, "step": 5546 }, { "epoch": 2.71, "learning_rate": 1.22807123006366e-07, "loss": 0.197, "step": 5547 }, { "epoch": 2.71, "learning_rate": 1.2239944110799458e-07, "loss": 0.1647, "step": 5548 }, { "epoch": 2.71, "learning_rate": 1.219924200428313e-07, "loss": 0.1496, "step": 5549 }, { "epoch": 2.71, "learning_rate": 1.2158605992400346e-07, "loss": 0.1554, "step": 5550 }, { "epoch": 2.71, "learning_rate": 1.2118036086445545e-07, "loss": 0.1702, "step": 5551 }, { "epoch": 2.71, "learning_rate": 1.2077532297694734e-07, "loss": 0.1274, "step": 5552 }, { "epoch": 2.71, "learning_rate": 1.20370946374056e-07, "loss": 0.1616, "step": 5553 }, { "epoch": 2.71, "learning_rate": 1.1996723116817516e-07, "loss": 0.1338, "step": 5554 }, { "epoch": 2.71, "learning_rate": 1.195641774715134e-07, "loss": 0.2013, "step": 5555 }, { "epoch": 2.71, "learning_rate": 1.191617853960958e-07, "loss": 0.1372, "step": 5556 }, { "epoch": 2.71, "learning_rate": 1.1876005505376376e-07, "loss": 0.1603, "step": 5557 }, { "epoch": 2.71, "learning_rate": 1.1835898655617434e-07, "loss": 0.1561, "step": 5558 }, { "epoch": 2.71, "learning_rate": 1.179585800148017e-07, "loss": 0.1685, "step": 5559 }, { "epoch": 2.71, "learning_rate": 1.1755883554093517e-07, "loss": 0.1469, "step": 5560 }, { "epoch": 2.72, "learning_rate": 1.1715975324568002e-07, "loss": 0.1672, "step": 5561 }, { "epoch": 2.72, "learning_rate": 1.167613332399578e-07, "loss": 0.1294, "step": 5562 }, { "epoch": 2.72, "learning_rate": 1.1636357563450551e-07, "loss": 0.1755, "step": 5563 }, { "epoch": 2.72, "learning_rate": 1.1596648053987691e-07, "loss": 0.1629, "step": 5564 }, { "epoch": 2.72, "learning_rate": 1.1557004806644068e-07, "loss": 0.1791, "step": 5565 }, { "epoch": 2.72, "learning_rate": 1.15174278324382e-07, "loss": 0.1731, "step": 5566 }, { "epoch": 2.72, "learning_rate": 1.1477917142370176e-07, "loss": 0.1634, "step": 5567 }, { "epoch": 2.72, "learning_rate": 1.1438472747421575e-07, "loss": 0.1348, "step": 5568 }, { "epoch": 2.72, "learning_rate": 1.139909465855571e-07, "loss": 0.1651, "step": 5569 }, { "epoch": 2.72, "learning_rate": 1.1359782886717324e-07, "loss": 0.1532, "step": 5570 }, { "epoch": 2.72, "learning_rate": 1.1320537442832735e-07, "loss": 0.1414, "step": 5571 }, { "epoch": 2.72, "learning_rate": 1.1281358337809995e-07, "loss": 0.1777, "step": 5572 }, { "epoch": 2.72, "learning_rate": 1.1242245582538447e-07, "loss": 0.1728, "step": 5573 }, { "epoch": 2.72, "learning_rate": 1.1203199187889257e-07, "loss": 0.1739, "step": 5574 }, { "epoch": 2.72, "learning_rate": 1.1164219164714996e-07, "loss": 0.1502, "step": 5575 }, { "epoch": 2.72, "learning_rate": 1.1125305523849772e-07, "loss": 0.1588, "step": 5576 }, { "epoch": 2.72, "learning_rate": 1.1086458276109325e-07, "loss": 0.1609, "step": 5577 }, { "epoch": 2.72, "learning_rate": 1.1047677432290876e-07, "loss": 0.1235, "step": 5578 }, { "epoch": 2.72, "learning_rate": 1.1008963003173307e-07, "loss": 0.1612, "step": 5579 }, { "epoch": 2.72, "learning_rate": 1.0970314999516868e-07, "loss": 0.1525, "step": 5580 }, { "epoch": 2.73, "learning_rate": 1.0931733432063468e-07, "loss": 0.1405, "step": 5581 }, { "epoch": 2.73, "learning_rate": 1.0893218311536501e-07, "loss": 0.146, "step": 5582 }, { "epoch": 2.73, "learning_rate": 1.0854769648640879e-07, "loss": 0.1877, "step": 5583 }, { "epoch": 2.73, "learning_rate": 1.0816387454063104e-07, "loss": 0.1585, "step": 5584 }, { "epoch": 2.73, "learning_rate": 1.0778071738471202e-07, "loss": 0.1427, "step": 5585 }, { "epoch": 2.73, "learning_rate": 1.0739822512514653e-07, "loss": 0.1572, "step": 5586 }, { "epoch": 2.73, "learning_rate": 1.0701639786824508e-07, "loss": 0.15, "step": 5587 }, { "epoch": 2.73, "learning_rate": 1.066352357201328e-07, "loss": 0.1599, "step": 5588 }, { "epoch": 2.73, "learning_rate": 1.0625473878675047e-07, "loss": 0.158, "step": 5589 }, { "epoch": 2.73, "learning_rate": 1.0587490717385462e-07, "loss": 0.1285, "step": 5590 }, { "epoch": 2.73, "learning_rate": 1.0549574098701526e-07, "loss": 0.1296, "step": 5591 }, { "epoch": 2.73, "learning_rate": 1.051172403316189e-07, "loss": 0.1711, "step": 5592 }, { "epoch": 2.73, "learning_rate": 1.0473940531286558e-07, "loss": 0.1344, "step": 5593 }, { "epoch": 2.73, "learning_rate": 1.0436223603577239e-07, "loss": 0.1383, "step": 5594 }, { "epoch": 2.73, "learning_rate": 1.0398573260516937e-07, "loss": 0.1691, "step": 5595 }, { "epoch": 2.73, "learning_rate": 1.036098951257028e-07, "loss": 0.1511, "step": 5596 }, { "epoch": 2.73, "learning_rate": 1.032347237018333e-07, "loss": 0.1759, "step": 5597 }, { "epoch": 2.73, "learning_rate": 1.0286021843783606e-07, "loss": 0.1722, "step": 5598 }, { "epoch": 2.73, "learning_rate": 1.0248637943780198e-07, "loss": 0.1426, "step": 5599 }, { "epoch": 2.73, "learning_rate": 1.0211320680563658e-07, "loss": 0.1732, "step": 5600 }, { "epoch": 2.73, "learning_rate": 1.0174070064505909e-07, "loss": 0.1487, "step": 5601 }, { "epoch": 2.74, "learning_rate": 1.0136886105960503e-07, "loss": 0.1202, "step": 5602 }, { "epoch": 2.74, "learning_rate": 1.0099768815262367e-07, "loss": 0.1578, "step": 5603 }, { "epoch": 2.74, "learning_rate": 1.0062718202727944e-07, "loss": 0.1515, "step": 5604 }, { "epoch": 2.74, "learning_rate": 1.0025734278655108e-07, "loss": 0.1752, "step": 5605 }, { "epoch": 2.74, "learning_rate": 9.988817053323246e-08, "loss": 0.1631, "step": 5606 }, { "epoch": 2.74, "learning_rate": 9.951966536993097e-08, "loss": 0.1604, "step": 5607 }, { "epoch": 2.74, "learning_rate": 9.91518273990702e-08, "loss": 0.1713, "step": 5608 }, { "epoch": 2.74, "learning_rate": 9.878465672288757e-08, "loss": 0.1534, "step": 5609 }, { "epoch": 2.74, "learning_rate": 9.841815344343419e-08, "loss": 0.1623, "step": 5610 }, { "epoch": 2.74, "learning_rate": 9.805231766257722e-08, "loss": 0.1386, "step": 5611 }, { "epoch": 2.74, "learning_rate": 9.76871494819967e-08, "loss": 0.1885, "step": 5612 }, { "epoch": 2.74, "learning_rate": 9.732264900318866e-08, "loss": 0.1569, "step": 5613 }, { "epoch": 2.74, "learning_rate": 9.695881632746235e-08, "loss": 0.1535, "step": 5614 }, { "epoch": 2.74, "learning_rate": 9.659565155594214e-08, "loss": 0.2038, "step": 5615 }, { "epoch": 2.74, "learning_rate": 9.623315478956646e-08, "loss": 0.1487, "step": 5616 }, { "epoch": 2.74, "learning_rate": 9.587132612908778e-08, "loss": 0.1558, "step": 5617 }, { "epoch": 2.74, "learning_rate": 9.551016567507371e-08, "loss": 0.1535, "step": 5618 }, { "epoch": 2.74, "learning_rate": 9.514967352790532e-08, "loss": 0.1718, "step": 5619 }, { "epoch": 2.74, "learning_rate": 9.478984978777778e-08, "loss": 0.1615, "step": 5620 }, { "epoch": 2.74, "learning_rate": 9.443069455470217e-08, "loss": 0.1338, "step": 5621 }, { "epoch": 2.75, "learning_rate": 9.407220792850114e-08, "loss": 0.1455, "step": 5622 }, { "epoch": 2.75, "learning_rate": 9.371439000881388e-08, "loss": 0.1693, "step": 5623 }, { "epoch": 2.75, "learning_rate": 9.335724089509252e-08, "loss": 0.1522, "step": 5624 }, { "epoch": 2.75, "learning_rate": 9.300076068660319e-08, "loss": 0.1965, "step": 5625 }, { "epoch": 2.75, "learning_rate": 9.264494948242692e-08, "loss": 0.153, "step": 5626 }, { "epoch": 2.75, "learning_rate": 9.228980738145793e-08, "loss": 0.1833, "step": 5627 }, { "epoch": 2.75, "learning_rate": 9.193533448240505e-08, "loss": 0.1432, "step": 5628 }, { "epoch": 2.75, "learning_rate": 9.158153088379085e-08, "loss": 0.1579, "step": 5629 }, { "epoch": 2.75, "learning_rate": 9.122839668395194e-08, "loss": 0.1558, "step": 5630 }, { "epoch": 2.75, "learning_rate": 9.087593198103872e-08, "loss": 0.1414, "step": 5631 }, { "epoch": 2.75, "learning_rate": 9.052413687301559e-08, "loss": 0.1512, "step": 5632 }, { "epoch": 2.75, "learning_rate": 9.017301145766155e-08, "loss": 0.1536, "step": 5633 }, { "epoch": 2.75, "learning_rate": 8.982255583256827e-08, "loss": 0.1575, "step": 5634 }, { "epoch": 2.75, "learning_rate": 8.947277009514199e-08, "loss": 0.1619, "step": 5635 }, { "epoch": 2.75, "learning_rate": 8.912365434260272e-08, "loss": 0.1534, "step": 5636 }, { "epoch": 2.75, "learning_rate": 8.877520867198341e-08, "loss": 0.1799, "step": 5637 }, { "epoch": 2.75, "learning_rate": 8.842743318013214e-08, "loss": 0.1423, "step": 5638 }, { "epoch": 2.75, "learning_rate": 8.808032796371019e-08, "loss": 0.1519, "step": 5639 }, { "epoch": 2.75, "learning_rate": 8.773389311919206e-08, "loss": 0.1726, "step": 5640 }, { "epoch": 2.75, "learning_rate": 8.738812874286656e-08, "loss": 0.1383, "step": 5641 }, { "epoch": 2.75, "learning_rate": 8.704303493083516e-08, "loss": 0.1579, "step": 5642 }, { "epoch": 2.76, "learning_rate": 8.669861177901445e-08, "loss": 0.1306, "step": 5643 }, { "epoch": 2.76, "learning_rate": 8.635485938313342e-08, "loss": 0.1722, "step": 5644 }, { "epoch": 2.76, "learning_rate": 8.60117778387351e-08, "loss": 0.1727, "step": 5645 }, { "epoch": 2.76, "learning_rate": 8.566936724117625e-08, "loss": 0.174, "step": 5646 }, { "epoch": 2.76, "learning_rate": 8.53276276856263e-08, "loss": 0.1643, "step": 5647 }, { "epoch": 2.76, "learning_rate": 8.498655926706956e-08, "loss": 0.1571, "step": 5648 }, { "epoch": 2.76, "learning_rate": 8.464616208030269e-08, "loss": 0.1716, "step": 5649 }, { "epoch": 2.76, "learning_rate": 8.430643621993529e-08, "loss": 0.1583, "step": 5650 }, { "epoch": 2.76, "learning_rate": 8.396738178039237e-08, "loss": 0.1868, "step": 5651 }, { "epoch": 2.76, "learning_rate": 8.362899885591052e-08, "loss": 0.1504, "step": 5652 }, { "epoch": 2.76, "learning_rate": 8.329128754054061e-08, "loss": 0.1391, "step": 5653 }, { "epoch": 2.76, "learning_rate": 8.295424792814616e-08, "loss": 0.1598, "step": 5654 }, { "epoch": 2.76, "learning_rate": 8.261788011240473e-08, "loss": 0.1547, "step": 5655 }, { "epoch": 2.76, "learning_rate": 8.228218418680628e-08, "loss": 0.127, "step": 5656 }, { "epoch": 2.76, "learning_rate": 8.194716024465505e-08, "loss": 0.1406, "step": 5657 }, { "epoch": 2.76, "learning_rate": 8.16128083790682e-08, "loss": 0.123, "step": 5658 }, { "epoch": 2.76, "learning_rate": 8.127912868297533e-08, "loss": 0.1716, "step": 5659 }, { "epoch": 2.76, "learning_rate": 8.094612124911999e-08, "loss": 0.1755, "step": 5660 }, { "epoch": 2.76, "learning_rate": 8.061378617005871e-08, "loss": 0.148, "step": 5661 }, { "epoch": 2.76, "learning_rate": 8.028212353816095e-08, "loss": 0.138, "step": 5662 }, { "epoch": 2.77, "learning_rate": 7.995113344560961e-08, "loss": 0.162, "step": 5663 }, { "epoch": 2.77, "learning_rate": 7.962081598440058e-08, "loss": 0.1583, "step": 5664 }, { "epoch": 2.77, "learning_rate": 7.929117124634261e-08, "loss": 0.1592, "step": 5665 }, { "epoch": 2.77, "learning_rate": 7.896219932305744e-08, "loss": 0.1858, "step": 5666 }, { "epoch": 2.77, "learning_rate": 7.863390030597968e-08, "loss": 0.1546, "step": 5667 }, { "epoch": 2.77, "learning_rate": 7.830627428635773e-08, "loss": 0.1313, "step": 5668 }, { "epoch": 2.77, "learning_rate": 7.79793213552521e-08, "loss": 0.1681, "step": 5669 }, { "epoch": 2.77, "learning_rate": 7.765304160353676e-08, "loss": 0.1331, "step": 5670 }, { "epoch": 2.77, "learning_rate": 7.732743512189777e-08, "loss": 0.151, "step": 5671 }, { "epoch": 2.77, "learning_rate": 7.70025020008347e-08, "loss": 0.1502, "step": 5672 }, { "epoch": 2.77, "learning_rate": 7.667824233066029e-08, "loss": 0.1446, "step": 5673 }, { "epoch": 2.77, "learning_rate": 7.63546562014994e-08, "loss": 0.1709, "step": 5674 }, { "epoch": 2.77, "learning_rate": 7.603174370328981e-08, "loss": 0.1241, "step": 5675 }, { "epoch": 2.77, "learning_rate": 7.570950492578277e-08, "loss": 0.142, "step": 5676 }, { "epoch": 2.77, "learning_rate": 7.538793995854083e-08, "loss": 0.1538, "step": 5677 }, { "epoch": 2.77, "learning_rate": 7.50670488909408e-08, "loss": 0.1528, "step": 5678 }, { "epoch": 2.77, "learning_rate": 7.474683181217162e-08, "loss": 0.1648, "step": 5679 }, { "epoch": 2.77, "learning_rate": 7.442728881123406e-08, "loss": 0.1637, "step": 5680 }, { "epoch": 2.77, "learning_rate": 7.410841997694319e-08, "loss": 0.184, "step": 5681 }, { "epoch": 2.77, "learning_rate": 7.37902253979253e-08, "loss": 0.1891, "step": 5682 }, { "epoch": 2.77, "learning_rate": 7.347270516261995e-08, "loss": 0.139, "step": 5683 }, { "epoch": 2.78, "learning_rate": 7.315585935927904e-08, "loss": 0.1433, "step": 5684 }, { "epoch": 2.78, "learning_rate": 7.283968807596714e-08, "loss": 0.1282, "step": 5685 }, { "epoch": 2.78, "learning_rate": 7.252419140056089e-08, "loss": 0.168, "step": 5686 }, { "epoch": 2.78, "learning_rate": 7.22093694207504e-08, "loss": 0.1565, "step": 5687 }, { "epoch": 2.78, "learning_rate": 7.189522222403767e-08, "loss": 0.1824, "step": 5688 }, { "epoch": 2.78, "learning_rate": 7.15817498977367e-08, "loss": 0.1352, "step": 5689 }, { "epoch": 2.78, "learning_rate": 7.126895252897476e-08, "loss": 0.1466, "step": 5690 }, { "epoch": 2.78, "learning_rate": 7.095683020469091e-08, "loss": 0.1803, "step": 5691 }, { "epoch": 2.78, "learning_rate": 7.064538301163659e-08, "loss": 0.1646, "step": 5692 }, { "epoch": 2.78, "learning_rate": 7.033461103637618e-08, "loss": 0.1578, "step": 5693 }, { "epoch": 2.78, "learning_rate": 7.002451436528613e-08, "loss": 0.1725, "step": 5694 }, { "epoch": 2.78, "learning_rate": 6.971509308455499e-08, "loss": 0.1614, "step": 5695 }, { "epoch": 2.78, "learning_rate": 6.940634728018342e-08, "loss": 0.167, "step": 5696 }, { "epoch": 2.78, "learning_rate": 6.909827703798472e-08, "loss": 0.1844, "step": 5697 }, { "epoch": 2.78, "learning_rate": 6.879088244358456e-08, "loss": 0.1389, "step": 5698 }, { "epoch": 2.78, "learning_rate": 6.848416358242016e-08, "loss": 0.1508, "step": 5699 }, { "epoch": 2.78, "learning_rate": 6.817812053974165e-08, "loss": 0.1532, "step": 5700 }, { "epoch": 2.78, "learning_rate": 6.7872753400611e-08, "loss": 0.1324, "step": 5701 }, { "epoch": 2.78, "learning_rate": 6.756806224990198e-08, "loss": 0.1811, "step": 5702 }, { "epoch": 2.78, "learning_rate": 6.726404717230156e-08, "loss": 0.1727, "step": 5703 }, { "epoch": 2.79, "learning_rate": 6.696070825230716e-08, "loss": 0.1277, "step": 5704 }, { "epoch": 2.79, "learning_rate": 6.665804557422967e-08, "loss": 0.1697, "step": 5705 }, { "epoch": 2.79, "learning_rate": 6.635605922219152e-08, "loss": 0.149, "step": 5706 }, { "epoch": 2.79, "learning_rate": 6.605474928012722e-08, "loss": 0.1617, "step": 5707 }, { "epoch": 2.79, "learning_rate": 6.575411583178287e-08, "loss": 0.1667, "step": 5708 }, { "epoch": 2.79, "learning_rate": 6.545415896071744e-08, "loss": 0.1193, "step": 5709 }, { "epoch": 2.79, "learning_rate": 6.515487875030063e-08, "loss": 0.1597, "step": 5710 }, { "epoch": 2.79, "learning_rate": 6.485627528371508e-08, "loss": 0.1882, "step": 5711 }, { "epoch": 2.79, "learning_rate": 6.45583486439555e-08, "loss": 0.1784, "step": 5712 }, { "epoch": 2.79, "learning_rate": 6.426109891382731e-08, "loss": 0.1844, "step": 5713 }, { "epoch": 2.79, "learning_rate": 6.396452617594861e-08, "loss": 0.1693, "step": 5714 }, { "epoch": 2.79, "learning_rate": 6.36686305127493e-08, "loss": 0.1785, "step": 5715 }, { "epoch": 2.79, "learning_rate": 6.33734120064708e-08, "loss": 0.1683, "step": 5716 }, { "epoch": 2.79, "learning_rate": 6.307887073916663e-08, "loss": 0.1715, "step": 5717 }, { "epoch": 2.79, "learning_rate": 6.278500679270216e-08, "loss": 0.2071, "step": 5718 }, { "epoch": 2.79, "learning_rate": 6.249182024875428e-08, "loss": 0.153, "step": 5719 }, { "epoch": 2.79, "learning_rate": 6.219931118881111e-08, "loss": 0.1605, "step": 5720 }, { "epoch": 2.79, "learning_rate": 6.19074796941735e-08, "loss": 0.1546, "step": 5721 }, { "epoch": 2.79, "learning_rate": 6.161632584595317e-08, "loss": 0.1568, "step": 5722 }, { "epoch": 2.79, "learning_rate": 6.132584972507349e-08, "loss": 0.1674, "step": 5723 }, { "epoch": 2.79, "learning_rate": 6.10360514122707e-08, "loss": 0.1403, "step": 5724 }, { "epoch": 2.8, "learning_rate": 6.074693098809092e-08, "loss": 0.1574, "step": 5725 }, { "epoch": 2.8, "learning_rate": 6.045848853289265e-08, "loss": 0.1404, "step": 5726 }, { "epoch": 2.8, "learning_rate": 6.017072412684621e-08, "loss": 0.1456, "step": 5727 }, { "epoch": 2.8, "learning_rate": 5.988363784993318e-08, "loss": 0.1706, "step": 5728 }, { "epoch": 2.8, "learning_rate": 5.959722978194638e-08, "loss": 0.1427, "step": 5729 }, { "epoch": 2.8, "learning_rate": 5.9311500002491026e-08, "loss": 0.1783, "step": 5730 }, { "epoch": 2.8, "learning_rate": 5.9026448590982497e-08, "loss": 0.1642, "step": 5731 }, { "epoch": 2.8, "learning_rate": 5.8742075626648797e-08, "loss": 0.149, "step": 5732 }, { "epoch": 2.8, "learning_rate": 5.845838118852865e-08, "loss": 0.156, "step": 5733 }, { "epoch": 2.8, "learning_rate": 5.817536535547258e-08, "loss": 0.1639, "step": 5734 }, { "epoch": 2.8, "learning_rate": 5.789302820614212e-08, "loss": 0.1612, "step": 5735 }, { "epoch": 2.8, "learning_rate": 5.761136981901061e-08, "loss": 0.1541, "step": 5736 }, { "epoch": 2.8, "learning_rate": 5.7330390272362647e-08, "loss": 0.1626, "step": 5737 }, { "epoch": 2.8, "learning_rate": 5.7050089644293815e-08, "loss": 0.1578, "step": 5738 }, { "epoch": 2.8, "learning_rate": 5.6770468012711524e-08, "loss": 0.1478, "step": 5739 }, { "epoch": 2.8, "learning_rate": 5.649152545533332e-08, "loss": 0.1442, "step": 5740 }, { "epoch": 2.8, "learning_rate": 5.621326204968996e-08, "loss": 0.1613, "step": 5741 }, { "epoch": 2.8, "learning_rate": 5.593567787312154e-08, "loss": 0.186, "step": 5742 }, { "epoch": 2.8, "learning_rate": 5.56587730027805e-08, "loss": 0.1565, "step": 5743 }, { "epoch": 2.8, "learning_rate": 5.5382547515629995e-08, "loss": 0.1776, "step": 5744 }, { "epoch": 2.81, "learning_rate": 5.510700148844444e-08, "loss": 0.1645, "step": 5745 }, { "epoch": 2.81, "learning_rate": 5.483213499780926e-08, "loss": 0.205, "step": 5746 }, { "epoch": 2.81, "learning_rate": 5.455794812012138e-08, "loss": 0.1705, "step": 5747 }, { "epoch": 2.81, "learning_rate": 5.428444093158902e-08, "loss": 0.1254, "step": 5748 }, { "epoch": 2.81, "learning_rate": 5.4011613508230806e-08, "loss": 0.1816, "step": 5749 }, { "epoch": 2.81, "learning_rate": 5.3739465925876364e-08, "loss": 0.1486, "step": 5750 }, { "epoch": 2.81, "learning_rate": 5.346799826016713e-08, "loss": 0.1955, "step": 5751 }, { "epoch": 2.81, "learning_rate": 5.319721058655497e-08, "loss": 0.1478, "step": 5752 }, { "epoch": 2.81, "learning_rate": 5.2927102980303014e-08, "loss": 0.1467, "step": 5753 }, { "epoch": 2.81, "learning_rate": 5.265767551648565e-08, "loss": 0.1895, "step": 5754 }, { "epoch": 2.81, "learning_rate": 5.238892826998715e-08, "loss": 0.1697, "step": 5755 }, { "epoch": 2.81, "learning_rate": 5.212086131550387e-08, "loss": 0.1415, "step": 5756 }, { "epoch": 2.81, "learning_rate": 5.1853474727542876e-08, "loss": 0.1538, "step": 5757 }, { "epoch": 2.81, "learning_rate": 5.158676858042167e-08, "loss": 0.1636, "step": 5758 }, { "epoch": 2.81, "learning_rate": 5.132074294826844e-08, "loss": 0.1721, "step": 5759 }, { "epoch": 2.81, "learning_rate": 5.105539790502351e-08, "loss": 0.2172, "step": 5760 }, { "epoch": 2.81, "learning_rate": 5.0790733524436756e-08, "loss": 0.1732, "step": 5761 }, { "epoch": 2.81, "learning_rate": 5.052674988006962e-08, "loss": 0.1558, "step": 5762 }, { "epoch": 2.81, "learning_rate": 5.02634470452934e-08, "loss": 0.2193, "step": 5763 }, { "epoch": 2.81, "learning_rate": 5.0000825093291513e-08, "loss": 0.1535, "step": 5764 }, { "epoch": 2.81, "learning_rate": 4.973888409705696e-08, "loss": 0.1841, "step": 5765 }, { "epoch": 2.82, "learning_rate": 4.9477624129394e-08, "loss": 0.1827, "step": 5766 }, { "epoch": 2.82, "learning_rate": 4.921704526291815e-08, "loss": 0.1536, "step": 5767 }, { "epoch": 2.82, "learning_rate": 4.895714757005426e-08, "loss": 0.1647, "step": 5768 }, { "epoch": 2.82, "learning_rate": 4.8697931123039e-08, "loss": 0.1738, "step": 5769 }, { "epoch": 2.82, "learning_rate": 4.843939599391917e-08, "loss": 0.168, "step": 5770 }, { "epoch": 2.82, "learning_rate": 4.8181542254552016e-08, "loss": 0.1602, "step": 5771 }, { "epoch": 2.82, "learning_rate": 4.792436997660605e-08, "loss": 0.182, "step": 5772 }, { "epoch": 2.82, "learning_rate": 4.766787923156046e-08, "loss": 0.1808, "step": 5773 }, { "epoch": 2.82, "learning_rate": 4.741207009070409e-08, "loss": 0.163, "step": 5774 }, { "epoch": 2.82, "learning_rate": 4.715694262513698e-08, "loss": 0.1765, "step": 5775 }, { "epoch": 2.82, "learning_rate": 4.6902496905769366e-08, "loss": 0.1556, "step": 5776 }, { "epoch": 2.82, "learning_rate": 4.664873300332273e-08, "loss": 0.1829, "step": 5777 }, { "epoch": 2.82, "learning_rate": 4.63956509883276e-08, "loss": 0.1824, "step": 5778 }, { "epoch": 2.82, "learning_rate": 4.614325093112687e-08, "loss": 0.1471, "step": 5779 }, { "epoch": 2.82, "learning_rate": 4.589153290187276e-08, "loss": 0.1413, "step": 5780 }, { "epoch": 2.82, "learning_rate": 4.5640496970527356e-08, "loss": 0.1378, "step": 5781 }, { "epoch": 2.82, "learning_rate": 4.5390143206864855e-08, "loss": 0.1398, "step": 5782 }, { "epoch": 2.82, "learning_rate": 4.514047168046792e-08, "loss": 0.1548, "step": 5783 }, { "epoch": 2.82, "learning_rate": 4.4891482460731326e-08, "loss": 0.1629, "step": 5784 }, { "epoch": 2.82, "learning_rate": 4.464317561685944e-08, "loss": 0.1638, "step": 5785 }, { "epoch": 2.83, "learning_rate": 4.4395551217866206e-08, "loss": 0.1405, "step": 5786 }, { "epoch": 2.83, "learning_rate": 4.414860933257742e-08, "loss": 0.1757, "step": 5787 }, { "epoch": 2.83, "learning_rate": 4.390235002962817e-08, "loss": 0.1573, "step": 5788 }, { "epoch": 2.83, "learning_rate": 4.365677337746399e-08, "loss": 0.1612, "step": 5789 }, { "epoch": 2.83, "learning_rate": 4.3411879444340835e-08, "loss": 0.1485, "step": 5790 }, { "epoch": 2.83, "learning_rate": 4.3167668298324805e-08, "loss": 0.1521, "step": 5791 }, { "epoch": 2.83, "learning_rate": 4.292414000729217e-08, "loss": 0.1483, "step": 5792 }, { "epoch": 2.83, "learning_rate": 4.2681294638929616e-08, "loss": 0.1853, "step": 5793 }, { "epoch": 2.83, "learning_rate": 4.243913226073398e-08, "loss": 0.1951, "step": 5794 }, { "epoch": 2.83, "learning_rate": 4.2197652940011714e-08, "loss": 0.1714, "step": 5795 }, { "epoch": 2.83, "learning_rate": 4.1956856743880226e-08, "loss": 0.1844, "step": 5796 }, { "epoch": 2.83, "learning_rate": 4.171674373926654e-08, "loss": 0.1491, "step": 5797 }, { "epoch": 2.83, "learning_rate": 4.1477313992908384e-08, "loss": 0.1493, "step": 5798 }, { "epoch": 2.83, "learning_rate": 4.123856757135281e-08, "loss": 0.1775, "step": 5799 }, { "epoch": 2.83, "learning_rate": 4.100050454095727e-08, "loss": 0.1743, "step": 5800 }, { "epoch": 2.83, "learning_rate": 4.076312496788914e-08, "loss": 0.1873, "step": 5801 }, { "epoch": 2.83, "learning_rate": 4.0526428918126195e-08, "loss": 0.1722, "step": 5802 }, { "epoch": 2.83, "learning_rate": 4.029041645745607e-08, "loss": 0.143, "step": 5803 }, { "epoch": 2.83, "learning_rate": 4.0055087651476586e-08, "loss": 0.1438, "step": 5804 }, { "epoch": 2.83, "learning_rate": 3.982044256559459e-08, "loss": 0.1196, "step": 5805 }, { "epoch": 2.83, "learning_rate": 3.9586481265028456e-08, "loss": 0.2011, "step": 5806 }, { "epoch": 2.84, "learning_rate": 3.935320381480506e-08, "loss": 0.1392, "step": 5807 }, { "epoch": 2.84, "learning_rate": 3.9120610279762e-08, "loss": 0.1615, "step": 5808 }, { "epoch": 2.84, "learning_rate": 3.888870072454698e-08, "loss": 0.188, "step": 5809 }, { "epoch": 2.84, "learning_rate": 3.865747521361651e-08, "loss": 0.1563, "step": 5810 }, { "epoch": 2.84, "learning_rate": 3.842693381123835e-08, "loss": 0.1699, "step": 5811 }, { "epoch": 2.84, "learning_rate": 3.81970765814893e-08, "loss": 0.1687, "step": 5812 }, { "epoch": 2.84, "learning_rate": 3.796790358825575e-08, "loss": 0.1591, "step": 5813 }, { "epoch": 2.84, "learning_rate": 3.7739414895234804e-08, "loss": 0.1676, "step": 5814 }, { "epoch": 2.84, "learning_rate": 3.751161056593261e-08, "loss": 0.1718, "step": 5815 }, { "epoch": 2.84, "learning_rate": 3.7284490663665464e-08, "loss": 0.1894, "step": 5816 }, { "epoch": 2.84, "learning_rate": 3.7058055251559535e-08, "loss": 0.1916, "step": 5817 }, { "epoch": 2.84, "learning_rate": 3.683230439255059e-08, "loss": 0.1611, "step": 5818 }, { "epoch": 2.84, "learning_rate": 3.660723814938344e-08, "loss": 0.1367, "step": 5819 }, { "epoch": 2.84, "learning_rate": 3.63828565846136e-08, "loss": 0.1777, "step": 5820 }, { "epoch": 2.84, "learning_rate": 3.615915976060674e-08, "loss": 0.1715, "step": 5821 }, { "epoch": 2.84, "learning_rate": 3.593614773953646e-08, "loss": 0.1601, "step": 5822 }, { "epoch": 2.84, "learning_rate": 3.571382058338735e-08, "loss": 0.173, "step": 5823 }, { "epoch": 2.84, "learning_rate": 3.54921783539533e-08, "loss": 0.1768, "step": 5824 }, { "epoch": 2.84, "learning_rate": 3.527122111283754e-08, "loss": 0.1466, "step": 5825 }, { "epoch": 2.84, "learning_rate": 3.5050948921453445e-08, "loss": 0.1475, "step": 5826 }, { "epoch": 2.85, "learning_rate": 3.483136184102398e-08, "loss": 0.2041, "step": 5827 }, { "epoch": 2.85, "learning_rate": 3.461245993258117e-08, "loss": 0.1507, "step": 5828 }, { "epoch": 2.85, "learning_rate": 3.4394243256966885e-08, "loss": 0.181, "step": 5829 }, { "epoch": 2.85, "learning_rate": 3.417671187483235e-08, "loss": 0.1262, "step": 5830 }, { "epoch": 2.85, "learning_rate": 3.3959865846638906e-08, "loss": 0.1674, "step": 5831 }, { "epoch": 2.85, "learning_rate": 3.3743705232656687e-08, "loss": 0.1532, "step": 5832 }, { "epoch": 2.85, "learning_rate": 3.3528230092965686e-08, "loss": 0.1561, "step": 5833 }, { "epoch": 2.85, "learning_rate": 3.3313440487455493e-08, "loss": 0.2179, "step": 5834 }, { "epoch": 2.85, "learning_rate": 3.3099336475825025e-08, "loss": 0.1714, "step": 5835 }, { "epoch": 2.85, "learning_rate": 3.2885918117582236e-08, "loss": 0.1685, "step": 5836 }, { "epoch": 2.85, "learning_rate": 3.267318547204551e-08, "loss": 0.1801, "step": 5837 }, { "epoch": 2.85, "learning_rate": 3.2461138598341155e-08, "loss": 0.1451, "step": 5838 }, { "epoch": 2.85, "learning_rate": 3.224977755540648e-08, "loss": 0.1797, "step": 5839 }, { "epoch": 2.85, "learning_rate": 3.2039102401987264e-08, "loss": 0.1744, "step": 5840 }, { "epoch": 2.85, "learning_rate": 3.182911319663862e-08, "loss": 0.1633, "step": 5841 }, { "epoch": 2.85, "learning_rate": 3.161980999772524e-08, "loss": 0.1599, "step": 5842 }, { "epoch": 2.85, "learning_rate": 3.1411192863421424e-08, "loss": 0.1955, "step": 5843 }, { "epoch": 2.85, "learning_rate": 3.1203261851709964e-08, "loss": 0.1374, "step": 5844 }, { "epoch": 2.85, "learning_rate": 3.099601702038379e-08, "loss": 0.1559, "step": 5845 }, { "epoch": 2.85, "learning_rate": 3.0789458427044894e-08, "loss": 0.1412, "step": 5846 }, { "epoch": 2.85, "learning_rate": 3.0583586129104284e-08, "loss": 0.167, "step": 5847 }, { "epoch": 2.86, "learning_rate": 3.037840018378202e-08, "loss": 0.1585, "step": 5848 }, { "epoch": 2.86, "learning_rate": 3.017390064810832e-08, "loss": 0.1459, "step": 5849 }, { "epoch": 2.86, "learning_rate": 2.9970087578921616e-08, "loss": 0.1773, "step": 5850 }, { "epoch": 2.86, "learning_rate": 2.9766961032869913e-08, "loss": 0.1939, "step": 5851 }, { "epoch": 2.86, "learning_rate": 2.9564521066411112e-08, "loss": 0.2194, "step": 5852 }, { "epoch": 2.86, "learning_rate": 2.9362767735811038e-08, "loss": 0.1547, "step": 5853 }, { "epoch": 2.86, "learning_rate": 2.916170109714511e-08, "loss": 0.1659, "step": 5854 }, { "epoch": 2.86, "learning_rate": 2.8961321206298355e-08, "loss": 0.1601, "step": 5855 }, { "epoch": 2.86, "learning_rate": 2.8761628118964825e-08, "loss": 0.1658, "step": 5856 }, { "epoch": 2.86, "learning_rate": 2.856262189064679e-08, "loss": 0.1672, "step": 5857 }, { "epoch": 2.86, "learning_rate": 2.8364302576656955e-08, "loss": 0.1399, "step": 5858 }, { "epoch": 2.86, "learning_rate": 2.816667023211622e-08, "loss": 0.1808, "step": 5859 }, { "epoch": 2.86, "learning_rate": 2.7969724911954545e-08, "loss": 0.1453, "step": 5860 }, { "epoch": 2.86, "learning_rate": 2.7773466670911464e-08, "loss": 0.1516, "step": 5861 }, { "epoch": 2.86, "learning_rate": 2.757789556353474e-08, "loss": 0.1716, "step": 5862 }, { "epoch": 2.86, "learning_rate": 2.738301164418228e-08, "loss": 0.1714, "step": 5863 }, { "epoch": 2.86, "learning_rate": 2.7188814967019915e-08, "loss": 0.1566, "step": 5864 }, { "epoch": 2.86, "learning_rate": 2.6995305586023092e-08, "loss": 0.1665, "step": 5865 }, { "epoch": 2.86, "learning_rate": 2.6802483554976277e-08, "loss": 0.1834, "step": 5866 }, { "epoch": 2.86, "learning_rate": 2.6610348927472162e-08, "loss": 0.141, "step": 5867 }, { "epoch": 2.87, "learning_rate": 2.641890175691303e-08, "loss": 0.1495, "step": 5868 }, { "epoch": 2.87, "learning_rate": 2.62281420965102e-08, "loss": 0.1457, "step": 5869 }, { "epoch": 2.87, "learning_rate": 2.6038069999283212e-08, "loss": 0.1857, "step": 5870 }, { "epoch": 2.87, "learning_rate": 2.5848685518061468e-08, "loss": 0.1452, "step": 5871 }, { "epoch": 2.87, "learning_rate": 2.565998870548231e-08, "loss": 0.1746, "step": 5872 }, { "epoch": 2.87, "learning_rate": 2.5471979613992394e-08, "loss": 0.2, "step": 5873 }, { "epoch": 2.87, "learning_rate": 2.528465829584742e-08, "loss": 0.1561, "step": 5874 }, { "epoch": 2.87, "learning_rate": 2.5098024803111574e-08, "loss": 0.165, "step": 5875 }, { "epoch": 2.87, "learning_rate": 2.491207918765809e-08, "loss": 0.1626, "step": 5876 }, { "epoch": 2.87, "learning_rate": 2.4726821501168675e-08, "loss": 0.1861, "step": 5877 }, { "epoch": 2.87, "learning_rate": 2.4542251795134365e-08, "loss": 0.1464, "step": 5878 }, { "epoch": 2.87, "learning_rate": 2.435837012085468e-08, "loss": 0.1929, "step": 5879 }, { "epoch": 2.87, "learning_rate": 2.4175176529437626e-08, "loss": 0.1661, "step": 5880 }, { "epoch": 2.87, "learning_rate": 2.3992671071800256e-08, "loss": 0.1515, "step": 5881 }, { "epoch": 2.87, "learning_rate": 2.381085379866893e-08, "loss": 0.1678, "step": 5882 }, { "epoch": 2.87, "learning_rate": 2.3629724760577398e-08, "loss": 0.1516, "step": 5883 }, { "epoch": 2.87, "learning_rate": 2.3449284007869556e-08, "loss": 0.1764, "step": 5884 }, { "epoch": 2.87, "learning_rate": 2.3269531590696947e-08, "loss": 0.1615, "step": 5885 }, { "epoch": 2.87, "learning_rate": 2.309046755902017e-08, "loss": 0.1547, "step": 5886 }, { "epoch": 2.87, "learning_rate": 2.2912091962608574e-08, "loss": 0.1467, "step": 5887 }, { "epoch": 2.88, "learning_rate": 2.273440485104028e-08, "loss": 0.204, "step": 5888 }, { "epoch": 2.88, "learning_rate": 2.2557406273701622e-08, "loss": 0.1282, "step": 5889 }, { "epoch": 2.88, "learning_rate": 2.2381096279787683e-08, "loss": 0.1235, "step": 5890 }, { "epoch": 2.88, "learning_rate": 2.2205474918302605e-08, "loss": 0.1583, "step": 5891 }, { "epoch": 2.88, "learning_rate": 2.203054223805873e-08, "loss": 0.1501, "step": 5892 }, { "epoch": 2.88, "learning_rate": 2.185629828767688e-08, "loss": 0.1444, "step": 5893 }, { "epoch": 2.88, "learning_rate": 2.168274311558666e-08, "loss": 0.1506, "step": 5894 }, { "epoch": 2.88, "learning_rate": 2.1509876770026418e-08, "loss": 0.148, "step": 5895 }, { "epoch": 2.88, "learning_rate": 2.1337699299042724e-08, "loss": 0.1351, "step": 5896 }, { "epoch": 2.88, "learning_rate": 2.116621075049091e-08, "loss": 0.158, "step": 5897 }, { "epoch": 2.88, "learning_rate": 2.099541117203424e-08, "loss": 0.1814, "step": 5898 }, { "epoch": 2.88, "learning_rate": 2.082530061114557e-08, "loss": 0.1517, "step": 5899 }, { "epoch": 2.88, "learning_rate": 2.0655879115105138e-08, "loss": 0.1614, "step": 5900 }, { "epoch": 2.88, "learning_rate": 2.0487146731002493e-08, "loss": 0.1835, "step": 5901 }, { "epoch": 2.88, "learning_rate": 2.0319103505735405e-08, "loss": 0.2498, "step": 5902 }, { "epoch": 2.88, "learning_rate": 2.0151749486009843e-08, "loss": 0.1516, "step": 5903 }, { "epoch": 2.88, "learning_rate": 1.9985084718339986e-08, "loss": 0.1691, "step": 5904 }, { "epoch": 2.88, "learning_rate": 1.981910924904934e-08, "loss": 0.1531, "step": 5905 }, { "epoch": 2.88, "learning_rate": 1.9653823124269334e-08, "loss": 0.1593, "step": 5906 }, { "epoch": 2.88, "learning_rate": 1.9489226389939885e-08, "loss": 0.1556, "step": 5907 }, { "epoch": 2.88, "learning_rate": 1.9325319091808847e-08, "loss": 0.1836, "step": 5908 }, { "epoch": 2.89, "learning_rate": 1.9162101275432832e-08, "loss": 0.1275, "step": 5909 }, { "epoch": 2.89, "learning_rate": 1.8999572986177217e-08, "loss": 0.1596, "step": 5910 }, { "epoch": 2.89, "learning_rate": 1.883773426921476e-08, "loss": 0.1688, "step": 5911 }, { "epoch": 2.89, "learning_rate": 1.867658516952753e-08, "loss": 0.1567, "step": 5912 }, { "epoch": 2.89, "learning_rate": 1.8516125731905253e-08, "loss": 0.208, "step": 5913 }, { "epoch": 2.89, "learning_rate": 1.835635600094643e-08, "loss": 0.1465, "step": 5914 }, { "epoch": 2.89, "learning_rate": 1.8197276021057473e-08, "loss": 0.1802, "step": 5915 }, { "epoch": 2.89, "learning_rate": 1.803888583645358e-08, "loss": 0.2637, "step": 5916 }, { "epoch": 2.89, "learning_rate": 1.7881185491157583e-08, "loss": 0.1888, "step": 5917 }, { "epoch": 2.89, "learning_rate": 1.7724175029000813e-08, "loss": 0.1698, "step": 5918 }, { "epoch": 2.89, "learning_rate": 1.756785449362336e-08, "loss": 0.1571, "step": 5919 }, { "epoch": 2.89, "learning_rate": 1.741222392847297e-08, "loss": 0.1459, "step": 5920 }, { "epoch": 2.89, "learning_rate": 1.72572833768056e-08, "loss": 0.1954, "step": 5921 }, { "epoch": 2.89, "learning_rate": 1.7103032881685966e-08, "loss": 0.1764, "step": 5922 }, { "epoch": 2.89, "learning_rate": 1.6949472485986163e-08, "loss": 0.1667, "step": 5923 }, { "epoch": 2.89, "learning_rate": 1.6796602232387328e-08, "loss": 0.1737, "step": 5924 }, { "epoch": 2.89, "learning_rate": 1.6644422163378526e-08, "loss": 0.1696, "step": 5925 }, { "epoch": 2.89, "learning_rate": 1.6492932321256484e-08, "loss": 0.1711, "step": 5926 }, { "epoch": 2.89, "learning_rate": 1.634213274812668e-08, "loss": 0.2014, "step": 5927 }, { "epoch": 2.89, "learning_rate": 1.6192023485902542e-08, "loss": 0.1923, "step": 5928 }, { "epoch": 2.9, "learning_rate": 1.6042604576305408e-08, "loss": 0.1592, "step": 5929 }, { "epoch": 2.9, "learning_rate": 1.58938760608654e-08, "loss": 0.159, "step": 5930 }, { "epoch": 2.9, "learning_rate": 1.574583798092e-08, "loss": 0.1674, "step": 5931 }, { "epoch": 2.9, "learning_rate": 1.5598490377614907e-08, "loss": 0.1754, "step": 5932 }, { "epoch": 2.9, "learning_rate": 1.5451833291904582e-08, "loss": 0.1581, "step": 5933 }, { "epoch": 2.9, "learning_rate": 1.530586676455059e-08, "loss": 0.1913, "step": 5934 }, { "epoch": 2.9, "learning_rate": 1.5160590836123256e-08, "loss": 0.1719, "step": 5935 }, { "epoch": 2.9, "learning_rate": 1.5016005547000833e-08, "loss": 0.2052, "step": 5936 }, { "epoch": 2.9, "learning_rate": 1.487211093736951e-08, "loss": 0.1544, "step": 5937 }, { "epoch": 2.9, "learning_rate": 1.4728907047223406e-08, "loss": 0.1912, "step": 5938 }, { "epoch": 2.9, "learning_rate": 1.4586393916364849e-08, "loss": 0.1833, "step": 5939 }, { "epoch": 2.9, "learning_rate": 1.4444571584404099e-08, "loss": 0.1369, "step": 5940 }, { "epoch": 2.9, "learning_rate": 1.4303440090759346e-08, "loss": 0.1646, "step": 5941 }, { "epoch": 2.9, "learning_rate": 1.4162999474657268e-08, "loss": 0.1902, "step": 5942 }, { "epoch": 2.9, "learning_rate": 1.4023249775131642e-08, "loss": 0.2003, "step": 5943 }, { "epoch": 2.9, "learning_rate": 1.388419103102473e-08, "loss": 0.1792, "step": 5944 }, { "epoch": 2.9, "learning_rate": 1.3745823280986725e-08, "loss": 0.1683, "step": 5945 }, { "epoch": 2.9, "learning_rate": 1.3608146563476033e-08, "loss": 0.1567, "step": 5946 }, { "epoch": 2.9, "learning_rate": 1.3471160916758152e-08, "loss": 0.1518, "step": 5947 }, { "epoch": 2.9, "learning_rate": 1.3334866378907351e-08, "loss": 0.1567, "step": 5948 }, { "epoch": 2.9, "learning_rate": 1.319926298780555e-08, "loss": 0.1843, "step": 5949 }, { "epoch": 2.91, "learning_rate": 1.3064350781142598e-08, "loss": 0.1699, "step": 5950 }, { "epoch": 2.91, "learning_rate": 1.2930129796415724e-08, "loss": 0.158, "step": 5951 }, { "epoch": 2.91, "learning_rate": 1.279660007093092e-08, "loss": 0.137, "step": 5952 }, { "epoch": 2.91, "learning_rate": 1.2663761641801553e-08, "loss": 0.1742, "step": 5953 }, { "epoch": 2.91, "learning_rate": 1.2531614545948368e-08, "loss": 0.1775, "step": 5954 }, { "epoch": 2.91, "learning_rate": 1.2400158820101426e-08, "loss": 0.1328, "step": 5955 }, { "epoch": 2.91, "learning_rate": 1.2269394500796782e-08, "loss": 0.172, "step": 5956 }, { "epoch": 2.91, "learning_rate": 1.2139321624379807e-08, "loss": 0.1975, "step": 5957 }, { "epoch": 2.91, "learning_rate": 1.2009940227002969e-08, "loss": 0.1469, "step": 5958 }, { "epoch": 2.91, "learning_rate": 1.1881250344626393e-08, "loss": 0.1678, "step": 5959 }, { "epoch": 2.91, "learning_rate": 1.175325201301869e-08, "loss": 0.1632, "step": 5960 }, { "epoch": 2.91, "learning_rate": 1.1625945267755845e-08, "loss": 0.1783, "step": 5961 }, { "epoch": 2.91, "learning_rate": 1.1499330144221221e-08, "loss": 0.1599, "step": 5962 }, { "epoch": 2.91, "learning_rate": 1.1373406677607224e-08, "loss": 0.2078, "step": 5963 }, { "epoch": 2.91, "learning_rate": 1.1248174902912245e-08, "loss": 0.147, "step": 5964 }, { "epoch": 2.91, "learning_rate": 1.112363485494372e-08, "loss": 0.2081, "step": 5965 }, { "epoch": 2.91, "learning_rate": 1.099978656831674e-08, "loss": 0.1786, "step": 5966 }, { "epoch": 2.91, "learning_rate": 1.0876630077453487e-08, "loss": 0.1516, "step": 5967 }, { "epoch": 2.91, "learning_rate": 1.0754165416584638e-08, "loss": 0.163, "step": 5968 }, { "epoch": 2.91, "learning_rate": 1.0632392619747412e-08, "loss": 0.1809, "step": 5969 }, { "epoch": 2.92, "learning_rate": 1.0511311720788342e-08, "loss": 0.1969, "step": 5970 }, { "epoch": 2.92, "learning_rate": 1.0390922753360234e-08, "loss": 0.1714, "step": 5971 }, { "epoch": 2.92, "learning_rate": 1.0271225750924652e-08, "loss": 0.1987, "step": 5972 }, { "epoch": 2.92, "learning_rate": 1.0152220746749709e-08, "loss": 0.183, "step": 5973 }, { "epoch": 2.92, "learning_rate": 1.0033907773912276e-08, "loss": 0.1864, "step": 5974 }, { "epoch": 2.92, "learning_rate": 9.916286865296054e-09, "loss": 0.1705, "step": 5975 }, { "epoch": 2.92, "learning_rate": 9.799358053592945e-09, "loss": 0.1442, "step": 5976 }, { "epoch": 2.92, "learning_rate": 9.683121371302506e-09, "loss": 0.1904, "step": 5977 }, { "epoch": 2.92, "learning_rate": 9.56757685073112e-09, "loss": 0.1831, "step": 5978 }, { "epoch": 2.92, "learning_rate": 9.452724523993927e-09, "loss": 0.1651, "step": 5979 }, { "epoch": 2.92, "learning_rate": 9.338564423012896e-09, "loss": 0.1898, "step": 5980 }, { "epoch": 2.92, "learning_rate": 9.225096579517922e-09, "loss": 0.2092, "step": 5981 }, { "epoch": 2.92, "learning_rate": 9.112321025046278e-09, "loss": 0.1509, "step": 5982 }, { "epoch": 2.92, "learning_rate": 9.00023779094289e-09, "loss": 0.1507, "step": 5983 }, { "epoch": 2.92, "learning_rate": 8.888846908360338e-09, "loss": 0.1489, "step": 5984 }, { "epoch": 2.92, "learning_rate": 8.778148408258858e-09, "loss": 0.2155, "step": 5985 }, { "epoch": 2.92, "learning_rate": 8.668142321406059e-09, "loss": 0.1699, "step": 5986 }, { "epoch": 2.92, "learning_rate": 8.558828678377207e-09, "loss": 0.179, "step": 5987 }, { "epoch": 2.92, "learning_rate": 8.45020750955522e-09, "loss": 0.1536, "step": 5988 }, { "epoch": 2.92, "learning_rate": 8.342278845129837e-09, "loss": 0.1829, "step": 5989 }, { "epoch": 2.92, "learning_rate": 8.235042715099561e-09, "loss": 0.1813, "step": 5990 }, { "epoch": 2.93, "learning_rate": 8.128499149269164e-09, "loss": 0.1642, "step": 5991 }, { "epoch": 2.93, "learning_rate": 8.02264817725218e-09, "loss": 0.1665, "step": 5992 }, { "epoch": 2.93, "learning_rate": 7.917489828468128e-09, "loss": 0.1771, "step": 5993 }, { "epoch": 2.93, "learning_rate": 7.813024132145297e-09, "loss": 0.1742, "step": 5994 }, { "epoch": 2.93, "learning_rate": 7.70925111731935e-09, "loss": 0.1829, "step": 5995 }, { "epoch": 2.93, "learning_rate": 7.606170812832214e-09, "loss": 0.1652, "step": 5996 }, { "epoch": 2.93, "learning_rate": 7.503783247335139e-09, "loss": 0.1857, "step": 5997 }, { "epoch": 2.93, "learning_rate": 7.4020884492848055e-09, "loss": 0.1895, "step": 5998 }, { "epoch": 2.93, "learning_rate": 7.3010864469474915e-09, "loss": 0.1892, "step": 5999 }, { "epoch": 2.93, "learning_rate": 7.20077726839491e-09, "loss": 0.1365, "step": 6000 }, { "epoch": 2.93, "learning_rate": 7.101160941507812e-09, "loss": 0.16, "step": 6001 }, { "epoch": 2.93, "learning_rate": 7.002237493972941e-09, "loss": 0.144, "step": 6002 }, { "epoch": 2.93, "learning_rate": 6.904006953285802e-09, "loss": 0.1697, "step": 6003 }, { "epoch": 2.93, "learning_rate": 6.806469346748723e-09, "loss": 0.1574, "step": 6004 }, { "epoch": 2.93, "learning_rate": 6.709624701470852e-09, "loss": 0.1479, "step": 6005 }, { "epoch": 2.93, "learning_rate": 6.6134730443701e-09, "loss": 0.1381, "step": 6006 }, { "epoch": 2.93, "learning_rate": 6.518014402170369e-09, "loss": 0.1933, "step": 6007 }, { "epoch": 2.93, "learning_rate": 6.423248801403492e-09, "loss": 0.1683, "step": 6008 }, { "epoch": 2.93, "learning_rate": 6.329176268409232e-09, "loss": 0.1775, "step": 6009 }, { "epoch": 2.93, "learning_rate": 6.235796829333896e-09, "loss": 0.2326, "step": 6010 }, { "epoch": 2.94, "learning_rate": 6.143110510131445e-09, "loss": 0.1607, "step": 6011 }, { "epoch": 2.94, "learning_rate": 6.051117336563495e-09, "loss": 0.1874, "step": 6012 }, { "epoch": 2.94, "learning_rate": 5.959817334198481e-09, "loss": 0.1798, "step": 6013 }, { "epoch": 2.94, "learning_rate": 5.869210528412772e-09, "loss": 0.1615, "step": 6014 }, { "epoch": 2.94, "learning_rate": 5.779296944389279e-09, "loss": 0.1834, "step": 6015 }, { "epoch": 2.94, "learning_rate": 5.690076607119121e-09, "loss": 0.1703, "step": 6016 }, { "epoch": 2.94, "learning_rate": 5.601549541399964e-09, "loss": 0.167, "step": 6017 }, { "epoch": 2.94, "learning_rate": 5.513715771837125e-09, "loss": 0.1779, "step": 6018 }, { "epoch": 2.94, "learning_rate": 5.426575322843574e-09, "loss": 0.1824, "step": 6019 }, { "epoch": 2.94, "learning_rate": 5.340128218638829e-09, "loss": 0.177, "step": 6020 }, { "epoch": 2.94, "learning_rate": 5.254374483250613e-09, "loss": 0.1673, "step": 6021 }, { "epoch": 2.94, "learning_rate": 5.169314140512916e-09, "loss": 0.1586, "step": 6022 }, { "epoch": 2.94, "learning_rate": 5.084947214067659e-09, "loss": 0.1646, "step": 6023 }, { "epoch": 2.94, "learning_rate": 5.001273727364142e-09, "loss": 0.1484, "step": 6024 }, { "epoch": 2.94, "learning_rate": 4.918293703658483e-09, "loss": 0.1939, "step": 6025 }, { "epoch": 2.94, "learning_rate": 4.836007166014179e-09, "loss": 0.1738, "step": 6026 }, { "epoch": 2.94, "learning_rate": 4.754414137302099e-09, "loss": 0.1767, "step": 6027 }, { "epoch": 2.94, "learning_rate": 4.673514640200216e-09, "loss": 0.1655, "step": 6028 }, { "epoch": 2.94, "learning_rate": 4.593308697194155e-09, "loss": 0.1209, "step": 6029 }, { "epoch": 2.94, "learning_rate": 4.5137963305760815e-09, "loss": 0.2233, "step": 6030 }, { "epoch": 2.94, "learning_rate": 4.434977562446097e-09, "loss": 0.159, "step": 6031 }, { "epoch": 2.95, "learning_rate": 4.356852414710844e-09, "loss": 0.1382, "step": 6032 }, { "epoch": 2.95, "learning_rate": 4.279420909084897e-09, "loss": 0.1924, "step": 6033 }, { "epoch": 2.95, "learning_rate": 4.202683067089375e-09, "loss": 0.1398, "step": 6034 }, { "epoch": 2.95, "learning_rate": 4.126638910053049e-09, "loss": 0.1592, "step": 6035 }, { "epoch": 2.95, "learning_rate": 4.051288459111513e-09, "loss": 0.1722, "step": 6036 }, { "epoch": 2.95, "learning_rate": 3.9766317352080145e-09, "loss": 0.1771, "step": 6037 }, { "epoch": 2.95, "learning_rate": 3.9026687590923426e-09, "loss": 0.199, "step": 6038 }, { "epoch": 2.95, "learning_rate": 3.829399551322222e-09, "loss": 0.1836, "step": 6039 }, { "epoch": 2.95, "learning_rate": 3.756824132262471e-09, "loss": 0.1724, "step": 6040 }, { "epoch": 2.95, "learning_rate": 3.6849425220841783e-09, "loss": 0.1443, "step": 6041 }, { "epoch": 2.95, "learning_rate": 3.613754740766362e-09, "loss": 0.1481, "step": 6042 }, { "epoch": 2.95, "learning_rate": 3.5432608080951392e-09, "loss": 0.1479, "step": 6043 }, { "epoch": 2.95, "learning_rate": 3.4734607436637257e-09, "loss": 0.1556, "step": 6044 }, { "epoch": 2.95, "learning_rate": 3.4043545668724363e-09, "loss": 0.1661, "step": 6045 }, { "epoch": 2.95, "learning_rate": 3.335942296928685e-09, "loss": 0.1802, "step": 6046 }, { "epoch": 2.95, "learning_rate": 3.268223952846983e-09, "loss": 0.1499, "step": 6047 }, { "epoch": 2.95, "learning_rate": 3.2011995534489415e-09, "loss": 0.1933, "step": 6048 }, { "epoch": 2.95, "learning_rate": 3.1348691173638256e-09, "loss": 0.1486, "step": 6049 }, { "epoch": 2.95, "learning_rate": 3.0692326630271663e-09, "loss": 0.167, "step": 6050 }, { "epoch": 2.95, "learning_rate": 3.0042902086824254e-09, "loss": 0.1785, "step": 6051 }, { "epoch": 2.96, "learning_rate": 2.9400417723793316e-09, "loss": 0.1713, "step": 6052 }, { "epoch": 2.96, "learning_rate": 2.876487371975545e-09, "loss": 0.1924, "step": 6053 }, { "epoch": 2.96, "learning_rate": 2.8136270251352682e-09, "loss": 0.1539, "step": 6054 }, { "epoch": 2.96, "learning_rate": 2.751460749330359e-09, "loss": 0.1907, "step": 6055 }, { "epoch": 2.96, "learning_rate": 2.6899885618386636e-09, "loss": 0.1552, "step": 6056 }, { "epoch": 2.96, "learning_rate": 2.6292104797465133e-09, "loss": 0.2001, "step": 6057 }, { "epoch": 2.96, "learning_rate": 2.56912651994623e-09, "loss": 0.1568, "step": 6058 }, { "epoch": 2.96, "learning_rate": 2.509736699137788e-09, "loss": 0.1639, "step": 6059 }, { "epoch": 2.96, "learning_rate": 2.451041033828261e-09, "loss": 0.1521, "step": 6060 }, { "epoch": 2.96, "learning_rate": 2.3930395403312654e-09, "loss": 0.1866, "step": 6061 }, { "epoch": 2.96, "learning_rate": 2.335732234767796e-09, "loss": 0.176, "step": 6062 }, { "epoch": 2.96, "learning_rate": 2.2791191330659436e-09, "loss": 0.1479, "step": 6063 }, { "epoch": 2.96, "learning_rate": 2.2232002509611773e-09, "loss": 0.1654, "step": 6064 }, { "epoch": 2.96, "learning_rate": 2.167975603995509e-09, "loss": 0.2226, "step": 6065 }, { "epoch": 2.96, "learning_rate": 2.1134452075177724e-09, "loss": 0.1698, "step": 6066 }, { "epoch": 2.96, "learning_rate": 2.059609076684732e-09, "loss": 0.1657, "step": 6067 }, { "epoch": 2.96, "learning_rate": 2.006467226459141e-09, "loss": 0.1779, "step": 6068 }, { "epoch": 2.96, "learning_rate": 1.9540196716116843e-09, "loss": 0.2283, "step": 6069 }, { "epoch": 2.96, "learning_rate": 1.90226642671959e-09, "loss": 0.1589, "step": 6070 }, { "epoch": 2.96, "learning_rate": 1.8512075061674629e-09, "loss": 0.1858, "step": 6071 }, { "epoch": 2.96, "learning_rate": 1.8008429241461733e-09, "loss": 0.1829, "step": 6072 }, { "epoch": 2.97, "learning_rate": 1.7511726946545239e-09, "loss": 0.1587, "step": 6073 }, { "epoch": 2.97, "learning_rate": 1.7021968314978598e-09, "loss": 0.1808, "step": 6074 }, { "epoch": 2.97, "learning_rate": 1.6539153482883486e-09, "loss": 0.2145, "step": 6075 }, { "epoch": 2.97, "learning_rate": 1.6063282584455331e-09, "loss": 0.1616, "step": 6076 }, { "epoch": 2.97, "learning_rate": 1.5594355751960554e-09, "loss": 0.1688, "step": 6077 }, { "epoch": 2.97, "learning_rate": 1.5132373115731013e-09, "loss": 0.1629, "step": 6078 }, { "epoch": 2.97, "learning_rate": 1.4677334804172326e-09, "loss": 0.1498, "step": 6079 }, { "epoch": 2.97, "learning_rate": 1.4229240943755552e-09, "loss": 0.2034, "step": 6080 }, { "epoch": 2.97, "learning_rate": 1.3788091659025504e-09, "loss": 0.2126, "step": 6081 }, { "epoch": 2.97, "learning_rate": 1.3353887072595218e-09, "loss": 0.175, "step": 6082 }, { "epoch": 2.97, "learning_rate": 1.2926627305151484e-09, "loss": 0.161, "step": 6083 }, { "epoch": 2.97, "learning_rate": 1.2506312475446536e-09, "loss": 0.1576, "step": 6084 }, { "epoch": 2.97, "learning_rate": 1.2092942700298038e-09, "loss": 0.1599, "step": 6085 }, { "epoch": 2.97, "learning_rate": 1.1686518094602972e-09, "loss": 0.2019, "step": 6086 }, { "epoch": 2.97, "learning_rate": 1.1287038771323755e-09, "loss": 0.1632, "step": 6087 }, { "epoch": 2.97, "learning_rate": 1.0894504841491016e-09, "loss": 0.1764, "step": 6088 }, { "epoch": 2.97, "learning_rate": 1.0508916414206371e-09, "loss": 0.2081, "step": 6089 }, { "epoch": 2.97, "learning_rate": 1.0130273596642426e-09, "loss": 0.1869, "step": 6090 }, { "epoch": 2.97, "learning_rate": 9.758576494034444e-10, "loss": 0.1784, "step": 6091 }, { "epoch": 2.97, "learning_rate": 9.393825209697005e-10, "loss": 0.1927, "step": 6092 }, { "epoch": 2.98, "learning_rate": 9.036019845007349e-10, "loss": 0.1656, "step": 6093 }, { "epoch": 2.98, "learning_rate": 8.685160499416479e-10, "loss": 0.2107, "step": 6094 }, { "epoch": 2.98, "learning_rate": 8.341247270440833e-10, "loss": 0.1485, "step": 6095 }, { "epoch": 2.98, "learning_rate": 8.004280253670616e-10, "loss": 0.1774, "step": 6096 }, { "epoch": 2.98, "learning_rate": 7.674259542758689e-10, "loss": 0.2018, "step": 6097 }, { "epoch": 2.98, "learning_rate": 7.351185229434455e-10, "loss": 0.17, "step": 6098 }, { "epoch": 2.98, "learning_rate": 7.035057403492751e-10, "loss": 0.1925, "step": 6099 }, { "epoch": 2.98, "learning_rate": 6.725876152796629e-10, "loss": 0.1764, "step": 6100 }, { "epoch": 2.98, "learning_rate": 6.423641563285676e-10, "loss": 0.176, "step": 6101 }, { "epoch": 2.98, "learning_rate": 6.128353718956592e-10, "loss": 0.1806, "step": 6102 }, { "epoch": 2.98, "learning_rate": 5.840012701885389e-10, "loss": 0.1672, "step": 6103 }, { "epoch": 2.98, "learning_rate": 5.558618592216291e-10, "loss": 0.199, "step": 6104 }, { "epoch": 2.98, "learning_rate": 5.284171468156186e-10, "loss": 0.1707, "step": 6105 }, { "epoch": 2.98, "learning_rate": 5.016671405988494e-10, "loss": 0.1759, "step": 6106 }, { "epoch": 2.98, "learning_rate": 4.756118480059302e-10, "loss": 0.1743, "step": 6107 }, { "epoch": 2.98, "learning_rate": 4.502512762788458e-10, "loss": 0.1651, "step": 6108 }, { "epoch": 2.98, "learning_rate": 4.2558543246640214e-10, "loss": 0.2191, "step": 6109 }, { "epoch": 2.98, "learning_rate": 4.0161432342450404e-10, "loss": 0.1855, "step": 6110 }, { "epoch": 2.98, "learning_rate": 3.7833795581504463e-10, "loss": 0.1602, "step": 6111 }, { "epoch": 2.98, "learning_rate": 3.5575633610784866e-10, "loss": 0.1903, "step": 6112 }, { "epoch": 2.98, "learning_rate": 3.33869470579562e-10, "loss": 0.1772, "step": 6113 }, { "epoch": 2.99, "learning_rate": 3.1267736531309654e-10, "loss": 0.1817, "step": 6114 }, { "epoch": 2.99, "learning_rate": 2.9218002619874066e-10, "loss": 0.188, "step": 6115 }, { "epoch": 2.99, "learning_rate": 2.7237745893332615e-10, "loss": 0.1673, "step": 6116 }, { "epoch": 2.99, "learning_rate": 2.532696690210612e-10, "loss": 0.1604, "step": 6117 }, { "epoch": 2.99, "learning_rate": 2.3485666177269775e-10, "loss": 0.1886, "step": 6118 }, { "epoch": 2.99, "learning_rate": 2.1713844230608627e-10, "loss": 0.18, "step": 6119 }, { "epoch": 2.99, "learning_rate": 2.0011501554562108e-10, "loss": 0.1526, "step": 6120 }, { "epoch": 2.99, "learning_rate": 1.8378638622307266e-10, "loss": 0.1929, "step": 6121 }, { "epoch": 2.99, "learning_rate": 1.6815255887647763e-10, "loss": 0.1565, "step": 6122 }, { "epoch": 2.99, "learning_rate": 1.5321353785152648e-10, "loss": 0.1886, "step": 6123 }, { "epoch": 2.99, "learning_rate": 1.3896932730017577e-10, "loss": 0.1934, "step": 6124 }, { "epoch": 2.99, "learning_rate": 1.2541993118148077e-10, "loss": 0.1989, "step": 6125 }, { "epoch": 2.99, "learning_rate": 1.1256535326159556e-10, "loss": 0.172, "step": 6126 }, { "epoch": 2.99, "learning_rate": 1.0040559711294028e-10, "loss": 0.204, "step": 6127 }, { "epoch": 2.99, "learning_rate": 8.894066611531138e-11, "loss": 0.1483, "step": 6128 }, { "epoch": 2.99, "learning_rate": 7.817056345560403e-11, "loss": 0.147, "step": 6129 }, { "epoch": 2.99, "learning_rate": 6.809529212697951e-11, "loss": 0.1836, "step": 6130 }, { "epoch": 2.99, "learning_rate": 5.871485492997541e-11, "loss": 0.1702, "step": 6131 }, { "epoch": 2.99, "learning_rate": 5.002925447167295e-11, "loss": 0.1933, "step": 6132 }, { "epoch": 2.99, "learning_rate": 4.203849316625208e-11, "loss": 0.1672, "step": 6133 }, { "epoch": 3.0, "learning_rate": 3.4742573234436414e-11, "loss": 0.2067, "step": 6134 }, { "epoch": 3.0, "learning_rate": 2.814149670404831e-11, "loss": 0.1854, "step": 6135 }, { "epoch": 3.0, "learning_rate": 2.2235265410286423e-11, "loss": 0.1483, "step": 6136 }, { "epoch": 3.0, "learning_rate": 1.7023880994337937e-11, "loss": 0.1891, "step": 6137 }, { "epoch": 3.0, "learning_rate": 1.250734490476635e-11, "loss": 0.1746, "step": 6138 }, { "epoch": 3.0, "learning_rate": 8.685658396678786e-12, "loss": 0.1969, "step": 6139 }, { "epoch": 3.0, "learning_rate": 5.558822532558683e-12, "loss": 0.179, "step": 6140 }, { "epoch": 3.0, "learning_rate": 3.1268381817106673e-12, "loss": 0.1588, "step": 6141 }, { "epoch": 3.0, "learning_rate": 1.3897060194278944e-12, "loss": 0.1686, "step": 6142 }, { "epoch": 3.0, "learning_rate": 3.4742652893493546e-13, "loss": 0.1805, "step": 6143 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.1211, "step": 6144 }, { "epoch": 3.0, "step": 6144, "total_flos": 4.3738597291940905e+18, "train_loss": 0.2402623662055703, "train_runtime": 31568.2451, "train_samples_per_second": 24.902, "train_steps_per_second": 0.195 } ], "logging_steps": 1.0, "max_steps": 6144, "num_train_epochs": 3, "save_steps": 500, "total_flos": 4.3738597291940905e+18, "trial_name": null, "trial_params": null }