{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.1904761904761904e-06, "loss": 3.9258, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.3809523809523808e-06, "loss": 3.8945, "step": 2 }, { "epoch": 0.02, "learning_rate": 3.5714285714285714e-06, "loss": 3.8945, "step": 3 }, { "epoch": 0.03, "learning_rate": 4.7619047619047615e-06, "loss": 3.9062, "step": 4 }, { "epoch": 0.04, "learning_rate": 5.9523809523809525e-06, "loss": 3.918, "step": 5 }, { "epoch": 0.04, "learning_rate": 7.142857142857143e-06, "loss": 3.9062, "step": 6 }, { "epoch": 0.05, "learning_rate": 8.333333333333334e-06, "loss": 3.8594, "step": 7 }, { "epoch": 0.06, "learning_rate": 9.523809523809523e-06, "loss": 3.8633, "step": 8 }, { "epoch": 0.06, "learning_rate": 1.0714285714285714e-05, "loss": 3.8789, "step": 9 }, { "epoch": 0.07, "learning_rate": 1.1904761904761905e-05, "loss": 3.8555, "step": 10 }, { "epoch": 0.08, "learning_rate": 1.3095238095238096e-05, "loss": 3.8164, "step": 11 }, { "epoch": 0.09, "learning_rate": 1.4285714285714285e-05, "loss": 3.8125, "step": 12 }, { "epoch": 0.09, "learning_rate": 1.5476190476190476e-05, "loss": 3.8945, "step": 13 }, { "epoch": 0.1, "learning_rate": 1.6666666666666667e-05, "loss": 3.8242, "step": 14 }, { "epoch": 0.11, "learning_rate": 1.785714285714286e-05, "loss": 3.875, "step": 15 }, { "epoch": 0.11, "learning_rate": 1.9047619047619046e-05, "loss": 3.8086, "step": 16 }, { "epoch": 0.12, "learning_rate": 2.023809523809524e-05, "loss": 3.7656, "step": 17 }, { "epoch": 0.13, "learning_rate": 2.1428571428571428e-05, "loss": 3.8164, "step": 18 }, { "epoch": 0.14, "learning_rate": 2.261904761904762e-05, "loss": 3.7891, "step": 19 }, { "epoch": 0.14, "learning_rate": 2.380952380952381e-05, "loss": 3.7812, "step": 20 }, { "epoch": 0.15, "learning_rate": 2.5e-05, "loss": 3.7461, "step": 21 }, { "epoch": 0.16, "learning_rate": 2.6190476190476192e-05, "loss": 3.793, "step": 22 }, { "epoch": 0.16, "learning_rate": 2.7380952380952383e-05, "loss": 3.8008, "step": 23 }, { "epoch": 0.17, "learning_rate": 2.857142857142857e-05, "loss": 3.7539, "step": 24 }, { "epoch": 0.18, "learning_rate": 2.9761904761904762e-05, "loss": 3.6953, "step": 25 }, { "epoch": 0.19, "learning_rate": 3.095238095238095e-05, "loss": 3.7109, "step": 26 }, { "epoch": 0.19, "learning_rate": 3.2142857142857144e-05, "loss": 3.6562, "step": 27 }, { "epoch": 0.2, "learning_rate": 3.3333333333333335e-05, "loss": 3.6758, "step": 28 }, { "epoch": 0.21, "learning_rate": 3.4523809523809526e-05, "loss": 3.6875, "step": 29 }, { "epoch": 0.21, "learning_rate": 3.571428571428572e-05, "loss": 3.7188, "step": 30 }, { "epoch": 0.22, "learning_rate": 3.690476190476191e-05, "loss": 3.6328, "step": 31 }, { "epoch": 0.23, "learning_rate": 3.809523809523809e-05, "loss": 3.6406, "step": 32 }, { "epoch": 0.24, "learning_rate": 3.928571428571429e-05, "loss": 3.6094, "step": 33 }, { "epoch": 0.24, "learning_rate": 4.047619047619048e-05, "loss": 3.5547, "step": 34 }, { "epoch": 0.25, "learning_rate": 4.166666666666667e-05, "loss": 3.4922, "step": 35 }, { "epoch": 0.26, "learning_rate": 4.2857142857142856e-05, "loss": 3.5977, "step": 36 }, { "epoch": 0.26, "learning_rate": 4.404761904761905e-05, "loss": 3.4922, "step": 37 }, { "epoch": 0.27, "learning_rate": 4.523809523809524e-05, "loss": 3.4883, "step": 38 }, { "epoch": 0.28, "learning_rate": 4.642857142857143e-05, "loss": 3.3477, "step": 39 }, { "epoch": 0.29, "learning_rate": 4.761904761904762e-05, "loss": 3.3008, "step": 40 }, { "epoch": 0.29, "learning_rate": 4.880952380952381e-05, "loss": 3.3398, "step": 41 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 3.2969, "step": 42 }, { "epoch": 0.31, "learning_rate": 5.119047619047619e-05, "loss": 3.2422, "step": 43 }, { "epoch": 0.31, "learning_rate": 5.2380952380952384e-05, "loss": 3.2109, "step": 44 }, { "epoch": 0.32, "learning_rate": 5.3571428571428575e-05, "loss": 3.1406, "step": 45 }, { "epoch": 0.33, "learning_rate": 5.4761904761904766e-05, "loss": 3.2031, "step": 46 }, { "epoch": 0.34, "learning_rate": 5.595238095238096e-05, "loss": 3.3398, "step": 47 }, { "epoch": 0.34, "learning_rate": 5.714285714285714e-05, "loss": 3.1523, "step": 48 }, { "epoch": 0.35, "learning_rate": 5.833333333333334e-05, "loss": 3.1055, "step": 49 }, { "epoch": 0.36, "learning_rate": 5.9523809523809524e-05, "loss": 3.043, "step": 50 }, { "epoch": 0.36, "learning_rate": 6.0714285714285715e-05, "loss": 3.0352, "step": 51 }, { "epoch": 0.37, "learning_rate": 6.19047619047619e-05, "loss": 3.1562, "step": 52 }, { "epoch": 0.38, "learning_rate": 6.30952380952381e-05, "loss": 3.0859, "step": 53 }, { "epoch": 0.39, "learning_rate": 6.428571428571429e-05, "loss": 2.9219, "step": 54 }, { "epoch": 0.39, "learning_rate": 6.547619047619048e-05, "loss": 2.9297, "step": 55 }, { "epoch": 0.4, "learning_rate": 6.666666666666667e-05, "loss": 2.9414, "step": 56 }, { "epoch": 0.41, "learning_rate": 6.785714285714286e-05, "loss": 2.9375, "step": 57 }, { "epoch": 0.41, "learning_rate": 6.904761904761905e-05, "loss": 2.8633, "step": 58 }, { "epoch": 0.42, "learning_rate": 7.023809523809524e-05, "loss": 2.8359, "step": 59 }, { "epoch": 0.43, "learning_rate": 7.142857142857143e-05, "loss": 2.8164, "step": 60 }, { "epoch": 0.44, "learning_rate": 7.261904761904762e-05, "loss": 3.0117, "step": 61 }, { "epoch": 0.44, "learning_rate": 7.380952380952382e-05, "loss": 2.8867, "step": 62 }, { "epoch": 0.45, "learning_rate": 7.500000000000001e-05, "loss": 2.75, "step": 63 }, { "epoch": 0.46, "learning_rate": 7.619047619047618e-05, "loss": 2.8359, "step": 64 }, { "epoch": 0.46, "learning_rate": 7.738095238095239e-05, "loss": 2.8281, "step": 65 }, { "epoch": 0.47, "learning_rate": 7.857142857142858e-05, "loss": 2.8203, "step": 66 }, { "epoch": 0.48, "learning_rate": 7.976190476190477e-05, "loss": 2.8672, "step": 67 }, { "epoch": 0.49, "learning_rate": 8.095238095238096e-05, "loss": 2.7656, "step": 68 }, { "epoch": 0.49, "learning_rate": 8.214285714285714e-05, "loss": 2.8203, "step": 69 }, { "epoch": 0.5, "learning_rate": 8.333333333333334e-05, "loss": 2.6719, "step": 70 }, { "epoch": 0.51, "learning_rate": 8.452380952380952e-05, "loss": 2.8008, "step": 71 }, { "epoch": 0.51, "learning_rate": 8.571428571428571e-05, "loss": 2.8516, "step": 72 }, { "epoch": 0.52, "learning_rate": 8.690476190476192e-05, "loss": 2.6953, "step": 73 }, { "epoch": 0.53, "learning_rate": 8.80952380952381e-05, "loss": 2.6367, "step": 74 }, { "epoch": 0.54, "learning_rate": 8.92857142857143e-05, "loss": 2.5352, "step": 75 }, { "epoch": 0.54, "learning_rate": 9.047619047619048e-05, "loss": 2.8438, "step": 76 }, { "epoch": 0.55, "learning_rate": 9.166666666666667e-05, "loss": 2.7109, "step": 77 }, { "epoch": 0.56, "learning_rate": 9.285714285714286e-05, "loss": 2.5039, "step": 78 }, { "epoch": 0.56, "learning_rate": 9.404761904761905e-05, "loss": 2.5, "step": 79 }, { "epoch": 0.57, "learning_rate": 9.523809523809524e-05, "loss": 2.4766, "step": 80 }, { "epoch": 0.58, "learning_rate": 9.642857142857143e-05, "loss": 2.457, "step": 81 }, { "epoch": 0.59, "learning_rate": 9.761904761904762e-05, "loss": 2.5703, "step": 82 }, { "epoch": 0.59, "learning_rate": 9.880952380952381e-05, "loss": 2.5195, "step": 83 }, { "epoch": 0.6, "learning_rate": 0.0001, "loss": 2.5, "step": 84 }, { "epoch": 0.61, "learning_rate": 9.99999665512283e-05, "loss": 2.4727, "step": 85 }, { "epoch": 0.61, "learning_rate": 9.999986620495793e-05, "loss": 2.4688, "step": 86 }, { "epoch": 0.62, "learning_rate": 9.999969896132315e-05, "loss": 2.457, "step": 87 }, { "epoch": 0.63, "learning_rate": 9.999946482054772e-05, "loss": 2.6641, "step": 88 }, { "epoch": 0.64, "learning_rate": 9.999916378294493e-05, "loss": 2.3125, "step": 89 }, { "epoch": 0.64, "learning_rate": 9.999879584891754e-05, "loss": 2.3125, "step": 90 }, { "epoch": 0.65, "learning_rate": 9.999836101895783e-05, "loss": 2.2812, "step": 91 }, { "epoch": 0.66, "learning_rate": 9.999785929364756e-05, "loss": 2.3477, "step": 92 }, { "epoch": 0.66, "learning_rate": 9.999729067365803e-05, "loss": 2.4258, "step": 93 }, { "epoch": 0.67, "learning_rate": 9.999665515975006e-05, "loss": 2.4648, "step": 94 }, { "epoch": 0.68, "learning_rate": 9.999595275277389e-05, "loss": 2.3281, "step": 95 }, { "epoch": 0.69, "learning_rate": 9.999518345366932e-05, "loss": 2.3828, "step": 96 }, { "epoch": 0.69, "learning_rate": 9.999434726346564e-05, "loss": 1.9785, "step": 97 }, { "epoch": 0.7, "learning_rate": 9.999344418328162e-05, "loss": 2.2891, "step": 98 }, { "epoch": 0.71, "learning_rate": 9.999247421432554e-05, "loss": 2.3359, "step": 99 }, { "epoch": 0.71, "learning_rate": 9.999143735789518e-05, "loss": 2.168, "step": 100 }, { "epoch": 0.72, "learning_rate": 9.999033361537779e-05, "loss": 2.1934, "step": 101 }, { "epoch": 0.73, "learning_rate": 9.998916298825014e-05, "loss": 2.4922, "step": 102 }, { "epoch": 0.74, "learning_rate": 9.998792547807845e-05, "loss": 2.1699, "step": 103 }, { "epoch": 0.74, "learning_rate": 9.998662108651848e-05, "loss": 2.0586, "step": 104 }, { "epoch": 0.75, "learning_rate": 9.99852498153154e-05, "loss": 2.0723, "step": 105 }, { "epoch": 0.76, "learning_rate": 9.998381166630395e-05, "loss": 2.3633, "step": 106 }, { "epoch": 0.76, "learning_rate": 9.998230664140826e-05, "loss": 2.0898, "step": 107 }, { "epoch": 0.77, "learning_rate": 9.9980734742642e-05, "loss": 2.209, "step": 108 }, { "epoch": 0.78, "learning_rate": 9.997909597210828e-05, "loss": 2.1484, "step": 109 }, { "epoch": 0.79, "learning_rate": 9.997739033199974e-05, "loss": 2.084, "step": 110 }, { "epoch": 0.79, "learning_rate": 9.997561782459837e-05, "loss": 2.0352, "step": 111 }, { "epoch": 0.8, "learning_rate": 9.997377845227576e-05, "loss": 1.9668, "step": 112 }, { "epoch": 0.81, "learning_rate": 9.997187221749286e-05, "loss": 2.2012, "step": 113 }, { "epoch": 0.81, "learning_rate": 9.996989912280014e-05, "loss": 2.0996, "step": 114 }, { "epoch": 0.82, "learning_rate": 9.996785917083749e-05, "loss": 2.0215, "step": 115 }, { "epoch": 0.83, "learning_rate": 9.996575236433428e-05, "loss": 2.0176, "step": 116 }, { "epoch": 0.84, "learning_rate": 9.996357870610931e-05, "loss": 1.9004, "step": 117 }, { "epoch": 0.84, "learning_rate": 9.996133819907081e-05, "loss": 1.8848, "step": 118 }, { "epoch": 0.85, "learning_rate": 9.99590308462165e-05, "loss": 1.9922, "step": 119 }, { "epoch": 0.86, "learning_rate": 9.995665665063349e-05, "loss": 2.1523, "step": 120 }, { "epoch": 0.86, "learning_rate": 9.995421561549833e-05, "loss": 2.0137, "step": 121 }, { "epoch": 0.87, "learning_rate": 9.9951707744077e-05, "loss": 2.2441, "step": 122 }, { "epoch": 0.88, "learning_rate": 9.994913303972494e-05, "loss": 2.0117, "step": 123 }, { "epoch": 0.89, "learning_rate": 9.994649150588693e-05, "loss": 2.1328, "step": 124 }, { "epoch": 0.89, "learning_rate": 9.994378314609725e-05, "loss": 2.0918, "step": 125 }, { "epoch": 0.9, "learning_rate": 9.994100796397954e-05, "loss": 2.1973, "step": 126 }, { "epoch": 0.91, "learning_rate": 9.993816596324685e-05, "loss": 2.0996, "step": 127 }, { "epoch": 0.91, "learning_rate": 9.993525714770166e-05, "loss": 1.8906, "step": 128 }, { "epoch": 0.92, "learning_rate": 9.993228152123581e-05, "loss": 2.1602, "step": 129 }, { "epoch": 0.93, "learning_rate": 9.992923908783055e-05, "loss": 1.8594, "step": 130 }, { "epoch": 0.94, "learning_rate": 9.992612985155647e-05, "loss": 2.2383, "step": 131 }, { "epoch": 0.94, "learning_rate": 9.992295381657361e-05, "loss": 1.9551, "step": 132 }, { "epoch": 0.95, "learning_rate": 9.991971098713136e-05, "loss": 2.0879, "step": 133 }, { "epoch": 0.96, "learning_rate": 9.991640136756842e-05, "loss": 2.0078, "step": 134 }, { "epoch": 0.96, "learning_rate": 9.991302496231294e-05, "loss": 1.9199, "step": 135 }, { "epoch": 0.97, "learning_rate": 9.990958177588236e-05, "loss": 1.9531, "step": 136 }, { "epoch": 0.98, "learning_rate": 9.990607181288352e-05, "loss": 1.6777, "step": 137 }, { "epoch": 0.99, "learning_rate": 9.990249507801256e-05, "loss": 2.1328, "step": 138 }, { "epoch": 0.99, "learning_rate": 9.989885157605496e-05, "loss": 1.9199, "step": 139 }, { "epoch": 1.0, "learning_rate": 9.989514131188559e-05, "loss": 2.2012, "step": 140 }, { "epoch": 1.01, "learning_rate": 9.989136429046856e-05, "loss": 1.6445, "step": 141 }, { "epoch": 1.01, "learning_rate": 9.988752051685736e-05, "loss": 1.623, "step": 142 }, { "epoch": 1.02, "learning_rate": 9.988360999619476e-05, "loss": 1.4434, "step": 143 }, { "epoch": 1.03, "learning_rate": 9.987963273371286e-05, "loss": 1.3242, "step": 144 }, { "epoch": 1.04, "learning_rate": 9.987558873473302e-05, "loss": 1.498, "step": 145 }, { "epoch": 1.04, "learning_rate": 9.987147800466593e-05, "loss": 1.4609, "step": 146 }, { "epoch": 1.05, "learning_rate": 9.986730054901153e-05, "loss": 1.6914, "step": 147 }, { "epoch": 1.06, "learning_rate": 9.986305637335907e-05, "loss": 1.5391, "step": 148 }, { "epoch": 1.06, "learning_rate": 9.985874548338702e-05, "loss": 1.4902, "step": 149 }, { "epoch": 1.07, "learning_rate": 9.985436788486318e-05, "loss": 1.4668, "step": 150 }, { "epoch": 1.08, "learning_rate": 9.984992358364451e-05, "loss": 1.4688, "step": 151 }, { "epoch": 1.09, "learning_rate": 9.984541258567731e-05, "loss": 1.5918, "step": 152 }, { "epoch": 1.09, "learning_rate": 9.984083489699706e-05, "loss": 1.5, "step": 153 }, { "epoch": 1.1, "learning_rate": 9.983619052372848e-05, "loss": 1.6504, "step": 154 }, { "epoch": 1.11, "learning_rate": 9.983147947208552e-05, "loss": 1.5176, "step": 155 }, { "epoch": 1.11, "learning_rate": 9.98267017483713e-05, "loss": 1.2656, "step": 156 }, { "epoch": 1.12, "learning_rate": 9.982185735897824e-05, "loss": 1.5996, "step": 157 }, { "epoch": 1.13, "learning_rate": 9.981694631038785e-05, "loss": 1.6016, "step": 158 }, { "epoch": 1.14, "learning_rate": 9.98119686091709e-05, "loss": 1.4609, "step": 159 }, { "epoch": 1.14, "learning_rate": 9.980692426198728e-05, "loss": 1.5391, "step": 160 }, { "epoch": 1.15, "learning_rate": 9.980181327558609e-05, "loss": 1.332, "step": 161 }, { "epoch": 1.16, "learning_rate": 9.97966356568056e-05, "loss": 1.6211, "step": 162 }, { "epoch": 1.16, "learning_rate": 9.979139141257319e-05, "loss": 1.4316, "step": 163 }, { "epoch": 1.17, "learning_rate": 9.978608054990539e-05, "loss": 1.6699, "step": 164 }, { "epoch": 1.18, "learning_rate": 9.97807030759079e-05, "loss": 1.4453, "step": 165 }, { "epoch": 1.19, "learning_rate": 9.977525899777548e-05, "loss": 1.4043, "step": 166 }, { "epoch": 1.19, "learning_rate": 9.976974832279209e-05, "loss": 1.459, "step": 167 }, { "epoch": 1.2, "learning_rate": 9.97641710583307e-05, "loss": 1.3496, "step": 168 }, { "epoch": 1.21, "learning_rate": 9.975852721185344e-05, "loss": 1.6484, "step": 169 }, { "epoch": 1.21, "learning_rate": 9.975281679091148e-05, "loss": 1.2637, "step": 170 }, { "epoch": 1.22, "learning_rate": 9.97470398031451e-05, "loss": 1.2705, "step": 171 }, { "epoch": 1.23, "learning_rate": 9.974119625628361e-05, "loss": 1.4492, "step": 172 }, { "epoch": 1.24, "learning_rate": 9.97352861581454e-05, "loss": 1.4219, "step": 173 }, { "epoch": 1.24, "learning_rate": 9.972930951663789e-05, "loss": 1.4512, "step": 174 }, { "epoch": 1.25, "learning_rate": 9.972326633975752e-05, "loss": 1.6289, "step": 175 }, { "epoch": 1.26, "learning_rate": 9.971715663558979e-05, "loss": 1.4863, "step": 176 }, { "epoch": 1.26, "learning_rate": 9.971098041230915e-05, "loss": 1.4062, "step": 177 }, { "epoch": 1.27, "learning_rate": 9.970473767817911e-05, "loss": 1.3672, "step": 178 }, { "epoch": 1.28, "learning_rate": 9.969842844155214e-05, "loss": 1.4062, "step": 179 }, { "epoch": 1.29, "learning_rate": 9.969205271086968e-05, "loss": 1.1543, "step": 180 }, { "epoch": 1.29, "learning_rate": 9.968561049466214e-05, "loss": 1.4629, "step": 181 }, { "epoch": 1.3, "learning_rate": 9.967910180154889e-05, "loss": 1.5156, "step": 182 }, { "epoch": 1.31, "learning_rate": 9.967252664023824e-05, "loss": 1.4395, "step": 183 }, { "epoch": 1.31, "learning_rate": 9.966588501952746e-05, "loss": 1.3887, "step": 184 }, { "epoch": 1.32, "learning_rate": 9.965917694830268e-05, "loss": 1.3418, "step": 185 }, { "epoch": 1.33, "learning_rate": 9.965240243553898e-05, "loss": 1.3496, "step": 186 }, { "epoch": 1.34, "learning_rate": 9.964556149030033e-05, "loss": 1.4824, "step": 187 }, { "epoch": 1.34, "learning_rate": 9.963865412173957e-05, "loss": 1.3867, "step": 188 }, { "epoch": 1.35, "learning_rate": 9.963168033909842e-05, "loss": 1.3232, "step": 189 }, { "epoch": 1.36, "learning_rate": 9.962464015170747e-05, "loss": 1.2197, "step": 190 }, { "epoch": 1.36, "learning_rate": 9.961753356898615e-05, "loss": 1.4414, "step": 191 }, { "epoch": 1.37, "learning_rate": 9.961036060044268e-05, "loss": 1.2129, "step": 192 }, { "epoch": 1.38, "learning_rate": 9.960312125567419e-05, "loss": 1.3223, "step": 193 }, { "epoch": 1.39, "learning_rate": 9.959581554436653e-05, "loss": 1.4492, "step": 194 }, { "epoch": 1.39, "learning_rate": 9.95884434762944e-05, "loss": 1.3945, "step": 195 }, { "epoch": 1.4, "learning_rate": 9.958100506132127e-05, "loss": 1.2246, "step": 196 }, { "epoch": 1.41, "learning_rate": 9.957350030939935e-05, "loss": 1.3867, "step": 197 }, { "epoch": 1.41, "learning_rate": 9.956592923056965e-05, "loss": 1.2529, "step": 198 }, { "epoch": 1.42, "learning_rate": 9.955829183496189e-05, "loss": 1.4785, "step": 199 }, { "epoch": 1.43, "learning_rate": 9.955058813279455e-05, "loss": 1.4395, "step": 200 }, { "epoch": 1.44, "learning_rate": 9.954281813437477e-05, "loss": 1.5605, "step": 201 }, { "epoch": 1.44, "learning_rate": 9.953498185009846e-05, "loss": 1.2949, "step": 202 }, { "epoch": 1.45, "learning_rate": 9.952707929045018e-05, "loss": 1.3154, "step": 203 }, { "epoch": 1.46, "learning_rate": 9.951911046600313e-05, "loss": 1.2324, "step": 204 }, { "epoch": 1.46, "learning_rate": 9.951107538741927e-05, "loss": 1.3574, "step": 205 }, { "epoch": 1.47, "learning_rate": 9.950297406544907e-05, "loss": 1.4707, "step": 206 }, { "epoch": 1.48, "learning_rate": 9.949480651093175e-05, "loss": 1.4375, "step": 207 }, { "epoch": 1.49, "learning_rate": 9.948657273479507e-05, "loss": 1.5488, "step": 208 }, { "epoch": 1.49, "learning_rate": 9.947827274805544e-05, "loss": 1.4141, "step": 209 }, { "epoch": 1.5, "learning_rate": 9.946990656181781e-05, "loss": 1.3516, "step": 210 }, { "epoch": 1.51, "learning_rate": 9.946147418727574e-05, "loss": 1.3408, "step": 211 }, { "epoch": 1.51, "learning_rate": 9.945297563571135e-05, "loss": 1.3496, "step": 212 }, { "epoch": 1.52, "learning_rate": 9.944441091849524e-05, "loss": 1.0674, "step": 213 }, { "epoch": 1.53, "learning_rate": 9.943578004708663e-05, "loss": 1.2793, "step": 214 }, { "epoch": 1.54, "learning_rate": 9.942708303303315e-05, "loss": 1.373, "step": 215 }, { "epoch": 1.54, "learning_rate": 9.941831988797104e-05, "loss": 1.3809, "step": 216 }, { "epoch": 1.55, "learning_rate": 9.940949062362491e-05, "loss": 1.4141, "step": 217 }, { "epoch": 1.56, "learning_rate": 9.940059525180788e-05, "loss": 1.3809, "step": 218 }, { "epoch": 1.56, "learning_rate": 9.939163378442156e-05, "loss": 1.4668, "step": 219 }, { "epoch": 1.57, "learning_rate": 9.938260623345591e-05, "loss": 1.0645, "step": 220 }, { "epoch": 1.58, "learning_rate": 9.937351261098937e-05, "loss": 1.498, "step": 221 }, { "epoch": 1.59, "learning_rate": 9.936435292918877e-05, "loss": 1.2988, "step": 222 }, { "epoch": 1.59, "learning_rate": 9.93551272003093e-05, "loss": 1.3457, "step": 223 }, { "epoch": 1.6, "learning_rate": 9.934583543669453e-05, "loss": 1.3301, "step": 224 }, { "epoch": 1.61, "learning_rate": 9.93364776507764e-05, "loss": 1.1602, "step": 225 }, { "epoch": 1.61, "learning_rate": 9.932705385507515e-05, "loss": 1.1621, "step": 226 }, { "epoch": 1.62, "learning_rate": 9.931756406219936e-05, "loss": 1.2559, "step": 227 }, { "epoch": 1.63, "learning_rate": 9.930800828484592e-05, "loss": 1.4355, "step": 228 }, { "epoch": 1.64, "learning_rate": 9.929838653579997e-05, "loss": 1.2656, "step": 229 }, { "epoch": 1.64, "learning_rate": 9.928869882793495e-05, "loss": 1.2393, "step": 230 }, { "epoch": 1.65, "learning_rate": 9.927894517421252e-05, "loss": 1.1885, "step": 231 }, { "epoch": 1.66, "learning_rate": 9.926912558768262e-05, "loss": 1.2891, "step": 232 }, { "epoch": 1.66, "learning_rate": 9.925924008148335e-05, "loss": 1.3945, "step": 233 }, { "epoch": 1.67, "learning_rate": 9.924928866884103e-05, "loss": 1.3418, "step": 234 }, { "epoch": 1.68, "learning_rate": 9.923927136307019e-05, "loss": 1.4629, "step": 235 }, { "epoch": 1.69, "learning_rate": 9.922918817757345e-05, "loss": 1.0576, "step": 236 }, { "epoch": 1.69, "learning_rate": 9.921903912584165e-05, "loss": 1.2676, "step": 237 }, { "epoch": 1.7, "learning_rate": 9.920882422145372e-05, "loss": 1.5, "step": 238 }, { "epoch": 1.71, "learning_rate": 9.919854347807669e-05, "loss": 1.4121, "step": 239 }, { "epoch": 1.71, "learning_rate": 9.918819690946567e-05, "loss": 1.1406, "step": 240 }, { "epoch": 1.72, "learning_rate": 9.917778452946391e-05, "loss": 1.2871, "step": 241 }, { "epoch": 1.73, "learning_rate": 9.916730635200264e-05, "loss": 1.1875, "step": 242 }, { "epoch": 1.74, "learning_rate": 9.915676239110112e-05, "loss": 1.2041, "step": 243 }, { "epoch": 1.74, "learning_rate": 9.914615266086668e-05, "loss": 1.252, "step": 244 }, { "epoch": 1.75, "learning_rate": 9.913547717549463e-05, "loss": 1.416, "step": 245 }, { "epoch": 1.76, "learning_rate": 9.912473594926821e-05, "loss": 1.3613, "step": 246 }, { "epoch": 1.76, "learning_rate": 9.911392899655867e-05, "loss": 1.416, "step": 247 }, { "epoch": 1.77, "learning_rate": 9.910305633182518e-05, "loss": 1.1562, "step": 248 }, { "epoch": 1.78, "learning_rate": 9.909211796961484e-05, "loss": 1.3711, "step": 249 }, { "epoch": 1.79, "learning_rate": 9.908111392456262e-05, "loss": 1.3867, "step": 250 }, { "epoch": 1.79, "learning_rate": 9.907004421139144e-05, "loss": 1.2637, "step": 251 }, { "epoch": 1.8, "learning_rate": 9.905890884491195e-05, "loss": 1.2578, "step": 252 }, { "epoch": 1.81, "learning_rate": 9.904770784002279e-05, "loss": 1.3105, "step": 253 }, { "epoch": 1.81, "learning_rate": 9.903644121171035e-05, "loss": 1.3105, "step": 254 }, { "epoch": 1.82, "learning_rate": 9.90251089750488e-05, "loss": 1.3945, "step": 255 }, { "epoch": 1.83, "learning_rate": 9.901371114520012e-05, "loss": 1.2852, "step": 256 }, { "epoch": 1.84, "learning_rate": 9.900224773741406e-05, "loss": 1.3047, "step": 257 }, { "epoch": 1.84, "learning_rate": 9.899071876702809e-05, "loss": 1.6133, "step": 258 }, { "epoch": 1.85, "learning_rate": 9.89791242494674e-05, "loss": 1.3281, "step": 259 }, { "epoch": 1.86, "learning_rate": 9.89674642002449e-05, "loss": 1.3457, "step": 260 }, { "epoch": 1.86, "learning_rate": 9.895573863496114e-05, "loss": 1.0859, "step": 261 }, { "epoch": 1.87, "learning_rate": 9.894394756930436e-05, "loss": 1.2803, "step": 262 }, { "epoch": 1.88, "learning_rate": 9.893209101905044e-05, "loss": 1.1699, "step": 263 }, { "epoch": 1.89, "learning_rate": 9.892016900006284e-05, "loss": 1.2441, "step": 264 }, { "epoch": 1.89, "learning_rate": 9.890818152829266e-05, "loss": 1.1641, "step": 265 }, { "epoch": 1.9, "learning_rate": 9.889612861977853e-05, "loss": 1.1953, "step": 266 }, { "epoch": 1.91, "learning_rate": 9.888401029064667e-05, "loss": 1.2324, "step": 267 }, { "epoch": 1.91, "learning_rate": 9.887182655711077e-05, "loss": 1.2734, "step": 268 }, { "epoch": 1.92, "learning_rate": 9.88595774354721e-05, "loss": 1.3105, "step": 269 }, { "epoch": 1.93, "learning_rate": 9.884726294211937e-05, "loss": 1.3516, "step": 270 }, { "epoch": 1.94, "learning_rate": 9.883488309352877e-05, "loss": 1.2344, "step": 271 }, { "epoch": 1.94, "learning_rate": 9.882243790626393e-05, "loss": 1.3223, "step": 272 }, { "epoch": 1.95, "learning_rate": 9.88099273969759e-05, "loss": 1.3418, "step": 273 }, { "epoch": 1.96, "learning_rate": 9.879735158240313e-05, "loss": 1.252, "step": 274 }, { "epoch": 1.96, "learning_rate": 9.878471047937143e-05, "loss": 1.3135, "step": 275 }, { "epoch": 1.97, "learning_rate": 9.877200410479399e-05, "loss": 1.3184, "step": 276 }, { "epoch": 1.98, "learning_rate": 9.87592324756713e-05, "loss": 1.3496, "step": 277 }, { "epoch": 1.99, "learning_rate": 9.874639560909117e-05, "loss": 1.1738, "step": 278 }, { "epoch": 1.99, "learning_rate": 9.873349352222873e-05, "loss": 1.1396, "step": 279 }, { "epoch": 2.0, "learning_rate": 9.872052623234632e-05, "loss": 1.0957, "step": 280 }, { "epoch": 2.01, "learning_rate": 9.87074937567935e-05, "loss": 0.8799, "step": 281 }, { "epoch": 2.01, "learning_rate": 9.869439611300713e-05, "loss": 0.7891, "step": 282 }, { "epoch": 2.02, "learning_rate": 9.86812333185112e-05, "loss": 0.8672, "step": 283 }, { "epoch": 2.03, "learning_rate": 9.866800539091688e-05, "loss": 0.7168, "step": 284 }, { "epoch": 2.04, "learning_rate": 9.865471234792248e-05, "loss": 0.7939, "step": 285 }, { "epoch": 2.04, "learning_rate": 9.864135420731344e-05, "loss": 0.8506, "step": 286 }, { "epoch": 2.05, "learning_rate": 9.86279309869623e-05, "loss": 0.7354, "step": 287 }, { "epoch": 2.06, "learning_rate": 9.861444270482868e-05, "loss": 0.6904, "step": 288 }, { "epoch": 2.06, "learning_rate": 9.860088937895923e-05, "loss": 0.7188, "step": 289 }, { "epoch": 2.07, "learning_rate": 9.858727102748763e-05, "loss": 0.75, "step": 290 }, { "epoch": 2.08, "learning_rate": 9.857358766863456e-05, "loss": 0.918, "step": 291 }, { "epoch": 2.09, "learning_rate": 9.85598393207077e-05, "loss": 0.8662, "step": 292 }, { "epoch": 2.09, "learning_rate": 9.854602600210165e-05, "loss": 0.7705, "step": 293 }, { "epoch": 2.1, "learning_rate": 9.853214773129796e-05, "loss": 0.8994, "step": 294 }, { "epoch": 2.11, "learning_rate": 9.851820452686507e-05, "loss": 0.793, "step": 295 }, { "epoch": 2.11, "learning_rate": 9.850419640745831e-05, "loss": 0.7881, "step": 296 }, { "epoch": 2.12, "learning_rate": 9.849012339181983e-05, "loss": 0.998, "step": 297 }, { "epoch": 2.13, "learning_rate": 9.847598549877866e-05, "loss": 0.7529, "step": 298 }, { "epoch": 2.14, "learning_rate": 9.846178274725059e-05, "loss": 0.7139, "step": 299 }, { "epoch": 2.14, "learning_rate": 9.844751515623824e-05, "loss": 0.7881, "step": 300 }, { "epoch": 2.15, "learning_rate": 9.843318274483089e-05, "loss": 0.9189, "step": 301 }, { "epoch": 2.16, "learning_rate": 9.841878553220465e-05, "loss": 0.7271, "step": 302 }, { "epoch": 2.16, "learning_rate": 9.840432353762225e-05, "loss": 0.7939, "step": 303 }, { "epoch": 2.17, "learning_rate": 9.838979678043315e-05, "loss": 0.8252, "step": 304 }, { "epoch": 2.18, "learning_rate": 9.837520528007341e-05, "loss": 0.8535, "step": 305 }, { "epoch": 2.19, "learning_rate": 9.836054905606578e-05, "loss": 0.8047, "step": 306 }, { "epoch": 2.19, "learning_rate": 9.834582812801954e-05, "loss": 0.689, "step": 307 }, { "epoch": 2.2, "learning_rate": 9.833104251563056e-05, "loss": 0.8828, "step": 308 }, { "epoch": 2.21, "learning_rate": 9.83161922386813e-05, "loss": 0.7666, "step": 309 }, { "epoch": 2.21, "learning_rate": 9.830127731704066e-05, "loss": 0.7285, "step": 310 }, { "epoch": 2.22, "learning_rate": 9.828629777066411e-05, "loss": 0.7329, "step": 311 }, { "epoch": 2.23, "learning_rate": 9.827125361959353e-05, "loss": 0.7402, "step": 312 }, { "epoch": 2.24, "learning_rate": 9.825614488395724e-05, "loss": 0.8984, "step": 313 }, { "epoch": 2.24, "learning_rate": 9.824097158397001e-05, "loss": 0.8076, "step": 314 }, { "epoch": 2.25, "learning_rate": 9.822573373993295e-05, "loss": 0.7119, "step": 315 }, { "epoch": 2.26, "learning_rate": 9.821043137223356e-05, "loss": 0.8643, "step": 316 }, { "epoch": 2.26, "learning_rate": 9.819506450134566e-05, "loss": 0.7881, "step": 317 }, { "epoch": 2.27, "learning_rate": 9.817963314782934e-05, "loss": 0.8076, "step": 318 }, { "epoch": 2.28, "learning_rate": 9.816413733233103e-05, "loss": 0.6494, "step": 319 }, { "epoch": 2.29, "learning_rate": 9.814857707558335e-05, "loss": 0.875, "step": 320 }, { "epoch": 2.29, "learning_rate": 9.813295239840516e-05, "loss": 0.751, "step": 321 }, { "epoch": 2.3, "learning_rate": 9.811726332170153e-05, "loss": 0.7656, "step": 322 }, { "epoch": 2.31, "learning_rate": 9.810150986646363e-05, "loss": 0.7871, "step": 323 }, { "epoch": 2.31, "learning_rate": 9.808569205376884e-05, "loss": 0.5781, "step": 324 }, { "epoch": 2.32, "learning_rate": 9.806980990478062e-05, "loss": 0.7441, "step": 325 }, { "epoch": 2.33, "learning_rate": 9.80538634407485e-05, "loss": 0.7344, "step": 326 }, { "epoch": 2.34, "learning_rate": 9.803785268300806e-05, "loss": 0.7197, "step": 327 }, { "epoch": 2.34, "learning_rate": 9.802177765298091e-05, "loss": 0.6689, "step": 328 }, { "epoch": 2.35, "learning_rate": 9.800563837217464e-05, "loss": 0.7686, "step": 329 }, { "epoch": 2.36, "learning_rate": 9.798943486218284e-05, "loss": 0.9209, "step": 330 }, { "epoch": 2.36, "learning_rate": 9.797316714468498e-05, "loss": 0.667, "step": 331 }, { "epoch": 2.37, "learning_rate": 9.795683524144649e-05, "loss": 0.7588, "step": 332 }, { "epoch": 2.38, "learning_rate": 9.794043917431863e-05, "loss": 0.8066, "step": 333 }, { "epoch": 2.39, "learning_rate": 9.792397896523858e-05, "loss": 0.7168, "step": 334 }, { "epoch": 2.39, "learning_rate": 9.790745463622924e-05, "loss": 0.9395, "step": 335 }, { "epoch": 2.4, "learning_rate": 9.789086620939936e-05, "loss": 0.7871, "step": 336 }, { "epoch": 2.41, "learning_rate": 9.787421370694345e-05, "loss": 0.7588, "step": 337 }, { "epoch": 2.41, "learning_rate": 9.785749715114177e-05, "loss": 0.7363, "step": 338 }, { "epoch": 2.42, "learning_rate": 9.784071656436017e-05, "loss": 0.9658, "step": 339 }, { "epoch": 2.43, "learning_rate": 9.782387196905034e-05, "loss": 0.833, "step": 340 }, { "epoch": 2.44, "learning_rate": 9.780696338774949e-05, "loss": 0.8105, "step": 341 }, { "epoch": 2.44, "learning_rate": 9.778999084308043e-05, "loss": 0.8701, "step": 342 }, { "epoch": 2.45, "learning_rate": 9.777295435775165e-05, "loss": 0.7412, "step": 343 }, { "epoch": 2.46, "learning_rate": 9.775585395455708e-05, "loss": 0.6558, "step": 344 }, { "epoch": 2.46, "learning_rate": 9.773868965637624e-05, "loss": 0.752, "step": 345 }, { "epoch": 2.47, "learning_rate": 9.772146148617414e-05, "loss": 0.7432, "step": 346 }, { "epoch": 2.48, "learning_rate": 9.770416946700121e-05, "loss": 0.7598, "step": 347 }, { "epoch": 2.49, "learning_rate": 9.76868136219933e-05, "loss": 0.7324, "step": 348 }, { "epoch": 2.49, "learning_rate": 9.766939397437171e-05, "loss": 0.9355, "step": 349 }, { "epoch": 2.5, "learning_rate": 9.765191054744305e-05, "loss": 0.7275, "step": 350 }, { "epoch": 2.51, "learning_rate": 9.763436336459931e-05, "loss": 0.8877, "step": 351 }, { "epoch": 2.51, "learning_rate": 9.761675244931772e-05, "loss": 0.8955, "step": 352 }, { "epoch": 2.52, "learning_rate": 9.759907782516085e-05, "loss": 0.7422, "step": 353 }, { "epoch": 2.53, "learning_rate": 9.758133951577648e-05, "loss": 0.8594, "step": 354 }, { "epoch": 2.54, "learning_rate": 9.756353754489758e-05, "loss": 0.6914, "step": 355 }, { "epoch": 2.54, "learning_rate": 9.754567193634232e-05, "loss": 0.9629, "step": 356 }, { "epoch": 2.55, "learning_rate": 9.752774271401402e-05, "loss": 0.9414, "step": 357 }, { "epoch": 2.56, "learning_rate": 9.750974990190106e-05, "loss": 0.7119, "step": 358 }, { "epoch": 2.56, "learning_rate": 9.749169352407699e-05, "loss": 0.8262, "step": 359 }, { "epoch": 2.57, "learning_rate": 9.747357360470033e-05, "loss": 0.6943, "step": 360 }, { "epoch": 2.58, "learning_rate": 9.745539016801462e-05, "loss": 0.7939, "step": 361 }, { "epoch": 2.59, "learning_rate": 9.743714323834844e-05, "loss": 0.8018, "step": 362 }, { "epoch": 2.59, "learning_rate": 9.741883284011527e-05, "loss": 0.6963, "step": 363 }, { "epoch": 2.6, "learning_rate": 9.740045899781352e-05, "loss": 0.8965, "step": 364 }, { "epoch": 2.61, "learning_rate": 9.738202173602651e-05, "loss": 0.7285, "step": 365 }, { "epoch": 2.61, "learning_rate": 9.736352107942236e-05, "loss": 0.9277, "step": 366 }, { "epoch": 2.62, "learning_rate": 9.734495705275406e-05, "loss": 0.8203, "step": 367 }, { "epoch": 2.63, "learning_rate": 9.732632968085936e-05, "loss": 0.8174, "step": 368 }, { "epoch": 2.64, "learning_rate": 9.730763898866078e-05, "loss": 0.8955, "step": 369 }, { "epoch": 2.64, "learning_rate": 9.728888500116551e-05, "loss": 0.6309, "step": 370 }, { "epoch": 2.65, "learning_rate": 9.727006774346551e-05, "loss": 0.7622, "step": 371 }, { "epoch": 2.66, "learning_rate": 9.725118724073731e-05, "loss": 0.7422, "step": 372 }, { "epoch": 2.66, "learning_rate": 9.723224351824213e-05, "loss": 0.7578, "step": 373 }, { "epoch": 2.67, "learning_rate": 9.721323660132571e-05, "loss": 0.7012, "step": 374 }, { "epoch": 2.68, "learning_rate": 9.719416651541839e-05, "loss": 0.7969, "step": 375 }, { "epoch": 2.69, "learning_rate": 9.717503328603498e-05, "loss": 0.7192, "step": 376 }, { "epoch": 2.69, "learning_rate": 9.715583693877484e-05, "loss": 0.8828, "step": 377 }, { "epoch": 2.7, "learning_rate": 9.713657749932172e-05, "loss": 0.7207, "step": 378 }, { "epoch": 2.71, "learning_rate": 9.71172549934438e-05, "loss": 0.75, "step": 379 }, { "epoch": 2.71, "learning_rate": 9.709786944699364e-05, "loss": 0.7402, "step": 380 }, { "epoch": 2.72, "learning_rate": 9.707842088590816e-05, "loss": 0.7139, "step": 381 }, { "epoch": 2.73, "learning_rate": 9.705890933620858e-05, "loss": 0.8047, "step": 382 }, { "epoch": 2.74, "learning_rate": 9.703933482400038e-05, "loss": 0.6929, "step": 383 }, { "epoch": 2.74, "learning_rate": 9.701969737547331e-05, "loss": 0.7026, "step": 384 }, { "epoch": 2.75, "learning_rate": 9.699999701690133e-05, "loss": 0.707, "step": 385 }, { "epoch": 2.76, "learning_rate": 9.69802337746425e-05, "loss": 0.6602, "step": 386 }, { "epoch": 2.76, "learning_rate": 9.696040767513911e-05, "loss": 0.8018, "step": 387 }, { "epoch": 2.77, "learning_rate": 9.694051874491748e-05, "loss": 0.793, "step": 388 }, { "epoch": 2.78, "learning_rate": 9.692056701058805e-05, "loss": 0.8291, "step": 389 }, { "epoch": 2.79, "learning_rate": 9.690055249884524e-05, "loss": 0.8359, "step": 390 }, { "epoch": 2.79, "learning_rate": 9.688047523646748e-05, "loss": 1.1475, "step": 391 }, { "epoch": 2.8, "learning_rate": 9.686033525031719e-05, "loss": 1.0, "step": 392 }, { "epoch": 2.81, "learning_rate": 9.684013256734065e-05, "loss": 0.7773, "step": 393 }, { "epoch": 2.81, "learning_rate": 9.681986721456806e-05, "loss": 0.7188, "step": 394 }, { "epoch": 2.82, "learning_rate": 9.679953921911349e-05, "loss": 0.8574, "step": 395 }, { "epoch": 2.83, "learning_rate": 9.677914860817476e-05, "loss": 0.6753, "step": 396 }, { "epoch": 2.84, "learning_rate": 9.675869540903355e-05, "loss": 0.9355, "step": 397 }, { "epoch": 2.84, "learning_rate": 9.67381796490552e-05, "loss": 0.8076, "step": 398 }, { "epoch": 2.85, "learning_rate": 9.671760135568881e-05, "loss": 0.832, "step": 399 }, { "epoch": 2.86, "learning_rate": 9.669696055646713e-05, "loss": 0.9033, "step": 400 }, { "epoch": 2.86, "learning_rate": 9.667625727900652e-05, "loss": 0.835, "step": 401 }, { "epoch": 2.87, "learning_rate": 9.665549155100694e-05, "loss": 1.0742, "step": 402 }, { "epoch": 2.88, "learning_rate": 9.663466340025194e-05, "loss": 0.9502, "step": 403 }, { "epoch": 2.89, "learning_rate": 9.661377285460855e-05, "loss": 1.001, "step": 404 }, { "epoch": 2.89, "learning_rate": 9.659281994202729e-05, "loss": 0.9863, "step": 405 }, { "epoch": 2.9, "learning_rate": 9.657180469054213e-05, "loss": 0.7432, "step": 406 }, { "epoch": 2.91, "learning_rate": 9.655072712827043e-05, "loss": 0.8945, "step": 407 }, { "epoch": 2.91, "learning_rate": 9.652958728341296e-05, "loss": 0.7988, "step": 408 }, { "epoch": 2.92, "learning_rate": 9.650838518425377e-05, "loss": 1.1289, "step": 409 }, { "epoch": 2.93, "learning_rate": 9.648712085916026e-05, "loss": 0.8262, "step": 410 }, { "epoch": 2.94, "learning_rate": 9.6465794336583e-05, "loss": 0.8457, "step": 411 }, { "epoch": 2.94, "learning_rate": 9.644440564505588e-05, "loss": 0.9473, "step": 412 }, { "epoch": 2.95, "learning_rate": 9.642295481319588e-05, "loss": 0.9805, "step": 413 }, { "epoch": 2.96, "learning_rate": 9.64014418697032e-05, "loss": 0.9141, "step": 414 }, { "epoch": 2.96, "learning_rate": 9.637986684336107e-05, "loss": 0.8779, "step": 415 }, { "epoch": 2.97, "learning_rate": 9.635822976303581e-05, "loss": 0.7988, "step": 416 }, { "epoch": 2.98, "learning_rate": 9.63365306576768e-05, "loss": 0.7197, "step": 417 }, { "epoch": 2.99, "learning_rate": 9.631476955631635e-05, "loss": 0.874, "step": 418 }, { "epoch": 2.99, "learning_rate": 9.629294648806976e-05, "loss": 1.0723, "step": 419 }, { "epoch": 3.0, "learning_rate": 9.627106148213522e-05, "loss": 0.7266, "step": 420 }, { "epoch": 3.01, "learning_rate": 9.624911456779378e-05, "loss": 0.3872, "step": 421 }, { "epoch": 3.01, "learning_rate": 9.622710577440936e-05, "loss": 0.5806, "step": 422 }, { "epoch": 3.02, "learning_rate": 9.620503513142861e-05, "loss": 0.4761, "step": 423 }, { "epoch": 3.03, "learning_rate": 9.6182902668381e-05, "loss": 0.5537, "step": 424 }, { "epoch": 3.04, "learning_rate": 9.616070841487865e-05, "loss": 0.5093, "step": 425 }, { "epoch": 3.04, "learning_rate": 9.61384524006164e-05, "loss": 0.5264, "step": 426 }, { "epoch": 3.05, "learning_rate": 9.61161346553717e-05, "loss": 0.5762, "step": 427 }, { "epoch": 3.06, "learning_rate": 9.609375520900459e-05, "loss": 0.4512, "step": 428 }, { "epoch": 3.06, "learning_rate": 9.607131409145766e-05, "loss": 0.4688, "step": 429 }, { "epoch": 3.07, "learning_rate": 9.604881133275605e-05, "loss": 0.6299, "step": 430 }, { "epoch": 3.08, "learning_rate": 9.602624696300733e-05, "loss": 0.4829, "step": 431 }, { "epoch": 3.09, "learning_rate": 9.600362101240152e-05, "loss": 0.519, "step": 432 }, { "epoch": 3.09, "learning_rate": 9.598093351121103e-05, "loss": 0.689, "step": 433 }, { "epoch": 3.1, "learning_rate": 9.595818448979061e-05, "loss": 0.4102, "step": 434 }, { "epoch": 3.11, "learning_rate": 9.593537397857736e-05, "loss": 0.5059, "step": 435 }, { "epoch": 3.11, "learning_rate": 9.59125020080906e-05, "loss": 0.4668, "step": 436 }, { "epoch": 3.12, "learning_rate": 9.588956860893191e-05, "loss": 0.4932, "step": 437 }, { "epoch": 3.13, "learning_rate": 9.586657381178505e-05, "loss": 0.4634, "step": 438 }, { "epoch": 3.14, "learning_rate": 9.584351764741594e-05, "loss": 0.4238, "step": 439 }, { "epoch": 3.14, "learning_rate": 9.582040014667258e-05, "loss": 0.4429, "step": 440 }, { "epoch": 3.15, "learning_rate": 9.579722134048506e-05, "loss": 0.407, "step": 441 }, { "epoch": 3.16, "learning_rate": 9.577398125986547e-05, "loss": 0.3555, "step": 442 }, { "epoch": 3.16, "learning_rate": 9.575067993590791e-05, "loss": 0.4287, "step": 443 }, { "epoch": 3.17, "learning_rate": 9.572731739978839e-05, "loss": 0.5806, "step": 444 }, { "epoch": 3.18, "learning_rate": 9.570389368276486e-05, "loss": 0.4839, "step": 445 }, { "epoch": 3.19, "learning_rate": 9.56804088161771e-05, "loss": 0.4595, "step": 446 }, { "epoch": 3.19, "learning_rate": 9.565686283144669e-05, "loss": 0.5259, "step": 447 }, { "epoch": 3.2, "learning_rate": 9.563325576007701e-05, "loss": 0.5449, "step": 448 }, { "epoch": 3.21, "learning_rate": 9.560958763365316e-05, "loss": 0.4253, "step": 449 }, { "epoch": 3.21, "learning_rate": 9.558585848384194e-05, "loss": 0.353, "step": 450 }, { "epoch": 3.22, "learning_rate": 9.556206834239177e-05, "loss": 0.4624, "step": 451 }, { "epoch": 3.23, "learning_rate": 9.553821724113268e-05, "loss": 0.4868, "step": 452 }, { "epoch": 3.24, "learning_rate": 9.55143052119763e-05, "loss": 0.5, "step": 453 }, { "epoch": 3.24, "learning_rate": 9.549033228691575e-05, "loss": 0.3589, "step": 454 }, { "epoch": 3.25, "learning_rate": 9.546629849802562e-05, "loss": 0.5537, "step": 455 }, { "epoch": 3.26, "learning_rate": 9.544220387746192e-05, "loss": 0.4824, "step": 456 }, { "epoch": 3.26, "learning_rate": 9.541804845746208e-05, "loss": 0.3845, "step": 457 }, { "epoch": 3.27, "learning_rate": 9.539383227034487e-05, "loss": 0.4424, "step": 458 }, { "epoch": 3.28, "learning_rate": 9.536955534851037e-05, "loss": 0.4912, "step": 459 }, { "epoch": 3.29, "learning_rate": 9.534521772443988e-05, "loss": 0.3691, "step": 460 }, { "epoch": 3.29, "learning_rate": 9.532081943069597e-05, "loss": 0.4287, "step": 461 }, { "epoch": 3.3, "learning_rate": 9.529636049992234e-05, "loss": 0.5239, "step": 462 }, { "epoch": 3.31, "learning_rate": 9.527184096484385e-05, "loss": 0.4204, "step": 463 }, { "epoch": 3.31, "learning_rate": 9.524726085826644e-05, "loss": 0.4482, "step": 464 }, { "epoch": 3.32, "learning_rate": 9.522262021307707e-05, "loss": 0.3882, "step": 465 }, { "epoch": 3.33, "learning_rate": 9.519791906224371e-05, "loss": 0.4194, "step": 466 }, { "epoch": 3.34, "learning_rate": 9.51731574388153e-05, "loss": 0.3521, "step": 467 }, { "epoch": 3.34, "learning_rate": 9.514833537592166e-05, "loss": 0.5159, "step": 468 }, { "epoch": 3.35, "learning_rate": 9.51234529067735e-05, "loss": 0.4868, "step": 469 }, { "epoch": 3.36, "learning_rate": 9.509851006466234e-05, "loss": 0.3755, "step": 470 }, { "epoch": 3.36, "learning_rate": 9.50735068829605e-05, "loss": 0.4067, "step": 471 }, { "epoch": 3.37, "learning_rate": 9.504844339512095e-05, "loss": 0.5586, "step": 472 }, { "epoch": 3.38, "learning_rate": 9.502331963467748e-05, "loss": 0.5889, "step": 473 }, { "epoch": 3.39, "learning_rate": 9.499813563524438e-05, "loss": 0.3799, "step": 474 }, { "epoch": 3.39, "learning_rate": 9.497289143051664e-05, "loss": 0.5024, "step": 475 }, { "epoch": 3.4, "learning_rate": 9.494758705426978e-05, "loss": 0.3931, "step": 476 }, { "epoch": 3.41, "learning_rate": 9.492222254035977e-05, "loss": 0.4033, "step": 477 }, { "epoch": 3.41, "learning_rate": 9.48967979227231e-05, "loss": 0.5889, "step": 478 }, { "epoch": 3.42, "learning_rate": 9.487131323537668e-05, "loss": 0.4966, "step": 479 }, { "epoch": 3.43, "learning_rate": 9.484576851241773e-05, "loss": 0.5483, "step": 480 }, { "epoch": 3.44, "learning_rate": 9.482016378802388e-05, "loss": 0.3979, "step": 481 }, { "epoch": 3.44, "learning_rate": 9.479449909645296e-05, "loss": 0.6338, "step": 482 }, { "epoch": 3.45, "learning_rate": 9.476877447204308e-05, "loss": 0.5259, "step": 483 }, { "epoch": 3.46, "learning_rate": 9.474298994921251e-05, "loss": 0.499, "step": 484 }, { "epoch": 3.46, "learning_rate": 9.471714556245969e-05, "loss": 0.583, "step": 485 }, { "epoch": 3.47, "learning_rate": 9.469124134636316e-05, "loss": 0.4688, "step": 486 }, { "epoch": 3.48, "learning_rate": 9.466527733558144e-05, "loss": 0.4312, "step": 487 }, { "epoch": 3.49, "learning_rate": 9.463925356485313e-05, "loss": 0.5454, "step": 488 }, { "epoch": 3.49, "learning_rate": 9.461317006899675e-05, "loss": 0.5103, "step": 489 }, { "epoch": 3.5, "learning_rate": 9.458702688291073e-05, "loss": 0.5537, "step": 490 }, { "epoch": 3.51, "learning_rate": 9.456082404157338e-05, "loss": 0.7695, "step": 491 }, { "epoch": 3.51, "learning_rate": 9.45345615800428e-05, "loss": 0.4844, "step": 492 }, { "epoch": 3.52, "learning_rate": 9.45082395334569e-05, "loss": 0.5835, "step": 493 }, { "epoch": 3.53, "learning_rate": 9.448185793703326e-05, "loss": 0.5928, "step": 494 }, { "epoch": 3.54, "learning_rate": 9.445541682606916e-05, "loss": 0.4434, "step": 495 }, { "epoch": 3.54, "learning_rate": 9.442891623594153e-05, "loss": 0.6338, "step": 496 }, { "epoch": 3.55, "learning_rate": 9.440235620210683e-05, "loss": 0.4326, "step": 497 }, { "epoch": 3.56, "learning_rate": 9.437573676010109e-05, "loss": 0.5425, "step": 498 }, { "epoch": 3.56, "learning_rate": 9.434905794553983e-05, "loss": 0.564, "step": 499 }, { "epoch": 3.57, "learning_rate": 9.432231979411798e-05, "loss": 0.5171, "step": 500 }, { "epoch": 3.58, "learning_rate": 9.429552234160987e-05, "loss": 0.6284, "step": 501 }, { "epoch": 3.59, "learning_rate": 9.426866562386917e-05, "loss": 0.4336, "step": 502 }, { "epoch": 3.59, "learning_rate": 9.424174967682888e-05, "loss": 0.4526, "step": 503 }, { "epoch": 3.6, "learning_rate": 9.421477453650118e-05, "loss": 0.5586, "step": 504 }, { "epoch": 3.61, "learning_rate": 9.41877402389775e-05, "loss": 0.5933, "step": 505 }, { "epoch": 3.61, "learning_rate": 9.41606468204284e-05, "loss": 0.5498, "step": 506 }, { "epoch": 3.62, "learning_rate": 9.413349431710353e-05, "loss": 0.5723, "step": 507 }, { "epoch": 3.63, "learning_rate": 9.410628276533163e-05, "loss": 0.4441, "step": 508 }, { "epoch": 3.64, "learning_rate": 9.407901220152038e-05, "loss": 0.4707, "step": 509 }, { "epoch": 3.64, "learning_rate": 9.40516826621565e-05, "loss": 0.5005, "step": 510 }, { "epoch": 3.65, "learning_rate": 9.402429418380554e-05, "loss": 0.5986, "step": 511 }, { "epoch": 3.66, "learning_rate": 9.399684680311196e-05, "loss": 0.6426, "step": 512 }, { "epoch": 3.66, "learning_rate": 9.3969340556799e-05, "loss": 0.4341, "step": 513 }, { "epoch": 3.67, "learning_rate": 9.394177548166864e-05, "loss": 0.5083, "step": 514 }, { "epoch": 3.68, "learning_rate": 9.391415161460162e-05, "loss": 0.5444, "step": 515 }, { "epoch": 3.69, "learning_rate": 9.388646899255733e-05, "loss": 0.4292, "step": 516 }, { "epoch": 3.69, "learning_rate": 9.385872765257373e-05, "loss": 0.5386, "step": 517 }, { "epoch": 3.7, "learning_rate": 9.38309276317674e-05, "loss": 0.4106, "step": 518 }, { "epoch": 3.71, "learning_rate": 9.380306896733336e-05, "loss": 0.5532, "step": 519 }, { "epoch": 3.71, "learning_rate": 9.377515169654518e-05, "loss": 0.4941, "step": 520 }, { "epoch": 3.72, "learning_rate": 9.374717585675476e-05, "loss": 0.5366, "step": 521 }, { "epoch": 3.73, "learning_rate": 9.371914148539243e-05, "loss": 0.752, "step": 522 }, { "epoch": 3.74, "learning_rate": 9.369104861996677e-05, "loss": 0.5986, "step": 523 }, { "epoch": 3.74, "learning_rate": 9.366289729806468e-05, "loss": 0.6279, "step": 524 }, { "epoch": 3.75, "learning_rate": 9.363468755735123e-05, "loss": 0.4033, "step": 525 }, { "epoch": 3.76, "learning_rate": 9.360641943556969e-05, "loss": 0.6108, "step": 526 }, { "epoch": 3.76, "learning_rate": 9.357809297054139e-05, "loss": 0.6797, "step": 527 }, { "epoch": 3.77, "learning_rate": 9.354970820016576e-05, "loss": 0.6968, "step": 528 }, { "epoch": 3.78, "learning_rate": 9.352126516242021e-05, "loss": 0.5576, "step": 529 }, { "epoch": 3.79, "learning_rate": 9.349276389536017e-05, "loss": 0.7036, "step": 530 }, { "epoch": 3.79, "learning_rate": 9.34642044371189e-05, "loss": 0.5742, "step": 531 }, { "epoch": 3.8, "learning_rate": 9.343558682590756e-05, "loss": 0.6689, "step": 532 }, { "epoch": 3.81, "learning_rate": 9.340691110001511e-05, "loss": 0.5068, "step": 533 }, { "epoch": 3.81, "learning_rate": 9.337817729780827e-05, "loss": 0.4404, "step": 534 }, { "epoch": 3.82, "learning_rate": 9.334938545773142e-05, "loss": 0.5806, "step": 535 }, { "epoch": 3.83, "learning_rate": 9.332053561830669e-05, "loss": 0.647, "step": 536 }, { "epoch": 3.84, "learning_rate": 9.329162781813369e-05, "loss": 0.4556, "step": 537 }, { "epoch": 3.84, "learning_rate": 9.326266209588966e-05, "loss": 0.5107, "step": 538 }, { "epoch": 3.85, "learning_rate": 9.323363849032933e-05, "loss": 0.5298, "step": 539 }, { "epoch": 3.86, "learning_rate": 9.320455704028481e-05, "loss": 0.7114, "step": 540 }, { "epoch": 3.86, "learning_rate": 9.31754177846657e-05, "loss": 0.4893, "step": 541 }, { "epoch": 3.87, "learning_rate": 9.314622076245887e-05, "loss": 0.5332, "step": 542 }, { "epoch": 3.88, "learning_rate": 9.311696601272852e-05, "loss": 0.5078, "step": 543 }, { "epoch": 3.89, "learning_rate": 9.308765357461604e-05, "loss": 0.6377, "step": 544 }, { "epoch": 3.89, "learning_rate": 9.305828348734005e-05, "loss": 0.5122, "step": 545 }, { "epoch": 3.9, "learning_rate": 9.302885579019627e-05, "loss": 0.6201, "step": 546 }, { "epoch": 3.91, "learning_rate": 9.299937052255752e-05, "loss": 0.5791, "step": 547 }, { "epoch": 3.91, "learning_rate": 9.296982772387365e-05, "loss": 0.5288, "step": 548 }, { "epoch": 3.92, "learning_rate": 9.294022743367145e-05, "loss": 0.4883, "step": 549 }, { "epoch": 3.93, "learning_rate": 9.291056969155469e-05, "loss": 0.7383, "step": 550 }, { "epoch": 3.94, "learning_rate": 9.288085453720394e-05, "loss": 0.4385, "step": 551 }, { "epoch": 3.94, "learning_rate": 9.285108201037662e-05, "loss": 0.5073, "step": 552 }, { "epoch": 3.95, "learning_rate": 9.282125215090694e-05, "loss": 0.4043, "step": 553 }, { "epoch": 3.96, "learning_rate": 9.279136499870574e-05, "loss": 0.6162, "step": 554 }, { "epoch": 3.96, "learning_rate": 9.276142059376057e-05, "loss": 0.5986, "step": 555 }, { "epoch": 3.97, "learning_rate": 9.27314189761356e-05, "loss": 0.6479, "step": 556 }, { "epoch": 3.98, "learning_rate": 9.270136018597151e-05, "loss": 0.8301, "step": 557 }, { "epoch": 3.99, "learning_rate": 9.267124426348548e-05, "loss": 0.6924, "step": 558 }, { "epoch": 3.99, "learning_rate": 9.264107124897113e-05, "loss": 0.4478, "step": 559 }, { "epoch": 4.0, "learning_rate": 9.261084118279847e-05, "loss": 0.4409, "step": 560 }, { "epoch": 4.01, "learning_rate": 9.258055410541385e-05, "loss": 0.3467, "step": 561 }, { "epoch": 4.01, "learning_rate": 9.255021005733989e-05, "loss": 0.335, "step": 562 }, { "epoch": 4.02, "learning_rate": 9.251980907917544e-05, "loss": 0.2866, "step": 563 }, { "epoch": 4.03, "learning_rate": 9.24893512115955e-05, "loss": 0.3098, "step": 564 }, { "epoch": 4.04, "learning_rate": 9.245883649535123e-05, "loss": 0.2983, "step": 565 }, { "epoch": 4.04, "learning_rate": 9.24282649712698e-05, "loss": 0.3325, "step": 566 }, { "epoch": 4.05, "learning_rate": 9.239763668025439e-05, "loss": 0.2937, "step": 567 }, { "epoch": 4.06, "learning_rate": 9.236695166328419e-05, "loss": 0.2521, "step": 568 }, { "epoch": 4.06, "learning_rate": 9.233620996141421e-05, "loss": 0.3516, "step": 569 }, { "epoch": 4.07, "learning_rate": 9.230541161577535e-05, "loss": 0.3245, "step": 570 }, { "epoch": 4.08, "learning_rate": 9.227455666757429e-05, "loss": 0.2839, "step": 571 }, { "epoch": 4.09, "learning_rate": 9.224364515809343e-05, "loss": 0.2791, "step": 572 }, { "epoch": 4.09, "learning_rate": 9.221267712869084e-05, "loss": 0.4272, "step": 573 }, { "epoch": 4.1, "learning_rate": 9.218165262080023e-05, "loss": 0.3684, "step": 574 }, { "epoch": 4.11, "learning_rate": 9.215057167593087e-05, "loss": 0.2695, "step": 575 }, { "epoch": 4.11, "learning_rate": 9.211943433566755e-05, "loss": 0.2488, "step": 576 }, { "epoch": 4.12, "learning_rate": 9.208824064167047e-05, "loss": 0.2268, "step": 577 }, { "epoch": 4.13, "learning_rate": 9.205699063567527e-05, "loss": 0.2986, "step": 578 }, { "epoch": 4.14, "learning_rate": 9.202568435949294e-05, "loss": 0.2322, "step": 579 }, { "epoch": 4.14, "learning_rate": 9.199432185500973e-05, "loss": 0.2053, "step": 580 }, { "epoch": 4.15, "learning_rate": 9.196290316418711e-05, "loss": 0.2217, "step": 581 }, { "epoch": 4.16, "learning_rate": 9.193142832906179e-05, "loss": 0.4053, "step": 582 }, { "epoch": 4.16, "learning_rate": 9.189989739174552e-05, "loss": 0.2201, "step": 583 }, { "epoch": 4.17, "learning_rate": 9.186831039442514e-05, "loss": 0.3105, "step": 584 }, { "epoch": 4.18, "learning_rate": 9.183666737936251e-05, "loss": 0.2233, "step": 585 }, { "epoch": 4.19, "learning_rate": 9.180496838889445e-05, "loss": 0.2712, "step": 586 }, { "epoch": 4.19, "learning_rate": 9.177321346543262e-05, "loss": 0.3401, "step": 587 }, { "epoch": 4.2, "learning_rate": 9.174140265146356e-05, "loss": 0.2474, "step": 588 }, { "epoch": 4.21, "learning_rate": 9.170953598954859e-05, "loss": 0.3126, "step": 589 }, { "epoch": 4.21, "learning_rate": 9.167761352232371e-05, "loss": 0.3491, "step": 590 }, { "epoch": 4.22, "learning_rate": 9.164563529249963e-05, "loss": 0.335, "step": 591 }, { "epoch": 4.23, "learning_rate": 9.161360134286166e-05, "loss": 0.304, "step": 592 }, { "epoch": 4.24, "learning_rate": 9.158151171626961e-05, "loss": 0.28, "step": 593 }, { "epoch": 4.24, "learning_rate": 9.154936645565787e-05, "loss": 0.3916, "step": 594 }, { "epoch": 4.25, "learning_rate": 9.15171656040352e-05, "loss": 0.3127, "step": 595 }, { "epoch": 4.26, "learning_rate": 9.148490920448477e-05, "loss": 0.269, "step": 596 }, { "epoch": 4.26, "learning_rate": 9.145259730016402e-05, "loss": 0.3088, "step": 597 }, { "epoch": 4.27, "learning_rate": 9.142022993430475e-05, "loss": 0.2593, "step": 598 }, { "epoch": 4.28, "learning_rate": 9.138780715021285e-05, "loss": 0.304, "step": 599 }, { "epoch": 4.29, "learning_rate": 9.135532899126844e-05, "loss": 0.2727, "step": 600 }, { "epoch": 4.29, "learning_rate": 9.13227955009257e-05, "loss": 0.281, "step": 601 }, { "epoch": 4.3, "learning_rate": 9.129020672271283e-05, "loss": 0.2522, "step": 602 }, { "epoch": 4.31, "learning_rate": 9.125756270023203e-05, "loss": 0.2812, "step": 603 }, { "epoch": 4.31, "learning_rate": 9.122486347715937e-05, "loss": 0.385, "step": 604 }, { "epoch": 4.32, "learning_rate": 9.119210909724485e-05, "loss": 0.2375, "step": 605 }, { "epoch": 4.33, "learning_rate": 9.115929960431217e-05, "loss": 0.1622, "step": 606 }, { "epoch": 4.34, "learning_rate": 9.112643504225886e-05, "loss": 0.3853, "step": 607 }, { "epoch": 4.34, "learning_rate": 9.109351545505607e-05, "loss": 0.2944, "step": 608 }, { "epoch": 4.35, "learning_rate": 9.10605408867486e-05, "loss": 0.2717, "step": 609 }, { "epoch": 4.36, "learning_rate": 9.10275113814548e-05, "loss": 0.3911, "step": 610 }, { "epoch": 4.36, "learning_rate": 9.099442698336649e-05, "loss": 0.3142, "step": 611 }, { "epoch": 4.37, "learning_rate": 9.096128773674902e-05, "loss": 0.3228, "step": 612 }, { "epoch": 4.38, "learning_rate": 9.092809368594107e-05, "loss": 0.3643, "step": 613 }, { "epoch": 4.39, "learning_rate": 9.08948448753546e-05, "loss": 0.3813, "step": 614 }, { "epoch": 4.39, "learning_rate": 9.086154134947494e-05, "loss": 0.3303, "step": 615 }, { "epoch": 4.4, "learning_rate": 9.082818315286055e-05, "loss": 0.2783, "step": 616 }, { "epoch": 4.41, "learning_rate": 9.079477033014306e-05, "loss": 0.3271, "step": 617 }, { "epoch": 4.41, "learning_rate": 9.076130292602717e-05, "loss": 0.2041, "step": 618 }, { "epoch": 4.42, "learning_rate": 9.072778098529064e-05, "loss": 0.3035, "step": 619 }, { "epoch": 4.43, "learning_rate": 9.069420455278419e-05, "loss": 0.3057, "step": 620 }, { "epoch": 4.44, "learning_rate": 9.06605736734314e-05, "loss": 0.2742, "step": 621 }, { "epoch": 4.44, "learning_rate": 9.062688839222877e-05, "loss": 0.3132, "step": 622 }, { "epoch": 4.45, "learning_rate": 9.059314875424553e-05, "loss": 0.256, "step": 623 }, { "epoch": 4.46, "learning_rate": 9.055935480462367e-05, "loss": 0.3308, "step": 624 }, { "epoch": 4.46, "learning_rate": 9.052550658857783e-05, "loss": 0.3706, "step": 625 }, { "epoch": 4.47, "learning_rate": 9.049160415139525e-05, "loss": 0.3552, "step": 626 }, { "epoch": 4.48, "learning_rate": 9.045764753843575e-05, "loss": 0.1973, "step": 627 }, { "epoch": 4.49, "learning_rate": 9.042363679513158e-05, "loss": 0.2075, "step": 628 }, { "epoch": 4.49, "learning_rate": 9.038957196698748e-05, "loss": 0.2339, "step": 629 }, { "epoch": 4.5, "learning_rate": 9.035545309958046e-05, "loss": 0.3701, "step": 630 }, { "epoch": 4.51, "learning_rate": 9.032128023855994e-05, "loss": 0.2737, "step": 631 }, { "epoch": 4.51, "learning_rate": 9.028705342964753e-05, "loss": 0.332, "step": 632 }, { "epoch": 4.52, "learning_rate": 9.025277271863699e-05, "loss": 0.2615, "step": 633 }, { "epoch": 4.53, "learning_rate": 9.021843815139423e-05, "loss": 0.3477, "step": 634 }, { "epoch": 4.54, "learning_rate": 9.018404977385723e-05, "loss": 0.3235, "step": 635 }, { "epoch": 4.54, "learning_rate": 9.014960763203592e-05, "loss": 0.448, "step": 636 }, { "epoch": 4.55, "learning_rate": 9.011511177201225e-05, "loss": 0.292, "step": 637 }, { "epoch": 4.56, "learning_rate": 9.008056223993993e-05, "loss": 0.2659, "step": 638 }, { "epoch": 4.56, "learning_rate": 9.004595908204456e-05, "loss": 0.2734, "step": 639 }, { "epoch": 4.57, "learning_rate": 9.001130234462347e-05, "loss": 0.5747, "step": 640 }, { "epoch": 4.58, "learning_rate": 8.997659207404566e-05, "loss": 0.2314, "step": 641 }, { "epoch": 4.59, "learning_rate": 8.994182831675176e-05, "loss": 0.3687, "step": 642 }, { "epoch": 4.59, "learning_rate": 8.990701111925399e-05, "loss": 0.269, "step": 643 }, { "epoch": 4.6, "learning_rate": 8.987214052813604e-05, "loss": 0.3162, "step": 644 }, { "epoch": 4.61, "learning_rate": 8.983721659005305e-05, "loss": 0.3853, "step": 645 }, { "epoch": 4.61, "learning_rate": 8.980223935173153e-05, "loss": 0.3716, "step": 646 }, { "epoch": 4.62, "learning_rate": 8.97672088599693e-05, "loss": 0.344, "step": 647 }, { "epoch": 4.63, "learning_rate": 8.973212516163545e-05, "loss": 0.3179, "step": 648 }, { "epoch": 4.64, "learning_rate": 8.969698830367024e-05, "loss": 0.2395, "step": 649 }, { "epoch": 4.64, "learning_rate": 8.966179833308506e-05, "loss": 0.3159, "step": 650 }, { "epoch": 4.65, "learning_rate": 8.962655529696236e-05, "loss": 0.2839, "step": 651 }, { "epoch": 4.66, "learning_rate": 8.959125924245559e-05, "loss": 0.3071, "step": 652 }, { "epoch": 4.66, "learning_rate": 8.955591021678913e-05, "loss": 0.3359, "step": 653 }, { "epoch": 4.67, "learning_rate": 8.952050826725826e-05, "loss": 0.3955, "step": 654 }, { "epoch": 4.68, "learning_rate": 8.948505344122904e-05, "loss": 0.3721, "step": 655 }, { "epoch": 4.69, "learning_rate": 8.944954578613827e-05, "loss": 0.5745, "step": 656 }, { "epoch": 4.69, "learning_rate": 8.941398534949345e-05, "loss": 0.3384, "step": 657 }, { "epoch": 4.7, "learning_rate": 8.937837217887273e-05, "loss": 0.4028, "step": 658 }, { "epoch": 4.71, "learning_rate": 8.934270632192474e-05, "loss": 0.4673, "step": 659 }, { "epoch": 4.71, "learning_rate": 8.930698782636867e-05, "loss": 0.3984, "step": 660 }, { "epoch": 4.72, "learning_rate": 8.927121673999411e-05, "loss": 0.3665, "step": 661 }, { "epoch": 4.73, "learning_rate": 8.9235393110661e-05, "loss": 0.3584, "step": 662 }, { "epoch": 4.74, "learning_rate": 8.919951698629962e-05, "loss": 0.4966, "step": 663 }, { "epoch": 4.74, "learning_rate": 8.916358841491046e-05, "loss": 0.3936, "step": 664 }, { "epoch": 4.75, "learning_rate": 8.912760744456415e-05, "loss": 0.3948, "step": 665 }, { "epoch": 4.76, "learning_rate": 8.90915741234015e-05, "loss": 0.3472, "step": 666 }, { "epoch": 4.76, "learning_rate": 8.90554884996333e-05, "loss": 0.3525, "step": 667 }, { "epoch": 4.77, "learning_rate": 8.901935062154034e-05, "loss": 0.3201, "step": 668 }, { "epoch": 4.78, "learning_rate": 8.898316053747334e-05, "loss": 0.4888, "step": 669 }, { "epoch": 4.79, "learning_rate": 8.894691829585285e-05, "loss": 0.3745, "step": 670 }, { "epoch": 4.79, "learning_rate": 8.89106239451692e-05, "loss": 0.4299, "step": 671 }, { "epoch": 4.8, "learning_rate": 8.887427753398248e-05, "loss": 0.377, "step": 672 }, { "epoch": 4.81, "learning_rate": 8.883787911092235e-05, "loss": 0.2729, "step": 673 }, { "epoch": 4.81, "learning_rate": 8.880142872468815e-05, "loss": 0.2388, "step": 674 }, { "epoch": 4.82, "learning_rate": 8.876492642404869e-05, "loss": 0.3877, "step": 675 }, { "epoch": 4.83, "learning_rate": 8.872837225784226e-05, "loss": 0.2964, "step": 676 }, { "epoch": 4.84, "learning_rate": 8.869176627497654e-05, "loss": 0.3662, "step": 677 }, { "epoch": 4.84, "learning_rate": 8.865510852442854e-05, "loss": 0.3389, "step": 678 }, { "epoch": 4.85, "learning_rate": 8.861839905524452e-05, "loss": 0.3779, "step": 679 }, { "epoch": 4.86, "learning_rate": 8.858163791653994e-05, "loss": 0.3545, "step": 680 }, { "epoch": 4.86, "learning_rate": 8.854482515749943e-05, "loss": 0.3179, "step": 681 }, { "epoch": 4.87, "learning_rate": 8.85079608273766e-05, "loss": 0.3215, "step": 682 }, { "epoch": 4.88, "learning_rate": 8.847104497549417e-05, "loss": 0.5205, "step": 683 }, { "epoch": 4.89, "learning_rate": 8.84340776512437e-05, "loss": 0.397, "step": 684 }, { "epoch": 4.89, "learning_rate": 8.839705890408564e-05, "loss": 0.2817, "step": 685 }, { "epoch": 4.9, "learning_rate": 8.835998878354931e-05, "loss": 0.3689, "step": 686 }, { "epoch": 4.91, "learning_rate": 8.832286733923266e-05, "loss": 0.3916, "step": 687 }, { "epoch": 4.91, "learning_rate": 8.828569462080238e-05, "loss": 0.4082, "step": 688 }, { "epoch": 4.92, "learning_rate": 8.824847067799374e-05, "loss": 0.259, "step": 689 }, { "epoch": 4.93, "learning_rate": 8.821119556061055e-05, "loss": 0.4072, "step": 690 }, { "epoch": 4.94, "learning_rate": 8.817386931852507e-05, "loss": 0.3521, "step": 691 }, { "epoch": 4.94, "learning_rate": 8.813649200167799e-05, "loss": 0.3174, "step": 692 }, { "epoch": 4.95, "learning_rate": 8.809906366007832e-05, "loss": 0.2385, "step": 693 }, { "epoch": 4.96, "learning_rate": 8.806158434380333e-05, "loss": 0.1899, "step": 694 }, { "epoch": 4.96, "learning_rate": 8.802405410299854e-05, "loss": 0.3579, "step": 695 }, { "epoch": 4.97, "learning_rate": 8.798647298787754e-05, "loss": 0.4351, "step": 696 }, { "epoch": 4.98, "learning_rate": 8.7948841048722e-05, "loss": 0.3948, "step": 697 }, { "epoch": 4.99, "learning_rate": 8.791115833588164e-05, "loss": 0.2502, "step": 698 }, { "epoch": 4.99, "learning_rate": 8.787342489977409e-05, "loss": 0.5435, "step": 699 }, { "epoch": 5.0, "learning_rate": 8.783564079088477e-05, "loss": 0.3455, "step": 700 }, { "epoch": 5.01, "learning_rate": 8.7797806059767e-05, "loss": 0.1833, "step": 701 }, { "epoch": 5.01, "learning_rate": 8.775992075704182e-05, "loss": 0.3127, "step": 702 }, { "epoch": 5.02, "learning_rate": 8.772198493339784e-05, "loss": 0.2632, "step": 703 }, { "epoch": 5.03, "learning_rate": 8.76839986395914e-05, "loss": 0.3398, "step": 704 }, { "epoch": 5.04, "learning_rate": 8.764596192644623e-05, "loss": 0.1427, "step": 705 }, { "epoch": 5.04, "learning_rate": 8.760787484485363e-05, "loss": 0.1605, "step": 706 }, { "epoch": 5.05, "learning_rate": 8.756973744577221e-05, "loss": 0.2048, "step": 707 }, { "epoch": 5.06, "learning_rate": 8.753154978022795e-05, "loss": 0.132, "step": 708 }, { "epoch": 5.06, "learning_rate": 8.749331189931409e-05, "loss": 0.2598, "step": 709 }, { "epoch": 5.07, "learning_rate": 8.7455023854191e-05, "loss": 0.2383, "step": 710 }, { "epoch": 5.08, "learning_rate": 8.741668569608622e-05, "loss": 0.2284, "step": 711 }, { "epoch": 5.09, "learning_rate": 8.737829747629432e-05, "loss": 0.2576, "step": 712 }, { "epoch": 5.09, "learning_rate": 8.733985924617687e-05, "loss": 0.218, "step": 713 }, { "epoch": 5.1, "learning_rate": 8.73013710571623e-05, "loss": 0.2046, "step": 714 }, { "epoch": 5.11, "learning_rate": 8.726283296074596e-05, "loss": 0.2556, "step": 715 }, { "epoch": 5.11, "learning_rate": 8.722424500848987e-05, "loss": 0.1673, "step": 716 }, { "epoch": 5.12, "learning_rate": 8.718560725202288e-05, "loss": 0.181, "step": 717 }, { "epoch": 5.13, "learning_rate": 8.714691974304035e-05, "loss": 0.188, "step": 718 }, { "epoch": 5.14, "learning_rate": 8.71081825333043e-05, "loss": 0.1929, "step": 719 }, { "epoch": 5.14, "learning_rate": 8.706939567464321e-05, "loss": 0.1958, "step": 720 }, { "epoch": 5.15, "learning_rate": 8.7030559218952e-05, "loss": 0.3145, "step": 721 }, { "epoch": 5.16, "learning_rate": 8.699167321819191e-05, "loss": 0.122, "step": 722 }, { "epoch": 5.16, "learning_rate": 8.695273772439052e-05, "loss": 0.2727, "step": 723 }, { "epoch": 5.17, "learning_rate": 8.691375278964162e-05, "loss": 0.1995, "step": 724 }, { "epoch": 5.18, "learning_rate": 8.68747184661051e-05, "loss": 0.2373, "step": 725 }, { "epoch": 5.19, "learning_rate": 8.6835634806007e-05, "loss": 0.1971, "step": 726 }, { "epoch": 5.19, "learning_rate": 8.679650186163932e-05, "loss": 0.2627, "step": 727 }, { "epoch": 5.2, "learning_rate": 8.675731968536002e-05, "loss": 0.1627, "step": 728 }, { "epoch": 5.21, "learning_rate": 8.671808832959294e-05, "loss": 0.3511, "step": 729 }, { "epoch": 5.21, "learning_rate": 8.66788078468277e-05, "loss": 0.3406, "step": 730 }, { "epoch": 5.22, "learning_rate": 8.663947828961963e-05, "loss": 0.1603, "step": 731 }, { "epoch": 5.23, "learning_rate": 8.660009971058978e-05, "loss": 0.2217, "step": 732 }, { "epoch": 5.24, "learning_rate": 8.656067216242474e-05, "loss": 0.2908, "step": 733 }, { "epoch": 5.24, "learning_rate": 8.652119569787663e-05, "loss": 0.2244, "step": 734 }, { "epoch": 5.25, "learning_rate": 8.648167036976302e-05, "loss": 0.2634, "step": 735 }, { "epoch": 5.26, "learning_rate": 8.644209623096686e-05, "loss": 0.2756, "step": 736 }, { "epoch": 5.26, "learning_rate": 8.64024733344364e-05, "loss": 0.1934, "step": 737 }, { "epoch": 5.27, "learning_rate": 8.636280173318515e-05, "loss": 0.3037, "step": 738 }, { "epoch": 5.28, "learning_rate": 8.632308148029173e-05, "loss": 0.4241, "step": 739 }, { "epoch": 5.29, "learning_rate": 8.628331262889991e-05, "loss": 0.1355, "step": 740 }, { "epoch": 5.29, "learning_rate": 8.624349523221847e-05, "loss": 0.2842, "step": 741 }, { "epoch": 5.3, "learning_rate": 8.620362934352109e-05, "loss": 0.2239, "step": 742 }, { "epoch": 5.31, "learning_rate": 8.616371501614638e-05, "loss": 0.1906, "step": 743 }, { "epoch": 5.31, "learning_rate": 8.612375230349778e-05, "loss": 0.2185, "step": 744 }, { "epoch": 5.32, "learning_rate": 8.608374125904341e-05, "loss": 0.2158, "step": 745 }, { "epoch": 5.33, "learning_rate": 8.604368193631611e-05, "loss": 0.269, "step": 746 }, { "epoch": 5.34, "learning_rate": 8.600357438891325e-05, "loss": 0.2349, "step": 747 }, { "epoch": 5.34, "learning_rate": 8.596341867049677e-05, "loss": 0.2017, "step": 748 }, { "epoch": 5.35, "learning_rate": 8.592321483479305e-05, "loss": 0.3022, "step": 749 }, { "epoch": 5.36, "learning_rate": 8.588296293559286e-05, "loss": 0.196, "step": 750 }, { "epoch": 5.36, "learning_rate": 8.584266302675124e-05, "loss": 0.1853, "step": 751 }, { "epoch": 5.37, "learning_rate": 8.58023151621875e-05, "loss": 0.1501, "step": 752 }, { "epoch": 5.38, "learning_rate": 8.576191939588509e-05, "loss": 0.2834, "step": 753 }, { "epoch": 5.39, "learning_rate": 8.572147578189159e-05, "loss": 0.2251, "step": 754 }, { "epoch": 5.39, "learning_rate": 8.568098437431854e-05, "loss": 0.1956, "step": 755 }, { "epoch": 5.4, "learning_rate": 8.564044522734147e-05, "loss": 0.2776, "step": 756 }, { "epoch": 5.41, "learning_rate": 8.559985839519975e-05, "loss": 0.238, "step": 757 }, { "epoch": 5.41, "learning_rate": 8.555922393219658e-05, "loss": 0.249, "step": 758 }, { "epoch": 5.42, "learning_rate": 8.551854189269888e-05, "loss": 0.2393, "step": 759 }, { "epoch": 5.43, "learning_rate": 8.54778123311372e-05, "loss": 0.1621, "step": 760 }, { "epoch": 5.44, "learning_rate": 8.543703530200572e-05, "loss": 0.2421, "step": 761 }, { "epoch": 5.44, "learning_rate": 8.539621085986208e-05, "loss": 0.1825, "step": 762 }, { "epoch": 5.45, "learning_rate": 8.535533905932738e-05, "loss": 0.2947, "step": 763 }, { "epoch": 5.46, "learning_rate": 8.531441995508609e-05, "loss": 0.1938, "step": 764 }, { "epoch": 5.46, "learning_rate": 8.527345360188596e-05, "loss": 0.2039, "step": 765 }, { "epoch": 5.47, "learning_rate": 8.523244005453795e-05, "loss": 0.1265, "step": 766 }, { "epoch": 5.48, "learning_rate": 8.519137936791617e-05, "loss": 0.2295, "step": 767 }, { "epoch": 5.49, "learning_rate": 8.515027159695781e-05, "loss": 0.2419, "step": 768 }, { "epoch": 5.49, "learning_rate": 8.510911679666304e-05, "loss": 0.243, "step": 769 }, { "epoch": 5.5, "learning_rate": 8.506791502209496e-05, "loss": 0.1797, "step": 770 }, { "epoch": 5.51, "learning_rate": 8.502666632837954e-05, "loss": 0.2546, "step": 771 }, { "epoch": 5.51, "learning_rate": 8.498537077070548e-05, "loss": 0.1945, "step": 772 }, { "epoch": 5.52, "learning_rate": 8.494402840432423e-05, "loss": 0.2267, "step": 773 }, { "epoch": 5.53, "learning_rate": 8.490263928454983e-05, "loss": 0.3799, "step": 774 }, { "epoch": 5.54, "learning_rate": 8.486120346675888e-05, "loss": 0.3062, "step": 775 }, { "epoch": 5.54, "learning_rate": 8.481972100639049e-05, "loss": 0.2715, "step": 776 }, { "epoch": 5.55, "learning_rate": 8.477819195894614e-05, "loss": 0.2549, "step": 777 }, { "epoch": 5.56, "learning_rate": 8.473661637998966e-05, "loss": 0.2632, "step": 778 }, { "epoch": 5.56, "learning_rate": 8.469499432514712e-05, "loss": 0.1971, "step": 779 }, { "epoch": 5.57, "learning_rate": 8.465332585010682e-05, "loss": 0.2107, "step": 780 }, { "epoch": 5.58, "learning_rate": 8.46116110106191e-05, "loss": 0.2363, "step": 781 }, { "epoch": 5.59, "learning_rate": 8.456984986249636e-05, "loss": 0.2847, "step": 782 }, { "epoch": 5.59, "learning_rate": 8.452804246161299e-05, "loss": 0.2957, "step": 783 }, { "epoch": 5.6, "learning_rate": 8.448618886390522e-05, "loss": 0.2913, "step": 784 }, { "epoch": 5.61, "learning_rate": 8.444428912537111e-05, "loss": 0.1957, "step": 785 }, { "epoch": 5.61, "learning_rate": 8.440234330207047e-05, "loss": 0.2428, "step": 786 }, { "epoch": 5.62, "learning_rate": 8.436035145012473e-05, "loss": 0.2571, "step": 787 }, { "epoch": 5.63, "learning_rate": 8.431831362571691e-05, "loss": 0.2546, "step": 788 }, { "epoch": 5.64, "learning_rate": 8.427622988509161e-05, "loss": 0.1317, "step": 789 }, { "epoch": 5.64, "learning_rate": 8.423410028455474e-05, "loss": 0.2402, "step": 790 }, { "epoch": 5.65, "learning_rate": 8.419192488047369e-05, "loss": 0.1776, "step": 791 }, { "epoch": 5.66, "learning_rate": 8.414970372927704e-05, "loss": 0.3433, "step": 792 }, { "epoch": 5.66, "learning_rate": 8.410743688745464e-05, "loss": 0.2917, "step": 793 }, { "epoch": 5.67, "learning_rate": 8.406512441155745e-05, "loss": 0.2998, "step": 794 }, { "epoch": 5.68, "learning_rate": 8.402276635819746e-05, "loss": 0.158, "step": 795 }, { "epoch": 5.69, "learning_rate": 8.398036278404767e-05, "loss": 0.1938, "step": 796 }, { "epoch": 5.69, "learning_rate": 8.393791374584202e-05, "loss": 0.292, "step": 797 }, { "epoch": 5.7, "learning_rate": 8.389541930037516e-05, "loss": 0.2827, "step": 798 }, { "epoch": 5.71, "learning_rate": 8.385287950450263e-05, "loss": 0.2095, "step": 799 }, { "epoch": 5.71, "learning_rate": 8.38102944151406e-05, "loss": 0.2983, "step": 800 }, { "epoch": 5.72, "learning_rate": 8.376766408926576e-05, "loss": 0.2866, "step": 801 }, { "epoch": 5.73, "learning_rate": 8.372498858391544e-05, "loss": 0.1755, "step": 802 }, { "epoch": 5.74, "learning_rate": 8.368226795618737e-05, "loss": 0.2004, "step": 803 }, { "epoch": 5.74, "learning_rate": 8.363950226323963e-05, "loss": 0.3513, "step": 804 }, { "epoch": 5.75, "learning_rate": 8.359669156229062e-05, "loss": 0.2826, "step": 805 }, { "epoch": 5.76, "learning_rate": 8.355383591061898e-05, "loss": 0.1951, "step": 806 }, { "epoch": 5.76, "learning_rate": 8.351093536556344e-05, "loss": 0.2869, "step": 807 }, { "epoch": 5.77, "learning_rate": 8.346798998452282e-05, "loss": 0.2996, "step": 808 }, { "epoch": 5.78, "learning_rate": 8.342499982495595e-05, "loss": 0.2158, "step": 809 }, { "epoch": 5.79, "learning_rate": 8.338196494438153e-05, "loss": 0.3228, "step": 810 }, { "epoch": 5.79, "learning_rate": 8.333888540037813e-05, "loss": 0.2207, "step": 811 }, { "epoch": 5.8, "learning_rate": 8.329576125058406e-05, "loss": 0.1558, "step": 812 }, { "epoch": 5.81, "learning_rate": 8.325259255269731e-05, "loss": 0.3579, "step": 813 }, { "epoch": 5.81, "learning_rate": 8.320937936447548e-05, "loss": 0.2119, "step": 814 }, { "epoch": 5.82, "learning_rate": 8.316612174373571e-05, "loss": 0.3374, "step": 815 }, { "epoch": 5.83, "learning_rate": 8.312281974835452e-05, "loss": 0.2084, "step": 816 }, { "epoch": 5.84, "learning_rate": 8.307947343626791e-05, "loss": 0.3301, "step": 817 }, { "epoch": 5.84, "learning_rate": 8.303608286547109e-05, "loss": 0.2354, "step": 818 }, { "epoch": 5.85, "learning_rate": 8.29926480940185e-05, "loss": 0.3062, "step": 819 }, { "epoch": 5.86, "learning_rate": 8.294916918002376e-05, "loss": 0.3477, "step": 820 }, { "epoch": 5.86, "learning_rate": 8.290564618165948e-05, "loss": 0.2808, "step": 821 }, { "epoch": 5.87, "learning_rate": 8.286207915715733e-05, "loss": 0.2473, "step": 822 }, { "epoch": 5.88, "learning_rate": 8.281846816480784e-05, "loss": 0.2483, "step": 823 }, { "epoch": 5.89, "learning_rate": 8.277481326296038e-05, "loss": 0.3738, "step": 824 }, { "epoch": 5.89, "learning_rate": 8.273111451002305e-05, "loss": 0.2498, "step": 825 }, { "epoch": 5.9, "learning_rate": 8.268737196446264e-05, "loss": 0.2865, "step": 826 }, { "epoch": 5.91, "learning_rate": 8.26435856848045e-05, "loss": 0.2422, "step": 827 }, { "epoch": 5.91, "learning_rate": 8.259975572963257e-05, "loss": 0.2542, "step": 828 }, { "epoch": 5.92, "learning_rate": 8.255588215758916e-05, "loss": 0.3137, "step": 829 }, { "epoch": 5.93, "learning_rate": 8.251196502737494e-05, "loss": 0.2952, "step": 830 }, { "epoch": 5.94, "learning_rate": 8.246800439774889e-05, "loss": 0.2139, "step": 831 }, { "epoch": 5.94, "learning_rate": 8.242400032752813e-05, "loss": 0.2222, "step": 832 }, { "epoch": 5.95, "learning_rate": 8.237995287558802e-05, "loss": 0.2825, "step": 833 }, { "epoch": 5.96, "learning_rate": 8.233586210086181e-05, "loss": 0.1975, "step": 834 }, { "epoch": 5.96, "learning_rate": 8.229172806234085e-05, "loss": 0.3669, "step": 835 }, { "epoch": 5.97, "learning_rate": 8.224755081907427e-05, "loss": 0.2123, "step": 836 }, { "epoch": 5.98, "learning_rate": 8.220333043016908e-05, "loss": 0.3105, "step": 837 }, { "epoch": 5.99, "learning_rate": 8.215906695478996e-05, "loss": 0.303, "step": 838 }, { "epoch": 5.99, "learning_rate": 8.21147604521593e-05, "loss": 0.2649, "step": 839 }, { "epoch": 6.0, "learning_rate": 8.2070410981557e-05, "loss": 0.2101, "step": 840 }, { "epoch": 6.01, "learning_rate": 8.20260186023205e-05, "loss": 0.2268, "step": 841 }, { "epoch": 6.01, "learning_rate": 8.198158337384457e-05, "loss": 0.1444, "step": 842 }, { "epoch": 6.02, "learning_rate": 8.19371053555814e-05, "loss": 0.2413, "step": 843 }, { "epoch": 6.03, "learning_rate": 8.189258460704038e-05, "loss": 0.2667, "step": 844 }, { "epoch": 6.04, "learning_rate": 8.184802118778812e-05, "loss": 0.207, "step": 845 }, { "epoch": 6.04, "learning_rate": 8.180341515744824e-05, "loss": 0.1433, "step": 846 }, { "epoch": 6.05, "learning_rate": 8.175876657570143e-05, "loss": 0.2949, "step": 847 }, { "epoch": 6.06, "learning_rate": 8.171407550228532e-05, "loss": 0.1644, "step": 848 }, { "epoch": 6.06, "learning_rate": 8.166934199699433e-05, "loss": 0.1445, "step": 849 }, { "epoch": 6.07, "learning_rate": 8.162456611967973e-05, "loss": 0.2118, "step": 850 }, { "epoch": 6.08, "learning_rate": 8.157974793024943e-05, "loss": 0.2546, "step": 851 }, { "epoch": 6.09, "learning_rate": 8.153488748866796e-05, "loss": 0.1009, "step": 852 }, { "epoch": 6.09, "learning_rate": 8.148998485495638e-05, "loss": 0.1865, "step": 853 }, { "epoch": 6.1, "learning_rate": 8.144504008919222e-05, "loss": 0.1312, "step": 854 }, { "epoch": 6.11, "learning_rate": 8.140005325150939e-05, "loss": 0.1772, "step": 855 }, { "epoch": 6.11, "learning_rate": 8.135502440209804e-05, "loss": 0.1553, "step": 856 }, { "epoch": 6.12, "learning_rate": 8.130995360120454e-05, "loss": 0.203, "step": 857 }, { "epoch": 6.13, "learning_rate": 8.126484090913147e-05, "loss": 0.1915, "step": 858 }, { "epoch": 6.14, "learning_rate": 8.121968638623733e-05, "loss": 0.1753, "step": 859 }, { "epoch": 6.14, "learning_rate": 8.117449009293668e-05, "loss": 0.1619, "step": 860 }, { "epoch": 6.15, "learning_rate": 8.112925208969995e-05, "loss": 0.1912, "step": 861 }, { "epoch": 6.16, "learning_rate": 8.108397243705335e-05, "loss": 0.2454, "step": 862 }, { "epoch": 6.16, "learning_rate": 8.103865119557884e-05, "loss": 0.1772, "step": 863 }, { "epoch": 6.17, "learning_rate": 8.0993288425914e-05, "loss": 0.194, "step": 864 }, { "epoch": 6.18, "learning_rate": 8.094788418875201e-05, "loss": 0.1808, "step": 865 }, { "epoch": 6.19, "learning_rate": 8.090243854484149e-05, "loss": 0.144, "step": 866 }, { "epoch": 6.19, "learning_rate": 8.08569515549865e-05, "loss": 0.2112, "step": 867 }, { "epoch": 6.2, "learning_rate": 8.081142328004637e-05, "loss": 0.203, "step": 868 }, { "epoch": 6.21, "learning_rate": 8.076585378093571e-05, "loss": 0.1335, "step": 869 }, { "epoch": 6.21, "learning_rate": 8.072024311862426e-05, "loss": 0.1669, "step": 870 }, { "epoch": 6.22, "learning_rate": 8.067459135413686e-05, "loss": 0.1437, "step": 871 }, { "epoch": 6.23, "learning_rate": 8.062889854855333e-05, "loss": 0.2239, "step": 872 }, { "epoch": 6.24, "learning_rate": 8.058316476300838e-05, "loss": 0.1633, "step": 873 }, { "epoch": 6.24, "learning_rate": 8.053739005869157e-05, "loss": 0.1168, "step": 874 }, { "epoch": 6.25, "learning_rate": 8.049157449684723e-05, "loss": 0.1109, "step": 875 }, { "epoch": 6.26, "learning_rate": 8.044571813877431e-05, "loss": 0.1283, "step": 876 }, { "epoch": 6.26, "learning_rate": 8.039982104582638e-05, "loss": 0.1658, "step": 877 }, { "epoch": 6.27, "learning_rate": 8.035388327941147e-05, "loss": 0.0621, "step": 878 }, { "epoch": 6.28, "learning_rate": 8.030790490099207e-05, "loss": 0.1772, "step": 879 }, { "epoch": 6.29, "learning_rate": 8.0261885972085e-05, "loss": 0.1737, "step": 880 }, { "epoch": 6.29, "learning_rate": 8.021582655426129e-05, "loss": 0.1523, "step": 881 }, { "epoch": 6.3, "learning_rate": 8.016972670914624e-05, "loss": 0.1487, "step": 882 }, { "epoch": 6.31, "learning_rate": 8.012358649841912e-05, "loss": 0.1704, "step": 883 }, { "epoch": 6.31, "learning_rate": 8.00774059838133e-05, "loss": 0.1759, "step": 884 }, { "epoch": 6.32, "learning_rate": 8.003118522711601e-05, "loss": 0.099, "step": 885 }, { "epoch": 6.33, "learning_rate": 7.998492429016837e-05, "loss": 0.1292, "step": 886 }, { "epoch": 6.34, "learning_rate": 7.993862323486525e-05, "loss": 0.115, "step": 887 }, { "epoch": 6.34, "learning_rate": 7.989228212315516e-05, "loss": 0.2158, "step": 888 }, { "epoch": 6.35, "learning_rate": 7.984590101704025e-05, "loss": 0.2172, "step": 889 }, { "epoch": 6.36, "learning_rate": 7.979947997857617e-05, "loss": 0.1553, "step": 890 }, { "epoch": 6.36, "learning_rate": 7.975301906987198e-05, "loss": 0.1495, "step": 891 }, { "epoch": 6.37, "learning_rate": 7.970651835309009e-05, "loss": 0.1606, "step": 892 }, { "epoch": 6.38, "learning_rate": 7.965997789044615e-05, "loss": 0.1874, "step": 893 }, { "epoch": 6.39, "learning_rate": 7.961339774420907e-05, "loss": 0.1799, "step": 894 }, { "epoch": 6.39, "learning_rate": 7.956677797670074e-05, "loss": 0.1362, "step": 895 }, { "epoch": 6.4, "learning_rate": 7.952011865029614e-05, "loss": 0.1504, "step": 896 }, { "epoch": 6.41, "learning_rate": 7.947341982742315e-05, "loss": 0.1853, "step": 897 }, { "epoch": 6.41, "learning_rate": 7.942668157056254e-05, "loss": 0.1646, "step": 898 }, { "epoch": 6.42, "learning_rate": 7.937990394224774e-05, "loss": 0.1544, "step": 899 }, { "epoch": 6.43, "learning_rate": 7.933308700506497e-05, "loss": 0.0704, "step": 900 }, { "epoch": 6.44, "learning_rate": 7.928623082165297e-05, "loss": 0.1415, "step": 901 }, { "epoch": 6.44, "learning_rate": 7.923933545470301e-05, "loss": 0.0901, "step": 902 }, { "epoch": 6.45, "learning_rate": 7.919240096695877e-05, "loss": 0.3038, "step": 903 }, { "epoch": 6.46, "learning_rate": 7.914542742121633e-05, "loss": 0.1935, "step": 904 }, { "epoch": 6.46, "learning_rate": 7.909841488032395e-05, "loss": 0.1886, "step": 905 }, { "epoch": 6.47, "learning_rate": 7.905136340718211e-05, "loss": 0.1522, "step": 906 }, { "epoch": 6.48, "learning_rate": 7.900427306474339e-05, "loss": 0.1505, "step": 907 }, { "epoch": 6.49, "learning_rate": 7.895714391601232e-05, "loss": 0.1989, "step": 908 }, { "epoch": 6.49, "learning_rate": 7.890997602404541e-05, "loss": 0.1909, "step": 909 }, { "epoch": 6.5, "learning_rate": 7.886276945195099e-05, "loss": 0.1724, "step": 910 }, { "epoch": 6.51, "learning_rate": 7.881552426288909e-05, "loss": 0.1178, "step": 911 }, { "epoch": 6.51, "learning_rate": 7.876824052007149e-05, "loss": 0.1318, "step": 912 }, { "epoch": 6.52, "learning_rate": 7.872091828676151e-05, "loss": 0.2234, "step": 913 }, { "epoch": 6.53, "learning_rate": 7.867355762627397e-05, "loss": 0.243, "step": 914 }, { "epoch": 6.54, "learning_rate": 7.86261586019751e-05, "loss": 0.2461, "step": 915 }, { "epoch": 6.54, "learning_rate": 7.857872127728248e-05, "loss": 0.2756, "step": 916 }, { "epoch": 6.55, "learning_rate": 7.853124571566491e-05, "loss": 0.1384, "step": 917 }, { "epoch": 6.56, "learning_rate": 7.848373198064236e-05, "loss": 0.1555, "step": 918 }, { "epoch": 6.56, "learning_rate": 7.843618013578586e-05, "loss": 0.2112, "step": 919 }, { "epoch": 6.57, "learning_rate": 7.838859024471748e-05, "loss": 0.1863, "step": 920 }, { "epoch": 6.58, "learning_rate": 7.83409623711101e-05, "loss": 0.1394, "step": 921 }, { "epoch": 6.59, "learning_rate": 7.829329657868753e-05, "loss": 0.1412, "step": 922 }, { "epoch": 6.59, "learning_rate": 7.824559293122423e-05, "loss": 0.2047, "step": 923 }, { "epoch": 6.6, "learning_rate": 7.819785149254532e-05, "loss": 0.3013, "step": 924 }, { "epoch": 6.61, "learning_rate": 7.815007232652654e-05, "loss": 0.1176, "step": 925 }, { "epoch": 6.61, "learning_rate": 7.810225549709404e-05, "loss": 0.1792, "step": 926 }, { "epoch": 6.62, "learning_rate": 7.805440106822442e-05, "loss": 0.1477, "step": 927 }, { "epoch": 6.63, "learning_rate": 7.800650910394449e-05, "loss": 0.2539, "step": 928 }, { "epoch": 6.64, "learning_rate": 7.795857966833142e-05, "loss": 0.2354, "step": 929 }, { "epoch": 6.64, "learning_rate": 7.791061282551237e-05, "loss": 0.2001, "step": 930 }, { "epoch": 6.65, "learning_rate": 7.786260863966468e-05, "loss": 0.1757, "step": 931 }, { "epoch": 6.66, "learning_rate": 7.781456717501557e-05, "loss": 0.1921, "step": 932 }, { "epoch": 6.66, "learning_rate": 7.776648849584214e-05, "loss": 0.1177, "step": 933 }, { "epoch": 6.67, "learning_rate": 7.771837266647131e-05, "loss": 0.2764, "step": 934 }, { "epoch": 6.68, "learning_rate": 7.767021975127972e-05, "loss": 0.2502, "step": 935 }, { "epoch": 6.69, "learning_rate": 7.762202981469357e-05, "loss": 0.1474, "step": 936 }, { "epoch": 6.69, "learning_rate": 7.757380292118865e-05, "loss": 0.167, "step": 937 }, { "epoch": 6.7, "learning_rate": 7.752553913529018e-05, "loss": 0.2162, "step": 938 }, { "epoch": 6.71, "learning_rate": 7.747723852157271e-05, "loss": 0.1195, "step": 939 }, { "epoch": 6.71, "learning_rate": 7.74289011446601e-05, "loss": 0.1802, "step": 940 }, { "epoch": 6.72, "learning_rate": 7.73805270692254e-05, "loss": 0.1719, "step": 941 }, { "epoch": 6.73, "learning_rate": 7.733211635999072e-05, "loss": 0.1899, "step": 942 }, { "epoch": 6.74, "learning_rate": 7.728366908172722e-05, "loss": 0.2852, "step": 943 }, { "epoch": 6.74, "learning_rate": 7.7235185299255e-05, "loss": 0.182, "step": 944 }, { "epoch": 6.75, "learning_rate": 7.718666507744294e-05, "loss": 0.1082, "step": 945 }, { "epoch": 6.76, "learning_rate": 7.713810848120872e-05, "loss": 0.14, "step": 946 }, { "epoch": 6.76, "learning_rate": 7.708951557551873e-05, "loss": 0.2124, "step": 947 }, { "epoch": 6.77, "learning_rate": 7.704088642538782e-05, "loss": 0.1958, "step": 948 }, { "epoch": 6.78, "learning_rate": 7.699222109587945e-05, "loss": 0.23, "step": 949 }, { "epoch": 6.79, "learning_rate": 7.694351965210542e-05, "loss": 0.2002, "step": 950 }, { "epoch": 6.79, "learning_rate": 7.689478215922588e-05, "loss": 0.1907, "step": 951 }, { "epoch": 6.8, "learning_rate": 7.68460086824492e-05, "loss": 0.1824, "step": 952 }, { "epoch": 6.81, "learning_rate": 7.679719928703188e-05, "loss": 0.2188, "step": 953 }, { "epoch": 6.81, "learning_rate": 7.674835403827852e-05, "loss": 0.2295, "step": 954 }, { "epoch": 6.82, "learning_rate": 7.669947300154163e-05, "loss": 0.1444, "step": 955 }, { "epoch": 6.83, "learning_rate": 7.665055624222166e-05, "loss": 0.1832, "step": 956 }, { "epoch": 6.84, "learning_rate": 7.660160382576683e-05, "loss": 0.1967, "step": 957 }, { "epoch": 6.84, "learning_rate": 7.655261581767305e-05, "loss": 0.248, "step": 958 }, { "epoch": 6.85, "learning_rate": 7.650359228348389e-05, "loss": 0.2788, "step": 959 }, { "epoch": 6.86, "learning_rate": 7.645453328879042e-05, "loss": 0.1614, "step": 960 }, { "epoch": 6.86, "learning_rate": 7.640543889923118e-05, "loss": 0.1764, "step": 961 }, { "epoch": 6.87, "learning_rate": 7.635630918049201e-05, "loss": 0.1523, "step": 962 }, { "epoch": 6.88, "learning_rate": 7.63071441983061e-05, "loss": 0.1556, "step": 963 }, { "epoch": 6.89, "learning_rate": 7.625794401845377e-05, "loss": 0.209, "step": 964 }, { "epoch": 6.89, "learning_rate": 7.620870870676242e-05, "loss": 0.2058, "step": 965 }, { "epoch": 6.9, "learning_rate": 7.61594383291065e-05, "loss": 0.3086, "step": 966 }, { "epoch": 6.91, "learning_rate": 7.611013295140736e-05, "loss": 0.2114, "step": 967 }, { "epoch": 6.91, "learning_rate": 7.606079263963317e-05, "loss": 0.2827, "step": 968 }, { "epoch": 6.92, "learning_rate": 7.601141745979884e-05, "loss": 0.1653, "step": 969 }, { "epoch": 6.93, "learning_rate": 7.596200747796593e-05, "loss": 0.2996, "step": 970 }, { "epoch": 6.94, "learning_rate": 7.591256276024257e-05, "loss": 0.1503, "step": 971 }, { "epoch": 6.94, "learning_rate": 7.586308337278336e-05, "loss": 0.1764, "step": 972 }, { "epoch": 6.95, "learning_rate": 7.581356938178929e-05, "loss": 0.1266, "step": 973 }, { "epoch": 6.96, "learning_rate": 7.576402085350764e-05, "loss": 0.1481, "step": 974 }, { "epoch": 6.96, "learning_rate": 7.571443785423196e-05, "loss": 0.1654, "step": 975 }, { "epoch": 6.97, "learning_rate": 7.566482045030179e-05, "loss": 0.2117, "step": 976 }, { "epoch": 6.98, "learning_rate": 7.561516870810281e-05, "loss": 0.2241, "step": 977 }, { "epoch": 6.99, "learning_rate": 7.556548269406663e-05, "loss": 0.1512, "step": 978 }, { "epoch": 6.99, "learning_rate": 7.551576247467067e-05, "loss": 0.1004, "step": 979 }, { "epoch": 7.0, "learning_rate": 7.546600811643816e-05, "loss": 0.3015, "step": 980 }, { "epoch": 7.01, "learning_rate": 7.541621968593798e-05, "loss": 0.0996, "step": 981 }, { "epoch": 7.01, "learning_rate": 7.536639724978458e-05, "loss": 0.1273, "step": 982 }, { "epoch": 7.02, "learning_rate": 7.531654087463796e-05, "loss": 0.1339, "step": 983 }, { "epoch": 7.03, "learning_rate": 7.52666506272035e-05, "loss": 0.0937, "step": 984 }, { "epoch": 7.04, "learning_rate": 7.521672657423188e-05, "loss": 0.1754, "step": 985 }, { "epoch": 7.04, "learning_rate": 7.516676878251907e-05, "loss": 0.121, "step": 986 }, { "epoch": 7.05, "learning_rate": 7.51167773189061e-05, "loss": 0.1879, "step": 987 }, { "epoch": 7.06, "learning_rate": 7.50667522502791e-05, "loss": 0.1144, "step": 988 }, { "epoch": 7.06, "learning_rate": 7.501669364356918e-05, "loss": 0.1241, "step": 989 }, { "epoch": 7.07, "learning_rate": 7.496660156575226e-05, "loss": 0.1148, "step": 990 }, { "epoch": 7.08, "learning_rate": 7.491647608384909e-05, "loss": 0.2286, "step": 991 }, { "epoch": 7.09, "learning_rate": 7.486631726492512e-05, "loss": 0.0773, "step": 992 }, { "epoch": 7.09, "learning_rate": 7.481612517609037e-05, "loss": 0.1666, "step": 993 }, { "epoch": 7.1, "learning_rate": 7.476589988449939e-05, "loss": 0.1398, "step": 994 }, { "epoch": 7.11, "learning_rate": 7.471564145735115e-05, "loss": 0.1267, "step": 995 }, { "epoch": 7.11, "learning_rate": 7.466534996188897e-05, "loss": 0.1467, "step": 996 }, { "epoch": 7.12, "learning_rate": 7.461502546540037e-05, "loss": 0.0851, "step": 997 }, { "epoch": 7.13, "learning_rate": 7.45646680352171e-05, "loss": 0.1072, "step": 998 }, { "epoch": 7.14, "learning_rate": 7.451427773871489e-05, "loss": 0.1427, "step": 999 }, { "epoch": 7.14, "learning_rate": 7.446385464331348e-05, "loss": 0.1046, "step": 1000 }, { "epoch": 7.15, "learning_rate": 7.441339881647653e-05, "loss": 0.1356, "step": 1001 }, { "epoch": 7.16, "learning_rate": 7.436291032571142e-05, "loss": 0.124, "step": 1002 }, { "epoch": 7.16, "learning_rate": 7.431238923856929e-05, "loss": 0.1045, "step": 1003 }, { "epoch": 7.17, "learning_rate": 7.426183562264488e-05, "loss": 0.1361, "step": 1004 }, { "epoch": 7.18, "learning_rate": 7.421124954557641e-05, "loss": 0.1464, "step": 1005 }, { "epoch": 7.19, "learning_rate": 7.41606310750456e-05, "loss": 0.0969, "step": 1006 }, { "epoch": 7.19, "learning_rate": 7.410998027877744e-05, "loss": 0.1346, "step": 1007 }, { "epoch": 7.2, "learning_rate": 7.405929722454026e-05, "loss": 0.1434, "step": 1008 }, { "epoch": 7.21, "learning_rate": 7.400858198014544e-05, "loss": 0.1064, "step": 1009 }, { "epoch": 7.21, "learning_rate": 7.395783461344754e-05, "loss": 0.1102, "step": 1010 }, { "epoch": 7.22, "learning_rate": 7.3907055192344e-05, "loss": 0.118, "step": 1011 }, { "epoch": 7.23, "learning_rate": 7.385624378477521e-05, "loss": 0.1222, "step": 1012 }, { "epoch": 7.24, "learning_rate": 7.380540045872433e-05, "loss": 0.1233, "step": 1013 }, { "epoch": 7.24, "learning_rate": 7.375452528221722e-05, "loss": 0.074, "step": 1014 }, { "epoch": 7.25, "learning_rate": 7.370361832332242e-05, "loss": 0.124, "step": 1015 }, { "epoch": 7.26, "learning_rate": 7.365267965015086e-05, "loss": 0.1475, "step": 1016 }, { "epoch": 7.26, "learning_rate": 7.360170933085604e-05, "loss": 0.0868, "step": 1017 }, { "epoch": 7.27, "learning_rate": 7.355070743363374e-05, "loss": 0.1705, "step": 1018 }, { "epoch": 7.28, "learning_rate": 7.349967402672196e-05, "loss": 0.1008, "step": 1019 }, { "epoch": 7.29, "learning_rate": 7.344860917840091e-05, "loss": 0.1377, "step": 1020 }, { "epoch": 7.29, "learning_rate": 7.339751295699287e-05, "loss": 0.0909, "step": 1021 }, { "epoch": 7.3, "learning_rate": 7.334638543086203e-05, "loss": 0.174, "step": 1022 }, { "epoch": 7.31, "learning_rate": 7.329522666841456e-05, "loss": 0.1506, "step": 1023 }, { "epoch": 7.31, "learning_rate": 7.324403673809831e-05, "loss": 0.098, "step": 1024 }, { "epoch": 7.32, "learning_rate": 7.319281570840292e-05, "loss": 0.1403, "step": 1025 }, { "epoch": 7.33, "learning_rate": 7.314156364785962e-05, "loss": 0.1337, "step": 1026 }, { "epoch": 7.34, "learning_rate": 7.309028062504116e-05, "loss": 0.1343, "step": 1027 }, { "epoch": 7.34, "learning_rate": 7.303896670856167e-05, "loss": 0.1298, "step": 1028 }, { "epoch": 7.35, "learning_rate": 7.298762196707668e-05, "loss": 0.1165, "step": 1029 }, { "epoch": 7.36, "learning_rate": 7.29362464692829e-05, "loss": 0.1653, "step": 1030 }, { "epoch": 7.36, "learning_rate": 7.288484028391825e-05, "loss": 0.0936, "step": 1031 }, { "epoch": 7.37, "learning_rate": 7.283340347976166e-05, "loss": 0.139, "step": 1032 }, { "epoch": 7.38, "learning_rate": 7.278193612563307e-05, "loss": 0.118, "step": 1033 }, { "epoch": 7.39, "learning_rate": 7.273043829039325e-05, "loss": 0.0905, "step": 1034 }, { "epoch": 7.39, "learning_rate": 7.26789100429438e-05, "loss": 0.082, "step": 1035 }, { "epoch": 7.4, "learning_rate": 7.262735145222696e-05, "loss": 0.2288, "step": 1036 }, { "epoch": 7.41, "learning_rate": 7.257576258722559e-05, "loss": 0.147, "step": 1037 }, { "epoch": 7.41, "learning_rate": 7.252414351696306e-05, "loss": 0.1344, "step": 1038 }, { "epoch": 7.42, "learning_rate": 7.247249431050314e-05, "loss": 0.1637, "step": 1039 }, { "epoch": 7.43, "learning_rate": 7.242081503694995e-05, "loss": 0.1638, "step": 1040 }, { "epoch": 7.44, "learning_rate": 7.236910576544782e-05, "loss": 0.1804, "step": 1041 }, { "epoch": 7.44, "learning_rate": 7.23173665651812e-05, "loss": 0.1167, "step": 1042 }, { "epoch": 7.45, "learning_rate": 7.226559750537462e-05, "loss": 0.1186, "step": 1043 }, { "epoch": 7.46, "learning_rate": 7.22137986552925e-05, "loss": 0.2211, "step": 1044 }, { "epoch": 7.46, "learning_rate": 7.216197008423919e-05, "loss": 0.1593, "step": 1045 }, { "epoch": 7.47, "learning_rate": 7.211011186155878e-05, "loss": 0.0903, "step": 1046 }, { "epoch": 7.48, "learning_rate": 7.205822405663499e-05, "loss": 0.1556, "step": 1047 }, { "epoch": 7.49, "learning_rate": 7.200630673889117e-05, "loss": 0.0966, "step": 1048 }, { "epoch": 7.49, "learning_rate": 7.195435997779014e-05, "loss": 0.1301, "step": 1049 }, { "epoch": 7.5, "learning_rate": 7.190238384283412e-05, "loss": 0.144, "step": 1050 }, { "epoch": 7.51, "learning_rate": 7.185037840356461e-05, "loss": 0.1959, "step": 1051 }, { "epoch": 7.51, "learning_rate": 7.179834372956236e-05, "loss": 0.0911, "step": 1052 }, { "epoch": 7.52, "learning_rate": 7.174627989044716e-05, "loss": 0.1809, "step": 1053 }, { "epoch": 7.53, "learning_rate": 7.169418695587791e-05, "loss": 0.093, "step": 1054 }, { "epoch": 7.54, "learning_rate": 7.164206499555239e-05, "loss": 0.1162, "step": 1055 }, { "epoch": 7.54, "learning_rate": 7.15899140792072e-05, "loss": 0.1019, "step": 1056 }, { "epoch": 7.55, "learning_rate": 7.153773427661774e-05, "loss": 0.1128, "step": 1057 }, { "epoch": 7.56, "learning_rate": 7.1485525657598e-05, "loss": 0.2412, "step": 1058 }, { "epoch": 7.56, "learning_rate": 7.143328829200053e-05, "loss": 0.1647, "step": 1059 }, { "epoch": 7.57, "learning_rate": 7.13810222497164e-05, "loss": 0.1716, "step": 1060 }, { "epoch": 7.58, "learning_rate": 7.132872760067497e-05, "loss": 0.1409, "step": 1061 }, { "epoch": 7.59, "learning_rate": 7.127640441484394e-05, "loss": 0.0478, "step": 1062 }, { "epoch": 7.59, "learning_rate": 7.122405276222914e-05, "loss": 0.1909, "step": 1063 }, { "epoch": 7.6, "learning_rate": 7.117167271287453e-05, "loss": 0.2739, "step": 1064 }, { "epoch": 7.61, "learning_rate": 7.1119264336862e-05, "loss": 0.1089, "step": 1065 }, { "epoch": 7.61, "learning_rate": 7.106682770431144e-05, "loss": 0.1174, "step": 1066 }, { "epoch": 7.62, "learning_rate": 7.101436288538043e-05, "loss": 0.0968, "step": 1067 }, { "epoch": 7.63, "learning_rate": 7.096186995026439e-05, "loss": 0.229, "step": 1068 }, { "epoch": 7.64, "learning_rate": 7.090934896919621e-05, "loss": 0.0862, "step": 1069 }, { "epoch": 7.64, "learning_rate": 7.085680001244644e-05, "loss": 0.1155, "step": 1070 }, { "epoch": 7.65, "learning_rate": 7.080422315032298e-05, "loss": 0.2079, "step": 1071 }, { "epoch": 7.66, "learning_rate": 7.07516184531711e-05, "loss": 0.0733, "step": 1072 }, { "epoch": 7.66, "learning_rate": 7.069898599137328e-05, "loss": 0.1811, "step": 1073 }, { "epoch": 7.67, "learning_rate": 7.064632583534917e-05, "loss": 0.2446, "step": 1074 }, { "epoch": 7.68, "learning_rate": 7.059363805555549e-05, "loss": 0.1173, "step": 1075 }, { "epoch": 7.69, "learning_rate": 7.054092272248589e-05, "loss": 0.1152, "step": 1076 }, { "epoch": 7.69, "learning_rate": 7.048817990667092e-05, "loss": 0.1771, "step": 1077 }, { "epoch": 7.7, "learning_rate": 7.043540967867782e-05, "loss": 0.1423, "step": 1078 }, { "epoch": 7.71, "learning_rate": 7.03826121091106e-05, "loss": 0.1818, "step": 1079 }, { "epoch": 7.71, "learning_rate": 7.032978726860981e-05, "loss": 0.1442, "step": 1080 }, { "epoch": 7.72, "learning_rate": 7.027693522785251e-05, "loss": 0.1948, "step": 1081 }, { "epoch": 7.73, "learning_rate": 7.022405605755208e-05, "loss": 0.158, "step": 1082 }, { "epoch": 7.74, "learning_rate": 7.017114982845833e-05, "loss": 0.1187, "step": 1083 }, { "epoch": 7.74, "learning_rate": 7.011821661135713e-05, "loss": 0.0932, "step": 1084 }, { "epoch": 7.75, "learning_rate": 7.006525647707054e-05, "loss": 0.2288, "step": 1085 }, { "epoch": 7.76, "learning_rate": 7.001226949645662e-05, "loss": 0.1403, "step": 1086 }, { "epoch": 7.76, "learning_rate": 6.995925574040936e-05, "loss": 0.1332, "step": 1087 }, { "epoch": 7.77, "learning_rate": 6.990621527985856e-05, "loss": 0.1025, "step": 1088 }, { "epoch": 7.78, "learning_rate": 6.985314818576972e-05, "loss": 0.0963, "step": 1089 }, { "epoch": 7.79, "learning_rate": 6.980005452914405e-05, "loss": 0.1445, "step": 1090 }, { "epoch": 7.79, "learning_rate": 6.97469343810182e-05, "loss": 0.1062, "step": 1091 }, { "epoch": 7.8, "learning_rate": 6.969378781246436e-05, "loss": 0.1611, "step": 1092 }, { "epoch": 7.81, "learning_rate": 6.964061489459002e-05, "loss": 0.1288, "step": 1093 }, { "epoch": 7.81, "learning_rate": 6.958741569853793e-05, "loss": 0.1174, "step": 1094 }, { "epoch": 7.82, "learning_rate": 6.9534190295486e-05, "loss": 0.1564, "step": 1095 }, { "epoch": 7.83, "learning_rate": 6.948093875664718e-05, "loss": 0.1132, "step": 1096 }, { "epoch": 7.84, "learning_rate": 6.942766115326945e-05, "loss": 0.2043, "step": 1097 }, { "epoch": 7.84, "learning_rate": 6.93743575566356e-05, "loss": 0.1368, "step": 1098 }, { "epoch": 7.85, "learning_rate": 6.932102803806324e-05, "loss": 0.1519, "step": 1099 }, { "epoch": 7.86, "learning_rate": 6.926767266890465e-05, "loss": 0.1536, "step": 1100 }, { "epoch": 7.86, "learning_rate": 6.921429152054668e-05, "loss": 0.0951, "step": 1101 }, { "epoch": 7.87, "learning_rate": 6.916088466441068e-05, "loss": 0.2603, "step": 1102 }, { "epoch": 7.88, "learning_rate": 6.91074521719524e-05, "loss": 0.0985, "step": 1103 }, { "epoch": 7.89, "learning_rate": 6.905399411466189e-05, "loss": 0.1571, "step": 1104 }, { "epoch": 7.89, "learning_rate": 6.900051056406341e-05, "loss": 0.1362, "step": 1105 }, { "epoch": 7.9, "learning_rate": 6.894700159171534e-05, "loss": 0.1055, "step": 1106 }, { "epoch": 7.91, "learning_rate": 6.889346726921004e-05, "loss": 0.1448, "step": 1107 }, { "epoch": 7.91, "learning_rate": 6.883990766817377e-05, "loss": 0.1132, "step": 1108 }, { "epoch": 7.92, "learning_rate": 6.87863228602667e-05, "loss": 0.1794, "step": 1109 }, { "epoch": 7.93, "learning_rate": 6.873271291718264e-05, "loss": 0.1669, "step": 1110 }, { "epoch": 7.94, "learning_rate": 6.867907791064905e-05, "loss": 0.2944, "step": 1111 }, { "epoch": 7.94, "learning_rate": 6.862541791242698e-05, "loss": 0.1079, "step": 1112 }, { "epoch": 7.95, "learning_rate": 6.857173299431083e-05, "loss": 0.1393, "step": 1113 }, { "epoch": 7.96, "learning_rate": 6.851802322812839e-05, "loss": 0.1599, "step": 1114 }, { "epoch": 7.96, "learning_rate": 6.846428868574069e-05, "loss": 0.1741, "step": 1115 }, { "epoch": 7.97, "learning_rate": 6.84105294390419e-05, "loss": 0.0963, "step": 1116 }, { "epoch": 7.98, "learning_rate": 6.835674555995928e-05, "loss": 0.2058, "step": 1117 }, { "epoch": 7.99, "learning_rate": 6.8302937120453e-05, "loss": 0.2096, "step": 1118 }, { "epoch": 7.99, "learning_rate": 6.82491041925161e-05, "loss": 0.1661, "step": 1119 }, { "epoch": 8.0, "learning_rate": 6.819524684817438e-05, "loss": 0.1873, "step": 1120 }, { "epoch": 8.01, "learning_rate": 6.814136515948636e-05, "loss": 0.1348, "step": 1121 }, { "epoch": 8.01, "learning_rate": 6.808745919854307e-05, "loss": 0.1381, "step": 1122 }, { "epoch": 8.02, "learning_rate": 6.803352903746803e-05, "loss": 0.105, "step": 1123 }, { "epoch": 8.03, "learning_rate": 6.797957474841716e-05, "loss": 0.1212, "step": 1124 }, { "epoch": 8.04, "learning_rate": 6.792559640357863e-05, "loss": 0.0898, "step": 1125 }, { "epoch": 8.04, "learning_rate": 6.787159407517285e-05, "loss": 0.1111, "step": 1126 }, { "epoch": 8.05, "learning_rate": 6.781756783545225e-05, "loss": 0.0886, "step": 1127 }, { "epoch": 8.06, "learning_rate": 6.776351775670129e-05, "loss": 0.0923, "step": 1128 }, { "epoch": 8.06, "learning_rate": 6.770944391123633e-05, "loss": 0.1081, "step": 1129 }, { "epoch": 8.07, "learning_rate": 6.76553463714055e-05, "loss": 0.0708, "step": 1130 }, { "epoch": 8.08, "learning_rate": 6.760122520958867e-05, "loss": 0.1169, "step": 1131 }, { "epoch": 8.09, "learning_rate": 6.754708049819728e-05, "loss": 0.0969, "step": 1132 }, { "epoch": 8.09, "learning_rate": 6.749291230967431e-05, "loss": 0.1034, "step": 1133 }, { "epoch": 8.1, "learning_rate": 6.743872071649411e-05, "loss": 0.0597, "step": 1134 }, { "epoch": 8.11, "learning_rate": 6.73845057911624e-05, "loss": 0.0567, "step": 1135 }, { "epoch": 8.11, "learning_rate": 6.733026760621607e-05, "loss": 0.0748, "step": 1136 }, { "epoch": 8.12, "learning_rate": 6.727600623422314e-05, "loss": 0.0736, "step": 1137 }, { "epoch": 8.13, "learning_rate": 6.722172174778267e-05, "loss": 0.0808, "step": 1138 }, { "epoch": 8.14, "learning_rate": 6.716741421952464e-05, "loss": 0.0831, "step": 1139 }, { "epoch": 8.14, "learning_rate": 6.711308372210983e-05, "loss": 0.0969, "step": 1140 }, { "epoch": 8.15, "learning_rate": 6.70587303282298e-05, "loss": 0.1414, "step": 1141 }, { "epoch": 8.16, "learning_rate": 6.700435411060673e-05, "loss": 0.0893, "step": 1142 }, { "epoch": 8.16, "learning_rate": 6.69499551419933e-05, "loss": 0.1046, "step": 1143 }, { "epoch": 8.17, "learning_rate": 6.689553349517268e-05, "loss": 0.1012, "step": 1144 }, { "epoch": 8.18, "learning_rate": 6.684108924295834e-05, "loss": 0.0637, "step": 1145 }, { "epoch": 8.19, "learning_rate": 6.678662245819402e-05, "loss": 0.0849, "step": 1146 }, { "epoch": 8.19, "learning_rate": 6.67321332137536e-05, "loss": 0.0789, "step": 1147 }, { "epoch": 8.2, "learning_rate": 6.667762158254104e-05, "loss": 0.0682, "step": 1148 }, { "epoch": 8.21, "learning_rate": 6.662308763749019e-05, "loss": 0.0752, "step": 1149 }, { "epoch": 8.21, "learning_rate": 6.65685314515648e-05, "loss": 0.0778, "step": 1150 }, { "epoch": 8.22, "learning_rate": 6.651395309775837e-05, "loss": 0.0627, "step": 1151 }, { "epoch": 8.23, "learning_rate": 6.645935264909404e-05, "loss": 0.0946, "step": 1152 }, { "epoch": 8.24, "learning_rate": 6.640473017862454e-05, "loss": 0.0612, "step": 1153 }, { "epoch": 8.24, "learning_rate": 6.635008575943208e-05, "loss": 0.0564, "step": 1154 }, { "epoch": 8.25, "learning_rate": 6.629541946462817e-05, "loss": 0.0707, "step": 1155 }, { "epoch": 8.26, "learning_rate": 6.624073136735363e-05, "loss": 0.0952, "step": 1156 }, { "epoch": 8.26, "learning_rate": 6.618602154077844e-05, "loss": 0.0996, "step": 1157 }, { "epoch": 8.27, "learning_rate": 6.613129005810169e-05, "loss": 0.0916, "step": 1158 }, { "epoch": 8.28, "learning_rate": 6.60765369925514e-05, "loss": 0.0869, "step": 1159 }, { "epoch": 8.29, "learning_rate": 6.602176241738449e-05, "loss": 0.0838, "step": 1160 }, { "epoch": 8.29, "learning_rate": 6.596696640588663e-05, "loss": 0.1028, "step": 1161 }, { "epoch": 8.3, "learning_rate": 6.59121490313722e-05, "loss": 0.064, "step": 1162 }, { "epoch": 8.31, "learning_rate": 6.585731036718417e-05, "loss": 0.0626, "step": 1163 }, { "epoch": 8.31, "learning_rate": 6.580245048669395e-05, "loss": 0.079, "step": 1164 }, { "epoch": 8.32, "learning_rate": 6.574756946330138e-05, "loss": 0.0564, "step": 1165 }, { "epoch": 8.33, "learning_rate": 6.569266737043458e-05, "loss": 0.103, "step": 1166 }, { "epoch": 8.34, "learning_rate": 6.563774428154983e-05, "loss": 0.1471, "step": 1167 }, { "epoch": 8.34, "learning_rate": 6.558280027013154e-05, "loss": 0.0413, "step": 1168 }, { "epoch": 8.35, "learning_rate": 6.552783540969212e-05, "loss": 0.106, "step": 1169 }, { "epoch": 8.36, "learning_rate": 6.547284977377182e-05, "loss": 0.094, "step": 1170 }, { "epoch": 8.36, "learning_rate": 6.541784343593873e-05, "loss": 0.0589, "step": 1171 }, { "epoch": 8.37, "learning_rate": 6.536281646978862e-05, "loss": 0.0929, "step": 1172 }, { "epoch": 8.38, "learning_rate": 6.530776894894489e-05, "loss": 0.0453, "step": 1173 }, { "epoch": 8.39, "learning_rate": 6.525270094705838e-05, "loss": 0.0538, "step": 1174 }, { "epoch": 8.39, "learning_rate": 6.51976125378074e-05, "loss": 0.0878, "step": 1175 }, { "epoch": 8.4, "learning_rate": 6.514250379489753e-05, "loss": 0.1053, "step": 1176 }, { "epoch": 8.41, "learning_rate": 6.508737479206157e-05, "loss": 0.0602, "step": 1177 }, { "epoch": 8.41, "learning_rate": 6.503222560305941e-05, "loss": 0.1005, "step": 1178 }, { "epoch": 8.42, "learning_rate": 6.497705630167793e-05, "loss": 0.1171, "step": 1179 }, { "epoch": 8.43, "learning_rate": 6.492186696173097e-05, "loss": 0.1165, "step": 1180 }, { "epoch": 8.44, "learning_rate": 6.486665765705915e-05, "loss": 0.0884, "step": 1181 }, { "epoch": 8.44, "learning_rate": 6.481142846152982e-05, "loss": 0.0748, "step": 1182 }, { "epoch": 8.45, "learning_rate": 6.475617944903693e-05, "loss": 0.061, "step": 1183 }, { "epoch": 8.46, "learning_rate": 6.47009106935009e-05, "loss": 0.1065, "step": 1184 }, { "epoch": 8.46, "learning_rate": 6.464562226886866e-05, "loss": 0.1077, "step": 1185 }, { "epoch": 8.47, "learning_rate": 6.459031424911339e-05, "loss": 0.0736, "step": 1186 }, { "epoch": 8.48, "learning_rate": 6.453498670823449e-05, "loss": 0.0858, "step": 1187 }, { "epoch": 8.49, "learning_rate": 6.447963972025751e-05, "loss": 0.0789, "step": 1188 }, { "epoch": 8.49, "learning_rate": 6.4424273359234e-05, "loss": 0.0569, "step": 1189 }, { "epoch": 8.5, "learning_rate": 6.436888769924142e-05, "loss": 0.1543, "step": 1190 }, { "epoch": 8.51, "learning_rate": 6.431348281438306e-05, "loss": 0.0955, "step": 1191 }, { "epoch": 8.51, "learning_rate": 6.425805877878793e-05, "loss": 0.1246, "step": 1192 }, { "epoch": 8.52, "learning_rate": 6.42026156666107e-05, "loss": 0.0952, "step": 1193 }, { "epoch": 8.53, "learning_rate": 6.414715355203148e-05, "loss": 0.0876, "step": 1194 }, { "epoch": 8.54, "learning_rate": 6.409167250925589e-05, "loss": 0.0744, "step": 1195 }, { "epoch": 8.54, "learning_rate": 6.403617261251484e-05, "loss": 0.1061, "step": 1196 }, { "epoch": 8.55, "learning_rate": 6.398065393606445e-05, "loss": 0.0938, "step": 1197 }, { "epoch": 8.56, "learning_rate": 6.392511655418599e-05, "loss": 0.0517, "step": 1198 }, { "epoch": 8.56, "learning_rate": 6.386956054118574e-05, "loss": 0.0598, "step": 1199 }, { "epoch": 8.57, "learning_rate": 6.381398597139492e-05, "loss": 0.064, "step": 1200 }, { "epoch": 8.58, "learning_rate": 6.375839291916957e-05, "loss": 0.1614, "step": 1201 }, { "epoch": 8.59, "learning_rate": 6.370278145889048e-05, "loss": 0.1089, "step": 1202 }, { "epoch": 8.59, "learning_rate": 6.364715166496303e-05, "loss": 0.0897, "step": 1203 }, { "epoch": 8.6, "learning_rate": 6.359150361181715e-05, "loss": 0.1501, "step": 1204 }, { "epoch": 8.61, "learning_rate": 6.353583737390723e-05, "loss": 0.0737, "step": 1205 }, { "epoch": 8.61, "learning_rate": 6.348015302571192e-05, "loss": 0.1003, "step": 1206 }, { "epoch": 8.62, "learning_rate": 6.342445064173417e-05, "loss": 0.1155, "step": 1207 }, { "epoch": 8.63, "learning_rate": 6.336873029650104e-05, "loss": 0.0975, "step": 1208 }, { "epoch": 8.64, "learning_rate": 6.331299206456358e-05, "loss": 0.0739, "step": 1209 }, { "epoch": 8.64, "learning_rate": 6.325723602049684e-05, "loss": 0.0353, "step": 1210 }, { "epoch": 8.65, "learning_rate": 6.320146223889966e-05, "loss": 0.1294, "step": 1211 }, { "epoch": 8.66, "learning_rate": 6.314567079439459e-05, "loss": 0.1348, "step": 1212 }, { "epoch": 8.66, "learning_rate": 6.30898617616279e-05, "loss": 0.0754, "step": 1213 }, { "epoch": 8.67, "learning_rate": 6.303403521526928e-05, "loss": 0.0776, "step": 1214 }, { "epoch": 8.68, "learning_rate": 6.297819123001193e-05, "loss": 0.1458, "step": 1215 }, { "epoch": 8.69, "learning_rate": 6.292232988057235e-05, "loss": 0.1002, "step": 1216 }, { "epoch": 8.69, "learning_rate": 6.28664512416903e-05, "loss": 0.1447, "step": 1217 }, { "epoch": 8.7, "learning_rate": 6.281055538812861e-05, "loss": 0.066, "step": 1218 }, { "epoch": 8.71, "learning_rate": 6.275464239467325e-05, "loss": 0.1165, "step": 1219 }, { "epoch": 8.71, "learning_rate": 6.269871233613301e-05, "loss": 0.1375, "step": 1220 }, { "epoch": 8.72, "learning_rate": 6.264276528733955e-05, "loss": 0.0796, "step": 1221 }, { "epoch": 8.73, "learning_rate": 6.258680132314731e-05, "loss": 0.076, "step": 1222 }, { "epoch": 8.74, "learning_rate": 6.253082051843332e-05, "loss": 0.1095, "step": 1223 }, { "epoch": 8.74, "learning_rate": 6.247482294809712e-05, "loss": 0.062, "step": 1224 }, { "epoch": 8.75, "learning_rate": 6.241880868706075e-05, "loss": 0.1236, "step": 1225 }, { "epoch": 8.76, "learning_rate": 6.236277781026849e-05, "loss": 0.0823, "step": 1226 }, { "epoch": 8.76, "learning_rate": 6.230673039268693e-05, "loss": 0.1379, "step": 1227 }, { "epoch": 8.77, "learning_rate": 6.225066650930476e-05, "loss": 0.0832, "step": 1228 }, { "epoch": 8.78, "learning_rate": 6.219458623513268e-05, "loss": 0.0459, "step": 1229 }, { "epoch": 8.79, "learning_rate": 6.213848964520338e-05, "loss": 0.0727, "step": 1230 }, { "epoch": 8.79, "learning_rate": 6.208237681457132e-05, "loss": 0.1042, "step": 1231 }, { "epoch": 8.8, "learning_rate": 6.202624781831268e-05, "loss": 0.092, "step": 1232 }, { "epoch": 8.81, "learning_rate": 6.197010273152534e-05, "loss": 0.1084, "step": 1233 }, { "epoch": 8.81, "learning_rate": 6.191394162932867e-05, "loss": 0.0796, "step": 1234 }, { "epoch": 8.82, "learning_rate": 6.185776458686344e-05, "loss": 0.0673, "step": 1235 }, { "epoch": 8.83, "learning_rate": 6.18015716792918e-05, "loss": 0.0617, "step": 1236 }, { "epoch": 8.84, "learning_rate": 6.174536298179707e-05, "loss": 0.0586, "step": 1237 }, { "epoch": 8.84, "learning_rate": 6.168913856958374e-05, "loss": 0.1507, "step": 1238 }, { "epoch": 8.85, "learning_rate": 6.163289851787731e-05, "loss": 0.0529, "step": 1239 }, { "epoch": 8.86, "learning_rate": 6.157664290192421e-05, "loss": 0.1055, "step": 1240 }, { "epoch": 8.86, "learning_rate": 6.152037179699168e-05, "loss": 0.0879, "step": 1241 }, { "epoch": 8.87, "learning_rate": 6.146408527836771e-05, "loss": 0.0685, "step": 1242 }, { "epoch": 8.88, "learning_rate": 6.140778342136088e-05, "loss": 0.1185, "step": 1243 }, { "epoch": 8.89, "learning_rate": 6.135146630130034e-05, "loss": 0.0723, "step": 1244 }, { "epoch": 8.89, "learning_rate": 6.129513399353558e-05, "loss": 0.0938, "step": 1245 }, { "epoch": 8.9, "learning_rate": 6.123878657343648e-05, "loss": 0.0575, "step": 1246 }, { "epoch": 8.91, "learning_rate": 6.118242411639314e-05, "loss": 0.1221, "step": 1247 }, { "epoch": 8.91, "learning_rate": 6.112604669781572e-05, "loss": 0.0705, "step": 1248 }, { "epoch": 8.92, "learning_rate": 6.106965439313448e-05, "loss": 0.0887, "step": 1249 }, { "epoch": 8.93, "learning_rate": 6.1013247277799535e-05, "loss": 0.1726, "step": 1250 }, { "epoch": 8.94, "learning_rate": 6.095682542728083e-05, "loss": 0.1548, "step": 1251 }, { "epoch": 8.94, "learning_rate": 6.090038891706801e-05, "loss": 0.0696, "step": 1252 }, { "epoch": 8.95, "learning_rate": 6.084393782267039e-05, "loss": 0.1312, "step": 1253 }, { "epoch": 8.96, "learning_rate": 6.078747221961675e-05, "loss": 0.1166, "step": 1254 }, { "epoch": 8.96, "learning_rate": 6.0730992183455285e-05, "loss": 0.1281, "step": 1255 }, { "epoch": 8.97, "learning_rate": 6.067449778975349e-05, "loss": 0.0837, "step": 1256 }, { "epoch": 8.98, "learning_rate": 6.061798911409813e-05, "loss": 0.0341, "step": 1257 }, { "epoch": 8.99, "learning_rate": 6.0561466232094996e-05, "loss": 0.0962, "step": 1258 }, { "epoch": 8.99, "learning_rate": 6.050492921936897e-05, "loss": 0.0633, "step": 1259 }, { "epoch": 9.0, "learning_rate": 6.044837815156377e-05, "loss": 0.1289, "step": 1260 }, { "epoch": 9.01, "learning_rate": 6.039181310434193e-05, "loss": 0.0606, "step": 1261 }, { "epoch": 9.01, "learning_rate": 6.0335234153384735e-05, "loss": 0.0689, "step": 1262 }, { "epoch": 9.02, "learning_rate": 6.027864137439202e-05, "loss": 0.0864, "step": 1263 }, { "epoch": 9.03, "learning_rate": 6.022203484308216e-05, "loss": 0.0859, "step": 1264 }, { "epoch": 9.04, "learning_rate": 6.016541463519191e-05, "loss": 0.1049, "step": 1265 }, { "epoch": 9.04, "learning_rate": 6.0108780826476306e-05, "loss": 0.0666, "step": 1266 }, { "epoch": 9.05, "learning_rate": 6.005213349270864e-05, "loss": 0.0666, "step": 1267 }, { "epoch": 9.06, "learning_rate": 5.9995472709680234e-05, "loss": 0.11, "step": 1268 }, { "epoch": 9.06, "learning_rate": 5.993879855320043e-05, "loss": 0.1593, "step": 1269 }, { "epoch": 9.07, "learning_rate": 5.988211109909647e-05, "loss": 0.058, "step": 1270 }, { "epoch": 9.08, "learning_rate": 5.982541042321338e-05, "loss": 0.0923, "step": 1271 }, { "epoch": 9.09, "learning_rate": 5.976869660141389e-05, "loss": 0.054, "step": 1272 }, { "epoch": 9.09, "learning_rate": 5.9711969709578306e-05, "loss": 0.0425, "step": 1273 }, { "epoch": 9.1, "learning_rate": 5.9655229823604406e-05, "loss": 0.0574, "step": 1274 }, { "epoch": 9.11, "learning_rate": 5.9598477019407374e-05, "loss": 0.0607, "step": 1275 }, { "epoch": 9.11, "learning_rate": 5.954171137291968e-05, "loss": 0.0885, "step": 1276 }, { "epoch": 9.12, "learning_rate": 5.948493296009098e-05, "loss": 0.0555, "step": 1277 }, { "epoch": 9.13, "learning_rate": 5.9428141856887986e-05, "loss": 0.0757, "step": 1278 }, { "epoch": 9.14, "learning_rate": 5.937133813929442e-05, "loss": 0.0861, "step": 1279 }, { "epoch": 9.14, "learning_rate": 5.931452188331083e-05, "loss": 0.0814, "step": 1280 }, { "epoch": 9.15, "learning_rate": 5.9257693164954606e-05, "loss": 0.1142, "step": 1281 }, { "epoch": 9.16, "learning_rate": 5.920085206025978e-05, "loss": 0.0623, "step": 1282 }, { "epoch": 9.16, "learning_rate": 5.9143998645276946e-05, "loss": 0.1515, "step": 1283 }, { "epoch": 9.17, "learning_rate": 5.908713299607318e-05, "loss": 0.0823, "step": 1284 }, { "epoch": 9.18, "learning_rate": 5.9030255188731944e-05, "loss": 0.0688, "step": 1285 }, { "epoch": 9.19, "learning_rate": 5.897336529935292e-05, "loss": 0.0405, "step": 1286 }, { "epoch": 9.19, "learning_rate": 5.891646340405201e-05, "loss": 0.1487, "step": 1287 }, { "epoch": 9.2, "learning_rate": 5.885954957896115e-05, "loss": 0.1345, "step": 1288 }, { "epoch": 9.21, "learning_rate": 5.8802623900228224e-05, "loss": 0.058, "step": 1289 }, { "epoch": 9.21, "learning_rate": 5.874568644401701e-05, "loss": 0.0909, "step": 1290 }, { "epoch": 9.22, "learning_rate": 5.8688737286507045e-05, "loss": 0.0582, "step": 1291 }, { "epoch": 9.23, "learning_rate": 5.863177650389347e-05, "loss": 0.08, "step": 1292 }, { "epoch": 9.24, "learning_rate": 5.857480417238702e-05, "loss": 0.0817, "step": 1293 }, { "epoch": 9.24, "learning_rate": 5.8517820368213874e-05, "loss": 0.0564, "step": 1294 }, { "epoch": 9.25, "learning_rate": 5.8460825167615585e-05, "loss": 0.0741, "step": 1295 }, { "epoch": 9.26, "learning_rate": 5.8403818646848915e-05, "loss": 0.0594, "step": 1296 }, { "epoch": 9.26, "learning_rate": 5.834680088218579e-05, "loss": 0.0573, "step": 1297 }, { "epoch": 9.27, "learning_rate": 5.8289771949913176e-05, "loss": 0.1451, "step": 1298 }, { "epoch": 9.28, "learning_rate": 5.823273192633297e-05, "loss": 0.0456, "step": 1299 }, { "epoch": 9.29, "learning_rate": 5.8175680887761955e-05, "loss": 0.0425, "step": 1300 }, { "epoch": 9.29, "learning_rate": 5.811861891053158e-05, "loss": 0.079, "step": 1301 }, { "epoch": 9.3, "learning_rate": 5.8061546070987994e-05, "loss": 0.0782, "step": 1302 }, { "epoch": 9.31, "learning_rate": 5.800446244549185e-05, "loss": 0.0497, "step": 1303 }, { "epoch": 9.31, "learning_rate": 5.794736811041821e-05, "loss": 0.062, "step": 1304 }, { "epoch": 9.32, "learning_rate": 5.789026314215651e-05, "loss": 0.0636, "step": 1305 }, { "epoch": 9.33, "learning_rate": 5.783314761711038e-05, "loss": 0.0544, "step": 1306 }, { "epoch": 9.34, "learning_rate": 5.7776021611697604e-05, "loss": 0.0757, "step": 1307 }, { "epoch": 9.34, "learning_rate": 5.771888520234997e-05, "loss": 0.0687, "step": 1308 }, { "epoch": 9.35, "learning_rate": 5.766173846551316e-05, "loss": 0.0474, "step": 1309 }, { "epoch": 9.36, "learning_rate": 5.7604581477646726e-05, "loss": 0.0601, "step": 1310 }, { "epoch": 9.36, "learning_rate": 5.754741431522389e-05, "loss": 0.0654, "step": 1311 }, { "epoch": 9.37, "learning_rate": 5.749023705473153e-05, "loss": 0.0908, "step": 1312 }, { "epoch": 9.38, "learning_rate": 5.743304977266999e-05, "loss": 0.0544, "step": 1313 }, { "epoch": 9.39, "learning_rate": 5.737585254555307e-05, "loss": 0.0508, "step": 1314 }, { "epoch": 9.39, "learning_rate": 5.731864544990782e-05, "loss": 0.0607, "step": 1315 }, { "epoch": 9.4, "learning_rate": 5.726142856227452e-05, "loss": 0.0553, "step": 1316 }, { "epoch": 9.41, "learning_rate": 5.7204201959206584e-05, "loss": 0.1112, "step": 1317 }, { "epoch": 9.41, "learning_rate": 5.714696571727037e-05, "loss": 0.0168, "step": 1318 }, { "epoch": 9.42, "learning_rate": 5.708971991304518e-05, "loss": 0.0792, "step": 1319 }, { "epoch": 9.43, "learning_rate": 5.703246462312307e-05, "loss": 0.1127, "step": 1320 }, { "epoch": 9.44, "learning_rate": 5.6975199924108826e-05, "loss": 0.1392, "step": 1321 }, { "epoch": 9.44, "learning_rate": 5.6917925892619774e-05, "loss": 0.045, "step": 1322 }, { "epoch": 9.45, "learning_rate": 5.686064260528578e-05, "loss": 0.0956, "step": 1323 }, { "epoch": 9.46, "learning_rate": 5.6803350138749034e-05, "loss": 0.0729, "step": 1324 }, { "epoch": 9.46, "learning_rate": 5.674604856966407e-05, "loss": 0.1003, "step": 1325 }, { "epoch": 9.47, "learning_rate": 5.6688737974697556e-05, "loss": 0.1255, "step": 1326 }, { "epoch": 9.48, "learning_rate": 5.6631418430528284e-05, "loss": 0.0533, "step": 1327 }, { "epoch": 9.49, "learning_rate": 5.6574090013846946e-05, "loss": 0.0735, "step": 1328 }, { "epoch": 9.49, "learning_rate": 5.6516752801356177e-05, "loss": 0.0631, "step": 1329 }, { "epoch": 9.5, "learning_rate": 5.645940686977033e-05, "loss": 0.1166, "step": 1330 }, { "epoch": 9.51, "learning_rate": 5.640205229581546e-05, "loss": 0.0882, "step": 1331 }, { "epoch": 9.51, "learning_rate": 5.634468915622915e-05, "loss": 0.1089, "step": 1332 }, { "epoch": 9.52, "learning_rate": 5.6287317527760485e-05, "loss": 0.053, "step": 1333 }, { "epoch": 9.53, "learning_rate": 5.622993748716987e-05, "loss": 0.0798, "step": 1334 }, { "epoch": 9.54, "learning_rate": 5.617254911122898e-05, "loss": 0.1088, "step": 1335 }, { "epoch": 9.54, "learning_rate": 5.6115152476720635e-05, "loss": 0.0837, "step": 1336 }, { "epoch": 9.55, "learning_rate": 5.6057747660438744e-05, "loss": 0.0604, "step": 1337 }, { "epoch": 9.56, "learning_rate": 5.600033473918811e-05, "loss": 0.1462, "step": 1338 }, { "epoch": 9.56, "learning_rate": 5.594291378978439e-05, "loss": 0.1118, "step": 1339 }, { "epoch": 9.57, "learning_rate": 5.5885484889054016e-05, "loss": 0.0389, "step": 1340 }, { "epoch": 9.58, "learning_rate": 5.582804811383402e-05, "loss": 0.0414, "step": 1341 }, { "epoch": 9.59, "learning_rate": 5.577060354097199e-05, "loss": 0.0872, "step": 1342 }, { "epoch": 9.59, "learning_rate": 5.5713151247325936e-05, "loss": 0.1338, "step": 1343 }, { "epoch": 9.6, "learning_rate": 5.565569130976422e-05, "loss": 0.0842, "step": 1344 }, { "epoch": 9.61, "learning_rate": 5.559822380516539e-05, "loss": 0.0894, "step": 1345 }, { "epoch": 9.61, "learning_rate": 5.554074881041817e-05, "loss": 0.0894, "step": 1346 }, { "epoch": 9.62, "learning_rate": 5.548326640242127e-05, "loss": 0.0754, "step": 1347 }, { "epoch": 9.63, "learning_rate": 5.542577665808332e-05, "loss": 0.1153, "step": 1348 }, { "epoch": 9.64, "learning_rate": 5.536827965432277e-05, "loss": 0.1393, "step": 1349 }, { "epoch": 9.64, "learning_rate": 5.531077546806782e-05, "loss": 0.0661, "step": 1350 }, { "epoch": 9.65, "learning_rate": 5.52532641762562e-05, "loss": 0.0472, "step": 1351 }, { "epoch": 9.66, "learning_rate": 5.5195745855835226e-05, "loss": 0.1109, "step": 1352 }, { "epoch": 9.66, "learning_rate": 5.5138220583761556e-05, "loss": 0.0421, "step": 1353 }, { "epoch": 9.67, "learning_rate": 5.508068843700121e-05, "loss": 0.1256, "step": 1354 }, { "epoch": 9.68, "learning_rate": 5.502314949252935e-05, "loss": 0.1505, "step": 1355 }, { "epoch": 9.69, "learning_rate": 5.496560382733028e-05, "loss": 0.0875, "step": 1356 }, { "epoch": 9.69, "learning_rate": 5.490805151839724e-05, "loss": 0.1029, "step": 1357 }, { "epoch": 9.7, "learning_rate": 5.4850492642732406e-05, "loss": 0.0696, "step": 1358 }, { "epoch": 9.71, "learning_rate": 5.479292727734674e-05, "loss": 0.0943, "step": 1359 }, { "epoch": 9.71, "learning_rate": 5.4735355499259855e-05, "loss": 0.1837, "step": 1360 }, { "epoch": 9.72, "learning_rate": 5.467777738549997e-05, "loss": 0.0845, "step": 1361 }, { "epoch": 9.73, "learning_rate": 5.462019301310378e-05, "loss": 0.0508, "step": 1362 }, { "epoch": 9.74, "learning_rate": 5.456260245911633e-05, "loss": 0.0217, "step": 1363 }, { "epoch": 9.74, "learning_rate": 5.4505005800590945e-05, "loss": 0.0674, "step": 1364 }, { "epoch": 9.75, "learning_rate": 5.444740311458915e-05, "loss": 0.1296, "step": 1365 }, { "epoch": 9.76, "learning_rate": 5.4389794478180486e-05, "loss": 0.0514, "step": 1366 }, { "epoch": 9.76, "learning_rate": 5.43321799684425e-05, "loss": 0.0719, "step": 1367 }, { "epoch": 9.77, "learning_rate": 5.427455966246057e-05, "loss": 0.0966, "step": 1368 }, { "epoch": 9.78, "learning_rate": 5.4216933637327806e-05, "loss": 0.0418, "step": 1369 }, { "epoch": 9.79, "learning_rate": 5.415930197014503e-05, "loss": 0.0747, "step": 1370 }, { "epoch": 9.79, "learning_rate": 5.410166473802057e-05, "loss": 0.1136, "step": 1371 }, { "epoch": 9.8, "learning_rate": 5.4044022018070214e-05, "loss": 0.067, "step": 1372 }, { "epoch": 9.81, "learning_rate": 5.398637388741709e-05, "loss": 0.0541, "step": 1373 }, { "epoch": 9.81, "learning_rate": 5.392872042319155e-05, "loss": 0.0438, "step": 1374 }, { "epoch": 9.82, "learning_rate": 5.387106170253113e-05, "loss": 0.0614, "step": 1375 }, { "epoch": 9.83, "learning_rate": 5.3813397802580334e-05, "loss": 0.025, "step": 1376 }, { "epoch": 9.84, "learning_rate": 5.375572880049062e-05, "loss": 0.0532, "step": 1377 }, { "epoch": 9.84, "learning_rate": 5.369805477342032e-05, "loss": 0.0906, "step": 1378 }, { "epoch": 9.85, "learning_rate": 5.36403757985344e-05, "loss": 0.0882, "step": 1379 }, { "epoch": 9.86, "learning_rate": 5.358269195300454e-05, "loss": 0.1021, "step": 1380 }, { "epoch": 9.86, "learning_rate": 5.352500331400886e-05, "loss": 0.1397, "step": 1381 }, { "epoch": 9.87, "learning_rate": 5.3467309958731934e-05, "loss": 0.0646, "step": 1382 }, { "epoch": 9.88, "learning_rate": 5.340961196436465e-05, "loss": 0.0809, "step": 1383 }, { "epoch": 9.89, "learning_rate": 5.335190940810407e-05, "loss": 0.1239, "step": 1384 }, { "epoch": 9.89, "learning_rate": 5.329420236715339e-05, "loss": 0.036, "step": 1385 }, { "epoch": 9.9, "learning_rate": 5.3236490918721794e-05, "loss": 0.0451, "step": 1386 }, { "epoch": 9.91, "learning_rate": 5.317877514002436e-05, "loss": 0.0688, "step": 1387 }, { "epoch": 9.91, "learning_rate": 5.312105510828196e-05, "loss": 0.0477, "step": 1388 }, { "epoch": 9.92, "learning_rate": 5.3063330900721175e-05, "loss": 0.0483, "step": 1389 }, { "epoch": 9.93, "learning_rate": 5.3005602594574145e-05, "loss": 0.069, "step": 1390 }, { "epoch": 9.94, "learning_rate": 5.2947870267078514e-05, "loss": 0.0455, "step": 1391 }, { "epoch": 9.94, "learning_rate": 5.289013399547732e-05, "loss": 0.0898, "step": 1392 }, { "epoch": 9.95, "learning_rate": 5.2832393857018815e-05, "loss": 0.0242, "step": 1393 }, { "epoch": 9.96, "learning_rate": 5.27746499289565e-05, "loss": 0.0921, "step": 1394 }, { "epoch": 9.96, "learning_rate": 5.27169022885489e-05, "loss": 0.1093, "step": 1395 }, { "epoch": 9.97, "learning_rate": 5.265915101305952e-05, "loss": 0.0569, "step": 1396 }, { "epoch": 9.98, "learning_rate": 5.260139617975673e-05, "loss": 0.0842, "step": 1397 }, { "epoch": 9.99, "learning_rate": 5.2543637865913675e-05, "loss": 0.0945, "step": 1398 }, { "epoch": 9.99, "learning_rate": 5.248587614880812e-05, "loss": 0.1096, "step": 1399 }, { "epoch": 10.0, "learning_rate": 5.242811110572242e-05, "loss": 0.0966, "step": 1400 }, { "epoch": 10.01, "learning_rate": 5.237034281394335e-05, "loss": 0.0817, "step": 1401 }, { "epoch": 10.01, "learning_rate": 5.2312571350762054e-05, "loss": 0.1007, "step": 1402 }, { "epoch": 10.02, "learning_rate": 5.225479679347392e-05, "loss": 0.0355, "step": 1403 }, { "epoch": 10.03, "learning_rate": 5.219701921937845e-05, "loss": 0.0681, "step": 1404 }, { "epoch": 10.04, "learning_rate": 5.213923870577921e-05, "loss": 0.0567, "step": 1405 }, { "epoch": 10.04, "learning_rate": 5.208145532998368e-05, "loss": 0.0526, "step": 1406 }, { "epoch": 10.05, "learning_rate": 5.2023669169303194e-05, "loss": 0.0471, "step": 1407 }, { "epoch": 10.06, "learning_rate": 5.1965880301052784e-05, "loss": 0.0257, "step": 1408 }, { "epoch": 10.06, "learning_rate": 5.19080888025511e-05, "loss": 0.114, "step": 1409 }, { "epoch": 10.07, "learning_rate": 5.1850294751120375e-05, "loss": 0.0815, "step": 1410 }, { "epoch": 10.08, "learning_rate": 5.1792498224086175e-05, "loss": 0.1447, "step": 1411 }, { "epoch": 10.09, "learning_rate": 5.17346992987774e-05, "loss": 0.0382, "step": 1412 }, { "epoch": 10.09, "learning_rate": 5.16768980525262e-05, "loss": 0.0477, "step": 1413 }, { "epoch": 10.1, "learning_rate": 5.1619094562667804e-05, "loss": 0.0477, "step": 1414 }, { "epoch": 10.11, "learning_rate": 5.156128890654042e-05, "loss": 0.0541, "step": 1415 }, { "epoch": 10.11, "learning_rate": 5.15034811614852e-05, "loss": 0.0332, "step": 1416 }, { "epoch": 10.12, "learning_rate": 5.144567140484605e-05, "loss": 0.0345, "step": 1417 }, { "epoch": 10.13, "learning_rate": 5.138785971396959e-05, "loss": 0.0341, "step": 1418 }, { "epoch": 10.14, "learning_rate": 5.133004616620501e-05, "loss": 0.039, "step": 1419 }, { "epoch": 10.14, "learning_rate": 5.1272230838904015e-05, "loss": 0.0767, "step": 1420 }, { "epoch": 10.15, "learning_rate": 5.121441380942066e-05, "loss": 0.0648, "step": 1421 }, { "epoch": 10.16, "learning_rate": 5.11565951551113e-05, "loss": 0.0534, "step": 1422 }, { "epoch": 10.16, "learning_rate": 5.109877495333444e-05, "loss": 0.0618, "step": 1423 }, { "epoch": 10.17, "learning_rate": 5.1040953281450684e-05, "loss": 0.0685, "step": 1424 }, { "epoch": 10.18, "learning_rate": 5.098313021682257e-05, "loss": 0.0297, "step": 1425 }, { "epoch": 10.19, "learning_rate": 5.0925305836814544e-05, "loss": 0.059, "step": 1426 }, { "epoch": 10.19, "learning_rate": 5.086748021879278e-05, "loss": 0.0442, "step": 1427 }, { "epoch": 10.2, "learning_rate": 5.080965344012508e-05, "loss": 0.0233, "step": 1428 }, { "epoch": 10.21, "learning_rate": 5.075182557818087e-05, "loss": 0.0613, "step": 1429 }, { "epoch": 10.21, "learning_rate": 5.069399671033096e-05, "loss": 0.0664, "step": 1430 }, { "epoch": 10.22, "learning_rate": 5.0636166913947556e-05, "loss": 0.0372, "step": 1431 }, { "epoch": 10.23, "learning_rate": 5.057833626640408e-05, "loss": 0.047, "step": 1432 }, { "epoch": 10.24, "learning_rate": 5.052050484507511e-05, "loss": 0.0468, "step": 1433 }, { "epoch": 10.24, "learning_rate": 5.046267272733621e-05, "loss": 0.0484, "step": 1434 }, { "epoch": 10.25, "learning_rate": 5.0404839990563936e-05, "loss": 0.0287, "step": 1435 }, { "epoch": 10.26, "learning_rate": 5.0347006712135646e-05, "loss": 0.0191, "step": 1436 }, { "epoch": 10.26, "learning_rate": 5.028917296942942e-05, "loss": 0.0415, "step": 1437 }, { "epoch": 10.27, "learning_rate": 5.023133883982397e-05, "loss": 0.0474, "step": 1438 }, { "epoch": 10.28, "learning_rate": 5.017350440069853e-05, "loss": 0.0549, "step": 1439 }, { "epoch": 10.29, "learning_rate": 5.011566972943272e-05, "loss": 0.0404, "step": 1440 }, { "epoch": 10.29, "learning_rate": 5.00578349034065e-05, "loss": 0.051, "step": 1441 }, { "epoch": 10.3, "learning_rate": 5e-05, "loss": 0.0817, "step": 1442 }, { "epoch": 10.31, "learning_rate": 4.994216509659352e-05, "loss": 0.0517, "step": 1443 }, { "epoch": 10.31, "learning_rate": 4.988433027056729e-05, "loss": 0.0383, "step": 1444 }, { "epoch": 10.32, "learning_rate": 4.982649559930147e-05, "loss": 0.0487, "step": 1445 }, { "epoch": 10.33, "learning_rate": 4.976866116017604e-05, "loss": 0.0533, "step": 1446 }, { "epoch": 10.34, "learning_rate": 4.971082703057059e-05, "loss": 0.0627, "step": 1447 }, { "epoch": 10.34, "learning_rate": 4.9652993287864365e-05, "loss": 0.0884, "step": 1448 }, { "epoch": 10.35, "learning_rate": 4.9595160009436075e-05, "loss": 0.0403, "step": 1449 }, { "epoch": 10.36, "learning_rate": 4.95373272726638e-05, "loss": 0.0535, "step": 1450 }, { "epoch": 10.36, "learning_rate": 4.947949515492491e-05, "loss": 0.0338, "step": 1451 }, { "epoch": 10.37, "learning_rate": 4.942166373359593e-05, "loss": 0.0827, "step": 1452 }, { "epoch": 10.38, "learning_rate": 4.936383308605245e-05, "loss": 0.0766, "step": 1453 }, { "epoch": 10.39, "learning_rate": 4.9306003289669046e-05, "loss": 0.0694, "step": 1454 }, { "epoch": 10.39, "learning_rate": 4.924817442181915e-05, "loss": 0.023, "step": 1455 }, { "epoch": 10.4, "learning_rate": 4.919034655987493e-05, "loss": 0.0243, "step": 1456 }, { "epoch": 10.41, "learning_rate": 4.9132519781207235e-05, "loss": 0.0482, "step": 1457 }, { "epoch": 10.41, "learning_rate": 4.907469416318547e-05, "loss": 0.0517, "step": 1458 }, { "epoch": 10.42, "learning_rate": 4.9016869783177434e-05, "loss": 0.0533, "step": 1459 }, { "epoch": 10.43, "learning_rate": 4.895904671854933e-05, "loss": 0.0326, "step": 1460 }, { "epoch": 10.44, "learning_rate": 4.890122504666557e-05, "loss": 0.0603, "step": 1461 }, { "epoch": 10.44, "learning_rate": 4.884340484488871e-05, "loss": 0.0558, "step": 1462 }, { "epoch": 10.45, "learning_rate": 4.8785586190579355e-05, "loss": 0.0378, "step": 1463 }, { "epoch": 10.46, "learning_rate": 4.872776916109601e-05, "loss": 0.0544, "step": 1464 }, { "epoch": 10.46, "learning_rate": 4.866995383379501e-05, "loss": 0.0514, "step": 1465 }, { "epoch": 10.47, "learning_rate": 4.861214028603044e-05, "loss": 0.0593, "step": 1466 }, { "epoch": 10.48, "learning_rate": 4.855432859515397e-05, "loss": 0.016, "step": 1467 }, { "epoch": 10.49, "learning_rate": 4.849651883851481e-05, "loss": 0.0716, "step": 1468 }, { "epoch": 10.49, "learning_rate": 4.843871109345959e-05, "loss": 0.0835, "step": 1469 }, { "epoch": 10.5, "learning_rate": 4.838090543733222e-05, "loss": 0.0474, "step": 1470 }, { "epoch": 10.51, "learning_rate": 4.832310194747382e-05, "loss": 0.0613, "step": 1471 }, { "epoch": 10.51, "learning_rate": 4.826530070122262e-05, "loss": 0.0423, "step": 1472 }, { "epoch": 10.52, "learning_rate": 4.820750177591386e-05, "loss": 0.0845, "step": 1473 }, { "epoch": 10.53, "learning_rate": 4.814970524887964e-05, "loss": 0.0436, "step": 1474 }, { "epoch": 10.54, "learning_rate": 4.8091911197448906e-05, "loss": 0.0637, "step": 1475 }, { "epoch": 10.54, "learning_rate": 4.803411969894724e-05, "loss": 0.0309, "step": 1476 }, { "epoch": 10.55, "learning_rate": 4.797633083069683e-05, "loss": 0.0645, "step": 1477 }, { "epoch": 10.56, "learning_rate": 4.7918544670016335e-05, "loss": 0.0744, "step": 1478 }, { "epoch": 10.56, "learning_rate": 4.786076129422081e-05, "loss": 0.0942, "step": 1479 }, { "epoch": 10.57, "learning_rate": 4.780298078062157e-05, "loss": 0.0399, "step": 1480 }, { "epoch": 10.58, "learning_rate": 4.77452032065261e-05, "loss": 0.0844, "step": 1481 }, { "epoch": 10.59, "learning_rate": 4.768742864923797e-05, "loss": 0.0594, "step": 1482 }, { "epoch": 10.59, "learning_rate": 4.7629657186056673e-05, "loss": 0.0971, "step": 1483 }, { "epoch": 10.6, "learning_rate": 4.7571888894277604e-05, "loss": 0.0614, "step": 1484 }, { "epoch": 10.61, "learning_rate": 4.7514123851191875e-05, "loss": 0.079, "step": 1485 }, { "epoch": 10.61, "learning_rate": 4.745636213408633e-05, "loss": 0.0211, "step": 1486 }, { "epoch": 10.62, "learning_rate": 4.739860382024327e-05, "loss": 0.0472, "step": 1487 }, { "epoch": 10.63, "learning_rate": 4.7340848986940487e-05, "loss": 0.0391, "step": 1488 }, { "epoch": 10.64, "learning_rate": 4.72830977114511e-05, "loss": 0.0659, "step": 1489 }, { "epoch": 10.64, "learning_rate": 4.72253500710435e-05, "loss": 0.0604, "step": 1490 }, { "epoch": 10.65, "learning_rate": 4.716760614298118e-05, "loss": 0.0394, "step": 1491 }, { "epoch": 10.66, "learning_rate": 4.710986600452269e-05, "loss": 0.0571, "step": 1492 }, { "epoch": 10.66, "learning_rate": 4.705212973292148e-05, "loss": 0.0322, "step": 1493 }, { "epoch": 10.67, "learning_rate": 4.699439740542585e-05, "loss": 0.0622, "step": 1494 }, { "epoch": 10.68, "learning_rate": 4.6936669099278823e-05, "loss": 0.0867, "step": 1495 }, { "epoch": 10.69, "learning_rate": 4.687894489171804e-05, "loss": 0.0762, "step": 1496 }, { "epoch": 10.69, "learning_rate": 4.682122485997564e-05, "loss": 0.0334, "step": 1497 }, { "epoch": 10.7, "learning_rate": 4.676350908127822e-05, "loss": 0.0362, "step": 1498 }, { "epoch": 10.71, "learning_rate": 4.6705797632846614e-05, "loss": 0.0665, "step": 1499 }, { "epoch": 10.71, "learning_rate": 4.6648090591895935e-05, "loss": 0.0361, "step": 1500 }, { "epoch": 10.72, "learning_rate": 4.6590388035635355e-05, "loss": 0.0339, "step": 1501 }, { "epoch": 10.73, "learning_rate": 4.653269004126806e-05, "loss": 0.0414, "step": 1502 }, { "epoch": 10.74, "learning_rate": 4.6474996685991134e-05, "loss": 0.1023, "step": 1503 }, { "epoch": 10.74, "learning_rate": 4.641730804699547e-05, "loss": 0.056, "step": 1504 }, { "epoch": 10.75, "learning_rate": 4.63596242014656e-05, "loss": 0.1044, "step": 1505 }, { "epoch": 10.76, "learning_rate": 4.6301945226579695e-05, "loss": 0.0457, "step": 1506 }, { "epoch": 10.76, "learning_rate": 4.624427119950938e-05, "loss": 0.0491, "step": 1507 }, { "epoch": 10.77, "learning_rate": 4.6186602197419685e-05, "loss": 0.0659, "step": 1508 }, { "epoch": 10.78, "learning_rate": 4.612893829746888e-05, "loss": 0.0753, "step": 1509 }, { "epoch": 10.79, "learning_rate": 4.6071279576808454e-05, "loss": 0.0354, "step": 1510 }, { "epoch": 10.79, "learning_rate": 4.6013626112582924e-05, "loss": 0.0331, "step": 1511 }, { "epoch": 10.8, "learning_rate": 4.59559779819298e-05, "loss": 0.0403, "step": 1512 }, { "epoch": 10.81, "learning_rate": 4.589833526197944e-05, "loss": 0.0994, "step": 1513 }, { "epoch": 10.81, "learning_rate": 4.5840698029854975e-05, "loss": 0.0448, "step": 1514 }, { "epoch": 10.82, "learning_rate": 4.57830663626722e-05, "loss": 0.0587, "step": 1515 }, { "epoch": 10.83, "learning_rate": 4.572544033753945e-05, "loss": 0.027, "step": 1516 }, { "epoch": 10.84, "learning_rate": 4.566782003155751e-05, "loss": 0.186, "step": 1517 }, { "epoch": 10.84, "learning_rate": 4.561020552181952e-05, "loss": 0.0428, "step": 1518 }, { "epoch": 10.85, "learning_rate": 4.555259688541086e-05, "loss": 0.0579, "step": 1519 }, { "epoch": 10.86, "learning_rate": 4.5494994199409067e-05, "loss": 0.058, "step": 1520 }, { "epoch": 10.86, "learning_rate": 4.543739754088369e-05, "loss": 0.1325, "step": 1521 }, { "epoch": 10.87, "learning_rate": 4.537980698689623e-05, "loss": 0.0455, "step": 1522 }, { "epoch": 10.88, "learning_rate": 4.532222261450004e-05, "loss": 0.0366, "step": 1523 }, { "epoch": 10.89, "learning_rate": 4.5264644500740156e-05, "loss": 0.0253, "step": 1524 }, { "epoch": 10.89, "learning_rate": 4.5207072722653274e-05, "loss": 0.0481, "step": 1525 }, { "epoch": 10.9, "learning_rate": 4.51495073572676e-05, "loss": 0.0718, "step": 1526 }, { "epoch": 10.91, "learning_rate": 4.5091948481602773e-05, "loss": 0.0479, "step": 1527 }, { "epoch": 10.91, "learning_rate": 4.503439617266973e-05, "loss": 0.0674, "step": 1528 }, { "epoch": 10.92, "learning_rate": 4.4976850507470655e-05, "loss": 0.0375, "step": 1529 }, { "epoch": 10.93, "learning_rate": 4.49193115629988e-05, "loss": 0.0811, "step": 1530 }, { "epoch": 10.94, "learning_rate": 4.486177941623845e-05, "loss": 0.0375, "step": 1531 }, { "epoch": 10.94, "learning_rate": 4.4804254144164785e-05, "loss": 0.0713, "step": 1532 }, { "epoch": 10.95, "learning_rate": 4.474673582374381e-05, "loss": 0.0407, "step": 1533 }, { "epoch": 10.96, "learning_rate": 4.468922453193219e-05, "loss": 0.0589, "step": 1534 }, { "epoch": 10.96, "learning_rate": 4.463172034567724e-05, "loss": 0.0594, "step": 1535 }, { "epoch": 10.97, "learning_rate": 4.4574223341916695e-05, "loss": 0.0151, "step": 1536 }, { "epoch": 10.98, "learning_rate": 4.451673359757874e-05, "loss": 0.0522, "step": 1537 }, { "epoch": 10.99, "learning_rate": 4.445925118958183e-05, "loss": 0.0345, "step": 1538 }, { "epoch": 10.99, "learning_rate": 4.4401776194834613e-05, "loss": 0.0956, "step": 1539 }, { "epoch": 11.0, "learning_rate": 4.434430869023579e-05, "loss": 0.0412, "step": 1540 }, { "epoch": 11.01, "learning_rate": 4.4286848752674075e-05, "loss": 0.029, "step": 1541 }, { "epoch": 11.01, "learning_rate": 4.422939645902803e-05, "loss": 0.0348, "step": 1542 }, { "epoch": 11.02, "learning_rate": 4.4171951886165994e-05, "loss": 0.0386, "step": 1543 }, { "epoch": 11.03, "learning_rate": 4.4114515110945995e-05, "loss": 0.0393, "step": 1544 }, { "epoch": 11.04, "learning_rate": 4.405708621021562e-05, "loss": 0.0308, "step": 1545 }, { "epoch": 11.04, "learning_rate": 4.39996652608119e-05, "loss": 0.0443, "step": 1546 }, { "epoch": 11.05, "learning_rate": 4.3942252339561275e-05, "loss": 0.0522, "step": 1547 }, { "epoch": 11.06, "learning_rate": 4.3884847523279376e-05, "loss": 0.032, "step": 1548 }, { "epoch": 11.06, "learning_rate": 4.382745088877104e-05, "loss": 0.0241, "step": 1549 }, { "epoch": 11.07, "learning_rate": 4.3770062512830154e-05, "loss": 0.0462, "step": 1550 }, { "epoch": 11.08, "learning_rate": 4.371268247223954e-05, "loss": 0.0375, "step": 1551 }, { "epoch": 11.09, "learning_rate": 4.365531084377087e-05, "loss": 0.0845, "step": 1552 }, { "epoch": 11.09, "learning_rate": 4.3597947704184574e-05, "loss": 0.0515, "step": 1553 }, { "epoch": 11.1, "learning_rate": 4.35405931302297e-05, "loss": 0.0433, "step": 1554 }, { "epoch": 11.11, "learning_rate": 4.348324719864385e-05, "loss": 0.0247, "step": 1555 }, { "epoch": 11.11, "learning_rate": 4.342590998615307e-05, "loss": 0.0465, "step": 1556 }, { "epoch": 11.12, "learning_rate": 4.336858156947174e-05, "loss": 0.0167, "step": 1557 }, { "epoch": 11.13, "learning_rate": 4.331126202530245e-05, "loss": 0.0488, "step": 1558 }, { "epoch": 11.14, "learning_rate": 4.3253951430335945e-05, "loss": 0.032, "step": 1559 }, { "epoch": 11.14, "learning_rate": 4.319664986125099e-05, "loss": 0.0459, "step": 1560 }, { "epoch": 11.15, "learning_rate": 4.3139357394714255e-05, "loss": 0.019, "step": 1561 }, { "epoch": 11.16, "learning_rate": 4.3082074107380245e-05, "loss": 0.0578, "step": 1562 }, { "epoch": 11.16, "learning_rate": 4.302480007589119e-05, "loss": 0.0212, "step": 1563 }, { "epoch": 11.17, "learning_rate": 4.2967535376876936e-05, "loss": 0.0582, "step": 1564 }, { "epoch": 11.18, "learning_rate": 4.291028008695483e-05, "loss": 0.0266, "step": 1565 }, { "epoch": 11.19, "learning_rate": 4.285303428272964e-05, "loss": 0.0365, "step": 1566 }, { "epoch": 11.19, "learning_rate": 4.279579804079344e-05, "loss": 0.043, "step": 1567 }, { "epoch": 11.2, "learning_rate": 4.27385714377255e-05, "loss": 0.0442, "step": 1568 }, { "epoch": 11.21, "learning_rate": 4.2681354550092214e-05, "loss": 0.0311, "step": 1569 }, { "epoch": 11.21, "learning_rate": 4.2624147454446945e-05, "loss": 0.0318, "step": 1570 }, { "epoch": 11.22, "learning_rate": 4.256695022733001e-05, "loss": 0.1141, "step": 1571 }, { "epoch": 11.23, "learning_rate": 4.2509762945268474e-05, "loss": 0.084, "step": 1572 }, { "epoch": 11.24, "learning_rate": 4.24525856847761e-05, "loss": 0.0493, "step": 1573 }, { "epoch": 11.24, "learning_rate": 4.239541852235327e-05, "loss": 0.0432, "step": 1574 }, { "epoch": 11.25, "learning_rate": 4.2338261534486844e-05, "loss": 0.0425, "step": 1575 }, { "epoch": 11.26, "learning_rate": 4.228111479765004e-05, "loss": 0.0338, "step": 1576 }, { "epoch": 11.26, "learning_rate": 4.2223978388302394e-05, "loss": 0.0646, "step": 1577 }, { "epoch": 11.27, "learning_rate": 4.2166852382889616e-05, "loss": 0.0676, "step": 1578 }, { "epoch": 11.28, "learning_rate": 4.210973685784349e-05, "loss": 0.063, "step": 1579 }, { "epoch": 11.29, "learning_rate": 4.205263188958179e-05, "loss": 0.0285, "step": 1580 }, { "epoch": 11.29, "learning_rate": 4.1995537554508156e-05, "loss": 0.0398, "step": 1581 }, { "epoch": 11.3, "learning_rate": 4.193845392901201e-05, "loss": 0.0432, "step": 1582 }, { "epoch": 11.31, "learning_rate": 4.188138108946842e-05, "loss": 0.0425, "step": 1583 }, { "epoch": 11.31, "learning_rate": 4.182431911223805e-05, "loss": 0.0264, "step": 1584 }, { "epoch": 11.32, "learning_rate": 4.1767268073667026e-05, "loss": 0.0892, "step": 1585 }, { "epoch": 11.33, "learning_rate": 4.171022805008683e-05, "loss": 0.0074, "step": 1586 }, { "epoch": 11.34, "learning_rate": 4.165319911781421e-05, "loss": 0.0611, "step": 1587 }, { "epoch": 11.34, "learning_rate": 4.159618135315109e-05, "loss": 0.0607, "step": 1588 }, { "epoch": 11.35, "learning_rate": 4.153917483238442e-05, "loss": 0.0585, "step": 1589 }, { "epoch": 11.36, "learning_rate": 4.1482179631786124e-05, "loss": 0.0213, "step": 1590 }, { "epoch": 11.36, "learning_rate": 4.1425195827612986e-05, "loss": 0.0184, "step": 1591 }, { "epoch": 11.37, "learning_rate": 4.136822349610654e-05, "loss": 0.0782, "step": 1592 }, { "epoch": 11.38, "learning_rate": 4.1311262713492966e-05, "loss": 0.0218, "step": 1593 }, { "epoch": 11.39, "learning_rate": 4.125431355598299e-05, "loss": 0.0293, "step": 1594 }, { "epoch": 11.39, "learning_rate": 4.119737609977178e-05, "loss": 0.0201, "step": 1595 }, { "epoch": 11.4, "learning_rate": 4.114045042103887e-05, "loss": 0.0622, "step": 1596 }, { "epoch": 11.41, "learning_rate": 4.1083536595948e-05, "loss": 0.0438, "step": 1597 }, { "epoch": 11.41, "learning_rate": 4.1026634700647084e-05, "loss": 0.0499, "step": 1598 }, { "epoch": 11.42, "learning_rate": 4.096974481126807e-05, "loss": 0.0534, "step": 1599 }, { "epoch": 11.43, "learning_rate": 4.0912867003926834e-05, "loss": 0.0697, "step": 1600 }, { "epoch": 11.44, "learning_rate": 4.085600135472307e-05, "loss": 0.0176, "step": 1601 }, { "epoch": 11.44, "learning_rate": 4.079914793974023e-05, "loss": 0.034, "step": 1602 }, { "epoch": 11.45, "learning_rate": 4.07423068350454e-05, "loss": 0.035, "step": 1603 }, { "epoch": 11.46, "learning_rate": 4.068547811668918e-05, "loss": 0.0396, "step": 1604 }, { "epoch": 11.46, "learning_rate": 4.06286618607056e-05, "loss": 0.0179, "step": 1605 }, { "epoch": 11.47, "learning_rate": 4.057185814311203e-05, "loss": 0.0358, "step": 1606 }, { "epoch": 11.48, "learning_rate": 4.0515067039909025e-05, "loss": 0.0383, "step": 1607 }, { "epoch": 11.49, "learning_rate": 4.045828862708032e-05, "loss": 0.0303, "step": 1608 }, { "epoch": 11.49, "learning_rate": 4.040152298059263e-05, "loss": 0.0487, "step": 1609 }, { "epoch": 11.5, "learning_rate": 4.0344770176395606e-05, "loss": 0.019, "step": 1610 }, { "epoch": 11.51, "learning_rate": 4.0288030290421705e-05, "loss": 0.0138, "step": 1611 }, { "epoch": 11.51, "learning_rate": 4.023130339858612e-05, "loss": 0.0721, "step": 1612 }, { "epoch": 11.52, "learning_rate": 4.017458957678663e-05, "loss": 0.0149, "step": 1613 }, { "epoch": 11.53, "learning_rate": 4.011788890090354e-05, "loss": 0.0208, "step": 1614 }, { "epoch": 11.54, "learning_rate": 4.0061201446799584e-05, "loss": 0.0375, "step": 1615 }, { "epoch": 11.54, "learning_rate": 4.0004527290319784e-05, "loss": 0.0548, "step": 1616 }, { "epoch": 11.55, "learning_rate": 3.9947866507291364e-05, "loss": 0.0168, "step": 1617 }, { "epoch": 11.56, "learning_rate": 3.98912191735237e-05, "loss": 0.0462, "step": 1618 }, { "epoch": 11.56, "learning_rate": 3.9834585364808104e-05, "loss": 0.062, "step": 1619 }, { "epoch": 11.57, "learning_rate": 3.977796515691785e-05, "loss": 0.0285, "step": 1620 }, { "epoch": 11.58, "learning_rate": 3.9721358625607985e-05, "loss": 0.0202, "step": 1621 }, { "epoch": 11.59, "learning_rate": 3.966476584661528e-05, "loss": 0.0081, "step": 1622 }, { "epoch": 11.59, "learning_rate": 3.960818689565808e-05, "loss": 0.0298, "step": 1623 }, { "epoch": 11.6, "learning_rate": 3.955162184843625e-05, "loss": 0.0445, "step": 1624 }, { "epoch": 11.61, "learning_rate": 3.949507078063105e-05, "loss": 0.1062, "step": 1625 }, { "epoch": 11.61, "learning_rate": 3.943853376790501e-05, "loss": 0.0234, "step": 1626 }, { "epoch": 11.62, "learning_rate": 3.9382010885901884e-05, "loss": 0.0443, "step": 1627 }, { "epoch": 11.63, "learning_rate": 3.9325502210246514e-05, "loss": 0.096, "step": 1628 }, { "epoch": 11.64, "learning_rate": 3.9269007816544734e-05, "loss": 0.0127, "step": 1629 }, { "epoch": 11.64, "learning_rate": 3.921252778038326e-05, "loss": 0.0419, "step": 1630 }, { "epoch": 11.65, "learning_rate": 3.915606217732962e-05, "loss": 0.0298, "step": 1631 }, { "epoch": 11.66, "learning_rate": 3.9099611082932e-05, "loss": 0.0525, "step": 1632 }, { "epoch": 11.66, "learning_rate": 3.9043174572719194e-05, "loss": 0.0269, "step": 1633 }, { "epoch": 11.67, "learning_rate": 3.8986752722200484e-05, "loss": 0.0571, "step": 1634 }, { "epoch": 11.68, "learning_rate": 3.893034560686553e-05, "loss": 0.0455, "step": 1635 }, { "epoch": 11.69, "learning_rate": 3.887395330218429e-05, "loss": 0.0062, "step": 1636 }, { "epoch": 11.69, "learning_rate": 3.881757588360689e-05, "loss": 0.0201, "step": 1637 }, { "epoch": 11.7, "learning_rate": 3.876121342656355e-05, "loss": 0.0476, "step": 1638 }, { "epoch": 11.71, "learning_rate": 3.870486600646445e-05, "loss": 0.0933, "step": 1639 }, { "epoch": 11.71, "learning_rate": 3.8648533698699695e-05, "loss": 0.035, "step": 1640 }, { "epoch": 11.72, "learning_rate": 3.859221657863913e-05, "loss": 0.0215, "step": 1641 }, { "epoch": 11.73, "learning_rate": 3.8535914721632304e-05, "loss": 0.0507, "step": 1642 }, { "epoch": 11.74, "learning_rate": 3.847962820300834e-05, "loss": 0.0557, "step": 1643 }, { "epoch": 11.74, "learning_rate": 3.8423357098075815e-05, "loss": 0.0314, "step": 1644 }, { "epoch": 11.75, "learning_rate": 3.836710148212271e-05, "loss": 0.0173, "step": 1645 }, { "epoch": 11.76, "learning_rate": 3.831086143041628e-05, "loss": 0.0437, "step": 1646 }, { "epoch": 11.76, "learning_rate": 3.825463701820295e-05, "loss": 0.0878, "step": 1647 }, { "epoch": 11.77, "learning_rate": 3.8198428320708216e-05, "loss": 0.0392, "step": 1648 }, { "epoch": 11.78, "learning_rate": 3.814223541313657e-05, "loss": 0.0662, "step": 1649 }, { "epoch": 11.79, "learning_rate": 3.8086058370671355e-05, "loss": 0.0297, "step": 1650 }, { "epoch": 11.79, "learning_rate": 3.802989726847467e-05, "loss": 0.0778, "step": 1651 }, { "epoch": 11.8, "learning_rate": 3.7973752181687335e-05, "loss": 0.0284, "step": 1652 }, { "epoch": 11.81, "learning_rate": 3.7917623185428706e-05, "loss": 0.0647, "step": 1653 }, { "epoch": 11.81, "learning_rate": 3.7861510354796634e-05, "loss": 0.0388, "step": 1654 }, { "epoch": 11.82, "learning_rate": 3.780541376486731e-05, "loss": 0.0531, "step": 1655 }, { "epoch": 11.83, "learning_rate": 3.774933349069524e-05, "loss": 0.044, "step": 1656 }, { "epoch": 11.84, "learning_rate": 3.7693269607313064e-05, "loss": 0.0255, "step": 1657 }, { "epoch": 11.84, "learning_rate": 3.7637222189731504e-05, "loss": 0.0926, "step": 1658 }, { "epoch": 11.85, "learning_rate": 3.758119131293926e-05, "loss": 0.0114, "step": 1659 }, { "epoch": 11.86, "learning_rate": 3.7525177051902874e-05, "loss": 0.0235, "step": 1660 }, { "epoch": 11.86, "learning_rate": 3.746917948156668e-05, "loss": 0.0487, "step": 1661 }, { "epoch": 11.87, "learning_rate": 3.741319867685268e-05, "loss": 0.0197, "step": 1662 }, { "epoch": 11.88, "learning_rate": 3.735723471266044e-05, "loss": 0.0349, "step": 1663 }, { "epoch": 11.89, "learning_rate": 3.7301287663867005e-05, "loss": 0.017, "step": 1664 }, { "epoch": 11.89, "learning_rate": 3.7245357605326766e-05, "loss": 0.0371, "step": 1665 }, { "epoch": 11.9, "learning_rate": 3.718944461187138e-05, "loss": 0.017, "step": 1666 }, { "epoch": 11.91, "learning_rate": 3.713354875830971e-05, "loss": 0.0399, "step": 1667 }, { "epoch": 11.91, "learning_rate": 3.7077670119427645e-05, "loss": 0.0407, "step": 1668 }, { "epoch": 11.92, "learning_rate": 3.7021808769988064e-05, "loss": 0.0226, "step": 1669 }, { "epoch": 11.93, "learning_rate": 3.696596478473072e-05, "loss": 0.0783, "step": 1670 }, { "epoch": 11.94, "learning_rate": 3.691013823837211e-05, "loss": 0.0376, "step": 1671 }, { "epoch": 11.94, "learning_rate": 3.68543292056054e-05, "loss": 0.0719, "step": 1672 }, { "epoch": 11.95, "learning_rate": 3.679853776110035e-05, "loss": 0.0702, "step": 1673 }, { "epoch": 11.96, "learning_rate": 3.6742763979503156e-05, "loss": 0.0321, "step": 1674 }, { "epoch": 11.96, "learning_rate": 3.6687007935436416e-05, "loss": 0.0098, "step": 1675 }, { "epoch": 11.97, "learning_rate": 3.663126970349897e-05, "loss": 0.0392, "step": 1676 }, { "epoch": 11.98, "learning_rate": 3.6575549358265836e-05, "loss": 0.0297, "step": 1677 }, { "epoch": 11.99, "learning_rate": 3.6519846974288095e-05, "loss": 0.048, "step": 1678 }, { "epoch": 11.99, "learning_rate": 3.646416262609279e-05, "loss": 0.0428, "step": 1679 }, { "epoch": 12.0, "learning_rate": 3.640849638818286e-05, "loss": 0.04, "step": 1680 }, { "epoch": 12.01, "learning_rate": 3.635284833503699e-05, "loss": 0.0575, "step": 1681 }, { "epoch": 12.01, "learning_rate": 3.6297218541109534e-05, "loss": 0.054, "step": 1682 }, { "epoch": 12.02, "learning_rate": 3.6241607080830445e-05, "loss": 0.0376, "step": 1683 }, { "epoch": 12.03, "learning_rate": 3.6186014028605096e-05, "loss": 0.0357, "step": 1684 }, { "epoch": 12.04, "learning_rate": 3.613043945881428e-05, "loss": 0.0312, "step": 1685 }, { "epoch": 12.04, "learning_rate": 3.607488344581402e-05, "loss": 0.0284, "step": 1686 }, { "epoch": 12.05, "learning_rate": 3.601934606393555e-05, "loss": 0.0404, "step": 1687 }, { "epoch": 12.06, "learning_rate": 3.596382738748516e-05, "loss": 0.0142, "step": 1688 }, { "epoch": 12.06, "learning_rate": 3.5908327490744114e-05, "loss": 0.0744, "step": 1689 }, { "epoch": 12.07, "learning_rate": 3.585284644796852e-05, "loss": 0.0253, "step": 1690 }, { "epoch": 12.08, "learning_rate": 3.5797384333389315e-05, "loss": 0.0321, "step": 1691 }, { "epoch": 12.09, "learning_rate": 3.574194122121207e-05, "loss": 0.0153, "step": 1692 }, { "epoch": 12.09, "learning_rate": 3.5686517185616955e-05, "loss": 0.015, "step": 1693 }, { "epoch": 12.1, "learning_rate": 3.5631112300758595e-05, "loss": 0.0228, "step": 1694 }, { "epoch": 12.11, "learning_rate": 3.557572664076601e-05, "loss": 0.0593, "step": 1695 }, { "epoch": 12.11, "learning_rate": 3.55203602797425e-05, "loss": 0.0852, "step": 1696 }, { "epoch": 12.12, "learning_rate": 3.5465013291765515e-05, "loss": 0.0335, "step": 1697 }, { "epoch": 12.13, "learning_rate": 3.5409685750886623e-05, "loss": 0.029, "step": 1698 }, { "epoch": 12.14, "learning_rate": 3.535437773113135e-05, "loss": 0.0219, "step": 1699 }, { "epoch": 12.14, "learning_rate": 3.52990893064991e-05, "loss": 0.0297, "step": 1700 }, { "epoch": 12.15, "learning_rate": 3.524382055096309e-05, "loss": 0.0116, "step": 1701 }, { "epoch": 12.16, "learning_rate": 3.5188571538470186e-05, "loss": 0.0292, "step": 1702 }, { "epoch": 12.16, "learning_rate": 3.513334234294085e-05, "loss": 0.0195, "step": 1703 }, { "epoch": 12.17, "learning_rate": 3.507813303826903e-05, "loss": 0.0523, "step": 1704 }, { "epoch": 12.18, "learning_rate": 3.502294369832208e-05, "loss": 0.0317, "step": 1705 }, { "epoch": 12.19, "learning_rate": 3.496777439694061e-05, "loss": 0.0138, "step": 1706 }, { "epoch": 12.19, "learning_rate": 3.4912625207938433e-05, "loss": 0.0297, "step": 1707 }, { "epoch": 12.2, "learning_rate": 3.4857496205102474e-05, "loss": 0.0147, "step": 1708 }, { "epoch": 12.21, "learning_rate": 3.48023874621926e-05, "loss": 0.0324, "step": 1709 }, { "epoch": 12.21, "learning_rate": 3.474729905294163e-05, "loss": 0.0865, "step": 1710 }, { "epoch": 12.22, "learning_rate": 3.4692231051055125e-05, "loss": 0.0415, "step": 1711 }, { "epoch": 12.23, "learning_rate": 3.463718353021138e-05, "loss": 0.0508, "step": 1712 }, { "epoch": 12.24, "learning_rate": 3.458215656406128e-05, "loss": 0.039, "step": 1713 }, { "epoch": 12.24, "learning_rate": 3.45271502262282e-05, "loss": 0.0144, "step": 1714 }, { "epoch": 12.25, "learning_rate": 3.447216459030789e-05, "loss": 0.0344, "step": 1715 }, { "epoch": 12.26, "learning_rate": 3.441719972986846e-05, "loss": 0.0592, "step": 1716 }, { "epoch": 12.26, "learning_rate": 3.436225571845017e-05, "loss": 0.0344, "step": 1717 }, { "epoch": 12.27, "learning_rate": 3.4307332629565434e-05, "loss": 0.0357, "step": 1718 }, { "epoch": 12.28, "learning_rate": 3.4252430536698635e-05, "loss": 0.0307, "step": 1719 }, { "epoch": 12.29, "learning_rate": 3.419754951330608e-05, "loss": 0.029, "step": 1720 }, { "epoch": 12.29, "learning_rate": 3.414268963281586e-05, "loss": 0.0568, "step": 1721 }, { "epoch": 12.3, "learning_rate": 3.408785096862782e-05, "loss": 0.0265, "step": 1722 }, { "epoch": 12.31, "learning_rate": 3.4033033594113396e-05, "loss": 0.0131, "step": 1723 }, { "epoch": 12.31, "learning_rate": 3.397823758261553e-05, "loss": 0.0557, "step": 1724 }, { "epoch": 12.32, "learning_rate": 3.392346300744861e-05, "loss": 0.0526, "step": 1725 }, { "epoch": 12.33, "learning_rate": 3.386870994189833e-05, "loss": 0.0397, "step": 1726 }, { "epoch": 12.34, "learning_rate": 3.3813978459221574e-05, "loss": 0.0423, "step": 1727 }, { "epoch": 12.34, "learning_rate": 3.37592686326464e-05, "loss": 0.0264, "step": 1728 }, { "epoch": 12.35, "learning_rate": 3.370458053537186e-05, "loss": 0.0312, "step": 1729 }, { "epoch": 12.36, "learning_rate": 3.3649914240567944e-05, "loss": 0.0123, "step": 1730 }, { "epoch": 12.36, "learning_rate": 3.359526982137546e-05, "loss": 0.0136, "step": 1731 }, { "epoch": 12.37, "learning_rate": 3.354064735090599e-05, "loss": 0.0149, "step": 1732 }, { "epoch": 12.38, "learning_rate": 3.3486046902241664e-05, "loss": 0.0431, "step": 1733 }, { "epoch": 12.39, "learning_rate": 3.343146854843523e-05, "loss": 0.0216, "step": 1734 }, { "epoch": 12.39, "learning_rate": 3.3376912362509835e-05, "loss": 0.0135, "step": 1735 }, { "epoch": 12.4, "learning_rate": 3.332237841745898e-05, "loss": 0.1102, "step": 1736 }, { "epoch": 12.41, "learning_rate": 3.32678667862464e-05, "loss": 0.128, "step": 1737 }, { "epoch": 12.41, "learning_rate": 3.3213377541805993e-05, "loss": 0.0473, "step": 1738 }, { "epoch": 12.42, "learning_rate": 3.3158910757041686e-05, "loss": 0.0191, "step": 1739 }, { "epoch": 12.43, "learning_rate": 3.310446650482732e-05, "loss": 0.0318, "step": 1740 }, { "epoch": 12.44, "learning_rate": 3.30500448580067e-05, "loss": 0.044, "step": 1741 }, { "epoch": 12.44, "learning_rate": 3.299564588939328e-05, "loss": 0.0244, "step": 1742 }, { "epoch": 12.45, "learning_rate": 3.294126967177019e-05, "loss": 0.0223, "step": 1743 }, { "epoch": 12.46, "learning_rate": 3.288691627789017e-05, "loss": 0.1216, "step": 1744 }, { "epoch": 12.46, "learning_rate": 3.283258578047537e-05, "loss": 0.0715, "step": 1745 }, { "epoch": 12.47, "learning_rate": 3.277827825221733e-05, "loss": 0.0196, "step": 1746 }, { "epoch": 12.48, "learning_rate": 3.272399376577686e-05, "loss": 0.0236, "step": 1747 }, { "epoch": 12.49, "learning_rate": 3.266973239378394e-05, "loss": 0.0344, "step": 1748 }, { "epoch": 12.49, "learning_rate": 3.26154942088376e-05, "loss": 0.0265, "step": 1749 }, { "epoch": 12.5, "learning_rate": 3.2561279283505883e-05, "loss": 0.0571, "step": 1750 }, { "epoch": 12.51, "learning_rate": 3.2507087690325696e-05, "loss": 0.0293, "step": 1751 }, { "epoch": 12.51, "learning_rate": 3.2452919501802715e-05, "loss": 0.0321, "step": 1752 }, { "epoch": 12.52, "learning_rate": 3.239877479041133e-05, "loss": 0.0203, "step": 1753 }, { "epoch": 12.53, "learning_rate": 3.234465362859451e-05, "loss": 0.0614, "step": 1754 }, { "epoch": 12.54, "learning_rate": 3.229055608876368e-05, "loss": 0.0183, "step": 1755 }, { "epoch": 12.54, "learning_rate": 3.2236482243298714e-05, "loss": 0.0178, "step": 1756 }, { "epoch": 12.55, "learning_rate": 3.218243216454775e-05, "loss": 0.062, "step": 1757 }, { "epoch": 12.56, "learning_rate": 3.212840592482715e-05, "loss": 0.0269, "step": 1758 }, { "epoch": 12.56, "learning_rate": 3.207440359642136e-05, "loss": 0.0315, "step": 1759 }, { "epoch": 12.57, "learning_rate": 3.2020425251582844e-05, "loss": 0.0632, "step": 1760 }, { "epoch": 12.58, "learning_rate": 3.196647096253198e-05, "loss": 0.0589, "step": 1761 }, { "epoch": 12.59, "learning_rate": 3.191254080145695e-05, "loss": 0.0274, "step": 1762 }, { "epoch": 12.59, "learning_rate": 3.185863484051365e-05, "loss": 0.0847, "step": 1763 }, { "epoch": 12.6, "learning_rate": 3.180475315182563e-05, "loss": 0.0503, "step": 1764 }, { "epoch": 12.61, "learning_rate": 3.1750895807483916e-05, "loss": 0.0357, "step": 1765 }, { "epoch": 12.61, "learning_rate": 3.169706287954701e-05, "loss": 0.0677, "step": 1766 }, { "epoch": 12.62, "learning_rate": 3.1643254440040734e-05, "loss": 0.048, "step": 1767 }, { "epoch": 12.63, "learning_rate": 3.1589470560958104e-05, "loss": 0.0195, "step": 1768 }, { "epoch": 12.64, "learning_rate": 3.153571131425932e-05, "loss": 0.0246, "step": 1769 }, { "epoch": 12.64, "learning_rate": 3.148197677187162e-05, "loss": 0.0105, "step": 1770 }, { "epoch": 12.65, "learning_rate": 3.142826700568918e-05, "loss": 0.0448, "step": 1771 }, { "epoch": 12.66, "learning_rate": 3.137458208757302e-05, "loss": 0.0234, "step": 1772 }, { "epoch": 12.66, "learning_rate": 3.1320922089350945e-05, "loss": 0.0218, "step": 1773 }, { "epoch": 12.67, "learning_rate": 3.1267287082817375e-05, "loss": 0.0227, "step": 1774 }, { "epoch": 12.68, "learning_rate": 3.121367713973331e-05, "loss": 0.0419, "step": 1775 }, { "epoch": 12.69, "learning_rate": 3.116009233182623e-05, "loss": 0.0234, "step": 1776 }, { "epoch": 12.69, "learning_rate": 3.1106532730789986e-05, "loss": 0.0117, "step": 1777 }, { "epoch": 12.7, "learning_rate": 3.105299840828466e-05, "loss": 0.0184, "step": 1778 }, { "epoch": 12.71, "learning_rate": 3.099948943593659e-05, "loss": 0.0315, "step": 1779 }, { "epoch": 12.71, "learning_rate": 3.0946005885338113e-05, "loss": 0.0199, "step": 1780 }, { "epoch": 12.72, "learning_rate": 3.0892547828047616e-05, "loss": 0.0193, "step": 1781 }, { "epoch": 12.73, "learning_rate": 3.083911533558933e-05, "loss": 0.0188, "step": 1782 }, { "epoch": 12.74, "learning_rate": 3.078570847945334e-05, "loss": 0.0525, "step": 1783 }, { "epoch": 12.74, "learning_rate": 3.073232733109536e-05, "loss": 0.02, "step": 1784 }, { "epoch": 12.75, "learning_rate": 3.0678971961936764e-05, "loss": 0.0381, "step": 1785 }, { "epoch": 12.76, "learning_rate": 3.06256424433644e-05, "loss": 0.0398, "step": 1786 }, { "epoch": 12.76, "learning_rate": 3.0572338846730566e-05, "loss": 0.0394, "step": 1787 }, { "epoch": 12.77, "learning_rate": 3.0519061243352834e-05, "loss": 0.0297, "step": 1788 }, { "epoch": 12.78, "learning_rate": 3.046580970451402e-05, "loss": 0.0304, "step": 1789 }, { "epoch": 12.79, "learning_rate": 3.0412584301462076e-05, "loss": 0.0342, "step": 1790 }, { "epoch": 12.79, "learning_rate": 3.0359385105409993e-05, "loss": 0.0167, "step": 1791 }, { "epoch": 12.8, "learning_rate": 3.0306212187535653e-05, "loss": 0.055, "step": 1792 }, { "epoch": 12.81, "learning_rate": 3.0253065618981812e-05, "loss": 0.0268, "step": 1793 }, { "epoch": 12.81, "learning_rate": 3.019994547085597e-05, "loss": 0.0235, "step": 1794 }, { "epoch": 12.82, "learning_rate": 3.0146851814230285e-05, "loss": 0.0354, "step": 1795 }, { "epoch": 12.83, "learning_rate": 3.0093784720141455e-05, "loss": 0.0436, "step": 1796 }, { "epoch": 12.84, "learning_rate": 3.0040744259590654e-05, "loss": 0.0304, "step": 1797 }, { "epoch": 12.84, "learning_rate": 2.9987730503543387e-05, "loss": 0.0267, "step": 1798 }, { "epoch": 12.85, "learning_rate": 2.9934743522929476e-05, "loss": 0.0345, "step": 1799 }, { "epoch": 12.86, "learning_rate": 2.9881783388642893e-05, "loss": 0.0213, "step": 1800 }, { "epoch": 12.86, "learning_rate": 2.982885017154169e-05, "loss": 0.0422, "step": 1801 }, { "epoch": 12.87, "learning_rate": 2.9775943942447914e-05, "loss": 0.046, "step": 1802 }, { "epoch": 12.88, "learning_rate": 2.972306477214751e-05, "loss": 0.0606, "step": 1803 }, { "epoch": 12.89, "learning_rate": 2.96702127313902e-05, "loss": 0.0163, "step": 1804 }, { "epoch": 12.89, "learning_rate": 2.9617387890889413e-05, "loss": 0.0147, "step": 1805 }, { "epoch": 12.9, "learning_rate": 2.9564590321322207e-05, "loss": 0.0325, "step": 1806 }, { "epoch": 12.91, "learning_rate": 2.951182009332911e-05, "loss": 0.0544, "step": 1807 }, { "epoch": 12.91, "learning_rate": 2.945907727751412e-05, "loss": 0.0452, "step": 1808 }, { "epoch": 12.92, "learning_rate": 2.9406361944444515e-05, "loss": 0.0539, "step": 1809 }, { "epoch": 12.93, "learning_rate": 2.9353674164650847e-05, "loss": 0.0364, "step": 1810 }, { "epoch": 12.94, "learning_rate": 2.9301014008626747e-05, "loss": 0.0529, "step": 1811 }, { "epoch": 12.94, "learning_rate": 2.924838154682893e-05, "loss": 0.073, "step": 1812 }, { "epoch": 12.95, "learning_rate": 2.9195776849677035e-05, "loss": 0.0375, "step": 1813 }, { "epoch": 12.96, "learning_rate": 2.914319998755357e-05, "loss": 0.051, "step": 1814 }, { "epoch": 12.96, "learning_rate": 2.9090651030803795e-05, "loss": 0.031, "step": 1815 }, { "epoch": 12.97, "learning_rate": 2.9038130049735634e-05, "loss": 0.0138, "step": 1816 }, { "epoch": 12.98, "learning_rate": 2.8985637114619567e-05, "loss": 0.0278, "step": 1817 }, { "epoch": 12.99, "learning_rate": 2.8933172295688576e-05, "loss": 0.0644, "step": 1818 }, { "epoch": 12.99, "learning_rate": 2.8880735663138024e-05, "loss": 0.0743, "step": 1819 }, { "epoch": 13.0, "learning_rate": 2.882832728712551e-05, "loss": 0.0143, "step": 1820 }, { "epoch": 13.01, "learning_rate": 2.8775947237770885e-05, "loss": 0.0228, "step": 1821 }, { "epoch": 13.01, "learning_rate": 2.8723595585156083e-05, "loss": 0.047, "step": 1822 }, { "epoch": 13.02, "learning_rate": 2.8671272399325044e-05, "loss": 0.0361, "step": 1823 }, { "epoch": 13.03, "learning_rate": 2.8618977750283603e-05, "loss": 0.0332, "step": 1824 }, { "epoch": 13.04, "learning_rate": 2.856671170799946e-05, "loss": 0.0399, "step": 1825 }, { "epoch": 13.04, "learning_rate": 2.8514474342402005e-05, "loss": 0.0699, "step": 1826 }, { "epoch": 13.05, "learning_rate": 2.8462265723382252e-05, "loss": 0.0087, "step": 1827 }, { "epoch": 13.06, "learning_rate": 2.8410085920792807e-05, "loss": 0.003, "step": 1828 }, { "epoch": 13.06, "learning_rate": 2.835793500444762e-05, "loss": 0.0287, "step": 1829 }, { "epoch": 13.07, "learning_rate": 2.8305813044122097e-05, "loss": 0.0192, "step": 1830 }, { "epoch": 13.08, "learning_rate": 2.825372010955285e-05, "loss": 0.0276, "step": 1831 }, { "epoch": 13.09, "learning_rate": 2.8201656270437658e-05, "loss": 0.0161, "step": 1832 }, { "epoch": 13.09, "learning_rate": 2.8149621596435393e-05, "loss": 0.0121, "step": 1833 }, { "epoch": 13.1, "learning_rate": 2.8097616157165883e-05, "loss": 0.0229, "step": 1834 }, { "epoch": 13.11, "learning_rate": 2.804564002220986e-05, "loss": 0.0517, "step": 1835 }, { "epoch": 13.11, "learning_rate": 2.7993693261108823e-05, "loss": 0.0066, "step": 1836 }, { "epoch": 13.12, "learning_rate": 2.7941775943365007e-05, "loss": 0.0227, "step": 1837 }, { "epoch": 13.13, "learning_rate": 2.7889888138441212e-05, "loss": 0.0315, "step": 1838 }, { "epoch": 13.14, "learning_rate": 2.7838029915760787e-05, "loss": 0.0228, "step": 1839 }, { "epoch": 13.14, "learning_rate": 2.7786201344707486e-05, "loss": 0.0438, "step": 1840 }, { "epoch": 13.15, "learning_rate": 2.7734402494625393e-05, "loss": 0.0177, "step": 1841 }, { "epoch": 13.16, "learning_rate": 2.7682633434818805e-05, "loss": 0.0378, "step": 1842 }, { "epoch": 13.16, "learning_rate": 2.7630894234552185e-05, "loss": 0.0409, "step": 1843 }, { "epoch": 13.17, "learning_rate": 2.7579184963050052e-05, "loss": 0.0325, "step": 1844 }, { "epoch": 13.18, "learning_rate": 2.7527505689496868e-05, "loss": 0.0217, "step": 1845 }, { "epoch": 13.19, "learning_rate": 2.7475856483036966e-05, "loss": 0.0147, "step": 1846 }, { "epoch": 13.19, "learning_rate": 2.7424237412774434e-05, "loss": 0.0193, "step": 1847 }, { "epoch": 13.2, "learning_rate": 2.737264854777306e-05, "loss": 0.016, "step": 1848 }, { "epoch": 13.21, "learning_rate": 2.7321089957056206e-05, "loss": 0.0456, "step": 1849 }, { "epoch": 13.21, "learning_rate": 2.7269561709606738e-05, "loss": 0.0579, "step": 1850 }, { "epoch": 13.22, "learning_rate": 2.721806387436693e-05, "loss": 0.0097, "step": 1851 }, { "epoch": 13.23, "learning_rate": 2.716659652023833e-05, "loss": 0.0153, "step": 1852 }, { "epoch": 13.24, "learning_rate": 2.7115159716081767e-05, "loss": 0.008, "step": 1853 }, { "epoch": 13.24, "learning_rate": 2.7063753530717117e-05, "loss": 0.0644, "step": 1854 }, { "epoch": 13.25, "learning_rate": 2.7012378032923346e-05, "loss": 0.011, "step": 1855 }, { "epoch": 13.26, "learning_rate": 2.6961033291438343e-05, "loss": 0.0576, "step": 1856 }, { "epoch": 13.26, "learning_rate": 2.6909719374958853e-05, "loss": 0.0237, "step": 1857 }, { "epoch": 13.27, "learning_rate": 2.685843635214038e-05, "loss": 0.0108, "step": 1858 }, { "epoch": 13.28, "learning_rate": 2.680718429159709e-05, "loss": 0.0243, "step": 1859 }, { "epoch": 13.29, "learning_rate": 2.6755963261901708e-05, "loss": 0.0563, "step": 1860 }, { "epoch": 13.29, "learning_rate": 2.6704773331585465e-05, "loss": 0.0127, "step": 1861 }, { "epoch": 13.3, "learning_rate": 2.6653614569137968e-05, "loss": 0.0229, "step": 1862 }, { "epoch": 13.31, "learning_rate": 2.6602487043007135e-05, "loss": 0.0316, "step": 1863 }, { "epoch": 13.31, "learning_rate": 2.6551390821599076e-05, "loss": 0.0107, "step": 1864 }, { "epoch": 13.32, "learning_rate": 2.650032597327805e-05, "loss": 0.0113, "step": 1865 }, { "epoch": 13.33, "learning_rate": 2.644929256636628e-05, "loss": 0.0247, "step": 1866 }, { "epoch": 13.34, "learning_rate": 2.6398290669143967e-05, "loss": 0.0101, "step": 1867 }, { "epoch": 13.34, "learning_rate": 2.6347320349849146e-05, "loss": 0.0092, "step": 1868 }, { "epoch": 13.35, "learning_rate": 2.6296381676677606e-05, "loss": 0.0126, "step": 1869 }, { "epoch": 13.36, "learning_rate": 2.624547471778278e-05, "loss": 0.0128, "step": 1870 }, { "epoch": 13.36, "learning_rate": 2.6194599541275682e-05, "loss": 0.063, "step": 1871 }, { "epoch": 13.37, "learning_rate": 2.6143756215224802e-05, "loss": 0.0069, "step": 1872 }, { "epoch": 13.38, "learning_rate": 2.6092944807656004e-05, "loss": 0.005, "step": 1873 }, { "epoch": 13.39, "learning_rate": 2.604216538655247e-05, "loss": 0.0508, "step": 1874 }, { "epoch": 13.39, "learning_rate": 2.5991418019854553e-05, "loss": 0.0254, "step": 1875 }, { "epoch": 13.4, "learning_rate": 2.5940702775459747e-05, "loss": 0.0146, "step": 1876 }, { "epoch": 13.41, "learning_rate": 2.5890019721222554e-05, "loss": 0.05, "step": 1877 }, { "epoch": 13.41, "learning_rate": 2.5839368924954433e-05, "loss": 0.027, "step": 1878 }, { "epoch": 13.42, "learning_rate": 2.578875045442361e-05, "loss": 0.0059, "step": 1879 }, { "epoch": 13.43, "learning_rate": 2.5738164377355145e-05, "loss": 0.015, "step": 1880 }, { "epoch": 13.44, "learning_rate": 2.5687610761430725e-05, "loss": 0.012, "step": 1881 }, { "epoch": 13.44, "learning_rate": 2.563708967428859e-05, "loss": 0.0431, "step": 1882 }, { "epoch": 13.45, "learning_rate": 2.5586601183523484e-05, "loss": 0.01, "step": 1883 }, { "epoch": 13.46, "learning_rate": 2.5536145356686524e-05, "loss": 0.0193, "step": 1884 }, { "epoch": 13.46, "learning_rate": 2.5485722261285126e-05, "loss": 0.0065, "step": 1885 }, { "epoch": 13.47, "learning_rate": 2.5435331964782916e-05, "loss": 0.0178, "step": 1886 }, { "epoch": 13.48, "learning_rate": 2.538497453459963e-05, "loss": 0.0427, "step": 1887 }, { "epoch": 13.49, "learning_rate": 2.5334650038111048e-05, "loss": 0.0502, "step": 1888 }, { "epoch": 13.49, "learning_rate": 2.5284358542648855e-05, "loss": 0.0168, "step": 1889 }, { "epoch": 13.5, "learning_rate": 2.5234100115500643e-05, "loss": 0.0104, "step": 1890 }, { "epoch": 13.51, "learning_rate": 2.518387482390966e-05, "loss": 0.0107, "step": 1891 }, { "epoch": 13.51, "learning_rate": 2.5133682735074904e-05, "loss": 0.0184, "step": 1892 }, { "epoch": 13.52, "learning_rate": 2.508352391615093e-05, "loss": 0.0094, "step": 1893 }, { "epoch": 13.53, "learning_rate": 2.503339843424777e-05, "loss": 0.0412, "step": 1894 }, { "epoch": 13.54, "learning_rate": 2.4983306356430846e-05, "loss": 0.0076, "step": 1895 }, { "epoch": 13.54, "learning_rate": 2.4933247749720912e-05, "loss": 0.0363, "step": 1896 }, { "epoch": 13.55, "learning_rate": 2.4883222681093914e-05, "loss": 0.0189, "step": 1897 }, { "epoch": 13.56, "learning_rate": 2.483323121748094e-05, "loss": 0.0083, "step": 1898 }, { "epoch": 13.56, "learning_rate": 2.4783273425768117e-05, "loss": 0.0251, "step": 1899 }, { "epoch": 13.57, "learning_rate": 2.4733349372796507e-05, "loss": 0.0414, "step": 1900 }, { "epoch": 13.58, "learning_rate": 2.4683459125362045e-05, "loss": 0.0157, "step": 1901 }, { "epoch": 13.59, "learning_rate": 2.4633602750215444e-05, "loss": 0.0531, "step": 1902 }, { "epoch": 13.59, "learning_rate": 2.458378031406206e-05, "loss": 0.0335, "step": 1903 }, { "epoch": 13.6, "learning_rate": 2.4533991883561868e-05, "loss": 0.0222, "step": 1904 }, { "epoch": 13.61, "learning_rate": 2.4484237525329352e-05, "loss": 0.0274, "step": 1905 }, { "epoch": 13.61, "learning_rate": 2.443451730593339e-05, "loss": 0.0405, "step": 1906 }, { "epoch": 13.62, "learning_rate": 2.4384831291897202e-05, "loss": 0.0397, "step": 1907 }, { "epoch": 13.63, "learning_rate": 2.4335179549698233e-05, "loss": 0.0179, "step": 1908 }, { "epoch": 13.64, "learning_rate": 2.4285562145768047e-05, "loss": 0.0177, "step": 1909 }, { "epoch": 13.64, "learning_rate": 2.4235979146492338e-05, "loss": 0.025, "step": 1910 }, { "epoch": 13.65, "learning_rate": 2.4186430618210705e-05, "loss": 0.0212, "step": 1911 }, { "epoch": 13.66, "learning_rate": 2.4136916627216655e-05, "loss": 0.0104, "step": 1912 }, { "epoch": 13.66, "learning_rate": 2.408743723975745e-05, "loss": 0.0118, "step": 1913 }, { "epoch": 13.67, "learning_rate": 2.403799252203408e-05, "loss": 0.0163, "step": 1914 }, { "epoch": 13.68, "learning_rate": 2.3988582540201164e-05, "loss": 0.0229, "step": 1915 }, { "epoch": 13.69, "learning_rate": 2.3939207360366832e-05, "loss": 0.0123, "step": 1916 }, { "epoch": 13.69, "learning_rate": 2.3889867048592633e-05, "loss": 0.0213, "step": 1917 }, { "epoch": 13.7, "learning_rate": 2.3840561670893496e-05, "loss": 0.0397, "step": 1918 }, { "epoch": 13.71, "learning_rate": 2.3791291293237584e-05, "loss": 0.014, "step": 1919 }, { "epoch": 13.71, "learning_rate": 2.374205598154624e-05, "loss": 0.0321, "step": 1920 }, { "epoch": 13.72, "learning_rate": 2.3692855801693904e-05, "loss": 0.0321, "step": 1921 }, { "epoch": 13.73, "learning_rate": 2.3643690819507986e-05, "loss": 0.0517, "step": 1922 }, { "epoch": 13.74, "learning_rate": 2.3594561100768826e-05, "loss": 0.0637, "step": 1923 }, { "epoch": 13.74, "learning_rate": 2.3545466711209585e-05, "loss": 0.0329, "step": 1924 }, { "epoch": 13.75, "learning_rate": 2.349640771651611e-05, "loss": 0.0708, "step": 1925 }, { "epoch": 13.76, "learning_rate": 2.3447384182326947e-05, "loss": 0.0081, "step": 1926 }, { "epoch": 13.76, "learning_rate": 2.3398396174233178e-05, "loss": 0.016, "step": 1927 }, { "epoch": 13.77, "learning_rate": 2.3349443757778343e-05, "loss": 0.054, "step": 1928 }, { "epoch": 13.78, "learning_rate": 2.3300526998458372e-05, "loss": 0.0895, "step": 1929 }, { "epoch": 13.79, "learning_rate": 2.3251645961721492e-05, "loss": 0.0277, "step": 1930 }, { "epoch": 13.79, "learning_rate": 2.320280071296812e-05, "loss": 0.0241, "step": 1931 }, { "epoch": 13.8, "learning_rate": 2.315399131755081e-05, "loss": 0.0215, "step": 1932 }, { "epoch": 13.81, "learning_rate": 2.3105217840774123e-05, "loss": 0.0121, "step": 1933 }, { "epoch": 13.81, "learning_rate": 2.3056480347894582e-05, "loss": 0.0156, "step": 1934 }, { "epoch": 13.82, "learning_rate": 2.3007778904120552e-05, "loss": 0.0149, "step": 1935 }, { "epoch": 13.83, "learning_rate": 2.29591135746122e-05, "loss": 0.034, "step": 1936 }, { "epoch": 13.84, "learning_rate": 2.2910484424481298e-05, "loss": 0.0104, "step": 1937 }, { "epoch": 13.84, "learning_rate": 2.2861891518791282e-05, "loss": 0.0169, "step": 1938 }, { "epoch": 13.85, "learning_rate": 2.281333492255708e-05, "loss": 0.0088, "step": 1939 }, { "epoch": 13.86, "learning_rate": 2.2764814700745025e-05, "loss": 0.0397, "step": 1940 }, { "epoch": 13.86, "learning_rate": 2.271633091827279e-05, "loss": 0.0619, "step": 1941 }, { "epoch": 13.87, "learning_rate": 2.266788364000929e-05, "loss": 0.035, "step": 1942 }, { "epoch": 13.88, "learning_rate": 2.261947293077461e-05, "loss": 0.033, "step": 1943 }, { "epoch": 13.89, "learning_rate": 2.25710988553399e-05, "loss": 0.0242, "step": 1944 }, { "epoch": 13.89, "learning_rate": 2.252276147842729e-05, "loss": 0.0215, "step": 1945 }, { "epoch": 13.9, "learning_rate": 2.2474460864709824e-05, "loss": 0.0583, "step": 1946 }, { "epoch": 13.91, "learning_rate": 2.242619707881134e-05, "loss": 0.015, "step": 1947 }, { "epoch": 13.91, "learning_rate": 2.2377970185306424e-05, "loss": 0.0291, "step": 1948 }, { "epoch": 13.92, "learning_rate": 2.23297802487203e-05, "loss": 0.0324, "step": 1949 }, { "epoch": 13.93, "learning_rate": 2.2281627333528697e-05, "loss": 0.018, "step": 1950 }, { "epoch": 13.94, "learning_rate": 2.223351150415788e-05, "loss": 0.0304, "step": 1951 }, { "epoch": 13.94, "learning_rate": 2.2185432824984453e-05, "loss": 0.0137, "step": 1952 }, { "epoch": 13.95, "learning_rate": 2.213739136033533e-05, "loss": 0.0171, "step": 1953 }, { "epoch": 13.96, "learning_rate": 2.2089387174487632e-05, "loss": 0.0206, "step": 1954 }, { "epoch": 13.96, "learning_rate": 2.20414203316686e-05, "loss": 0.0918, "step": 1955 }, { "epoch": 13.97, "learning_rate": 2.1993490896055512e-05, "loss": 0.0155, "step": 1956 }, { "epoch": 13.98, "learning_rate": 2.1945598931775595e-05, "loss": 0.0459, "step": 1957 }, { "epoch": 13.99, "learning_rate": 2.1897744502905954e-05, "loss": 0.059, "step": 1958 }, { "epoch": 13.99, "learning_rate": 2.184992767347346e-05, "loss": 0.0191, "step": 1959 }, { "epoch": 14.0, "learning_rate": 2.180214850745467e-05, "loss": 0.0206, "step": 1960 }, { "epoch": 14.01, "learning_rate": 2.1754407068775794e-05, "loss": 0.0112, "step": 1961 }, { "epoch": 14.01, "learning_rate": 2.1706703421312492e-05, "loss": 0.0286, "step": 1962 }, { "epoch": 14.02, "learning_rate": 2.165903762888991e-05, "loss": 0.031, "step": 1963 }, { "epoch": 14.03, "learning_rate": 2.161140975528254e-05, "loss": 0.006, "step": 1964 }, { "epoch": 14.04, "learning_rate": 2.1563819864214147e-05, "loss": 0.0034, "step": 1965 }, { "epoch": 14.04, "learning_rate": 2.1516268019357656e-05, "loss": 0.0145, "step": 1966 }, { "epoch": 14.05, "learning_rate": 2.1468754284335097e-05, "loss": 0.004, "step": 1967 }, { "epoch": 14.06, "learning_rate": 2.1421278722717524e-05, "loss": 0.0057, "step": 1968 }, { "epoch": 14.06, "learning_rate": 2.1373841398024896e-05, "loss": 0.0388, "step": 1969 }, { "epoch": 14.07, "learning_rate": 2.1326442373726028e-05, "loss": 0.0148, "step": 1970 }, { "epoch": 14.08, "learning_rate": 2.127908171323849e-05, "loss": 0.0043, "step": 1971 }, { "epoch": 14.09, "learning_rate": 2.123175947992851e-05, "loss": 0.0261, "step": 1972 }, { "epoch": 14.09, "learning_rate": 2.1184475737110932e-05, "loss": 0.0396, "step": 1973 }, { "epoch": 14.1, "learning_rate": 2.1137230548049043e-05, "loss": 0.014, "step": 1974 }, { "epoch": 14.11, "learning_rate": 2.109002397595461e-05, "loss": 0.0352, "step": 1975 }, { "epoch": 14.11, "learning_rate": 2.1042856083987695e-05, "loss": 0.0093, "step": 1976 }, { "epoch": 14.12, "learning_rate": 2.0995726935256634e-05, "loss": 0.0282, "step": 1977 }, { "epoch": 14.13, "learning_rate": 2.09486365928179e-05, "loss": 0.0248, "step": 1978 }, { "epoch": 14.14, "learning_rate": 2.0901585119676064e-05, "loss": 0.0338, "step": 1979 }, { "epoch": 14.14, "learning_rate": 2.0854572578783686e-05, "loss": 0.0321, "step": 1980 }, { "epoch": 14.15, "learning_rate": 2.0807599033041235e-05, "loss": 0.0377, "step": 1981 }, { "epoch": 14.16, "learning_rate": 2.076066454529701e-05, "loss": 0.0358, "step": 1982 }, { "epoch": 14.16, "learning_rate": 2.0713769178347043e-05, "loss": 0.0191, "step": 1983 }, { "epoch": 14.17, "learning_rate": 2.0666912994935034e-05, "loss": 0.0175, "step": 1984 }, { "epoch": 14.18, "learning_rate": 2.062009605775226e-05, "loss": 0.0704, "step": 1985 }, { "epoch": 14.19, "learning_rate": 2.057331842943749e-05, "loss": 0.0035, "step": 1986 }, { "epoch": 14.19, "learning_rate": 2.052658017257686e-05, "loss": 0.0303, "step": 1987 }, { "epoch": 14.2, "learning_rate": 2.0479881349703883e-05, "loss": 0.0331, "step": 1988 }, { "epoch": 14.21, "learning_rate": 2.0433222023299287e-05, "loss": 0.0377, "step": 1989 }, { "epoch": 14.21, "learning_rate": 2.038660225579096e-05, "loss": 0.0465, "step": 1990 }, { "epoch": 14.22, "learning_rate": 2.0340022109553864e-05, "loss": 0.0677, "step": 1991 }, { "epoch": 14.23, "learning_rate": 2.0293481646909934e-05, "loss": 0.0302, "step": 1992 }, { "epoch": 14.24, "learning_rate": 2.0246980930128034e-05, "loss": 0.0261, "step": 1993 }, { "epoch": 14.24, "learning_rate": 2.0200520021423813e-05, "loss": 0.0209, "step": 1994 }, { "epoch": 14.25, "learning_rate": 2.0154098982959745e-05, "loss": 0.0345, "step": 1995 }, { "epoch": 14.26, "learning_rate": 2.0107717876844838e-05, "loss": 0.0491, "step": 1996 }, { "epoch": 14.26, "learning_rate": 2.006137676513476e-05, "loss": 0.0151, "step": 1997 }, { "epoch": 14.27, "learning_rate": 2.0015075709831634e-05, "loss": 0.0173, "step": 1998 }, { "epoch": 14.28, "learning_rate": 1.9968814772883997e-05, "loss": 0.01, "step": 1999 }, { "epoch": 14.29, "learning_rate": 1.9922594016186713e-05, "loss": 0.0144, "step": 2000 }, { "epoch": 14.29, "learning_rate": 1.987641350158088e-05, "loss": 0.0073, "step": 2001 }, { "epoch": 14.3, "learning_rate": 1.983027329085377e-05, "loss": 0.0204, "step": 2002 }, { "epoch": 14.31, "learning_rate": 1.9784173445738697e-05, "loss": 0.0063, "step": 2003 }, { "epoch": 14.31, "learning_rate": 1.9738114027915006e-05, "loss": 0.0091, "step": 2004 }, { "epoch": 14.32, "learning_rate": 1.9692095099007923e-05, "loss": 0.009, "step": 2005 }, { "epoch": 14.33, "learning_rate": 1.9646116720588525e-05, "loss": 0.014, "step": 2006 }, { "epoch": 14.34, "learning_rate": 1.9600178954173635e-05, "loss": 0.0054, "step": 2007 }, { "epoch": 14.34, "learning_rate": 1.9554281861225694e-05, "loss": 0.0173, "step": 2008 }, { "epoch": 14.35, "learning_rate": 1.9508425503152773e-05, "loss": 0.0114, "step": 2009 }, { "epoch": 14.36, "learning_rate": 1.946260994130843e-05, "loss": 0.0685, "step": 2010 }, { "epoch": 14.36, "learning_rate": 1.9416835236991625e-05, "loss": 0.0364, "step": 2011 }, { "epoch": 14.37, "learning_rate": 1.937110145144668e-05, "loss": 0.0033, "step": 2012 }, { "epoch": 14.38, "learning_rate": 1.9325408645863142e-05, "loss": 0.0454, "step": 2013 }, { "epoch": 14.39, "learning_rate": 1.9279756881375743e-05, "loss": 0.0099, "step": 2014 }, { "epoch": 14.39, "learning_rate": 1.9234146219064298e-05, "loss": 0.0054, "step": 2015 }, { "epoch": 14.4, "learning_rate": 1.9188576719953633e-05, "loss": 0.0306, "step": 2016 }, { "epoch": 14.41, "learning_rate": 1.9143048445013507e-05, "loss": 0.049, "step": 2017 }, { "epoch": 14.41, "learning_rate": 1.9097561455158502e-05, "loss": 0.0115, "step": 2018 }, { "epoch": 14.42, "learning_rate": 1.9052115811247985e-05, "loss": 0.0101, "step": 2019 }, { "epoch": 14.43, "learning_rate": 1.9006711574086005e-05, "loss": 0.0346, "step": 2020 }, { "epoch": 14.44, "learning_rate": 1.8961348804421175e-05, "loss": 0.0231, "step": 2021 }, { "epoch": 14.44, "learning_rate": 1.891602756294666e-05, "loss": 0.0293, "step": 2022 }, { "epoch": 14.45, "learning_rate": 1.887074791030006e-05, "loss": 0.0192, "step": 2023 }, { "epoch": 14.46, "learning_rate": 1.8825509907063327e-05, "loss": 0.0069, "step": 2024 }, { "epoch": 14.46, "learning_rate": 1.8780313613762685e-05, "loss": 0.0104, "step": 2025 }, { "epoch": 14.47, "learning_rate": 1.8735159090868547e-05, "loss": 0.0321, "step": 2026 }, { "epoch": 14.48, "learning_rate": 1.869004639879546e-05, "loss": 0.0164, "step": 2027 }, { "epoch": 14.49, "learning_rate": 1.8644975597901977e-05, "loss": 0.0136, "step": 2028 }, { "epoch": 14.49, "learning_rate": 1.8599946748490615e-05, "loss": 0.0198, "step": 2029 }, { "epoch": 14.5, "learning_rate": 1.8554959910807775e-05, "loss": 0.0229, "step": 2030 }, { "epoch": 14.51, "learning_rate": 1.851001514504362e-05, "loss": 0.0051, "step": 2031 }, { "epoch": 14.51, "learning_rate": 1.8465112511332065e-05, "loss": 0.0085, "step": 2032 }, { "epoch": 14.52, "learning_rate": 1.8420252069750598e-05, "loss": 0.0258, "step": 2033 }, { "epoch": 14.53, "learning_rate": 1.8375433880320292e-05, "loss": 0.0208, "step": 2034 }, { "epoch": 14.54, "learning_rate": 1.8330658003005685e-05, "loss": 0.0214, "step": 2035 }, { "epoch": 14.54, "learning_rate": 1.8285924497714703e-05, "loss": 0.0235, "step": 2036 }, { "epoch": 14.55, "learning_rate": 1.824123342429858e-05, "loss": 0.012, "step": 2037 }, { "epoch": 14.56, "learning_rate": 1.819658484255177e-05, "loss": 0.0156, "step": 2038 }, { "epoch": 14.56, "learning_rate": 1.815197881221189e-05, "loss": 0.0081, "step": 2039 }, { "epoch": 14.57, "learning_rate": 1.8107415392959614e-05, "loss": 0.0097, "step": 2040 }, { "epoch": 14.58, "learning_rate": 1.8062894644418604e-05, "loss": 0.0308, "step": 2041 }, { "epoch": 14.59, "learning_rate": 1.8018416626155443e-05, "loss": 0.0596, "step": 2042 }, { "epoch": 14.59, "learning_rate": 1.7973981397679525e-05, "loss": 0.0252, "step": 2043 }, { "epoch": 14.6, "learning_rate": 1.7929589018443016e-05, "loss": 0.0297, "step": 2044 }, { "epoch": 14.61, "learning_rate": 1.7885239547840716e-05, "loss": 0.0257, "step": 2045 }, { "epoch": 14.61, "learning_rate": 1.7840933045210052e-05, "loss": 0.0235, "step": 2046 }, { "epoch": 14.62, "learning_rate": 1.7796669569830943e-05, "loss": 0.0189, "step": 2047 }, { "epoch": 14.63, "learning_rate": 1.7752449180925747e-05, "loss": 0.0197, "step": 2048 }, { "epoch": 14.64, "learning_rate": 1.7708271937659173e-05, "loss": 0.0689, "step": 2049 }, { "epoch": 14.64, "learning_rate": 1.7664137899138193e-05, "loss": 0.0449, "step": 2050 }, { "epoch": 14.65, "learning_rate": 1.7620047124412e-05, "loss": 0.0314, "step": 2051 }, { "epoch": 14.66, "learning_rate": 1.7575999672471867e-05, "loss": 0.0564, "step": 2052 }, { "epoch": 14.66, "learning_rate": 1.753199560225113e-05, "loss": 0.0308, "step": 2053 }, { "epoch": 14.67, "learning_rate": 1.7488034972625066e-05, "loss": 0.0478, "step": 2054 }, { "epoch": 14.68, "learning_rate": 1.7444117842410845e-05, "loss": 0.012, "step": 2055 }, { "epoch": 14.69, "learning_rate": 1.7400244270367428e-05, "loss": 0.0281, "step": 2056 }, { "epoch": 14.69, "learning_rate": 1.735641431519551e-05, "loss": 0.0121, "step": 2057 }, { "epoch": 14.7, "learning_rate": 1.7312628035537387e-05, "loss": 0.0368, "step": 2058 }, { "epoch": 14.71, "learning_rate": 1.726888548997697e-05, "loss": 0.0173, "step": 2059 }, { "epoch": 14.71, "learning_rate": 1.7225186737039638e-05, "loss": 0.0258, "step": 2060 }, { "epoch": 14.72, "learning_rate": 1.7181531835192165e-05, "loss": 0.013, "step": 2061 }, { "epoch": 14.73, "learning_rate": 1.7137920842842674e-05, "loss": 0.03, "step": 2062 }, { "epoch": 14.74, "learning_rate": 1.709435381834053e-05, "loss": 0.0061, "step": 2063 }, { "epoch": 14.74, "learning_rate": 1.7050830819976267e-05, "loss": 0.0245, "step": 2064 }, { "epoch": 14.75, "learning_rate": 1.7007351905981512e-05, "loss": 0.0188, "step": 2065 }, { "epoch": 14.76, "learning_rate": 1.696391713452893e-05, "loss": 0.0217, "step": 2066 }, { "epoch": 14.76, "learning_rate": 1.69205265637321e-05, "loss": 0.0089, "step": 2067 }, { "epoch": 14.77, "learning_rate": 1.6877180251645486e-05, "loss": 0.0238, "step": 2068 }, { "epoch": 14.78, "learning_rate": 1.6833878256264324e-05, "loss": 0.0055, "step": 2069 }, { "epoch": 14.79, "learning_rate": 1.679062063552454e-05, "loss": 0.0407, "step": 2070 }, { "epoch": 14.79, "learning_rate": 1.674740744730271e-05, "loss": 0.0041, "step": 2071 }, { "epoch": 14.8, "learning_rate": 1.6704238749415957e-05, "loss": 0.0289, "step": 2072 }, { "epoch": 14.81, "learning_rate": 1.6661114599621885e-05, "loss": 0.0124, "step": 2073 }, { "epoch": 14.81, "learning_rate": 1.6618035055618485e-05, "loss": 0.0075, "step": 2074 }, { "epoch": 14.82, "learning_rate": 1.657500017504407e-05, "loss": 0.0307, "step": 2075 }, { "epoch": 14.83, "learning_rate": 1.653201001547719e-05, "loss": 0.0109, "step": 2076 }, { "epoch": 14.84, "learning_rate": 1.648906463443658e-05, "loss": 0.0553, "step": 2077 }, { "epoch": 14.84, "learning_rate": 1.6446164089381034e-05, "loss": 0.0265, "step": 2078 }, { "epoch": 14.85, "learning_rate": 1.640330843770938e-05, "loss": 0.0069, "step": 2079 }, { "epoch": 14.86, "learning_rate": 1.6360497736760383e-05, "loss": 0.0076, "step": 2080 }, { "epoch": 14.86, "learning_rate": 1.6317732043812646e-05, "loss": 0.0235, "step": 2081 }, { "epoch": 14.87, "learning_rate": 1.6275011416084562e-05, "loss": 0.0215, "step": 2082 }, { "epoch": 14.88, "learning_rate": 1.623233591073425e-05, "loss": 0.0101, "step": 2083 }, { "epoch": 14.89, "learning_rate": 1.618970558485942e-05, "loss": 0.0109, "step": 2084 }, { "epoch": 14.89, "learning_rate": 1.6147120495497365e-05, "loss": 0.0236, "step": 2085 }, { "epoch": 14.9, "learning_rate": 1.6104580699624837e-05, "loss": 0.0171, "step": 2086 }, { "epoch": 14.91, "learning_rate": 1.6062086254157997e-05, "loss": 0.021, "step": 2087 }, { "epoch": 14.91, "learning_rate": 1.601963721595232e-05, "loss": 0.0082, "step": 2088 }, { "epoch": 14.92, "learning_rate": 1.5977233641802543e-05, "loss": 0.0424, "step": 2089 }, { "epoch": 14.93, "learning_rate": 1.593487558844255e-05, "loss": 0.031, "step": 2090 }, { "epoch": 14.94, "learning_rate": 1.5892563112545366e-05, "loss": 0.0144, "step": 2091 }, { "epoch": 14.94, "learning_rate": 1.5850296270722964e-05, "loss": 0.0127, "step": 2092 }, { "epoch": 14.95, "learning_rate": 1.5808075119526324e-05, "loss": 0.0114, "step": 2093 }, { "epoch": 14.96, "learning_rate": 1.576589971544526e-05, "loss": 0.014, "step": 2094 }, { "epoch": 14.96, "learning_rate": 1.5723770114908403e-05, "loss": 0.0091, "step": 2095 }, { "epoch": 14.97, "learning_rate": 1.5681686374283088e-05, "loss": 0.0113, "step": 2096 }, { "epoch": 14.98, "learning_rate": 1.5639648549875287e-05, "loss": 0.0442, "step": 2097 }, { "epoch": 14.99, "learning_rate": 1.5597656697929542e-05, "loss": 0.0019, "step": 2098 }, { "epoch": 14.99, "learning_rate": 1.5555710874628893e-05, "loss": 0.0298, "step": 2099 }, { "epoch": 15.0, "learning_rate": 1.5513811136094787e-05, "loss": 0.0019, "step": 2100 }, { "epoch": 15.01, "learning_rate": 1.5471957538387015e-05, "loss": 0.0136, "step": 2101 }, { "epoch": 15.01, "learning_rate": 1.543015013750364e-05, "loss": 0.0405, "step": 2102 }, { "epoch": 15.02, "learning_rate": 1.5388388989380924e-05, "loss": 0.0155, "step": 2103 }, { "epoch": 15.03, "learning_rate": 1.5346674149893202e-05, "loss": 0.0237, "step": 2104 }, { "epoch": 15.04, "learning_rate": 1.5305005674852884e-05, "loss": 0.0157, "step": 2105 }, { "epoch": 15.04, "learning_rate": 1.5263383620010357e-05, "loss": 0.0322, "step": 2106 }, { "epoch": 15.05, "learning_rate": 1.5221808041053875e-05, "loss": 0.0848, "step": 2107 }, { "epoch": 15.06, "learning_rate": 1.5180278993609526e-05, "loss": 0.0089, "step": 2108 }, { "epoch": 15.06, "learning_rate": 1.5138796533241134e-05, "loss": 0.017, "step": 2109 }, { "epoch": 15.07, "learning_rate": 1.5097360715450187e-05, "loss": 0.032, "step": 2110 }, { "epoch": 15.08, "learning_rate": 1.5055971595675783e-05, "loss": 0.0172, "step": 2111 }, { "epoch": 15.09, "learning_rate": 1.5014629229294524e-05, "loss": 0.0076, "step": 2112 }, { "epoch": 15.09, "learning_rate": 1.4973333671620465e-05, "loss": 0.0172, "step": 2113 }, { "epoch": 15.1, "learning_rate": 1.4932084977905042e-05, "loss": 0.0063, "step": 2114 }, { "epoch": 15.11, "learning_rate": 1.4890883203336981e-05, "loss": 0.0182, "step": 2115 }, { "epoch": 15.11, "learning_rate": 1.4849728403042213e-05, "loss": 0.026, "step": 2116 }, { "epoch": 15.12, "learning_rate": 1.4808620632083853e-05, "loss": 0.0115, "step": 2117 }, { "epoch": 15.13, "learning_rate": 1.4767559945462072e-05, "loss": 0.0102, "step": 2118 }, { "epoch": 15.14, "learning_rate": 1.472654639811406e-05, "loss": 0.0143, "step": 2119 }, { "epoch": 15.14, "learning_rate": 1.4685580044913921e-05, "loss": 0.0155, "step": 2120 }, { "epoch": 15.15, "learning_rate": 1.4644660940672627e-05, "loss": 0.0121, "step": 2121 }, { "epoch": 15.16, "learning_rate": 1.460378914013793e-05, "loss": 0.0176, "step": 2122 }, { "epoch": 15.16, "learning_rate": 1.4562964697994286e-05, "loss": 0.0188, "step": 2123 }, { "epoch": 15.17, "learning_rate": 1.4522187668862796e-05, "loss": 0.0096, "step": 2124 }, { "epoch": 15.18, "learning_rate": 1.4481458107301121e-05, "loss": 0.0312, "step": 2125 }, { "epoch": 15.19, "learning_rate": 1.4440776067803419e-05, "loss": 0.0101, "step": 2126 }, { "epoch": 15.19, "learning_rate": 1.4400141604800249e-05, "loss": 0.0225, "step": 2127 }, { "epoch": 15.2, "learning_rate": 1.4359554772658552e-05, "loss": 0.0204, "step": 2128 }, { "epoch": 15.21, "learning_rate": 1.4319015625681475e-05, "loss": 0.0044, "step": 2129 }, { "epoch": 15.21, "learning_rate": 1.4278524218108424e-05, "loss": 0.0211, "step": 2130 }, { "epoch": 15.22, "learning_rate": 1.4238080604114917e-05, "loss": 0.0304, "step": 2131 }, { "epoch": 15.23, "learning_rate": 1.419768483781252e-05, "loss": 0.0264, "step": 2132 }, { "epoch": 15.24, "learning_rate": 1.415733697324878e-05, "loss": 0.0132, "step": 2133 }, { "epoch": 15.24, "learning_rate": 1.4117037064407162e-05, "loss": 0.0156, "step": 2134 }, { "epoch": 15.25, "learning_rate": 1.4076785165206962e-05, "loss": 0.0265, "step": 2135 }, { "epoch": 15.26, "learning_rate": 1.4036581329503245e-05, "loss": 0.0272, "step": 2136 }, { "epoch": 15.26, "learning_rate": 1.3996425611086767e-05, "loss": 0.0065, "step": 2137 }, { "epoch": 15.27, "learning_rate": 1.3956318063683904e-05, "loss": 0.0069, "step": 2138 }, { "epoch": 15.28, "learning_rate": 1.3916258740956583e-05, "loss": 0.0367, "step": 2139 }, { "epoch": 15.29, "learning_rate": 1.3876247696502238e-05, "loss": 0.0314, "step": 2140 }, { "epoch": 15.29, "learning_rate": 1.3836284983853632e-05, "loss": 0.0262, "step": 2141 }, { "epoch": 15.3, "learning_rate": 1.3796370656478935e-05, "loss": 0.0517, "step": 2142 }, { "epoch": 15.31, "learning_rate": 1.3756504767781559e-05, "loss": 0.0055, "step": 2143 }, { "epoch": 15.31, "learning_rate": 1.3716687371100096e-05, "loss": 0.0212, "step": 2144 }, { "epoch": 15.32, "learning_rate": 1.3676918519708276e-05, "loss": 0.0092, "step": 2145 }, { "epoch": 15.33, "learning_rate": 1.363719826681486e-05, "loss": 0.0069, "step": 2146 }, { "epoch": 15.34, "learning_rate": 1.3597526665563598e-05, "loss": 0.0272, "step": 2147 }, { "epoch": 15.34, "learning_rate": 1.3557903769033148e-05, "loss": 0.0481, "step": 2148 }, { "epoch": 15.35, "learning_rate": 1.3518329630236987e-05, "loss": 0.0116, "step": 2149 }, { "epoch": 15.36, "learning_rate": 1.3478804302123382e-05, "loss": 0.0251, "step": 2150 }, { "epoch": 15.36, "learning_rate": 1.3439327837575271e-05, "loss": 0.0103, "step": 2151 }, { "epoch": 15.37, "learning_rate": 1.3399900289410245e-05, "loss": 0.0058, "step": 2152 }, { "epoch": 15.38, "learning_rate": 1.3360521710380391e-05, "loss": 0.0078, "step": 2153 }, { "epoch": 15.39, "learning_rate": 1.3321192153172329e-05, "loss": 0.0191, "step": 2154 }, { "epoch": 15.39, "learning_rate": 1.3281911670407076e-05, "loss": 0.0179, "step": 2155 }, { "epoch": 15.4, "learning_rate": 1.3242680314639993e-05, "loss": 0.0045, "step": 2156 }, { "epoch": 15.41, "learning_rate": 1.3203498138360693e-05, "loss": 0.02, "step": 2157 }, { "epoch": 15.41, "learning_rate": 1.3164365193993016e-05, "loss": 0.0149, "step": 2158 }, { "epoch": 15.42, "learning_rate": 1.3125281533894912e-05, "loss": 0.0392, "step": 2159 }, { "epoch": 15.43, "learning_rate": 1.3086247210358398e-05, "loss": 0.0654, "step": 2160 }, { "epoch": 15.44, "learning_rate": 1.3047262275609483e-05, "loss": 0.0075, "step": 2161 }, { "epoch": 15.44, "learning_rate": 1.30083267818081e-05, "loss": 0.0023, "step": 2162 }, { "epoch": 15.45, "learning_rate": 1.2969440781048014e-05, "loss": 0.0081, "step": 2163 }, { "epoch": 15.46, "learning_rate": 1.2930604325356794e-05, "loss": 0.0018, "step": 2164 }, { "epoch": 15.46, "learning_rate": 1.2891817466695705e-05, "loss": 0.0124, "step": 2165 }, { "epoch": 15.47, "learning_rate": 1.2853080256959655e-05, "loss": 0.0161, "step": 2166 }, { "epoch": 15.48, "learning_rate": 1.2814392747977139e-05, "loss": 0.0225, "step": 2167 }, { "epoch": 15.49, "learning_rate": 1.277575499151013e-05, "loss": 0.0038, "step": 2168 }, { "epoch": 15.49, "learning_rate": 1.2737167039254055e-05, "loss": 0.0241, "step": 2169 }, { "epoch": 15.5, "learning_rate": 1.2698628942837699e-05, "loss": 0.0094, "step": 2170 }, { "epoch": 15.51, "learning_rate": 1.2660140753823135e-05, "loss": 0.008, "step": 2171 }, { "epoch": 15.51, "learning_rate": 1.2621702523705676e-05, "loss": 0.0042, "step": 2172 }, { "epoch": 15.52, "learning_rate": 1.2583314303913778e-05, "loss": 0.0105, "step": 2173 }, { "epoch": 15.53, "learning_rate": 1.2544976145809018e-05, "loss": 0.0057, "step": 2174 }, { "epoch": 15.54, "learning_rate": 1.2506688100685931e-05, "loss": 0.0033, "step": 2175 }, { "epoch": 15.54, "learning_rate": 1.2468450219772054e-05, "loss": 0.0581, "step": 2176 }, { "epoch": 15.55, "learning_rate": 1.24302625542278e-05, "loss": 0.0267, "step": 2177 }, { "epoch": 15.56, "learning_rate": 1.2392125155146383e-05, "loss": 0.0208, "step": 2178 }, { "epoch": 15.56, "learning_rate": 1.2354038073553776e-05, "loss": 0.0071, "step": 2179 }, { "epoch": 15.57, "learning_rate": 1.2316001360408614e-05, "loss": 0.0096, "step": 2180 }, { "epoch": 15.58, "learning_rate": 1.227801506660215e-05, "loss": 0.0294, "step": 2181 }, { "epoch": 15.59, "learning_rate": 1.2240079242958191e-05, "loss": 0.0181, "step": 2182 }, { "epoch": 15.59, "learning_rate": 1.2202193940232993e-05, "loss": 0.0181, "step": 2183 }, { "epoch": 15.6, "learning_rate": 1.2164359209115234e-05, "loss": 0.0255, "step": 2184 }, { "epoch": 15.61, "learning_rate": 1.2126575100225929e-05, "loss": 0.0118, "step": 2185 }, { "epoch": 15.61, "learning_rate": 1.2088841664118367e-05, "loss": 0.0315, "step": 2186 }, { "epoch": 15.62, "learning_rate": 1.2051158951278008e-05, "loss": 0.0171, "step": 2187 }, { "epoch": 15.63, "learning_rate": 1.2013527012122478e-05, "loss": 0.03, "step": 2188 }, { "epoch": 15.64, "learning_rate": 1.1975945897001472e-05, "loss": 0.0301, "step": 2189 }, { "epoch": 15.64, "learning_rate": 1.1938415656196672e-05, "loss": 0.0143, "step": 2190 }, { "epoch": 15.65, "learning_rate": 1.1900936339921693e-05, "loss": 0.0064, "step": 2191 }, { "epoch": 15.66, "learning_rate": 1.186350799832202e-05, "loss": 0.0081, "step": 2192 }, { "epoch": 15.66, "learning_rate": 1.1826130681474939e-05, "loss": 0.0674, "step": 2193 }, { "epoch": 15.67, "learning_rate": 1.1788804439389457e-05, "loss": 0.0142, "step": 2194 }, { "epoch": 15.68, "learning_rate": 1.175152932200626e-05, "loss": 0.0066, "step": 2195 }, { "epoch": 15.69, "learning_rate": 1.1714305379197615e-05, "loss": 0.033, "step": 2196 }, { "epoch": 15.69, "learning_rate": 1.1677132660767337e-05, "loss": 0.0055, "step": 2197 }, { "epoch": 15.7, "learning_rate": 1.1640011216450691e-05, "loss": 0.005, "step": 2198 }, { "epoch": 15.71, "learning_rate": 1.1602941095914361e-05, "loss": 0.0086, "step": 2199 }, { "epoch": 15.71, "learning_rate": 1.1565922348756325e-05, "loss": 0.0209, "step": 2200 }, { "epoch": 15.72, "learning_rate": 1.152895502450585e-05, "loss": 0.0711, "step": 2201 }, { "epoch": 15.73, "learning_rate": 1.1492039172623408e-05, "loss": 0.0093, "step": 2202 }, { "epoch": 15.74, "learning_rate": 1.1455174842500593e-05, "loss": 0.0035, "step": 2203 }, { "epoch": 15.74, "learning_rate": 1.1418362083460066e-05, "loss": 0.0096, "step": 2204 }, { "epoch": 15.75, "learning_rate": 1.1381600944755494e-05, "loss": 0.0252, "step": 2205 }, { "epoch": 15.76, "learning_rate": 1.1344891475571474e-05, "loss": 0.0292, "step": 2206 }, { "epoch": 15.76, "learning_rate": 1.1308233725023465e-05, "loss": 0.0277, "step": 2207 }, { "epoch": 15.77, "learning_rate": 1.1271627742157742e-05, "loss": 0.0159, "step": 2208 }, { "epoch": 15.78, "learning_rate": 1.1235073575951316e-05, "loss": 0.0311, "step": 2209 }, { "epoch": 15.79, "learning_rate": 1.1198571275311858e-05, "loss": 0.0097, "step": 2210 }, { "epoch": 15.79, "learning_rate": 1.1162120889077665e-05, "loss": 0.0186, "step": 2211 }, { "epoch": 15.8, "learning_rate": 1.1125722466017547e-05, "loss": 0.0266, "step": 2212 }, { "epoch": 15.81, "learning_rate": 1.1089376054830803e-05, "loss": 0.0186, "step": 2213 }, { "epoch": 15.81, "learning_rate": 1.1053081704147162e-05, "loss": 0.023, "step": 2214 }, { "epoch": 15.82, "learning_rate": 1.1016839462526668e-05, "loss": 0.0369, "step": 2215 }, { "epoch": 15.83, "learning_rate": 1.0980649378459668e-05, "loss": 0.0037, "step": 2216 }, { "epoch": 15.84, "learning_rate": 1.0944511500366716e-05, "loss": 0.0057, "step": 2217 }, { "epoch": 15.84, "learning_rate": 1.090842587659851e-05, "loss": 0.0149, "step": 2218 }, { "epoch": 15.85, "learning_rate": 1.0872392555435856e-05, "loss": 0.0091, "step": 2219 }, { "epoch": 15.86, "learning_rate": 1.083641158508955e-05, "loss": 0.0074, "step": 2220 }, { "epoch": 15.86, "learning_rate": 1.080048301370038e-05, "loss": 0.025, "step": 2221 }, { "epoch": 15.87, "learning_rate": 1.0764606889338995e-05, "loss": 0.0009, "step": 2222 }, { "epoch": 15.88, "learning_rate": 1.0728783260005904e-05, "loss": 0.013, "step": 2223 }, { "epoch": 15.89, "learning_rate": 1.0693012173631344e-05, "loss": 0.0106, "step": 2224 }, { "epoch": 15.89, "learning_rate": 1.0657293678075275e-05, "loss": 0.0303, "step": 2225 }, { "epoch": 15.9, "learning_rate": 1.0621627821127289e-05, "loss": 0.012, "step": 2226 }, { "epoch": 15.91, "learning_rate": 1.058601465050656e-05, "loss": 0.008, "step": 2227 }, { "epoch": 15.91, "learning_rate": 1.055045421386175e-05, "loss": 0.0079, "step": 2228 }, { "epoch": 15.92, "learning_rate": 1.0514946558770977e-05, "loss": 0.0267, "step": 2229 }, { "epoch": 15.93, "learning_rate": 1.0479491732741747e-05, "loss": 0.0041, "step": 2230 }, { "epoch": 15.94, "learning_rate": 1.044408978321087e-05, "loss": 0.0163, "step": 2231 }, { "epoch": 15.94, "learning_rate": 1.0408740757544416e-05, "loss": 0.0151, "step": 2232 }, { "epoch": 15.95, "learning_rate": 1.0373444703037644e-05, "loss": 0.0014, "step": 2233 }, { "epoch": 15.96, "learning_rate": 1.0338201666914943e-05, "loss": 0.0349, "step": 2234 }, { "epoch": 15.96, "learning_rate": 1.0303011696329756e-05, "loss": 0.0241, "step": 2235 }, { "epoch": 15.97, "learning_rate": 1.026787483836456e-05, "loss": 0.0234, "step": 2236 }, { "epoch": 15.98, "learning_rate": 1.0232791140030713e-05, "loss": 0.0284, "step": 2237 }, { "epoch": 15.99, "learning_rate": 1.0197760648268484e-05, "loss": 0.0088, "step": 2238 }, { "epoch": 15.99, "learning_rate": 1.0162783409946964e-05, "loss": 0.012, "step": 2239 }, { "epoch": 16.0, "learning_rate": 1.012785947186397e-05, "loss": 0.0145, "step": 2240 }, { "epoch": 16.01, "learning_rate": 1.0092988880746018e-05, "loss": 0.0082, "step": 2241 }, { "epoch": 16.01, "learning_rate": 1.0058171683248246e-05, "loss": 0.0122, "step": 2242 }, { "epoch": 16.02, "learning_rate": 1.0023407925954353e-05, "loss": 0.0297, "step": 2243 }, { "epoch": 16.03, "learning_rate": 9.988697655376544e-06, "loss": 0.0257, "step": 2244 }, { "epoch": 16.04, "learning_rate": 9.954040917955449e-06, "loss": 0.0141, "step": 2245 }, { "epoch": 16.04, "learning_rate": 9.919437760060073e-06, "loss": 0.0029, "step": 2246 }, { "epoch": 16.05, "learning_rate": 9.88488822798776e-06, "loss": 0.0436, "step": 2247 }, { "epoch": 16.06, "learning_rate": 9.850392367964085e-06, "loss": 0.0257, "step": 2248 }, { "epoch": 16.06, "learning_rate": 9.815950226142778e-06, "loss": 0.0037, "step": 2249 }, { "epoch": 16.07, "learning_rate": 9.781561848605775e-06, "loss": 0.0187, "step": 2250 }, { "epoch": 16.08, "learning_rate": 9.74722728136302e-06, "loss": 0.0042, "step": 2251 }, { "epoch": 16.09, "learning_rate": 9.712946570352472e-06, "loss": 0.0126, "step": 2252 }, { "epoch": 16.09, "learning_rate": 9.678719761440047e-06, "loss": 0.0165, "step": 2253 }, { "epoch": 16.1, "learning_rate": 9.644546900419533e-06, "loss": 0.0271, "step": 2254 }, { "epoch": 16.11, "learning_rate": 9.61042803301253e-06, "loss": 0.0052, "step": 2255 }, { "epoch": 16.11, "learning_rate": 9.576363204868416e-06, "loss": 0.0063, "step": 2256 }, { "epoch": 16.12, "learning_rate": 9.54235246156427e-06, "loss": 0.0112, "step": 2257 }, { "epoch": 16.13, "learning_rate": 9.508395848604756e-06, "loss": 0.0074, "step": 2258 }, { "epoch": 16.14, "learning_rate": 9.474493411422186e-06, "loss": 0.0078, "step": 2259 }, { "epoch": 16.14, "learning_rate": 9.44064519537634e-06, "loss": 0.0135, "step": 2260 }, { "epoch": 16.15, "learning_rate": 9.406851245754477e-06, "loss": 0.0162, "step": 2261 }, { "epoch": 16.16, "learning_rate": 9.37311160777124e-06, "loss": 0.0052, "step": 2262 }, { "epoch": 16.16, "learning_rate": 9.339426326568606e-06, "loss": 0.0021, "step": 2263 }, { "epoch": 16.17, "learning_rate": 9.305795447215827e-06, "loss": 0.0024, "step": 2264 }, { "epoch": 16.18, "learning_rate": 9.27221901470936e-06, "loss": 0.0049, "step": 2265 }, { "epoch": 16.19, "learning_rate": 9.23869707397283e-06, "loss": 0.0049, "step": 2266 }, { "epoch": 16.19, "learning_rate": 9.205229669856947e-06, "loss": 0.0027, "step": 2267 }, { "epoch": 16.2, "learning_rate": 9.171816847139448e-06, "loss": 0.0087, "step": 2268 }, { "epoch": 16.21, "learning_rate": 9.138458650525055e-06, "loss": 0.0049, "step": 2269 }, { "epoch": 16.21, "learning_rate": 9.105155124645403e-06, "loss": 0.0283, "step": 2270 }, { "epoch": 16.22, "learning_rate": 9.071906314058948e-06, "loss": 0.034, "step": 2271 }, { "epoch": 16.23, "learning_rate": 9.03871226325098e-06, "loss": 0.0202, "step": 2272 }, { "epoch": 16.24, "learning_rate": 9.005573016633511e-06, "loss": 0.0183, "step": 2273 }, { "epoch": 16.24, "learning_rate": 8.972488618545222e-06, "loss": 0.0186, "step": 2274 }, { "epoch": 16.25, "learning_rate": 8.939459113251408e-06, "loss": 0.0274, "step": 2275 }, { "epoch": 16.26, "learning_rate": 8.906484544943932e-06, "loss": 0.0029, "step": 2276 }, { "epoch": 16.26, "learning_rate": 8.873564957741144e-06, "loss": 0.0175, "step": 2277 }, { "epoch": 16.27, "learning_rate": 8.840700395687823e-06, "loss": 0.004, "step": 2278 }, { "epoch": 16.28, "learning_rate": 8.807890902755156e-06, "loss": 0.0438, "step": 2279 }, { "epoch": 16.29, "learning_rate": 8.775136522840621e-06, "loss": 0.0133, "step": 2280 }, { "epoch": 16.29, "learning_rate": 8.742437299767975e-06, "loss": 0.003, "step": 2281 }, { "epoch": 16.3, "learning_rate": 8.70979327728718e-06, "loss": 0.0026, "step": 2282 }, { "epoch": 16.31, "learning_rate": 8.677204499074314e-06, "loss": 0.0408, "step": 2283 }, { "epoch": 16.31, "learning_rate": 8.644671008731569e-06, "loss": 0.0142, "step": 2284 }, { "epoch": 16.32, "learning_rate": 8.61219284978716e-06, "loss": 0.0059, "step": 2285 }, { "epoch": 16.33, "learning_rate": 8.579770065695264e-06, "loss": 0.0067, "step": 2286 }, { "epoch": 16.34, "learning_rate": 8.547402699835976e-06, "loss": 0.0175, "step": 2287 }, { "epoch": 16.34, "learning_rate": 8.515090795515247e-06, "loss": 0.0301, "step": 2288 }, { "epoch": 16.35, "learning_rate": 8.4828343959648e-06, "loss": 0.0099, "step": 2289 }, { "epoch": 16.36, "learning_rate": 8.450633544342134e-06, "loss": 0.0033, "step": 2290 }, { "epoch": 16.36, "learning_rate": 8.418488283730392e-06, "loss": 0.0311, "step": 2291 }, { "epoch": 16.37, "learning_rate": 8.386398657138356e-06, "loss": 0.0037, "step": 2292 }, { "epoch": 16.38, "learning_rate": 8.354364707500379e-06, "loss": 0.0084, "step": 2293 }, { "epoch": 16.39, "learning_rate": 8.322386477676308e-06, "loss": 0.0035, "step": 2294 }, { "epoch": 16.39, "learning_rate": 8.290464010451432e-06, "loss": 0.0015, "step": 2295 }, { "epoch": 16.4, "learning_rate": 8.25859734853645e-06, "loss": 0.0154, "step": 2296 }, { "epoch": 16.41, "learning_rate": 8.226786534567393e-06, "loss": 0.0192, "step": 2297 }, { "epoch": 16.41, "learning_rate": 8.195031611105564e-06, "loss": 0.0169, "step": 2298 }, { "epoch": 16.42, "learning_rate": 8.163332620637493e-06, "loss": 0.0573, "step": 2299 }, { "epoch": 16.43, "learning_rate": 8.131689605574866e-06, "loss": 0.0055, "step": 2300 }, { "epoch": 16.44, "learning_rate": 8.10010260825449e-06, "loss": 0.008, "step": 2301 }, { "epoch": 16.44, "learning_rate": 8.068571670938218e-06, "loss": 0.012, "step": 2302 }, { "epoch": 16.45, "learning_rate": 8.037096835812885e-06, "loss": 0.0164, "step": 2303 }, { "epoch": 16.46, "learning_rate": 8.00567814499028e-06, "loss": 0.0246, "step": 2304 }, { "epoch": 16.46, "learning_rate": 7.974315640507064e-06, "loss": 0.0039, "step": 2305 }, { "epoch": 16.47, "learning_rate": 7.943009364324733e-06, "loss": 0.0257, "step": 2306 }, { "epoch": 16.48, "learning_rate": 7.911759358329552e-06, "loss": 0.0107, "step": 2307 }, { "epoch": 16.49, "learning_rate": 7.880565664332474e-06, "loss": 0.0035, "step": 2308 }, { "epoch": 16.49, "learning_rate": 7.849428324069142e-06, "loss": 0.0151, "step": 2309 }, { "epoch": 16.5, "learning_rate": 7.81834737919978e-06, "loss": 0.0094, "step": 2310 }, { "epoch": 16.51, "learning_rate": 7.787322871309172e-06, "loss": 0.0237, "step": 2311 }, { "epoch": 16.51, "learning_rate": 7.756354841906582e-06, "loss": 0.0052, "step": 2312 }, { "epoch": 16.52, "learning_rate": 7.725443332425719e-06, "loss": 0.018, "step": 2313 }, { "epoch": 16.53, "learning_rate": 7.694588384224655e-06, "loss": 0.0235, "step": 2314 }, { "epoch": 16.54, "learning_rate": 7.663790038585793e-06, "loss": 0.0447, "step": 2315 }, { "epoch": 16.54, "learning_rate": 7.633048336715814e-06, "loss": 0.0105, "step": 2316 }, { "epoch": 16.55, "learning_rate": 7.602363319745609e-06, "loss": 0.0258, "step": 2317 }, { "epoch": 16.56, "learning_rate": 7.571735028730209e-06, "loss": 0.0113, "step": 2318 }, { "epoch": 16.56, "learning_rate": 7.5411635046487794e-06, "loss": 0.0303, "step": 2319 }, { "epoch": 16.57, "learning_rate": 7.510648788404501e-06, "loss": 0.018, "step": 2320 }, { "epoch": 16.58, "learning_rate": 7.4801909208245706e-06, "loss": 0.0076, "step": 2321 }, { "epoch": 16.59, "learning_rate": 7.449789942660118e-06, "loss": 0.0109, "step": 2322 }, { "epoch": 16.59, "learning_rate": 7.419445894586163e-06, "loss": 0.0056, "step": 2323 }, { "epoch": 16.6, "learning_rate": 7.389158817201542e-06, "loss": 0.0054, "step": 2324 }, { "epoch": 16.61, "learning_rate": 7.358928751028887e-06, "loss": 0.0205, "step": 2325 }, { "epoch": 16.61, "learning_rate": 7.3287557365145365e-06, "loss": 0.0123, "step": 2326 }, { "epoch": 16.62, "learning_rate": 7.2986398140285015e-06, "loss": 0.0091, "step": 2327 }, { "epoch": 16.63, "learning_rate": 7.268581023864402e-06, "loss": 0.0614, "step": 2328 }, { "epoch": 16.64, "learning_rate": 7.238579406239432e-06, "loss": 0.0101, "step": 2329 }, { "epoch": 16.64, "learning_rate": 7.208635001294278e-06, "loss": 0.013, "step": 2330 }, { "epoch": 16.65, "learning_rate": 7.178747849093093e-06, "loss": 0.0135, "step": 2331 }, { "epoch": 16.66, "learning_rate": 7.1489179896233885e-06, "loss": 0.0342, "step": 2332 }, { "epoch": 16.66, "learning_rate": 7.119145462796079e-06, "loss": 0.0059, "step": 2333 }, { "epoch": 16.67, "learning_rate": 7.089430308445316e-06, "loss": 0.0188, "step": 2334 }, { "epoch": 16.68, "learning_rate": 7.059772566328543e-06, "loss": 0.0069, "step": 2335 }, { "epoch": 16.69, "learning_rate": 7.030172276126351e-06, "loss": 0.021, "step": 2336 }, { "epoch": 16.69, "learning_rate": 7.000629477442478e-06, "loss": 0.0185, "step": 2337 }, { "epoch": 16.7, "learning_rate": 6.9711442098037375e-06, "loss": 0.0016, "step": 2338 }, { "epoch": 16.71, "learning_rate": 6.941716512659957e-06, "loss": 0.0136, "step": 2339 }, { "epoch": 16.71, "learning_rate": 6.912346425383964e-06, "loss": 0.0162, "step": 2340 }, { "epoch": 16.72, "learning_rate": 6.883033987271492e-06, "loss": 0.0538, "step": 2341 }, { "epoch": 16.73, "learning_rate": 6.85377923754113e-06, "loss": 0.041, "step": 2342 }, { "epoch": 16.74, "learning_rate": 6.824582215334302e-06, "loss": 0.0206, "step": 2343 }, { "epoch": 16.74, "learning_rate": 6.795442959715192e-06, "loss": 0.0021, "step": 2344 }, { "epoch": 16.75, "learning_rate": 6.766361509670688e-06, "loss": 0.0056, "step": 2345 }, { "epoch": 16.76, "learning_rate": 6.737337904110341e-06, "loss": 0.0107, "step": 2346 }, { "epoch": 16.76, "learning_rate": 6.708372181866318e-06, "loss": 0.017, "step": 2347 }, { "epoch": 16.77, "learning_rate": 6.679464381693323e-06, "loss": 0.004, "step": 2348 }, { "epoch": 16.78, "learning_rate": 6.650614542268574e-06, "loss": 0.0064, "step": 2349 }, { "epoch": 16.79, "learning_rate": 6.621822702191743e-06, "loss": 0.0117, "step": 2350 }, { "epoch": 16.79, "learning_rate": 6.593088899984895e-06, "loss": 0.0464, "step": 2351 }, { "epoch": 16.8, "learning_rate": 6.564413174092443e-06, "loss": 0.0319, "step": 2352 }, { "epoch": 16.81, "learning_rate": 6.535795562881114e-06, "loss": 0.0422, "step": 2353 }, { "epoch": 16.81, "learning_rate": 6.507236104639841e-06, "loss": 0.0138, "step": 2354 }, { "epoch": 16.82, "learning_rate": 6.478734837579792e-06, "loss": 0.0072, "step": 2355 }, { "epoch": 16.83, "learning_rate": 6.4502917998342575e-06, "loss": 0.0062, "step": 2356 }, { "epoch": 16.84, "learning_rate": 6.421907029458624e-06, "loss": 0.0041, "step": 2357 }, { "epoch": 16.84, "learning_rate": 6.3935805644303184e-06, "loss": 0.009, "step": 2358 }, { "epoch": 16.85, "learning_rate": 6.365312442648769e-06, "loss": 0.0026, "step": 2359 }, { "epoch": 16.86, "learning_rate": 6.337102701935321e-06, "loss": 0.0218, "step": 2360 }, { "epoch": 16.86, "learning_rate": 6.3089513800332286e-06, "loss": 0.0072, "step": 2361 }, { "epoch": 16.87, "learning_rate": 6.28085851460758e-06, "loss": 0.0255, "step": 2362 }, { "epoch": 16.88, "learning_rate": 6.2528241432452394e-06, "loss": 0.0112, "step": 2363 }, { "epoch": 16.89, "learning_rate": 6.2248483034548276e-06, "loss": 0.0401, "step": 2364 }, { "epoch": 16.89, "learning_rate": 6.196931032666648e-06, "loss": 0.0322, "step": 2365 }, { "epoch": 16.9, "learning_rate": 6.16907236823262e-06, "loss": 0.0248, "step": 2366 }, { "epoch": 16.91, "learning_rate": 6.14127234742628e-06, "loss": 0.0041, "step": 2367 }, { "epoch": 16.91, "learning_rate": 6.11353100744268e-06, "loss": 0.0218, "step": 2368 }, { "epoch": 16.92, "learning_rate": 6.085848385398385e-06, "loss": 0.0137, "step": 2369 }, { "epoch": 16.93, "learning_rate": 6.058224518331374e-06, "loss": 0.0083, "step": 2370 }, { "epoch": 16.94, "learning_rate": 6.03065944320102e-06, "loss": 0.0675, "step": 2371 }, { "epoch": 16.94, "learning_rate": 6.003153196888045e-06, "loss": 0.0064, "step": 2372 }, { "epoch": 16.95, "learning_rate": 5.97570581619446e-06, "loss": 0.0103, "step": 2373 }, { "epoch": 16.96, "learning_rate": 5.948317337843501e-06, "loss": 0.0259, "step": 2374 }, { "epoch": 16.96, "learning_rate": 5.920987798479616e-06, "loss": 0.0044, "step": 2375 }, { "epoch": 16.97, "learning_rate": 5.893717234668383e-06, "loss": 0.0358, "step": 2376 }, { "epoch": 16.98, "learning_rate": 5.866505682896472e-06, "loss": 0.033, "step": 2377 }, { "epoch": 16.99, "learning_rate": 5.839353179571616e-06, "loss": 0.0377, "step": 2378 }, { "epoch": 16.99, "learning_rate": 5.812259761022509e-06, "loss": 0.0099, "step": 2379 }, { "epoch": 17.0, "learning_rate": 5.785225463498828e-06, "loss": 0.0071, "step": 2380 }, { "epoch": 17.01, "learning_rate": 5.758250323171127e-06, "loss": 0.0058, "step": 2381 }, { "epoch": 17.01, "learning_rate": 5.731334376130826e-06, "loss": 0.0282, "step": 2382 }, { "epoch": 17.02, "learning_rate": 5.704477658390139e-06, "loss": 0.0229, "step": 2383 }, { "epoch": 17.03, "learning_rate": 5.67768020588203e-06, "loss": 0.0099, "step": 2384 }, { "epoch": 17.04, "learning_rate": 5.650942054460173e-06, "loss": 0.0054, "step": 2385 }, { "epoch": 17.04, "learning_rate": 5.6242632398989085e-06, "loss": 0.0009, "step": 2386 }, { "epoch": 17.05, "learning_rate": 5.597643797893176e-06, "loss": 0.0017, "step": 2387 }, { "epoch": 17.06, "learning_rate": 5.571083764058482e-06, "loss": 0.0029, "step": 2388 }, { "epoch": 17.06, "learning_rate": 5.544583173930839e-06, "loss": 0.037, "step": 2389 }, { "epoch": 17.07, "learning_rate": 5.518142062966758e-06, "loss": 0.0368, "step": 2390 }, { "epoch": 17.08, "learning_rate": 5.491760466543117e-06, "loss": 0.0029, "step": 2391 }, { "epoch": 17.09, "learning_rate": 5.465438419957208e-06, "loss": 0.0434, "step": 2392 }, { "epoch": 17.09, "learning_rate": 5.439175958426634e-06, "loss": 0.0025, "step": 2393 }, { "epoch": 17.1, "learning_rate": 5.412973117089287e-06, "loss": 0.0248, "step": 2394 }, { "epoch": 17.11, "learning_rate": 5.386829931003268e-06, "loss": 0.0047, "step": 2395 }, { "epoch": 17.11, "learning_rate": 5.360746435146885e-06, "loss": 0.0086, "step": 2396 }, { "epoch": 17.12, "learning_rate": 5.334722664418573e-06, "loss": 0.0121, "step": 2397 }, { "epoch": 17.13, "learning_rate": 5.3087586536368525e-06, "loss": 0.0073, "step": 2398 }, { "epoch": 17.14, "learning_rate": 5.2828544375403025e-06, "loss": 0.0041, "step": 2399 }, { "epoch": 17.14, "learning_rate": 5.257010050787486e-06, "loss": 0.0272, "step": 2400 }, { "epoch": 17.15, "learning_rate": 5.2312255279569236e-06, "loss": 0.0068, "step": 2401 }, { "epoch": 17.16, "learning_rate": 5.20550090354705e-06, "loss": 0.0207, "step": 2402 }, { "epoch": 17.16, "learning_rate": 5.179836211976135e-06, "loss": 0.0068, "step": 2403 }, { "epoch": 17.17, "learning_rate": 5.154231487582273e-06, "loss": 0.0016, "step": 2404 }, { "epoch": 17.18, "learning_rate": 5.128686764623336e-06, "loss": 0.0116, "step": 2405 }, { "epoch": 17.19, "learning_rate": 5.103202077276908e-06, "loss": 0.0256, "step": 2406 }, { "epoch": 17.19, "learning_rate": 5.077777459640248e-06, "loss": 0.033, "step": 2407 }, { "epoch": 17.2, "learning_rate": 5.05241294573024e-06, "loss": 0.0203, "step": 2408 }, { "epoch": 17.21, "learning_rate": 5.027108569483363e-06, "loss": 0.002, "step": 2409 }, { "epoch": 17.21, "learning_rate": 5.0018643647556265e-06, "loss": 0.0038, "step": 2410 }, { "epoch": 17.22, "learning_rate": 4.976680365322533e-06, "loss": 0.0046, "step": 2411 }, { "epoch": 17.23, "learning_rate": 4.951556604879048e-06, "loss": 0.0163, "step": 2412 }, { "epoch": 17.24, "learning_rate": 4.9264931170395155e-06, "loss": 0.0154, "step": 2413 }, { "epoch": 17.24, "learning_rate": 4.901489935337661e-06, "loss": 0.0031, "step": 2414 }, { "epoch": 17.25, "learning_rate": 4.876547093226513e-06, "loss": 0.0064, "step": 2415 }, { "epoch": 17.26, "learning_rate": 4.851664624078356e-06, "loss": 0.0051, "step": 2416 }, { "epoch": 17.26, "learning_rate": 4.826842561184719e-06, "loss": 0.0087, "step": 2417 }, { "epoch": 17.27, "learning_rate": 4.802080937756304e-06, "loss": 0.0045, "step": 2418 }, { "epoch": 17.28, "learning_rate": 4.777379786922937e-06, "loss": 0.0118, "step": 2419 }, { "epoch": 17.29, "learning_rate": 4.752739141733564e-06, "loss": 0.0065, "step": 2420 }, { "epoch": 17.29, "learning_rate": 4.728159035156143e-06, "loss": 0.0021, "step": 2421 }, { "epoch": 17.3, "learning_rate": 4.703639500077656e-06, "loss": 0.011, "step": 2422 }, { "epoch": 17.31, "learning_rate": 4.679180569304032e-06, "loss": 0.0078, "step": 2423 }, { "epoch": 17.31, "learning_rate": 4.654782275560127e-06, "loss": 0.0086, "step": 2424 }, { "epoch": 17.32, "learning_rate": 4.63044465148964e-06, "loss": 0.016, "step": 2425 }, { "epoch": 17.33, "learning_rate": 4.606167729655131e-06, "loss": 0.0018, "step": 2426 }, { "epoch": 17.34, "learning_rate": 4.581951542537921e-06, "loss": 0.0163, "step": 2427 }, { "epoch": 17.34, "learning_rate": 4.557796122538089e-06, "loss": 0.0135, "step": 2428 }, { "epoch": 17.35, "learning_rate": 4.533701501974391e-06, "loss": 0.0139, "step": 2429 }, { "epoch": 17.36, "learning_rate": 4.509667713084254e-06, "loss": 0.0245, "step": 2430 }, { "epoch": 17.36, "learning_rate": 4.485694788023698e-06, "loss": 0.0098, "step": 2431 }, { "epoch": 17.37, "learning_rate": 4.4617827588673166e-06, "loss": 0.0203, "step": 2432 }, { "epoch": 17.38, "learning_rate": 4.437931657608246e-06, "loss": 0.0152, "step": 2433 }, { "epoch": 17.39, "learning_rate": 4.414141516158072e-06, "loss": 0.0073, "step": 2434 }, { "epoch": 17.39, "learning_rate": 4.390412366346841e-06, "loss": 0.0338, "step": 2435 }, { "epoch": 17.4, "learning_rate": 4.366744239922998e-06, "loss": 0.0234, "step": 2436 }, { "epoch": 17.41, "learning_rate": 4.3431371685533125e-06, "loss": 0.0176, "step": 2437 }, { "epoch": 17.41, "learning_rate": 4.319591183822902e-06, "loss": 0.0161, "step": 2438 }, { "epoch": 17.42, "learning_rate": 4.296106317235138e-06, "loss": 0.0235, "step": 2439 }, { "epoch": 17.43, "learning_rate": 4.2726826002116085e-06, "loss": 0.0117, "step": 2440 }, { "epoch": 17.44, "learning_rate": 4.2493200640921e-06, "loss": 0.002, "step": 2441 }, { "epoch": 17.44, "learning_rate": 4.226018740134541e-06, "loss": 0.0125, "step": 2442 }, { "epoch": 17.45, "learning_rate": 4.202778659514956e-06, "loss": 0.0046, "step": 2443 }, { "epoch": 17.46, "learning_rate": 4.1795998533274265e-06, "loss": 0.0166, "step": 2444 }, { "epoch": 17.46, "learning_rate": 4.156482352584068e-06, "loss": 0.0266, "step": 2445 }, { "epoch": 17.47, "learning_rate": 4.1334261882149505e-06, "loss": 0.0169, "step": 2446 }, { "epoch": 17.48, "learning_rate": 4.110431391068093e-06, "loss": 0.011, "step": 2447 }, { "epoch": 17.49, "learning_rate": 4.0874979919094e-06, "loss": 0.0594, "step": 2448 }, { "epoch": 17.49, "learning_rate": 4.064626021422652e-06, "loss": 0.0018, "step": 2449 }, { "epoch": 17.5, "learning_rate": 4.041815510209396e-06, "loss": 0.0398, "step": 2450 }, { "epoch": 17.51, "learning_rate": 4.0190664887889805e-06, "loss": 0.0079, "step": 2451 }, { "epoch": 17.51, "learning_rate": 3.996378987598487e-06, "loss": 0.0096, "step": 2452 }, { "epoch": 17.52, "learning_rate": 3.973753036992678e-06, "loss": 0.0161, "step": 2453 }, { "epoch": 17.53, "learning_rate": 3.951188667243955e-06, "loss": 0.0218, "step": 2454 }, { "epoch": 17.54, "learning_rate": 3.92868590854234e-06, "loss": 0.0195, "step": 2455 }, { "epoch": 17.54, "learning_rate": 3.906244790995422e-06, "loss": 0.0181, "step": 2456 }, { "epoch": 17.55, "learning_rate": 3.883865344628307e-06, "loss": 0.006, "step": 2457 }, { "epoch": 17.56, "learning_rate": 3.861547599383602e-06, "loss": 0.009, "step": 2458 }, { "epoch": 17.56, "learning_rate": 3.839291585121352e-06, "loss": 0.0071, "step": 2459 }, { "epoch": 17.57, "learning_rate": 3.8170973316190074e-06, "loss": 0.0021, "step": 2460 }, { "epoch": 17.58, "learning_rate": 3.794964868571399e-06, "loss": 0.0012, "step": 2461 }, { "epoch": 17.59, "learning_rate": 3.7728942255906564e-06, "loss": 0.0455, "step": 2462 }, { "epoch": 17.59, "learning_rate": 3.7508854322062305e-06, "loss": 0.006, "step": 2463 }, { "epoch": 17.6, "learning_rate": 3.728938517864794e-06, "loss": 0.004, "step": 2464 }, { "epoch": 17.61, "learning_rate": 3.707053511930253e-06, "loss": 0.0155, "step": 2465 }, { "epoch": 17.61, "learning_rate": 3.685230443683657e-06, "loss": 0.0132, "step": 2466 }, { "epoch": 17.62, "learning_rate": 3.6634693423232114e-06, "loss": 0.012, "step": 2467 }, { "epoch": 17.63, "learning_rate": 3.6417702369641925e-06, "loss": 0.0059, "step": 2468 }, { "epoch": 17.64, "learning_rate": 3.620133156638944e-06, "loss": 0.0159, "step": 2469 }, { "epoch": 17.64, "learning_rate": 3.5985581302968085e-06, "loss": 0.0178, "step": 2470 }, { "epoch": 17.65, "learning_rate": 3.5770451868041176e-06, "loss": 0.0059, "step": 2471 }, { "epoch": 17.66, "learning_rate": 3.555594354944125e-06, "loss": 0.0122, "step": 2472 }, { "epoch": 17.66, "learning_rate": 3.534205663417006e-06, "loss": 0.0087, "step": 2473 }, { "epoch": 17.67, "learning_rate": 3.512879140839764e-06, "loss": 0.0097, "step": 2474 }, { "epoch": 17.68, "learning_rate": 3.491614815746236e-06, "loss": 0.023, "step": 2475 }, { "epoch": 17.69, "learning_rate": 3.4704127165870517e-06, "loss": 0.0051, "step": 2476 }, { "epoch": 17.69, "learning_rate": 3.449272871729581e-06, "loss": 0.048, "step": 2477 }, { "epoch": 17.7, "learning_rate": 3.4281953094578877e-06, "loss": 0.027, "step": 2478 }, { "epoch": 17.71, "learning_rate": 3.407180057972725e-06, "loss": 0.0021, "step": 2479 }, { "epoch": 17.71, "learning_rate": 3.386227145391463e-06, "loss": 0.0094, "step": 2480 }, { "epoch": 17.72, "learning_rate": 3.3653365997480644e-06, "loss": 0.0156, "step": 2481 }, { "epoch": 17.73, "learning_rate": 3.344508448993061e-06, "loss": 0.0274, "step": 2482 }, { "epoch": 17.74, "learning_rate": 3.323742720993489e-06, "loss": 0.0281, "step": 2483 }, { "epoch": 17.74, "learning_rate": 3.303039443532874e-06, "loss": 0.0022, "step": 2484 }, { "epoch": 17.75, "learning_rate": 3.2823986443111854e-06, "loss": 0.0095, "step": 2485 }, { "epoch": 17.76, "learning_rate": 3.2618203509448053e-06, "loss": 0.0025, "step": 2486 }, { "epoch": 17.76, "learning_rate": 3.2413045909664596e-06, "loss": 0.0051, "step": 2487 }, { "epoch": 17.77, "learning_rate": 3.220851391825247e-06, "loss": 0.0179, "step": 2488 }, { "epoch": 17.78, "learning_rate": 3.2004607808865273e-06, "loss": 0.0177, "step": 2489 }, { "epoch": 17.79, "learning_rate": 3.1801327854319496e-06, "loss": 0.0073, "step": 2490 }, { "epoch": 17.79, "learning_rate": 3.1598674326593635e-06, "loss": 0.0246, "step": 2491 }, { "epoch": 17.8, "learning_rate": 3.1396647496828247e-06, "loss": 0.0052, "step": 2492 }, { "epoch": 17.81, "learning_rate": 3.1195247635325166e-06, "loss": 0.0116, "step": 2493 }, { "epoch": 17.81, "learning_rate": 3.0994475011547675e-06, "loss": 0.015, "step": 2494 }, { "epoch": 17.82, "learning_rate": 3.079432989411957e-06, "loss": 0.0506, "step": 2495 }, { "epoch": 17.83, "learning_rate": 3.059481255082519e-06, "loss": 0.0095, "step": 2496 }, { "epoch": 17.84, "learning_rate": 3.039592324860896e-06, "loss": 0.018, "step": 2497 }, { "epoch": 17.84, "learning_rate": 3.019766225357512e-06, "loss": 0.0241, "step": 2498 }, { "epoch": 17.85, "learning_rate": 3.0000029830986933e-06, "loss": 0.0167, "step": 2499 }, { "epoch": 17.86, "learning_rate": 2.980302624526693e-06, "loss": 0.004, "step": 2500 }, { "epoch": 17.86, "learning_rate": 2.9606651759996275e-06, "loss": 0.0409, "step": 2501 }, { "epoch": 17.87, "learning_rate": 2.9410906637914348e-06, "loss": 0.0147, "step": 2502 }, { "epoch": 17.88, "learning_rate": 2.9215791140918437e-06, "loss": 0.0531, "step": 2503 }, { "epoch": 17.89, "learning_rate": 2.9021305530063658e-06, "loss": 0.0248, "step": 2504 }, { "epoch": 17.89, "learning_rate": 2.8827450065562088e-06, "loss": 0.0041, "step": 2505 }, { "epoch": 17.9, "learning_rate": 2.8634225006782865e-06, "loss": 0.005, "step": 2506 }, { "epoch": 17.91, "learning_rate": 2.8441630612251693e-06, "loss": 0.0043, "step": 2507 }, { "epoch": 17.91, "learning_rate": 2.8249667139650216e-06, "loss": 0.0168, "step": 2508 }, { "epoch": 17.92, "learning_rate": 2.8058334845816213e-06, "loss": 0.0126, "step": 2509 }, { "epoch": 17.93, "learning_rate": 2.786763398674297e-06, "loss": 0.0141, "step": 2510 }, { "epoch": 17.94, "learning_rate": 2.7677564817578772e-06, "loss": 0.0102, "step": 2511 }, { "epoch": 17.94, "learning_rate": 2.7488127592626866e-06, "loss": 0.0158, "step": 2512 }, { "epoch": 17.95, "learning_rate": 2.729932256534495e-06, "loss": 0.0082, "step": 2513 }, { "epoch": 17.96, "learning_rate": 2.711114998834485e-06, "loss": 0.0058, "step": 2514 }, { "epoch": 17.96, "learning_rate": 2.692361011339228e-06, "loss": 0.0042, "step": 2515 }, { "epoch": 17.97, "learning_rate": 2.6736703191406366e-06, "loss": 0.0016, "step": 2516 }, { "epoch": 17.98, "learning_rate": 2.6550429472459394e-06, "loss": 0.0236, "step": 2517 }, { "epoch": 17.99, "learning_rate": 2.636478920577634e-06, "loss": 0.0457, "step": 2518 }, { "epoch": 17.99, "learning_rate": 2.6179782639734918e-06, "loss": 0.0069, "step": 2519 }, { "epoch": 18.0, "learning_rate": 2.5995410021864787e-06, "loss": 0.0229, "step": 2520 }, { "epoch": 18.01, "learning_rate": 2.5811671598847343e-06, "loss": 0.0049, "step": 2521 }, { "epoch": 18.01, "learning_rate": 2.5628567616515663e-06, "loss": 0.0368, "step": 2522 }, { "epoch": 18.02, "learning_rate": 2.5446098319853894e-06, "loss": 0.0028, "step": 2523 }, { "epoch": 18.03, "learning_rate": 2.5264263952996915e-06, "loss": 0.0095, "step": 2524 }, { "epoch": 18.04, "learning_rate": 2.508306475923017e-06, "loss": 0.0013, "step": 2525 }, { "epoch": 18.04, "learning_rate": 2.4902500980989395e-06, "loss": 0.0253, "step": 2526 }, { "epoch": 18.05, "learning_rate": 2.4722572859859904e-06, "loss": 0.0176, "step": 2527 }, { "epoch": 18.06, "learning_rate": 2.4543280636576794e-06, "loss": 0.0042, "step": 2528 }, { "epoch": 18.06, "learning_rate": 2.436462455102423e-06, "loss": 0.0225, "step": 2529 }, { "epoch": 18.07, "learning_rate": 2.4186604842235282e-06, "loss": 0.0076, "step": 2530 }, { "epoch": 18.08, "learning_rate": 2.400922174839154e-06, "loss": 0.0437, "step": 2531 }, { "epoch": 18.09, "learning_rate": 2.3832475506822937e-06, "loss": 0.0506, "step": 2532 }, { "epoch": 18.09, "learning_rate": 2.365636635400714e-06, "loss": 0.0149, "step": 2533 }, { "epoch": 18.1, "learning_rate": 2.3480894525569562e-06, "loss": 0.0145, "step": 2534 }, { "epoch": 18.11, "learning_rate": 2.3306060256283014e-06, "loss": 0.0117, "step": 2535 }, { "epoch": 18.11, "learning_rate": 2.3131863780067043e-06, "loss": 0.0028, "step": 2536 }, { "epoch": 18.12, "learning_rate": 2.2958305329987996e-06, "loss": 0.0497, "step": 2537 }, { "epoch": 18.13, "learning_rate": 2.278538513825862e-06, "loss": 0.0127, "step": 2538 }, { "epoch": 18.14, "learning_rate": 2.261310343623757e-06, "loss": 0.0116, "step": 2539 }, { "epoch": 18.14, "learning_rate": 2.24414604544293e-06, "loss": 0.0089, "step": 2540 }, { "epoch": 18.15, "learning_rate": 2.2270456422483655e-06, "loss": 0.0123, "step": 2541 }, { "epoch": 18.16, "learning_rate": 2.2100091569195735e-06, "loss": 0.01, "step": 2542 }, { "epoch": 18.16, "learning_rate": 2.1930366122505263e-06, "loss": 0.0108, "step": 2543 }, { "epoch": 18.17, "learning_rate": 2.1761280309496646e-06, "loss": 0.0073, "step": 2544 }, { "epoch": 18.18, "learning_rate": 2.159283435639825e-06, "loss": 0.0018, "step": 2545 }, { "epoch": 18.19, "learning_rate": 2.1425028488582523e-06, "loss": 0.0017, "step": 2546 }, { "epoch": 18.19, "learning_rate": 2.1257862930565477e-06, "loss": 0.0697, "step": 2547 }, { "epoch": 18.2, "learning_rate": 2.1091337906006482e-06, "loss": 0.0321, "step": 2548 }, { "epoch": 18.21, "learning_rate": 2.092545363770776e-06, "loss": 0.0056, "step": 2549 }, { "epoch": 18.21, "learning_rate": 2.0760210347614385e-06, "loss": 0.0292, "step": 2550 }, { "epoch": 18.22, "learning_rate": 2.059560825681367e-06, "loss": 0.0358, "step": 2551 }, { "epoch": 18.23, "learning_rate": 2.043164758553523e-06, "loss": 0.0341, "step": 2552 }, { "epoch": 18.24, "learning_rate": 2.0268328553150307e-06, "loss": 0.0055, "step": 2553 }, { "epoch": 18.24, "learning_rate": 2.010565137817172e-06, "loss": 0.0219, "step": 2554 }, { "epoch": 18.25, "learning_rate": 1.994361627825364e-06, "loss": 0.0475, "step": 2555 }, { "epoch": 18.26, "learning_rate": 1.9782223470191042e-06, "loss": 0.0054, "step": 2556 }, { "epoch": 18.26, "learning_rate": 1.9621473169919526e-06, "loss": 0.0227, "step": 2557 }, { "epoch": 18.27, "learning_rate": 1.94613655925151e-06, "loss": 0.0072, "step": 2558 }, { "epoch": 18.28, "learning_rate": 1.930190095219386e-06, "loss": 0.0096, "step": 2559 }, { "epoch": 18.29, "learning_rate": 1.914307946231164e-06, "loss": 0.0211, "step": 2560 }, { "epoch": 18.29, "learning_rate": 1.8984901335363792e-06, "loss": 0.0057, "step": 2561 }, { "epoch": 18.3, "learning_rate": 1.8827366782984913e-06, "loss": 0.0012, "step": 2562 }, { "epoch": 18.31, "learning_rate": 1.8670476015948457e-06, "loss": 0.0059, "step": 2563 }, { "epoch": 18.31, "learning_rate": 1.8514229244166569e-06, "loss": 0.0204, "step": 2564 }, { "epoch": 18.32, "learning_rate": 1.835862667668975e-06, "loss": 0.0061, "step": 2565 }, { "epoch": 18.33, "learning_rate": 1.820366852170663e-06, "loss": 0.0059, "step": 2566 }, { "epoch": 18.34, "learning_rate": 1.8049354986543532e-06, "loss": 0.005, "step": 2567 }, { "epoch": 18.34, "learning_rate": 1.7895686277664469e-06, "loss": 0.0039, "step": 2568 }, { "epoch": 18.35, "learning_rate": 1.7742662600670646e-06, "loss": 0.0046, "step": 2569 }, { "epoch": 18.36, "learning_rate": 1.7590284160300063e-06, "loss": 0.0054, "step": 2570 }, { "epoch": 18.36, "learning_rate": 1.7438551160427696e-06, "loss": 0.0042, "step": 2571 }, { "epoch": 18.37, "learning_rate": 1.7287463804064875e-06, "loss": 0.0082, "step": 2572 }, { "epoch": 18.38, "learning_rate": 1.713702229335895e-06, "loss": 0.0047, "step": 2573 }, { "epoch": 18.39, "learning_rate": 1.6987226829593417e-06, "loss": 0.0274, "step": 2574 }, { "epoch": 18.39, "learning_rate": 1.683807761318712e-06, "loss": 0.024, "step": 2575 }, { "epoch": 18.4, "learning_rate": 1.6689574843694433e-06, "loss": 0.0122, "step": 2576 }, { "epoch": 18.41, "learning_rate": 1.6541718719804755e-06, "loss": 0.001, "step": 2577 }, { "epoch": 18.41, "learning_rate": 1.6394509439342343e-06, "loss": 0.0064, "step": 2578 }, { "epoch": 18.42, "learning_rate": 1.6247947199265923e-06, "loss": 0.0024, "step": 2579 }, { "epoch": 18.43, "learning_rate": 1.6102032195668637e-06, "loss": 0.0194, "step": 2580 }, { "epoch": 18.44, "learning_rate": 1.5956764623777655e-06, "loss": 0.0206, "step": 2581 }, { "epoch": 18.44, "learning_rate": 1.5812144677953665e-06, "loss": 0.0099, "step": 2582 }, { "epoch": 18.45, "learning_rate": 1.5668172551691173e-06, "loss": 0.0034, "step": 2583 }, { "epoch": 18.46, "learning_rate": 1.5524848437617756e-06, "loss": 0.0041, "step": 2584 }, { "epoch": 18.46, "learning_rate": 1.5382172527494076e-06, "loss": 0.0309, "step": 2585 }, { "epoch": 18.47, "learning_rate": 1.5240145012213436e-06, "loss": 0.003, "step": 2586 }, { "epoch": 18.48, "learning_rate": 1.5098766081801775e-06, "loss": 0.0278, "step": 2587 }, { "epoch": 18.49, "learning_rate": 1.4958035925417003e-06, "loss": 0.0112, "step": 2588 }, { "epoch": 18.49, "learning_rate": 1.4817954731349338e-06, "loss": 0.0029, "step": 2589 }, { "epoch": 18.5, "learning_rate": 1.4678522687020413e-06, "loss": 0.0013, "step": 2590 }, { "epoch": 18.51, "learning_rate": 1.4539739978983502e-06, "loss": 0.017, "step": 2591 }, { "epoch": 18.51, "learning_rate": 1.4401606792923017e-06, "loss": 0.0159, "step": 2592 }, { "epoch": 18.52, "learning_rate": 1.4264123313654398e-06, "loss": 0.0018, "step": 2593 }, { "epoch": 18.53, "learning_rate": 1.4127289725123782e-06, "loss": 0.0167, "step": 2594 }, { "epoch": 18.54, "learning_rate": 1.3991106210407778e-06, "loss": 0.0234, "step": 2595 }, { "epoch": 18.54, "learning_rate": 1.3855572951713248e-06, "loss": 0.0273, "step": 2596 }, { "epoch": 18.55, "learning_rate": 1.3720690130377024e-06, "loss": 0.0231, "step": 2597 }, { "epoch": 18.56, "learning_rate": 1.3586457926865692e-06, "loss": 0.0104, "step": 2598 }, { "epoch": 18.56, "learning_rate": 1.3452876520775371e-06, "loss": 0.0262, "step": 2599 }, { "epoch": 18.57, "learning_rate": 1.331994609083137e-06, "loss": 0.0109, "step": 2600 }, { "epoch": 18.58, "learning_rate": 1.3187666814888089e-06, "loss": 0.001, "step": 2601 }, { "epoch": 18.59, "learning_rate": 1.3056038869928733e-06, "loss": 0.0262, "step": 2602 }, { "epoch": 18.59, "learning_rate": 1.2925062432065039e-06, "loss": 0.0078, "step": 2603 }, { "epoch": 18.6, "learning_rate": 1.2794737676536994e-06, "loss": 0.0362, "step": 2604 }, { "epoch": 18.61, "learning_rate": 1.2665064777712732e-06, "loss": 0.0125, "step": 2605 }, { "epoch": 18.61, "learning_rate": 1.2536043909088191e-06, "loss": 0.0706, "step": 2606 }, { "epoch": 18.62, "learning_rate": 1.2407675243287009e-06, "loss": 0.0111, "step": 2607 }, { "epoch": 18.63, "learning_rate": 1.2279958952060134e-06, "loss": 0.0156, "step": 2608 }, { "epoch": 18.64, "learning_rate": 1.2152895206285708e-06, "loss": 0.002, "step": 2609 }, { "epoch": 18.64, "learning_rate": 1.2026484175968744e-06, "loss": 0.005, "step": 2610 }, { "epoch": 18.65, "learning_rate": 1.1900726030241005e-06, "loss": 0.0076, "step": 2611 }, { "epoch": 18.66, "learning_rate": 1.1775620937360676e-06, "loss": 0.0032, "step": 2612 }, { "epoch": 18.66, "learning_rate": 1.1651169064712309e-06, "loss": 0.0037, "step": 2613 }, { "epoch": 18.67, "learning_rate": 1.1527370578806318e-06, "loss": 0.003, "step": 2614 }, { "epoch": 18.68, "learning_rate": 1.14042256452791e-06, "loss": 0.0091, "step": 2615 }, { "epoch": 18.69, "learning_rate": 1.1281734428892409e-06, "loss": 0.0012, "step": 2616 }, { "epoch": 18.69, "learning_rate": 1.115989709353349e-06, "loss": 0.0208, "step": 2617 }, { "epoch": 18.7, "learning_rate": 1.1038713802214717e-06, "loss": 0.0069, "step": 2618 }, { "epoch": 18.71, "learning_rate": 1.09181847170734e-06, "loss": 0.0287, "step": 2619 }, { "epoch": 18.71, "learning_rate": 1.0798309999371536e-06, "loss": 0.0358, "step": 2620 }, { "epoch": 18.72, "learning_rate": 1.0679089809495614e-06, "loss": 0.0395, "step": 2621 }, { "epoch": 18.73, "learning_rate": 1.056052430695642e-06, "loss": 0.0044, "step": 2622 }, { "epoch": 18.74, "learning_rate": 1.0442613650388678e-06, "loss": 0.0119, "step": 2623 }, { "epoch": 18.74, "learning_rate": 1.0325357997551134e-06, "loss": 0.0124, "step": 2624 }, { "epoch": 18.75, "learning_rate": 1.0208757505326016e-06, "loss": 0.0085, "step": 2625 }, { "epoch": 18.76, "learning_rate": 1.0092812329719147e-06, "loss": 0.0069, "step": 2626 }, { "epoch": 18.76, "learning_rate": 9.97752262585938e-07, "loss": 0.0023, "step": 2627 }, { "epoch": 18.77, "learning_rate": 9.862888547998829e-07, "loss": 0.0041, "step": 2628 }, { "epoch": 18.78, "learning_rate": 9.748910249512079e-07, "loss": 0.0021, "step": 2629 }, { "epoch": 18.79, "learning_rate": 9.63558788289659e-07, "loss": 0.0008, "step": 2630 }, { "epoch": 18.79, "learning_rate": 9.522921599772028e-07, "loss": 0.0238, "step": 2631 }, { "epoch": 18.8, "learning_rate": 9.410911550880475e-07, "loss": 0.0018, "step": 2632 }, { "epoch": 18.81, "learning_rate": 9.29955788608583e-07, "loss": 0.0147, "step": 2633 }, { "epoch": 18.81, "learning_rate": 9.188860754373752e-07, "loss": 0.0067, "step": 2634 }, { "epoch": 18.82, "learning_rate": 9.078820303851654e-07, "loss": 0.0036, "step": 2635 }, { "epoch": 18.83, "learning_rate": 8.969436681748211e-07, "loss": 0.0068, "step": 2636 }, { "epoch": 18.84, "learning_rate": 8.860710034413355e-07, "loss": 0.0119, "step": 2637 }, { "epoch": 18.84, "learning_rate": 8.752640507317944e-07, "loss": 0.0274, "step": 2638 }, { "epoch": 18.85, "learning_rate": 8.64522824505376e-07, "loss": 0.01, "step": 2639 }, { "epoch": 18.86, "learning_rate": 8.53847339133318e-07, "loss": 0.0873, "step": 2640 }, { "epoch": 18.86, "learning_rate": 8.432376088988836e-07, "loss": 0.0039, "step": 2641 }, { "epoch": 18.87, "learning_rate": 8.326936479973735e-07, "loss": 0.0377, "step": 2642 }, { "epoch": 18.88, "learning_rate": 8.222154705360919e-07, "loss": 0.0041, "step": 2643 }, { "epoch": 18.89, "learning_rate": 8.118030905343244e-07, "loss": 0.0057, "step": 2644 }, { "epoch": 18.89, "learning_rate": 8.014565219233216e-07, "loss": 0.0162, "step": 2645 }, { "epoch": 18.9, "learning_rate": 7.911757785462881e-07, "loss": 0.0047, "step": 2646 }, { "epoch": 18.91, "learning_rate": 7.80960874158354e-07, "loss": 0.0039, "step": 2647 }, { "epoch": 18.91, "learning_rate": 7.708118224265537e-07, "loss": 0.0089, "step": 2648 }, { "epoch": 18.92, "learning_rate": 7.60728636929825e-07, "loss": 0.0113, "step": 2649 }, { "epoch": 18.93, "learning_rate": 7.507113311589764e-07, "loss": 0.003, "step": 2650 }, { "epoch": 18.94, "learning_rate": 7.407599185166647e-07, "loss": 0.0092, "step": 2651 }, { "epoch": 18.94, "learning_rate": 7.308744123174005e-07, "loss": 0.0014, "step": 2652 }, { "epoch": 18.95, "learning_rate": 7.210548257874928e-07, "loss": 0.0012, "step": 2653 }, { "epoch": 18.96, "learning_rate": 7.113011720650709e-07, "loss": 0.0029, "step": 2654 }, { "epoch": 18.96, "learning_rate": 7.01613464200046e-07, "loss": 0.0037, "step": 2655 }, { "epoch": 18.97, "learning_rate": 6.919917151540945e-07, "loss": 0.0203, "step": 2656 }, { "epoch": 18.98, "learning_rate": 6.824359378006407e-07, "loss": 0.0055, "step": 2657 }, { "epoch": 18.99, "learning_rate": 6.72946144924852e-07, "loss": 0.0061, "step": 2658 }, { "epoch": 18.99, "learning_rate": 6.635223492236053e-07, "loss": 0.0007, "step": 2659 }, { "epoch": 19.0, "learning_rate": 6.54164563305465e-07, "loss": 0.0173, "step": 2660 }, { "epoch": 19.01, "learning_rate": 6.448727996907045e-07, "loss": 0.0355, "step": 2661 }, { "epoch": 19.01, "learning_rate": 6.356470708112295e-07, "loss": 0.0027, "step": 2662 }, { "epoch": 19.02, "learning_rate": 6.264873890106271e-07, "loss": 0.0072, "step": 2663 }, { "epoch": 19.03, "learning_rate": 6.173937665440943e-07, "loss": 0.0026, "step": 2664 }, { "epoch": 19.04, "learning_rate": 6.083662155784598e-07, "loss": 0.0228, "step": 2665 }, { "epoch": 19.04, "learning_rate": 5.994047481921284e-07, "loss": 0.0168, "step": 2666 }, { "epoch": 19.05, "learning_rate": 5.905093763751091e-07, "loss": 0.0044, "step": 2667 }, { "epoch": 19.06, "learning_rate": 5.816801120289761e-07, "loss": 0.0182, "step": 2668 }, { "epoch": 19.06, "learning_rate": 5.729169669668522e-07, "loss": 0.002, "step": 2669 }, { "epoch": 19.07, "learning_rate": 5.642199529133918e-07, "loss": 0.0115, "step": 2670 }, { "epoch": 19.08, "learning_rate": 5.555890815047705e-07, "loss": 0.0296, "step": 2671 }, { "epoch": 19.09, "learning_rate": 5.470243642886729e-07, "loss": 0.0133, "step": 2672 }, { "epoch": 19.09, "learning_rate": 5.38525812724261e-07, "loss": 0.019, "step": 2673 }, { "epoch": 19.1, "learning_rate": 5.300934381821998e-07, "loss": 0.042, "step": 2674 }, { "epoch": 19.11, "learning_rate": 5.21727251944576e-07, "loss": 0.0489, "step": 2675 }, { "epoch": 19.11, "learning_rate": 5.13427265204941e-07, "loss": 0.025, "step": 2676 }, { "epoch": 19.12, "learning_rate": 5.051934890682619e-07, "loss": 0.0106, "step": 2677 }, { "epoch": 19.13, "learning_rate": 4.970259345509376e-07, "loss": 0.0041, "step": 2678 }, { "epoch": 19.14, "learning_rate": 4.88924612580749e-07, "loss": 0.0186, "step": 2679 }, { "epoch": 19.14, "learning_rate": 4.808895339968645e-07, "loss": 0.0244, "step": 2680 }, { "epoch": 19.15, "learning_rate": 4.7292070954983445e-07, "loss": 0.0259, "step": 2681 }, { "epoch": 19.16, "learning_rate": 4.6501814990154156e-07, "loss": 0.0184, "step": 2682 }, { "epoch": 19.16, "learning_rate": 4.57181865625228e-07, "loss": 0.0113, "step": 2683 }, { "epoch": 19.17, "learning_rate": 4.4941186720546255e-07, "loss": 0.0186, "step": 2684 }, { "epoch": 19.18, "learning_rate": 4.4170816503811274e-07, "loss": 0.0205, "step": 2685 }, { "epoch": 19.19, "learning_rate": 4.340707694303614e-07, "loss": 0.0026, "step": 2686 }, { "epoch": 19.19, "learning_rate": 4.264996906006624e-07, "loss": 0.0451, "step": 2687 }, { "epoch": 19.2, "learning_rate": 4.189949386787462e-07, "loss": 0.016, "step": 2688 }, { "epoch": 19.21, "learning_rate": 4.1155652370560296e-07, "loss": 0.0077, "step": 2689 }, { "epoch": 19.21, "learning_rate": 4.041844556334717e-07, "loss": 0.0177, "step": 2690 }, { "epoch": 19.22, "learning_rate": 3.968787443258182e-07, "loss": 0.0024, "step": 2691 }, { "epoch": 19.23, "learning_rate": 3.8963939955731775e-07, "loss": 0.0276, "step": 2692 }, { "epoch": 19.24, "learning_rate": 3.8246643101386146e-07, "loss": 0.0034, "step": 2693 }, { "epoch": 19.24, "learning_rate": 3.7535984829252804e-07, "loss": 0.008, "step": 2694 }, { "epoch": 19.25, "learning_rate": 3.683196609015782e-07, "loss": 0.0041, "step": 2695 }, { "epoch": 19.26, "learning_rate": 3.6134587826043285e-07, "loss": 0.0033, "step": 2696 }, { "epoch": 19.26, "learning_rate": 3.5443850969967275e-07, "loss": 0.0109, "step": 2697 }, { "epoch": 19.27, "learning_rate": 3.475975644610219e-07, "loss": 0.018, "step": 2698 }, { "epoch": 19.28, "learning_rate": 3.4082305169732544e-07, "loss": 0.0111, "step": 2699 }, { "epoch": 19.29, "learning_rate": 3.3411498047254965e-07, "loss": 0.0113, "step": 2700 }, { "epoch": 19.29, "learning_rate": 3.274733597617541e-07, "loss": 0.0086, "step": 2701 }, { "epoch": 19.3, "learning_rate": 3.208981984511195e-07, "loss": 0.0114, "step": 2702 }, { "epoch": 19.31, "learning_rate": 3.143895053378698e-07, "loss": 0.0022, "step": 2703 }, { "epoch": 19.31, "learning_rate": 3.0794728913033366e-07, "loss": 0.0079, "step": 2704 }, { "epoch": 19.32, "learning_rate": 3.0157155844786624e-07, "loss": 0.0101, "step": 2705 }, { "epoch": 19.33, "learning_rate": 2.9526232182088834e-07, "loss": 0.0119, "step": 2706 }, { "epoch": 19.34, "learning_rate": 2.890195876908475e-07, "loss": 0.0088, "step": 2707 }, { "epoch": 19.34, "learning_rate": 2.82843364410218e-07, "loss": 0.038, "step": 2708 }, { "epoch": 19.35, "learning_rate": 2.767336602424786e-07, "loss": 0.003, "step": 2709 }, { "epoch": 19.36, "learning_rate": 2.7069048336211823e-07, "loss": 0.0031, "step": 2710 }, { "epoch": 19.36, "learning_rate": 2.64713841854608e-07, "loss": 0.0075, "step": 2711 }, { "epoch": 19.37, "learning_rate": 2.5880374371639594e-07, "loss": 0.0493, "step": 2712 }, { "epoch": 19.38, "learning_rate": 2.529601968549067e-07, "loss": 0.0238, "step": 2713 }, { "epoch": 19.39, "learning_rate": 2.471832090885251e-07, "loss": 0.0151, "step": 2714 }, { "epoch": 19.39, "learning_rate": 2.4147278814656836e-07, "loss": 0.0105, "step": 2715 }, { "epoch": 19.4, "learning_rate": 2.3582894166930268e-07, "loss": 0.0063, "step": 2716 }, { "epoch": 19.41, "learning_rate": 2.3025167720791552e-07, "loss": 0.0011, "step": 2717 }, { "epoch": 19.41, "learning_rate": 2.2474100222451555e-07, "loss": 0.0043, "step": 2718 }, { "epoch": 19.42, "learning_rate": 2.1929692409211057e-07, "loss": 0.011, "step": 2719 }, { "epoch": 19.43, "learning_rate": 2.1391945009461844e-07, "loss": 0.0145, "step": 2720 }, { "epoch": 19.44, "learning_rate": 2.086085874268229e-07, "loss": 0.0071, "step": 2721 }, { "epoch": 19.44, "learning_rate": 2.0336434319440655e-07, "loss": 0.0178, "step": 2722 }, { "epoch": 19.45, "learning_rate": 1.981867244139124e-07, "loss": 0.0131, "step": 2723 }, { "epoch": 19.46, "learning_rate": 1.9307573801273236e-07, "loss": 0.0165, "step": 2724 }, { "epoch": 19.46, "learning_rate": 1.8803139082911869e-07, "loss": 0.0113, "step": 2725 }, { "epoch": 19.47, "learning_rate": 1.8305368961215595e-07, "loss": 0.005, "step": 2726 }, { "epoch": 19.48, "learning_rate": 1.7814264102177235e-07, "loss": 0.0212, "step": 2727 }, { "epoch": 19.49, "learning_rate": 1.7329825162870073e-07, "loss": 0.0038, "step": 2728 }, { "epoch": 19.49, "learning_rate": 1.685205279145008e-07, "loss": 0.0058, "step": 2729 }, { "epoch": 19.5, "learning_rate": 1.6380947627153143e-07, "loss": 0.0151, "step": 2730 }, { "epoch": 19.51, "learning_rate": 1.5916510300294507e-07, "loss": 0.009, "step": 2731 }, { "epoch": 19.51, "learning_rate": 1.545874143226933e-07, "loss": 0.0063, "step": 2732 }, { "epoch": 19.52, "learning_rate": 1.500764163554935e-07, "loss": 0.021, "step": 2733 }, { "epoch": 19.53, "learning_rate": 1.456321151368345e-07, "loss": 0.0086, "step": 2734 }, { "epoch": 19.54, "learning_rate": 1.4125451661298194e-07, "loss": 0.0034, "step": 2735 }, { "epoch": 19.54, "learning_rate": 1.3694362664094518e-07, "loss": 0.0047, "step": 2736 }, { "epoch": 19.55, "learning_rate": 1.3269945098847713e-07, "loss": 0.0048, "step": 2737 }, { "epoch": 19.56, "learning_rate": 1.2852199533407993e-07, "loss": 0.009, "step": 2738 }, { "epoch": 19.56, "learning_rate": 1.244112652669882e-07, "loss": 0.0167, "step": 2739 }, { "epoch": 19.57, "learning_rate": 1.2036726628715245e-07, "loss": 0.0087, "step": 2740 }, { "epoch": 19.58, "learning_rate": 1.1639000380524456e-07, "loss": 0.0296, "step": 2741 }, { "epoch": 19.59, "learning_rate": 1.1247948314264678e-07, "loss": 0.0025, "step": 2742 }, { "epoch": 19.59, "learning_rate": 1.0863570953144608e-07, "loss": 0.0025, "step": 2743 }, { "epoch": 19.6, "learning_rate": 1.0485868811441757e-07, "loss": 0.0022, "step": 2744 }, { "epoch": 19.61, "learning_rate": 1.0114842394503555e-07, "loss": 0.0046, "step": 2745 }, { "epoch": 19.61, "learning_rate": 9.750492198744576e-08, "loss": 0.0022, "step": 2746 }, { "epoch": 19.62, "learning_rate": 9.39281871164821e-08, "loss": 0.0029, "step": 2747 }, { "epoch": 19.63, "learning_rate": 9.041822411763324e-08, "loss": 0.005, "step": 2748 }, { "epoch": 19.64, "learning_rate": 8.697503768706483e-08, "loss": 0.0031, "step": 2749 }, { "epoch": 19.64, "learning_rate": 8.359863243158073e-08, "loss": 0.0033, "step": 2750 }, { "epoch": 19.65, "learning_rate": 8.02890128686562e-08, "loss": 0.0184, "step": 2751 }, { "epoch": 19.66, "learning_rate": 7.704618342638802e-08, "loss": 0.0221, "step": 2752 }, { "epoch": 19.66, "learning_rate": 7.387014844353335e-08, "loss": 0.064, "step": 2753 }, { "epoch": 19.67, "learning_rate": 7.076091216946523e-08, "loss": 0.0473, "step": 2754 }, { "epoch": 19.68, "learning_rate": 6.771847876418935e-08, "loss": 0.002, "step": 2755 }, { "epoch": 19.69, "learning_rate": 6.474285229833843e-08, "loss": 0.0067, "step": 2756 }, { "epoch": 19.69, "learning_rate": 6.183403675314447e-08, "loss": 0.0041, "step": 2757 }, { "epoch": 19.7, "learning_rate": 5.899203602046655e-08, "loss": 0.0333, "step": 2758 }, { "epoch": 19.71, "learning_rate": 5.621685390275744e-08, "loss": 0.0129, "step": 2759 }, { "epoch": 19.71, "learning_rate": 5.350849411307479e-08, "loss": 0.0015, "step": 2760 }, { "epoch": 19.72, "learning_rate": 5.0866960275081085e-08, "loss": 0.0199, "step": 2761 }, { "epoch": 19.73, "learning_rate": 4.829225592300479e-08, "loss": 0.0013, "step": 2762 }, { "epoch": 19.74, "learning_rate": 4.578438450167921e-08, "loss": 0.0069, "step": 2763 }, { "epoch": 19.74, "learning_rate": 4.334334936652029e-08, "loss": 0.003, "step": 2764 }, { "epoch": 19.75, "learning_rate": 4.096915378349886e-08, "loss": 0.019, "step": 2765 }, { "epoch": 19.76, "learning_rate": 3.866180092918503e-08, "loss": 0.0086, "step": 2766 }, { "epoch": 19.76, "learning_rate": 3.642129389069826e-08, "loss": 0.01, "step": 2767 }, { "epoch": 19.77, "learning_rate": 3.424763566572398e-08, "loss": 0.0078, "step": 2768 }, { "epoch": 19.78, "learning_rate": 3.21408291625136e-08, "loss": 0.0086, "step": 2769 }, { "epoch": 19.79, "learning_rate": 3.010087719986787e-08, "loss": 0.0334, "step": 2770 }, { "epoch": 19.79, "learning_rate": 2.8127782507147983e-08, "loss": 0.0501, "step": 2771 }, { "epoch": 19.8, "learning_rate": 2.6221547724253337e-08, "loss": 0.0229, "step": 2772 }, { "epoch": 19.81, "learning_rate": 2.438217540163268e-08, "loss": 0.0045, "step": 2773 }, { "epoch": 19.81, "learning_rate": 2.2609668000278527e-08, "loss": 0.0102, "step": 2774 }, { "epoch": 19.82, "learning_rate": 2.090402789171608e-08, "loss": 0.0056, "step": 2775 }, { "epoch": 19.83, "learning_rate": 1.9265257358008772e-08, "loss": 0.007, "step": 2776 }, { "epoch": 19.84, "learning_rate": 1.7693358591747143e-08, "loss": 0.0082, "step": 2777 }, { "epoch": 19.84, "learning_rate": 1.6188333696059988e-08, "loss": 0.0085, "step": 2778 }, { "epoch": 19.85, "learning_rate": 1.4750184684597656e-08, "loss": 0.0106, "step": 2779 }, { "epoch": 19.86, "learning_rate": 1.3378913481526533e-08, "loss": 0.0087, "step": 2780 }, { "epoch": 19.86, "learning_rate": 1.2074521921545678e-08, "loss": 0.0077, "step": 2781 }, { "epoch": 19.87, "learning_rate": 1.0837011749864623e-08, "loss": 0.0006, "step": 2782 }, { "epoch": 19.88, "learning_rate": 9.666384622214475e-09, "loss": 0.003, "step": 2783 }, { "epoch": 19.89, "learning_rate": 8.562642104831265e-09, "loss": 0.0196, "step": 2784 }, { "epoch": 19.89, "learning_rate": 7.525785674467046e-09, "loss": 0.0206, "step": 2785 }, { "epoch": 19.9, "learning_rate": 6.5558167183898955e-09, "loss": 0.0112, "step": 2786 }, { "epoch": 19.91, "learning_rate": 5.652736534372816e-09, "loss": 0.0165, "step": 2787 }, { "epoch": 19.91, "learning_rate": 4.816546330688176e-09, "loss": 0.0209, "step": 2788 }, { "epoch": 19.92, "learning_rate": 4.047247226118822e-09, "loss": 0.0078, "step": 2789 }, { "epoch": 19.93, "learning_rate": 3.3448402499469677e-09, "loss": 0.0022, "step": 2790 }, { "epoch": 19.94, "learning_rate": 2.70932634195975e-09, "loss": 0.0152, "step": 2791 }, { "epoch": 19.94, "learning_rate": 2.140706352443678e-09, "loss": 0.0291, "step": 2792 }, { "epoch": 19.95, "learning_rate": 1.6389810421846286e-09, "loss": 0.0177, "step": 2793 }, { "epoch": 19.96, "learning_rate": 1.2041510824678525e-09, "loss": 0.0131, "step": 2794 }, { "epoch": 19.96, "learning_rate": 8.362170550779702e-10, "loss": 0.001, "step": 2795 }, { "epoch": 19.97, "learning_rate": 5.351794522823195e-10, "loss": 0.0021, "step": 2796 }, { "epoch": 19.98, "learning_rate": 3.0103867685871146e-10, "loss": 0.0068, "step": 2797 }, { "epoch": 19.99, "learning_rate": 1.3379504207877702e-10, "loss": 0.0096, "step": 2798 }, { "epoch": 19.99, "learning_rate": 3.3448771707966787e-11, "loss": 0.0003, "step": 2799 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.0156, "step": 2800 }, { "epoch": 20.0, "step": 2800, "total_flos": 1.2328419668451656e+18, "train_loss": 0.34252217101199284, "train_runtime": 5823.5284, "train_samples_per_second": 61.43, "train_steps_per_second": 0.481 } ], "logging_steps": 1.0, "max_steps": 2800, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.2328419668451656e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }