{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5293, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.2578616352201258e-07, "loss": 2.9003, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.5157232704402517e-07, "loss": 2.8605, "step": 2 }, { "epoch": 0.0, "learning_rate": 3.773584905660378e-07, "loss": 2.8961, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.031446540880503e-07, "loss": 3.0197, "step": 4 }, { "epoch": 0.0, "learning_rate": 6.28930817610063e-07, "loss": 3.0022, "step": 5 }, { "epoch": 0.0, "learning_rate": 7.547169811320755e-07, "loss": 3.3174, "step": 6 }, { "epoch": 0.0, "learning_rate": 8.805031446540881e-07, "loss": 2.6333, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.0062893081761007e-06, "loss": 2.8719, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.1320754716981133e-06, "loss": 2.6209, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.257861635220126e-06, "loss": 2.3449, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3836477987421384e-06, "loss": 2.2187, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.509433962264151e-06, "loss": 2.199, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.6352201257861635e-06, "loss": 1.3869, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.7610062893081762e-06, "loss": 1.4613, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.8867924528301889e-06, "loss": 1.3137, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.0125786163522013e-06, "loss": 1.3615, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.138364779874214e-06, "loss": 1.2526, "step": 17 }, { "epoch": 0.0, "learning_rate": 2.2641509433962266e-06, "loss": 1.1311, "step": 18 }, { "epoch": 0.0, "learning_rate": 2.389937106918239e-06, "loss": 0.8397, "step": 19 }, { "epoch": 0.0, "learning_rate": 2.515723270440252e-06, "loss": 0.9525, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.6415094339622644e-06, "loss": 0.9427, "step": 21 }, { "epoch": 0.0, "learning_rate": 2.767295597484277e-06, "loss": 0.9563, "step": 22 }, { "epoch": 0.0, "learning_rate": 2.8930817610062893e-06, "loss": 0.9302, "step": 23 }, { "epoch": 0.0, "learning_rate": 3.018867924528302e-06, "loss": 0.915, "step": 24 }, { "epoch": 0.0, "learning_rate": 3.1446540880503146e-06, "loss": 0.8127, "step": 25 }, { "epoch": 0.0, "learning_rate": 3.270440251572327e-06, "loss": 0.7934, "step": 26 }, { "epoch": 0.01, "learning_rate": 3.3962264150943395e-06, "loss": 0.7398, "step": 27 }, { "epoch": 0.01, "learning_rate": 3.5220125786163524e-06, "loss": 0.9085, "step": 28 }, { "epoch": 0.01, "learning_rate": 3.647798742138365e-06, "loss": 0.8116, "step": 29 }, { "epoch": 0.01, "learning_rate": 3.7735849056603777e-06, "loss": 0.7594, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.89937106918239e-06, "loss": 0.7183, "step": 31 }, { "epoch": 0.01, "learning_rate": 4.025157232704403e-06, "loss": 0.7135, "step": 32 }, { "epoch": 0.01, "learning_rate": 4.150943396226416e-06, "loss": 0.5953, "step": 33 }, { "epoch": 0.01, "learning_rate": 4.276729559748428e-06, "loss": 0.7524, "step": 34 }, { "epoch": 0.01, "learning_rate": 4.402515723270441e-06, "loss": 0.7809, "step": 35 }, { "epoch": 0.01, "learning_rate": 4.528301886792453e-06, "loss": 0.6991, "step": 36 }, { "epoch": 0.01, "learning_rate": 4.654088050314466e-06, "loss": 0.775, "step": 37 }, { "epoch": 0.01, "learning_rate": 4.779874213836478e-06, "loss": 0.7897, "step": 38 }, { "epoch": 0.01, "learning_rate": 4.905660377358491e-06, "loss": 0.7646, "step": 39 }, { "epoch": 0.01, "learning_rate": 5.031446540880504e-06, "loss": 0.6718, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.157232704402516e-06, "loss": 0.8805, "step": 41 }, { "epoch": 0.01, "learning_rate": 5.283018867924529e-06, "loss": 0.6674, "step": 42 }, { "epoch": 0.01, "learning_rate": 5.408805031446541e-06, "loss": 0.7801, "step": 43 }, { "epoch": 0.01, "learning_rate": 5.534591194968554e-06, "loss": 0.7097, "step": 44 }, { "epoch": 0.01, "learning_rate": 5.660377358490566e-06, "loss": 0.7525, "step": 45 }, { "epoch": 0.01, "learning_rate": 5.786163522012579e-06, "loss": 0.6345, "step": 46 }, { "epoch": 0.01, "learning_rate": 5.911949685534591e-06, "loss": 0.6604, "step": 47 }, { "epoch": 0.01, "learning_rate": 6.037735849056604e-06, "loss": 0.8861, "step": 48 }, { "epoch": 0.01, "learning_rate": 6.163522012578617e-06, "loss": 0.7344, "step": 49 }, { "epoch": 0.01, "learning_rate": 6.289308176100629e-06, "loss": 0.7242, "step": 50 }, { "epoch": 0.01, "learning_rate": 6.415094339622642e-06, "loss": 0.6609, "step": 51 }, { "epoch": 0.01, "learning_rate": 6.540880503144654e-06, "loss": 0.6181, "step": 52 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-06, "loss": 0.5307, "step": 53 }, { "epoch": 0.01, "learning_rate": 6.792452830188679e-06, "loss": 0.6782, "step": 54 }, { "epoch": 0.01, "learning_rate": 6.9182389937106915e-06, "loss": 0.6726, "step": 55 }, { "epoch": 0.01, "learning_rate": 7.044025157232705e-06, "loss": 0.7186, "step": 56 }, { "epoch": 0.01, "learning_rate": 7.169811320754717e-06, "loss": 0.7728, "step": 57 }, { "epoch": 0.01, "learning_rate": 7.29559748427673e-06, "loss": 0.6714, "step": 58 }, { "epoch": 0.01, "learning_rate": 7.421383647798742e-06, "loss": 0.59, "step": 59 }, { "epoch": 0.01, "learning_rate": 7.5471698113207555e-06, "loss": 0.6865, "step": 60 }, { "epoch": 0.01, "learning_rate": 7.672955974842768e-06, "loss": 0.726, "step": 61 }, { "epoch": 0.01, "learning_rate": 7.79874213836478e-06, "loss": 0.541, "step": 62 }, { "epoch": 0.01, "learning_rate": 7.924528301886793e-06, "loss": 0.614, "step": 63 }, { "epoch": 0.01, "learning_rate": 8.050314465408805e-06, "loss": 0.7961, "step": 64 }, { "epoch": 0.01, "learning_rate": 8.17610062893082e-06, "loss": 0.6302, "step": 65 }, { "epoch": 0.01, "learning_rate": 8.301886792452832e-06, "loss": 0.6586, "step": 66 }, { "epoch": 0.01, "learning_rate": 8.427672955974844e-06, "loss": 0.6428, "step": 67 }, { "epoch": 0.01, "learning_rate": 8.553459119496857e-06, "loss": 0.5777, "step": 68 }, { "epoch": 0.01, "learning_rate": 8.67924528301887e-06, "loss": 0.8179, "step": 69 }, { "epoch": 0.01, "learning_rate": 8.805031446540882e-06, "loss": 0.5903, "step": 70 }, { "epoch": 0.01, "learning_rate": 8.930817610062894e-06, "loss": 0.5953, "step": 71 }, { "epoch": 0.01, "learning_rate": 9.056603773584907e-06, "loss": 0.6665, "step": 72 }, { "epoch": 0.01, "learning_rate": 9.182389937106919e-06, "loss": 0.7219, "step": 73 }, { "epoch": 0.01, "learning_rate": 9.308176100628931e-06, "loss": 0.6492, "step": 74 }, { "epoch": 0.01, "learning_rate": 9.433962264150944e-06, "loss": 0.6834, "step": 75 }, { "epoch": 0.01, "learning_rate": 9.559748427672956e-06, "loss": 0.5328, "step": 76 }, { "epoch": 0.01, "learning_rate": 9.685534591194969e-06, "loss": 0.6929, "step": 77 }, { "epoch": 0.01, "learning_rate": 9.811320754716981e-06, "loss": 0.578, "step": 78 }, { "epoch": 0.01, "learning_rate": 9.937106918238994e-06, "loss": 0.5392, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.0062893081761008e-05, "loss": 0.6924, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.018867924528302e-05, "loss": 0.6315, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.0314465408805033e-05, "loss": 0.5293, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.0440251572327045e-05, "loss": 0.6656, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.0566037735849058e-05, "loss": 0.813, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.069182389937107e-05, "loss": 0.7101, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.0817610062893083e-05, "loss": 0.6531, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.0943396226415095e-05, "loss": 0.5837, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.1069182389937107e-05, "loss": 0.6335, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.119496855345912e-05, "loss": 0.6192, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.1320754716981132e-05, "loss": 0.5517, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.1446540880503145e-05, "loss": 0.6116, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.1572327044025157e-05, "loss": 0.5987, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.169811320754717e-05, "loss": 0.6827, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.1823899371069182e-05, "loss": 0.5083, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.1949685534591196e-05, "loss": 0.6555, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.2075471698113209e-05, "loss": 0.614, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.2201257861635221e-05, "loss": 0.7708, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.2327044025157234e-05, "loss": 0.7318, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.2452830188679246e-05, "loss": 0.623, "step": 99 }, { "epoch": 0.02, "learning_rate": 1.2578616352201259e-05, "loss": 0.6502, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.2704402515723271e-05, "loss": 0.5998, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.2830188679245283e-05, "loss": 0.5604, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.2955974842767296e-05, "loss": 0.7929, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.3081761006289308e-05, "loss": 0.7071, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.320754716981132e-05, "loss": 0.7683, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.3333333333333333e-05, "loss": 0.6646, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.3459119496855346e-05, "loss": 0.6703, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.3584905660377358e-05, "loss": 0.5489, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.371069182389937e-05, "loss": 0.8126, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.3836477987421383e-05, "loss": 0.8022, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.3962264150943397e-05, "loss": 0.7625, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.408805031446541e-05, "loss": 0.8567, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.4213836477987422e-05, "loss": 0.5958, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.4339622641509435e-05, "loss": 0.483, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.4465408805031447e-05, "loss": 0.6908, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.459119496855346e-05, "loss": 0.7446, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.4716981132075472e-05, "loss": 0.8195, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.4842767295597484e-05, "loss": 0.7398, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.4968553459119497e-05, "loss": 0.8607, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.5094339622641511e-05, "loss": 0.6992, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.5220125786163525e-05, "loss": 0.6016, "step": 121 }, { "epoch": 0.02, "learning_rate": 1.5345911949685536e-05, "loss": 0.6876, "step": 122 }, { "epoch": 0.02, "learning_rate": 1.547169811320755e-05, "loss": 0.8756, "step": 123 }, { "epoch": 0.02, "learning_rate": 1.559748427672956e-05, "loss": 0.6193, "step": 124 }, { "epoch": 0.02, "learning_rate": 1.5723270440251575e-05, "loss": 0.6255, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.5849056603773586e-05, "loss": 0.5405, "step": 126 }, { "epoch": 0.02, "learning_rate": 1.59748427672956e-05, "loss": 0.8002, "step": 127 }, { "epoch": 0.02, "learning_rate": 1.610062893081761e-05, "loss": 0.5789, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.6226415094339625e-05, "loss": 0.7674, "step": 129 }, { "epoch": 0.02, "learning_rate": 1.635220125786164e-05, "loss": 0.7165, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.647798742138365e-05, "loss": 0.6537, "step": 131 }, { "epoch": 0.02, "learning_rate": 1.6603773584905664e-05, "loss": 0.6347, "step": 132 }, { "epoch": 0.03, "learning_rate": 1.6729559748427675e-05, "loss": 0.6772, "step": 133 }, { "epoch": 0.03, "learning_rate": 1.685534591194969e-05, "loss": 0.7095, "step": 134 }, { "epoch": 0.03, "learning_rate": 1.69811320754717e-05, "loss": 0.6785, "step": 135 }, { "epoch": 0.03, "learning_rate": 1.7106918238993714e-05, "loss": 0.6601, "step": 136 }, { "epoch": 0.03, "learning_rate": 1.7232704402515724e-05, "loss": 0.7688, "step": 137 }, { "epoch": 0.03, "learning_rate": 1.735849056603774e-05, "loss": 0.6459, "step": 138 }, { "epoch": 0.03, "learning_rate": 1.748427672955975e-05, "loss": 0.5228, "step": 139 }, { "epoch": 0.03, "learning_rate": 1.7610062893081763e-05, "loss": 0.5642, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.7735849056603774e-05, "loss": 0.8328, "step": 141 }, { "epoch": 0.03, "learning_rate": 1.7861635220125788e-05, "loss": 0.5998, "step": 142 }, { "epoch": 0.03, "learning_rate": 1.79874213836478e-05, "loss": 0.8785, "step": 143 }, { "epoch": 0.03, "learning_rate": 1.8113207547169813e-05, "loss": 0.6707, "step": 144 }, { "epoch": 0.03, "learning_rate": 1.8238993710691827e-05, "loss": 0.6566, "step": 145 }, { "epoch": 0.03, "learning_rate": 1.8364779874213838e-05, "loss": 0.6448, "step": 146 }, { "epoch": 0.03, "learning_rate": 1.8490566037735852e-05, "loss": 0.5819, "step": 147 }, { "epoch": 0.03, "learning_rate": 1.8616352201257863e-05, "loss": 0.6793, "step": 148 }, { "epoch": 0.03, "learning_rate": 1.8742138364779877e-05, "loss": 0.7071, "step": 149 }, { "epoch": 0.03, "learning_rate": 1.8867924528301888e-05, "loss": 0.7584, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.8993710691823902e-05, "loss": 0.8256, "step": 151 }, { "epoch": 0.03, "learning_rate": 1.9119496855345913e-05, "loss": 0.6851, "step": 152 }, { "epoch": 0.03, "learning_rate": 1.9245283018867927e-05, "loss": 0.667, "step": 153 }, { "epoch": 0.03, "learning_rate": 1.9371069182389938e-05, "loss": 0.6073, "step": 154 }, { "epoch": 0.03, "learning_rate": 1.9496855345911952e-05, "loss": 0.7333, "step": 155 }, { "epoch": 0.03, "learning_rate": 1.9622641509433963e-05, "loss": 0.6845, "step": 156 }, { "epoch": 0.03, "learning_rate": 1.9748427672955977e-05, "loss": 0.5911, "step": 157 }, { "epoch": 0.03, "learning_rate": 1.9874213836477987e-05, "loss": 0.6814, "step": 158 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.5802, "step": 159 }, { "epoch": 0.03, "learning_rate": 1.9999998127775143e-05, "loss": 0.5972, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.9999992511101266e-05, "loss": 0.7765, "step": 161 }, { "epoch": 0.03, "learning_rate": 1.9999983149980473e-05, "loss": 0.6315, "step": 162 }, { "epoch": 0.03, "learning_rate": 1.9999970044416273e-05, "loss": 0.5626, "step": 163 }, { "epoch": 0.03, "learning_rate": 1.999995319441357e-05, "loss": 0.7166, "step": 164 }, { "epoch": 0.03, "learning_rate": 1.9999932599978674e-05, "loss": 0.6474, "step": 165 }, { "epoch": 0.03, "learning_rate": 1.9999908261119295e-05, "loss": 0.7463, "step": 166 }, { "epoch": 0.03, "learning_rate": 1.9999880177844552e-05, "loss": 0.7002, "step": 167 }, { "epoch": 0.03, "learning_rate": 1.9999848350164958e-05, "loss": 0.7019, "step": 168 }, { "epoch": 0.03, "learning_rate": 1.9999812778092425e-05, "loss": 0.5973, "step": 169 }, { "epoch": 0.03, "learning_rate": 1.9999773461640282e-05, "loss": 0.6132, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.9999730400823244e-05, "loss": 0.7176, "step": 171 }, { "epoch": 0.03, "learning_rate": 1.9999683595657436e-05, "loss": 0.6751, "step": 172 }, { "epoch": 0.03, "learning_rate": 1.9999633046160386e-05, "loss": 0.7415, "step": 173 }, { "epoch": 0.03, "learning_rate": 1.9999578752351022e-05, "loss": 0.6113, "step": 174 }, { "epoch": 0.03, "learning_rate": 1.9999520714249672e-05, "loss": 0.66, "step": 175 }, { "epoch": 0.03, "learning_rate": 1.999945893187807e-05, "loss": 0.5901, "step": 176 }, { "epoch": 0.03, "learning_rate": 1.9999393405259354e-05, "loss": 0.6363, "step": 177 }, { "epoch": 0.03, "learning_rate": 1.9999324134418053e-05, "loss": 0.6355, "step": 178 }, { "epoch": 0.03, "learning_rate": 1.9999251119380105e-05, "loss": 0.6915, "step": 179 }, { "epoch": 0.03, "learning_rate": 1.9999174360172855e-05, "loss": 0.6234, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.999909385682504e-05, "loss": 0.6215, "step": 181 }, { "epoch": 0.03, "learning_rate": 1.999900960936681e-05, "loss": 0.8868, "step": 182 }, { "epoch": 0.03, "learning_rate": 1.9998921617829708e-05, "loss": 0.6218, "step": 183 }, { "epoch": 0.03, "learning_rate": 1.9998829882246684e-05, "loss": 0.5871, "step": 184 }, { "epoch": 0.03, "learning_rate": 1.999873440265208e-05, "loss": 0.7843, "step": 185 }, { "epoch": 0.04, "learning_rate": 1.9998635179081658e-05, "loss": 0.7821, "step": 186 }, { "epoch": 0.04, "learning_rate": 1.9998532211572566e-05, "loss": 0.5964, "step": 187 }, { "epoch": 0.04, "learning_rate": 1.9998425500163363e-05, "loss": 0.5649, "step": 188 }, { "epoch": 0.04, "learning_rate": 1.9998315044894006e-05, "loss": 0.6584, "step": 189 }, { "epoch": 0.04, "learning_rate": 1.999820084580585e-05, "loss": 0.8553, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.999808290294166e-05, "loss": 0.6535, "step": 191 }, { "epoch": 0.04, "learning_rate": 1.99979612163456e-05, "loss": 0.6756, "step": 192 }, { "epoch": 0.04, "learning_rate": 1.999783578606323e-05, "loss": 0.7919, "step": 193 }, { "epoch": 0.04, "learning_rate": 1.9997706612141522e-05, "loss": 0.7333, "step": 194 }, { "epoch": 0.04, "learning_rate": 1.9997573694628846e-05, "loss": 0.5511, "step": 195 }, { "epoch": 0.04, "learning_rate": 1.9997437033574967e-05, "loss": 0.6066, "step": 196 }, { "epoch": 0.04, "learning_rate": 1.999729662903106e-05, "loss": 0.7922, "step": 197 }, { "epoch": 0.04, "learning_rate": 1.9997152481049697e-05, "loss": 0.6688, "step": 198 }, { "epoch": 0.04, "learning_rate": 1.9997004589684852e-05, "loss": 0.7331, "step": 199 }, { "epoch": 0.04, "learning_rate": 1.9996852954991905e-05, "loss": 0.6669, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.9996697577027636e-05, "loss": 0.6727, "step": 201 }, { "epoch": 0.04, "learning_rate": 1.9996538455850226e-05, "loss": 0.7987, "step": 202 }, { "epoch": 0.04, "learning_rate": 1.9996375591519254e-05, "loss": 0.7364, "step": 203 }, { "epoch": 0.04, "learning_rate": 1.99962089840957e-05, "loss": 0.641, "step": 204 }, { "epoch": 0.04, "learning_rate": 1.999603863364196e-05, "loss": 0.5363, "step": 205 }, { "epoch": 0.04, "learning_rate": 1.9995864540221816e-05, "loss": 0.7245, "step": 206 }, { "epoch": 0.04, "learning_rate": 1.999568670390045e-05, "loss": 0.7697, "step": 207 }, { "epoch": 0.04, "learning_rate": 1.999550512474446e-05, "loss": 0.6599, "step": 208 }, { "epoch": 0.04, "learning_rate": 1.9995319802821832e-05, "loss": 0.6034, "step": 209 }, { "epoch": 0.04, "learning_rate": 1.9995130738201966e-05, "loss": 0.6008, "step": 210 }, { "epoch": 0.04, "learning_rate": 1.9994937930955652e-05, "loss": 0.6824, "step": 211 }, { "epoch": 0.04, "learning_rate": 1.9994741381155084e-05, "loss": 0.6103, "step": 212 }, { "epoch": 0.04, "learning_rate": 1.9994541088873856e-05, "loss": 0.631, "step": 213 }, { "epoch": 0.04, "learning_rate": 1.9994337054186975e-05, "loss": 0.6383, "step": 214 }, { "epoch": 0.04, "learning_rate": 1.999412927717084e-05, "loss": 0.51, "step": 215 }, { "epoch": 0.04, "learning_rate": 1.9993917757903243e-05, "loss": 0.6301, "step": 216 }, { "epoch": 0.04, "learning_rate": 1.9993702496463395e-05, "loss": 0.643, "step": 217 }, { "epoch": 0.04, "learning_rate": 1.9993483492931895e-05, "loss": 0.8141, "step": 218 }, { "epoch": 0.04, "learning_rate": 1.9993260747390754e-05, "loss": 0.7312, "step": 219 }, { "epoch": 0.04, "learning_rate": 1.999303425992337e-05, "loss": 0.5865, "step": 220 }, { "epoch": 0.04, "learning_rate": 1.9992804030614553e-05, "loss": 0.5673, "step": 221 }, { "epoch": 0.04, "learning_rate": 1.9992570059550513e-05, "loss": 0.6901, "step": 222 }, { "epoch": 0.04, "learning_rate": 1.999233234681886e-05, "loss": 0.811, "step": 223 }, { "epoch": 0.04, "learning_rate": 1.99920908925086e-05, "loss": 0.5411, "step": 224 }, { "epoch": 0.04, "learning_rate": 1.9991845696710146e-05, "loss": 0.5754, "step": 225 }, { "epoch": 0.04, "learning_rate": 1.9991596759515314e-05, "loss": 0.5265, "step": 226 }, { "epoch": 0.04, "learning_rate": 1.9991344081017312e-05, "loss": 0.7087, "step": 227 }, { "epoch": 0.04, "learning_rate": 1.999108766131076e-05, "loss": 0.7973, "step": 228 }, { "epoch": 0.04, "learning_rate": 1.9990827500491666e-05, "loss": 0.5511, "step": 229 }, { "epoch": 0.04, "learning_rate": 1.999056359865745e-05, "loss": 0.6778, "step": 230 }, { "epoch": 0.04, "learning_rate": 1.9990295955906927e-05, "loss": 0.5695, "step": 231 }, { "epoch": 0.04, "learning_rate": 1.9990024572340317e-05, "loss": 0.619, "step": 232 }, { "epoch": 0.04, "learning_rate": 1.998974944805924e-05, "loss": 0.6746, "step": 233 }, { "epoch": 0.04, "learning_rate": 1.998947058316671e-05, "loss": 0.6085, "step": 234 }, { "epoch": 0.04, "learning_rate": 1.998918797776715e-05, "loss": 0.732, "step": 235 }, { "epoch": 0.04, "learning_rate": 1.9988901631966373e-05, "loss": 0.6765, "step": 236 }, { "epoch": 0.04, "learning_rate": 1.9988611545871606e-05, "loss": 0.6403, "step": 237 }, { "epoch": 0.04, "learning_rate": 1.9988317719591473e-05, "loss": 0.5689, "step": 238 }, { "epoch": 0.05, "learning_rate": 1.9988020153235996e-05, "loss": 0.7555, "step": 239 }, { "epoch": 0.05, "learning_rate": 1.998771884691659e-05, "loss": 0.557, "step": 240 }, { "epoch": 0.05, "learning_rate": 1.998741380074608e-05, "loss": 0.702, "step": 241 }, { "epoch": 0.05, "learning_rate": 1.998710501483869e-05, "loss": 0.6374, "step": 242 }, { "epoch": 0.05, "learning_rate": 1.9986792489310044e-05, "loss": 0.6665, "step": 243 }, { "epoch": 0.05, "learning_rate": 1.9986476224277167e-05, "loss": 0.6836, "step": 244 }, { "epoch": 0.05, "learning_rate": 1.998615621985848e-05, "loss": 0.706, "step": 245 }, { "epoch": 0.05, "learning_rate": 1.9985832476173807e-05, "loss": 0.5046, "step": 246 }, { "epoch": 0.05, "learning_rate": 1.9985504993344375e-05, "loss": 0.6341, "step": 247 }, { "epoch": 0.05, "learning_rate": 1.9985173771492807e-05, "loss": 0.6224, "step": 248 }, { "epoch": 0.05, "learning_rate": 1.9984838810743124e-05, "loss": 0.6806, "step": 249 }, { "epoch": 0.05, "learning_rate": 1.9984500111220757e-05, "loss": 0.5569, "step": 250 }, { "epoch": 0.05, "learning_rate": 1.9984157673052525e-05, "loss": 0.7444, "step": 251 }, { "epoch": 0.05, "learning_rate": 1.9983811496366654e-05, "loss": 0.8125, "step": 252 }, { "epoch": 0.05, "learning_rate": 1.9983461581292768e-05, "loss": 0.6381, "step": 253 }, { "epoch": 0.05, "learning_rate": 1.9983107927961888e-05, "loss": 0.6368, "step": 254 }, { "epoch": 0.05, "learning_rate": 1.9982750536506445e-05, "loss": 0.7953, "step": 255 }, { "epoch": 0.05, "learning_rate": 1.998238940706026e-05, "loss": 0.6563, "step": 256 }, { "epoch": 0.05, "learning_rate": 1.9982024539758547e-05, "loss": 0.7502, "step": 257 }, { "epoch": 0.05, "learning_rate": 1.998165593473794e-05, "loss": 0.8238, "step": 258 }, { "epoch": 0.05, "learning_rate": 1.998128359213646e-05, "loss": 0.6128, "step": 259 }, { "epoch": 0.05, "learning_rate": 1.998090751209352e-05, "loss": 0.6802, "step": 260 }, { "epoch": 0.05, "learning_rate": 1.9980527694749952e-05, "loss": 0.8623, "step": 261 }, { "epoch": 0.05, "learning_rate": 1.998014414024797e-05, "loss": 0.679, "step": 262 }, { "epoch": 0.05, "learning_rate": 1.9979756848731195e-05, "loss": 0.7058, "step": 263 }, { "epoch": 0.05, "learning_rate": 1.9979365820344648e-05, "loss": 0.6038, "step": 264 }, { "epoch": 0.05, "learning_rate": 1.997897105523475e-05, "loss": 0.6477, "step": 265 }, { "epoch": 0.05, "learning_rate": 1.9978572553549315e-05, "loss": 0.5313, "step": 266 }, { "epoch": 0.05, "learning_rate": 1.997817031543756e-05, "loss": 0.5533, "step": 267 }, { "epoch": 0.05, "learning_rate": 1.99777643410501e-05, "loss": 0.6897, "step": 268 }, { "epoch": 0.05, "learning_rate": 1.9977354630538953e-05, "loss": 0.5639, "step": 269 }, { "epoch": 0.05, "learning_rate": 1.9976941184057536e-05, "loss": 0.4884, "step": 270 }, { "epoch": 0.05, "learning_rate": 1.9976524001760653e-05, "loss": 0.6924, "step": 271 }, { "epoch": 0.05, "learning_rate": 1.9976103083804523e-05, "loss": 0.6267, "step": 272 }, { "epoch": 0.05, "learning_rate": 1.9975678430346753e-05, "loss": 0.6701, "step": 273 }, { "epoch": 0.05, "learning_rate": 1.9975250041546354e-05, "loss": 0.5491, "step": 274 }, { "epoch": 0.05, "learning_rate": 1.9974817917563735e-05, "loss": 0.6715, "step": 275 }, { "epoch": 0.05, "learning_rate": 1.99743820585607e-05, "loss": 0.6484, "step": 276 }, { "epoch": 0.05, "learning_rate": 1.9973942464700456e-05, "loss": 0.7303, "step": 277 }, { "epoch": 0.05, "learning_rate": 1.997349913614761e-05, "loss": 0.5503, "step": 278 }, { "epoch": 0.05, "learning_rate": 1.9973052073068154e-05, "loss": 0.8026, "step": 279 }, { "epoch": 0.05, "learning_rate": 1.9972601275629497e-05, "loss": 0.675, "step": 280 }, { "epoch": 0.05, "learning_rate": 1.9972146744000436e-05, "loss": 0.6253, "step": 281 }, { "epoch": 0.05, "learning_rate": 1.997168847835117e-05, "loss": 0.6511, "step": 282 }, { "epoch": 0.05, "learning_rate": 1.997122647885329e-05, "loss": 0.5213, "step": 283 }, { "epoch": 0.05, "learning_rate": 1.9970760745679793e-05, "loss": 0.6078, "step": 284 }, { "epoch": 0.05, "learning_rate": 1.9970291279005066e-05, "loss": 0.764, "step": 285 }, { "epoch": 0.05, "learning_rate": 1.9969818079004907e-05, "loss": 0.5073, "step": 286 }, { "epoch": 0.05, "learning_rate": 1.9969341145856493e-05, "loss": 0.6617, "step": 287 }, { "epoch": 0.05, "learning_rate": 1.9968860479738416e-05, "loss": 0.5661, "step": 288 }, { "epoch": 0.05, "learning_rate": 1.9968376080830654e-05, "loss": 0.6483, "step": 289 }, { "epoch": 0.05, "learning_rate": 1.9967887949314595e-05, "loss": 0.6387, "step": 290 }, { "epoch": 0.05, "learning_rate": 1.996739608537301e-05, "loss": 0.5802, "step": 291 }, { "epoch": 0.06, "learning_rate": 1.9966900489190082e-05, "loss": 0.7015, "step": 292 }, { "epoch": 0.06, "learning_rate": 1.996640116095138e-05, "loss": 0.7117, "step": 293 }, { "epoch": 0.06, "learning_rate": 1.996589810084387e-05, "loss": 0.6152, "step": 294 }, { "epoch": 0.06, "learning_rate": 1.996539130905593e-05, "loss": 0.7553, "step": 295 }, { "epoch": 0.06, "learning_rate": 1.996488078577732e-05, "loss": 0.5157, "step": 296 }, { "epoch": 0.06, "learning_rate": 1.9964366531199205e-05, "loss": 0.6229, "step": 297 }, { "epoch": 0.06, "learning_rate": 1.996384854551415e-05, "loss": 0.7195, "step": 298 }, { "epoch": 0.06, "learning_rate": 1.99633268289161e-05, "loss": 0.6942, "step": 299 }, { "epoch": 0.06, "learning_rate": 1.996280138160042e-05, "loss": 0.6831, "step": 300 }, { "epoch": 0.06, "learning_rate": 1.9962272203763852e-05, "loss": 0.607, "step": 301 }, { "epoch": 0.06, "learning_rate": 1.9961739295604553e-05, "loss": 0.6172, "step": 302 }, { "epoch": 0.06, "learning_rate": 1.996120265732206e-05, "loss": 0.5852, "step": 303 }, { "epoch": 0.06, "learning_rate": 1.996066228911732e-05, "loss": 0.6676, "step": 304 }, { "epoch": 0.06, "learning_rate": 1.996011819119267e-05, "loss": 0.7202, "step": 305 }, { "epoch": 0.06, "learning_rate": 1.995957036375184e-05, "loss": 0.6109, "step": 306 }, { "epoch": 0.06, "learning_rate": 1.995901880699997e-05, "loss": 0.619, "step": 307 }, { "epoch": 0.06, "learning_rate": 1.9958463521143584e-05, "loss": 0.8135, "step": 308 }, { "epoch": 0.06, "learning_rate": 1.9957904506390602e-05, "loss": 0.5947, "step": 309 }, { "epoch": 0.06, "learning_rate": 1.9957341762950346e-05, "loss": 0.5531, "step": 310 }, { "epoch": 0.06, "learning_rate": 1.9956775291033534e-05, "loss": 0.7493, "step": 311 }, { "epoch": 0.06, "learning_rate": 1.995620509085228e-05, "loss": 0.6118, "step": 312 }, { "epoch": 0.06, "learning_rate": 1.9955631162620094e-05, "loss": 0.835, "step": 313 }, { "epoch": 0.06, "learning_rate": 1.9955053506551872e-05, "loss": 0.6711, "step": 314 }, { "epoch": 0.06, "learning_rate": 1.995447212286392e-05, "loss": 0.7215, "step": 315 }, { "epoch": 0.06, "learning_rate": 1.9953887011773934e-05, "loss": 0.7533, "step": 316 }, { "epoch": 0.06, "learning_rate": 1.9953298173501007e-05, "loss": 0.7127, "step": 317 }, { "epoch": 0.06, "learning_rate": 1.9952705608265628e-05, "loss": 0.7642, "step": 318 }, { "epoch": 0.06, "learning_rate": 1.995210931628967e-05, "loss": 0.4624, "step": 319 }, { "epoch": 0.06, "learning_rate": 1.9951509297796427e-05, "loss": 0.6976, "step": 320 }, { "epoch": 0.06, "learning_rate": 1.995090555301056e-05, "loss": 0.7235, "step": 321 }, { "epoch": 0.06, "learning_rate": 1.9950298082158146e-05, "loss": 0.6917, "step": 322 }, { "epoch": 0.06, "learning_rate": 1.9949686885466645e-05, "loss": 0.7465, "step": 323 }, { "epoch": 0.06, "learning_rate": 1.9949071963164918e-05, "loss": 0.5137, "step": 324 }, { "epoch": 0.06, "learning_rate": 1.994845331548322e-05, "loss": 0.5533, "step": 325 }, { "epoch": 0.06, "learning_rate": 1.99478309426532e-05, "loss": 0.6244, "step": 326 }, { "epoch": 0.06, "learning_rate": 1.9947204844907903e-05, "loss": 0.6671, "step": 327 }, { "epoch": 0.06, "learning_rate": 1.9946575022481764e-05, "loss": 0.5778, "step": 328 }, { "epoch": 0.06, "learning_rate": 1.9945941475610623e-05, "loss": 0.6731, "step": 329 }, { "epoch": 0.06, "learning_rate": 1.994530420453171e-05, "loss": 0.6014, "step": 330 }, { "epoch": 0.06, "learning_rate": 1.9944663209483636e-05, "loss": 0.6393, "step": 331 }, { "epoch": 0.06, "learning_rate": 1.994401849070643e-05, "loss": 0.5364, "step": 332 }, { "epoch": 0.06, "learning_rate": 1.99433700484415e-05, "loss": 0.6886, "step": 333 }, { "epoch": 0.06, "learning_rate": 1.994271788293165e-05, "loss": 0.7886, "step": 334 }, { "epoch": 0.06, "learning_rate": 1.9942061994421088e-05, "loss": 0.7636, "step": 335 }, { "epoch": 0.06, "learning_rate": 1.99414023831554e-05, "loss": 0.6779, "step": 336 }, { "epoch": 0.06, "learning_rate": 1.994073904938157e-05, "loss": 0.6993, "step": 337 }, { "epoch": 0.06, "learning_rate": 1.9940071993347992e-05, "loss": 0.7263, "step": 338 }, { "epoch": 0.06, "learning_rate": 1.9939401215304435e-05, "loss": 0.6105, "step": 339 }, { "epoch": 0.06, "learning_rate": 1.9938726715502067e-05, "loss": 0.6277, "step": 340 }, { "epoch": 0.06, "learning_rate": 1.9938048494193458e-05, "loss": 0.699, "step": 341 }, { "epoch": 0.06, "learning_rate": 1.9937366551632555e-05, "loss": 0.5902, "step": 342 }, { "epoch": 0.06, "learning_rate": 1.9936680888074716e-05, "loss": 0.6665, "step": 343 }, { "epoch": 0.06, "learning_rate": 1.993599150377668e-05, "loss": 0.5722, "step": 344 }, { "epoch": 0.07, "learning_rate": 1.9935298398996584e-05, "loss": 0.6369, "step": 345 }, { "epoch": 0.07, "learning_rate": 1.993460157399396e-05, "loss": 0.6279, "step": 346 }, { "epoch": 0.07, "learning_rate": 1.9933901029029732e-05, "loss": 0.7698, "step": 347 }, { "epoch": 0.07, "learning_rate": 1.993319676436621e-05, "loss": 0.6217, "step": 348 }, { "epoch": 0.07, "learning_rate": 1.9932488780267106e-05, "loss": 0.6714, "step": 349 }, { "epoch": 0.07, "learning_rate": 1.9931777076997516e-05, "loss": 0.5437, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.9931061654823942e-05, "loss": 0.7429, "step": 351 }, { "epoch": 0.07, "learning_rate": 1.9930342514014263e-05, "loss": 0.6626, "step": 352 }, { "epoch": 0.07, "learning_rate": 1.9929619654837765e-05, "loss": 0.6474, "step": 353 }, { "epoch": 0.07, "learning_rate": 1.992889307756511e-05, "loss": 0.6279, "step": 354 }, { "epoch": 0.07, "learning_rate": 1.992816278246837e-05, "loss": 0.7801, "step": 355 }, { "epoch": 0.07, "learning_rate": 1.992742876982099e-05, "loss": 0.7405, "step": 356 }, { "epoch": 0.07, "learning_rate": 1.992669103989783e-05, "loss": 0.7276, "step": 357 }, { "epoch": 0.07, "learning_rate": 1.9925949592975117e-05, "loss": 0.6978, "step": 358 }, { "epoch": 0.07, "learning_rate": 1.9925204429330492e-05, "loss": 0.6032, "step": 359 }, { "epoch": 0.07, "learning_rate": 1.9924455549242972e-05, "loss": 0.5869, "step": 360 }, { "epoch": 0.07, "learning_rate": 1.9923702952992973e-05, "loss": 0.621, "step": 361 }, { "epoch": 0.07, "learning_rate": 1.99229466408623e-05, "loss": 0.6377, "step": 362 }, { "epoch": 0.07, "learning_rate": 1.9922186613134152e-05, "loss": 0.6187, "step": 363 }, { "epoch": 0.07, "learning_rate": 1.992142287009312e-05, "loss": 0.6818, "step": 364 }, { "epoch": 0.07, "learning_rate": 1.9920655412025175e-05, "loss": 0.8135, "step": 365 }, { "epoch": 0.07, "learning_rate": 1.9919884239217695e-05, "loss": 0.6521, "step": 366 }, { "epoch": 0.07, "learning_rate": 1.9919109351959444e-05, "loss": 0.4523, "step": 367 }, { "epoch": 0.07, "learning_rate": 1.9918330750540564e-05, "loss": 0.6957, "step": 368 }, { "epoch": 0.07, "learning_rate": 1.9917548435252608e-05, "loss": 0.8135, "step": 369 }, { "epoch": 0.07, "learning_rate": 1.9916762406388507e-05, "loss": 0.5928, "step": 370 }, { "epoch": 0.07, "learning_rate": 1.9915972664242587e-05, "loss": 0.6949, "step": 371 }, { "epoch": 0.07, "learning_rate": 1.991517920911056e-05, "loss": 0.5844, "step": 372 }, { "epoch": 0.07, "learning_rate": 1.9914382041289533e-05, "loss": 0.4758, "step": 373 }, { "epoch": 0.07, "learning_rate": 1.9913581161078002e-05, "loss": 0.7041, "step": 374 }, { "epoch": 0.07, "learning_rate": 1.9912776568775852e-05, "loss": 0.5166, "step": 375 }, { "epoch": 0.07, "learning_rate": 1.991196826468436e-05, "loss": 0.6685, "step": 376 }, { "epoch": 0.07, "learning_rate": 1.9911156249106186e-05, "loss": 0.7114, "step": 377 }, { "epoch": 0.07, "learning_rate": 1.9910340522345395e-05, "loss": 0.7476, "step": 378 }, { "epoch": 0.07, "learning_rate": 1.990952108470742e-05, "loss": 0.5799, "step": 379 }, { "epoch": 0.07, "learning_rate": 1.9908697936499105e-05, "loss": 0.6284, "step": 380 }, { "epoch": 0.07, "learning_rate": 1.990787107802867e-05, "loss": 0.5422, "step": 381 }, { "epoch": 0.07, "learning_rate": 1.9907040509605725e-05, "loss": 0.6316, "step": 382 }, { "epoch": 0.07, "learning_rate": 1.9906206231541273e-05, "loss": 0.6247, "step": 383 }, { "epoch": 0.07, "learning_rate": 1.9905368244147713e-05, "loss": 0.5781, "step": 384 }, { "epoch": 0.07, "learning_rate": 1.9904526547738812e-05, "loss": 0.6742, "step": 385 }, { "epoch": 0.07, "learning_rate": 1.9903681142629752e-05, "loss": 0.6599, "step": 386 }, { "epoch": 0.07, "learning_rate": 1.9902832029137086e-05, "loss": 0.5974, "step": 387 }, { "epoch": 0.07, "learning_rate": 1.9901979207578755e-05, "loss": 0.5805, "step": 388 }, { "epoch": 0.07, "learning_rate": 1.9901122678274106e-05, "loss": 0.6521, "step": 389 }, { "epoch": 0.07, "learning_rate": 1.9900262441543848e-05, "loss": 0.7784, "step": 390 }, { "epoch": 0.07, "learning_rate": 1.9899398497710098e-05, "loss": 0.888, "step": 391 }, { "epoch": 0.07, "learning_rate": 1.9898530847096354e-05, "loss": 0.5475, "step": 392 }, { "epoch": 0.07, "learning_rate": 1.9897659490027512e-05, "loss": 0.6568, "step": 393 }, { "epoch": 0.07, "learning_rate": 1.9896784426829838e-05, "loss": 1.0618, "step": 394 }, { "epoch": 0.07, "learning_rate": 1.9895905657830995e-05, "loss": 0.6606, "step": 395 }, { "epoch": 0.07, "learning_rate": 1.989502318336004e-05, "loss": 0.5988, "step": 396 }, { "epoch": 0.08, "learning_rate": 1.9894137003747404e-05, "loss": 0.6945, "step": 397 }, { "epoch": 0.08, "learning_rate": 1.9893247119324916e-05, "loss": 0.6397, "step": 398 }, { "epoch": 0.08, "learning_rate": 1.9892353530425792e-05, "loss": 0.7417, "step": 399 }, { "epoch": 0.08, "learning_rate": 1.9891456237384626e-05, "loss": 0.6551, "step": 400 }, { "epoch": 0.08, "learning_rate": 1.989055524053741e-05, "loss": 0.583, "step": 401 }, { "epoch": 0.08, "learning_rate": 1.9889650540221514e-05, "loss": 0.8043, "step": 402 }, { "epoch": 0.08, "learning_rate": 1.9888742136775695e-05, "loss": 0.5808, "step": 403 }, { "epoch": 0.08, "learning_rate": 1.988783003054011e-05, "loss": 0.4911, "step": 404 }, { "epoch": 0.08, "learning_rate": 1.9886914221856285e-05, "loss": 0.6328, "step": 405 }, { "epoch": 0.08, "learning_rate": 1.9885994711067143e-05, "loss": 0.5502, "step": 406 }, { "epoch": 0.08, "learning_rate": 1.988507149851699e-05, "loss": 0.6709, "step": 407 }, { "epoch": 0.08, "learning_rate": 1.9884144584551517e-05, "loss": 0.6058, "step": 408 }, { "epoch": 0.08, "learning_rate": 1.9883213969517806e-05, "loss": 0.598, "step": 409 }, { "epoch": 0.08, "learning_rate": 1.988227965376431e-05, "loss": 0.7248, "step": 410 }, { "epoch": 0.08, "learning_rate": 1.9881341637640895e-05, "loss": 0.6113, "step": 411 }, { "epoch": 0.08, "learning_rate": 1.9880399921498786e-05, "loss": 0.6465, "step": 412 }, { "epoch": 0.08, "learning_rate": 1.987945450569061e-05, "loss": 0.5249, "step": 413 }, { "epoch": 0.08, "learning_rate": 1.987850539057036e-05, "loss": 0.5956, "step": 414 }, { "epoch": 0.08, "learning_rate": 1.9877552576493442e-05, "loss": 0.6303, "step": 415 }, { "epoch": 0.08, "learning_rate": 1.987659606381663e-05, "loss": 0.6691, "step": 416 }, { "epoch": 0.08, "learning_rate": 1.987563585289808e-05, "loss": 0.6176, "step": 417 }, { "epoch": 0.08, "learning_rate": 1.9874671944097338e-05, "loss": 0.6232, "step": 418 }, { "epoch": 0.08, "learning_rate": 1.9873704337775343e-05, "loss": 0.6552, "step": 419 }, { "epoch": 0.08, "learning_rate": 1.9872733034294403e-05, "loss": 0.6103, "step": 420 }, { "epoch": 0.08, "learning_rate": 1.987175803401822e-05, "loss": 0.6751, "step": 421 }, { "epoch": 0.08, "learning_rate": 1.9870779337311876e-05, "loss": 0.5589, "step": 422 }, { "epoch": 0.08, "learning_rate": 1.9869796944541842e-05, "loss": 0.755, "step": 423 }, { "epoch": 0.08, "learning_rate": 1.9868810856075965e-05, "loss": 0.5386, "step": 424 }, { "epoch": 0.08, "learning_rate": 1.9867821072283492e-05, "loss": 0.6365, "step": 425 }, { "epoch": 0.08, "learning_rate": 1.986682759353503e-05, "loss": 0.5795, "step": 426 }, { "epoch": 0.08, "learning_rate": 1.9865830420202587e-05, "loss": 0.6278, "step": 427 }, { "epoch": 0.08, "learning_rate": 1.986482955265955e-05, "loss": 0.5914, "step": 428 }, { "epoch": 0.08, "learning_rate": 1.986382499128069e-05, "loss": 0.5987, "step": 429 }, { "epoch": 0.08, "learning_rate": 1.986281673644216e-05, "loss": 0.5812, "step": 430 }, { "epoch": 0.08, "learning_rate": 1.986180478852149e-05, "loss": 0.5928, "step": 431 }, { "epoch": 0.08, "learning_rate": 1.9860789147897605e-05, "loss": 0.4536, "step": 432 }, { "epoch": 0.08, "learning_rate": 1.985976981495081e-05, "loss": 0.5398, "step": 433 }, { "epoch": 0.08, "learning_rate": 1.9858746790062778e-05, "loss": 0.6403, "step": 434 }, { "epoch": 0.08, "learning_rate": 1.985772007361658e-05, "loss": 0.6704, "step": 435 }, { "epoch": 0.08, "learning_rate": 1.9856689665996672e-05, "loss": 0.6578, "step": 436 }, { "epoch": 0.08, "learning_rate": 1.9855655567588877e-05, "loss": 0.5833, "step": 437 }, { "epoch": 0.08, "learning_rate": 1.985461777878041e-05, "loss": 0.5668, "step": 438 }, { "epoch": 0.08, "learning_rate": 1.9853576299959866e-05, "loss": 0.643, "step": 439 }, { "epoch": 0.08, "learning_rate": 1.985253113151722e-05, "loss": 0.7148, "step": 440 }, { "epoch": 0.08, "learning_rate": 1.9851482273843833e-05, "loss": 0.5307, "step": 441 }, { "epoch": 0.08, "learning_rate": 1.985042972733244e-05, "loss": 0.6312, "step": 442 }, { "epoch": 0.08, "learning_rate": 1.9849373492377172e-05, "loss": 0.7208, "step": 443 }, { "epoch": 0.08, "learning_rate": 1.9848313569373518e-05, "loss": 0.6216, "step": 444 }, { "epoch": 0.08, "learning_rate": 1.9847249958718367e-05, "loss": 0.6125, "step": 445 }, { "epoch": 0.08, "learning_rate": 1.9846182660809983e-05, "loss": 0.8181, "step": 446 }, { "epoch": 0.08, "learning_rate": 1.984511167604801e-05, "loss": 0.8014, "step": 447 }, { "epoch": 0.08, "learning_rate": 1.984403700483347e-05, "loss": 0.7352, "step": 448 }, { "epoch": 0.08, "learning_rate": 1.984295864756877e-05, "loss": 0.7524, "step": 449 }, { "epoch": 0.09, "learning_rate": 1.9841876604657703e-05, "loss": 0.6212, "step": 450 }, { "epoch": 0.09, "learning_rate": 1.9840790876505422e-05, "loss": 0.6664, "step": 451 }, { "epoch": 0.09, "learning_rate": 1.983970146351848e-05, "loss": 0.7818, "step": 452 }, { "epoch": 0.09, "learning_rate": 1.9838608366104796e-05, "loss": 0.6461, "step": 453 }, { "epoch": 0.09, "learning_rate": 1.9837511584673685e-05, "loss": 0.6943, "step": 454 }, { "epoch": 0.09, "learning_rate": 1.983641111963582e-05, "loss": 0.7532, "step": 455 }, { "epoch": 0.09, "learning_rate": 1.9835306971403275e-05, "loss": 0.5039, "step": 456 }, { "epoch": 0.09, "learning_rate": 1.9834199140389485e-05, "loss": 0.6018, "step": 457 }, { "epoch": 0.09, "learning_rate": 1.9833087627009274e-05, "loss": 0.5084, "step": 458 }, { "epoch": 0.09, "learning_rate": 1.983197243167884e-05, "loss": 0.8088, "step": 459 }, { "epoch": 0.09, "learning_rate": 1.983085355481577e-05, "loss": 0.6736, "step": 460 }, { "epoch": 0.09, "learning_rate": 1.982973099683902e-05, "loss": 0.6053, "step": 461 }, { "epoch": 0.09, "learning_rate": 1.9828604758168918e-05, "loss": 0.6537, "step": 462 }, { "epoch": 0.09, "learning_rate": 1.9827474839227188e-05, "loss": 0.842, "step": 463 }, { "epoch": 0.09, "learning_rate": 1.9826341240436915e-05, "loss": 0.6273, "step": 464 }, { "epoch": 0.09, "learning_rate": 1.9825203962222573e-05, "loss": 0.6632, "step": 465 }, { "epoch": 0.09, "learning_rate": 1.9824063005010008e-05, "loss": 0.499, "step": 466 }, { "epoch": 0.09, "learning_rate": 1.982291836922645e-05, "loss": 0.7698, "step": 467 }, { "epoch": 0.09, "learning_rate": 1.98217700553005e-05, "loss": 0.6533, "step": 468 }, { "epoch": 0.09, "learning_rate": 1.9820618063662133e-05, "loss": 0.5688, "step": 469 }, { "epoch": 0.09, "learning_rate": 1.9819462394742715e-05, "loss": 0.6132, "step": 470 }, { "epoch": 0.09, "learning_rate": 1.981830304897498e-05, "loss": 0.5624, "step": 471 }, { "epoch": 0.09, "learning_rate": 1.981714002679303e-05, "loss": 0.7985, "step": 472 }, { "epoch": 0.09, "learning_rate": 1.9815973328632362e-05, "loss": 0.5627, "step": 473 }, { "epoch": 0.09, "learning_rate": 1.9814802954929838e-05, "loss": 0.7494, "step": 474 }, { "epoch": 0.09, "learning_rate": 1.9813628906123696e-05, "loss": 0.6231, "step": 475 }, { "epoch": 0.09, "learning_rate": 1.9812451182653554e-05, "loss": 0.5332, "step": 476 }, { "epoch": 0.09, "learning_rate": 1.9811269784960404e-05, "loss": 0.5872, "step": 477 }, { "epoch": 0.09, "learning_rate": 1.9810084713486617e-05, "loss": 0.5988, "step": 478 }, { "epoch": 0.09, "learning_rate": 1.9808895968675937e-05, "loss": 0.6907, "step": 479 }, { "epoch": 0.09, "learning_rate": 1.9807703550973477e-05, "loss": 0.59, "step": 480 }, { "epoch": 0.09, "learning_rate": 1.980650746082574e-05, "loss": 0.5273, "step": 481 }, { "epoch": 0.09, "learning_rate": 1.9805307698680592e-05, "loss": 0.5967, "step": 482 }, { "epoch": 0.09, "learning_rate": 1.9804104264987278e-05, "loss": 0.6169, "step": 483 }, { "epoch": 0.09, "learning_rate": 1.980289716019642e-05, "loss": 0.5768, "step": 484 }, { "epoch": 0.09, "learning_rate": 1.980168638476001e-05, "loss": 0.5397, "step": 485 }, { "epoch": 0.09, "learning_rate": 1.9800471939131418e-05, "loss": 0.5614, "step": 486 }, { "epoch": 0.09, "learning_rate": 1.9799253823765383e-05, "loss": 0.6865, "step": 487 }, { "epoch": 0.09, "learning_rate": 1.9798032039118028e-05, "loss": 0.7262, "step": 488 }, { "epoch": 0.09, "learning_rate": 1.979680658564684e-05, "loss": 0.5734, "step": 489 }, { "epoch": 0.09, "learning_rate": 1.9795577463810686e-05, "loss": 0.589, "step": 490 }, { "epoch": 0.09, "learning_rate": 1.9794344674069807e-05, "loss": 0.6562, "step": 491 }, { "epoch": 0.09, "learning_rate": 1.979310821688581e-05, "loss": 0.7511, "step": 492 }, { "epoch": 0.09, "learning_rate": 1.979186809272168e-05, "loss": 0.6116, "step": 493 }, { "epoch": 0.09, "learning_rate": 1.979062430204178e-05, "loss": 0.7909, "step": 494 }, { "epoch": 0.09, "learning_rate": 1.978937684531184e-05, "loss": 0.5213, "step": 495 }, { "epoch": 0.09, "learning_rate": 1.9788125722998958e-05, "loss": 0.4475, "step": 496 }, { "epoch": 0.09, "learning_rate": 1.9786870935571617e-05, "loss": 0.6951, "step": 497 }, { "epoch": 0.09, "learning_rate": 1.9785612483499666e-05, "loss": 0.5588, "step": 498 }, { "epoch": 0.09, "learning_rate": 1.9784350367254322e-05, "loss": 0.7768, "step": 499 }, { "epoch": 0.09, "learning_rate": 1.978308458730818e-05, "loss": 0.7764, "step": 500 }, { "epoch": 0.09, "learning_rate": 1.9781815144135204e-05, "loss": 0.7041, "step": 501 }, { "epoch": 0.09, "learning_rate": 1.9780542038210733e-05, "loss": 0.6452, "step": 502 }, { "epoch": 0.1, "learning_rate": 1.9779265270011474e-05, "loss": 0.6745, "step": 503 }, { "epoch": 0.1, "learning_rate": 1.9777984840015505e-05, "loss": 0.761, "step": 504 }, { "epoch": 0.1, "learning_rate": 1.9776700748702275e-05, "loss": 0.701, "step": 505 }, { "epoch": 0.1, "learning_rate": 1.9775412996552612e-05, "loss": 0.5742, "step": 506 }, { "epoch": 0.1, "learning_rate": 1.97741215840487e-05, "loss": 0.5596, "step": 507 }, { "epoch": 0.1, "learning_rate": 1.977282651167411e-05, "loss": 0.6739, "step": 508 }, { "epoch": 0.1, "learning_rate": 1.9771527779913774e-05, "loss": 0.7232, "step": 509 }, { "epoch": 0.1, "learning_rate": 1.977022538925399e-05, "loss": 0.6018, "step": 510 }, { "epoch": 0.1, "learning_rate": 1.9768919340182437e-05, "loss": 0.6422, "step": 511 }, { "epoch": 0.1, "learning_rate": 1.976760963318815e-05, "loss": 0.6974, "step": 512 }, { "epoch": 0.1, "learning_rate": 1.9766296268761555e-05, "loss": 0.5416, "step": 513 }, { "epoch": 0.1, "learning_rate": 1.9764979247394428e-05, "loss": 0.4937, "step": 514 }, { "epoch": 0.1, "learning_rate": 1.9763658569579916e-05, "loss": 0.6419, "step": 515 }, { "epoch": 0.1, "learning_rate": 1.976233423581255e-05, "loss": 0.8821, "step": 516 }, { "epoch": 0.1, "learning_rate": 1.9761006246588217e-05, "loss": 0.681, "step": 517 }, { "epoch": 0.1, "learning_rate": 1.9759674602404173e-05, "loss": 0.7926, "step": 518 }, { "epoch": 0.1, "learning_rate": 1.9758339303759046e-05, "loss": 0.7327, "step": 519 }, { "epoch": 0.1, "learning_rate": 1.9757000351152834e-05, "loss": 0.6987, "step": 520 }, { "epoch": 0.1, "learning_rate": 1.9755657745086898e-05, "loss": 0.834, "step": 521 }, { "epoch": 0.1, "learning_rate": 1.9754311486063973e-05, "loss": 0.5376, "step": 522 }, { "epoch": 0.1, "learning_rate": 1.975296157458816e-05, "loss": 0.6339, "step": 523 }, { "epoch": 0.1, "learning_rate": 1.9751608011164922e-05, "loss": 0.6089, "step": 524 }, { "epoch": 0.1, "learning_rate": 1.9750250796301098e-05, "loss": 0.6588, "step": 525 }, { "epoch": 0.1, "learning_rate": 1.9748889930504886e-05, "loss": 0.6911, "step": 526 }, { "epoch": 0.1, "learning_rate": 1.9747525414285863e-05, "loss": 0.8009, "step": 527 }, { "epoch": 0.1, "learning_rate": 1.9746157248154954e-05, "loss": 0.6687, "step": 528 }, { "epoch": 0.1, "learning_rate": 1.9744785432624473e-05, "loss": 0.5702, "step": 529 }, { "epoch": 0.1, "learning_rate": 1.9743409968208084e-05, "loss": 0.6122, "step": 530 }, { "epoch": 0.1, "learning_rate": 1.974203085542082e-05, "loss": 0.8507, "step": 531 }, { "epoch": 0.1, "learning_rate": 1.974064809477909e-05, "loss": 0.5668, "step": 532 }, { "epoch": 0.1, "learning_rate": 1.9739261686800662e-05, "loss": 0.8123, "step": 533 }, { "epoch": 0.1, "learning_rate": 1.973787163200466e-05, "loss": 0.5727, "step": 534 }, { "epoch": 0.1, "learning_rate": 1.9736477930911588e-05, "loss": 0.674, "step": 535 }, { "epoch": 0.1, "learning_rate": 1.9735080584043314e-05, "loss": 0.6002, "step": 536 }, { "epoch": 0.1, "learning_rate": 1.9733679591923062e-05, "loss": 0.6238, "step": 537 }, { "epoch": 0.1, "learning_rate": 1.973227495507543e-05, "loss": 0.7495, "step": 538 }, { "epoch": 0.1, "learning_rate": 1.9730866674026378e-05, "loss": 0.5555, "step": 539 }, { "epoch": 0.1, "learning_rate": 1.9729454749303227e-05, "loss": 0.6144, "step": 540 }, { "epoch": 0.1, "learning_rate": 1.9728039181434666e-05, "loss": 0.7028, "step": 541 }, { "epoch": 0.1, "learning_rate": 1.9726619970950744e-05, "loss": 0.7399, "step": 542 }, { "epoch": 0.1, "learning_rate": 1.9725197118382884e-05, "loss": 0.5762, "step": 543 }, { "epoch": 0.1, "learning_rate": 1.9723770624263862e-05, "loss": 0.7227, "step": 544 }, { "epoch": 0.1, "learning_rate": 1.9722340489127824e-05, "loss": 0.6282, "step": 545 }, { "epoch": 0.1, "learning_rate": 1.972090671351027e-05, "loss": 0.639, "step": 546 }, { "epoch": 0.1, "learning_rate": 1.9719469297948076e-05, "loss": 0.8125, "step": 547 }, { "epoch": 0.1, "learning_rate": 1.9718028242979478e-05, "loss": 0.624, "step": 548 }, { "epoch": 0.1, "learning_rate": 1.9716583549144067e-05, "loss": 0.7245, "step": 549 }, { "epoch": 0.1, "learning_rate": 1.97151352169828e-05, "loss": 0.5894, "step": 550 }, { "epoch": 0.1, "learning_rate": 1.9713683247038e-05, "loss": 0.6551, "step": 551 }, { "epoch": 0.1, "learning_rate": 1.9712227639853352e-05, "loss": 0.5875, "step": 552 }, { "epoch": 0.1, "learning_rate": 1.9710768395973897e-05, "loss": 0.5908, "step": 553 }, { "epoch": 0.1, "learning_rate": 1.9709305515946042e-05, "loss": 0.6481, "step": 554 }, { "epoch": 0.1, "learning_rate": 1.9707839000317557e-05, "loss": 0.7637, "step": 555 }, { "epoch": 0.11, "learning_rate": 1.970636884963757e-05, "loss": 0.4863, "step": 556 }, { "epoch": 0.11, "learning_rate": 1.9704895064456573e-05, "loss": 0.7046, "step": 557 }, { "epoch": 0.11, "learning_rate": 1.9703417645326418e-05, "loss": 0.6052, "step": 558 }, { "epoch": 0.11, "learning_rate": 1.970193659280031e-05, "loss": 0.7102, "step": 559 }, { "epoch": 0.11, "learning_rate": 1.9700451907432835e-05, "loss": 0.7297, "step": 560 }, { "epoch": 0.11, "learning_rate": 1.9698963589779915e-05, "loss": 0.7229, "step": 561 }, { "epoch": 0.11, "learning_rate": 1.969747164039885e-05, "loss": 0.6624, "step": 562 }, { "epoch": 0.11, "learning_rate": 1.9695976059848282e-05, "loss": 0.7479, "step": 563 }, { "epoch": 0.11, "learning_rate": 1.9694476848688236e-05, "loss": 0.5644, "step": 564 }, { "epoch": 0.11, "learning_rate": 1.969297400748008e-05, "loss": 0.7066, "step": 565 }, { "epoch": 0.11, "learning_rate": 1.969146753678654e-05, "loss": 0.5552, "step": 566 }, { "epoch": 0.11, "learning_rate": 1.968995743717171e-05, "loss": 0.65, "step": 567 }, { "epoch": 0.11, "learning_rate": 1.9688443709201043e-05, "loss": 0.7314, "step": 568 }, { "epoch": 0.11, "learning_rate": 1.9686926353441347e-05, "loss": 0.7631, "step": 569 }, { "epoch": 0.11, "learning_rate": 1.968540537046078e-05, "loss": 0.7732, "step": 570 }, { "epoch": 0.11, "learning_rate": 1.9683880760828874e-05, "loss": 0.5251, "step": 571 }, { "epoch": 0.11, "learning_rate": 1.968235252511651e-05, "loss": 0.6859, "step": 572 }, { "epoch": 0.11, "learning_rate": 1.968082066389592e-05, "loss": 0.7438, "step": 573 }, { "epoch": 0.11, "learning_rate": 1.9679285177740715e-05, "loss": 0.5103, "step": 574 }, { "epoch": 0.11, "learning_rate": 1.9677746067225843e-05, "loss": 0.7103, "step": 575 }, { "epoch": 0.11, "learning_rate": 1.9676203332927616e-05, "loss": 0.3896, "step": 576 }, { "epoch": 0.11, "learning_rate": 1.9674656975423704e-05, "loss": 0.6445, "step": 577 }, { "epoch": 0.11, "learning_rate": 1.9673106995293135e-05, "loss": 0.5784, "step": 578 }, { "epoch": 0.11, "learning_rate": 1.9671553393116286e-05, "loss": 0.5864, "step": 579 }, { "epoch": 0.11, "learning_rate": 1.9669996169474903e-05, "loss": 0.6886, "step": 580 }, { "epoch": 0.11, "learning_rate": 1.9668435324952076e-05, "loss": 0.7056, "step": 581 }, { "epoch": 0.11, "learning_rate": 1.966687086013225e-05, "loss": 0.6895, "step": 582 }, { "epoch": 0.11, "learning_rate": 1.966530277560124e-05, "loss": 0.6188, "step": 583 }, { "epoch": 0.11, "learning_rate": 1.9663731071946207e-05, "loss": 0.5383, "step": 584 }, { "epoch": 0.11, "learning_rate": 1.966215574975566e-05, "loss": 0.5419, "step": 585 }, { "epoch": 0.11, "learning_rate": 1.9660576809619475e-05, "loss": 0.7643, "step": 586 }, { "epoch": 0.11, "learning_rate": 1.9658994252128884e-05, "loss": 0.5683, "step": 587 }, { "epoch": 0.11, "learning_rate": 1.9657408077876458e-05, "loss": 0.5302, "step": 588 }, { "epoch": 0.11, "learning_rate": 1.9655818287456133e-05, "loss": 0.8155, "step": 589 }, { "epoch": 0.11, "learning_rate": 1.9654224881463205e-05, "loss": 0.6518, "step": 590 }, { "epoch": 0.11, "learning_rate": 1.9652627860494313e-05, "loss": 0.5929, "step": 591 }, { "epoch": 0.11, "learning_rate": 1.965102722514745e-05, "loss": 0.7371, "step": 592 }, { "epoch": 0.11, "learning_rate": 1.964942297602197e-05, "loss": 0.4912, "step": 593 }, { "epoch": 0.11, "learning_rate": 1.9647815113718577e-05, "loss": 0.8094, "step": 594 }, { "epoch": 0.11, "learning_rate": 1.9646203638839327e-05, "loss": 0.7346, "step": 595 }, { "epoch": 0.11, "learning_rate": 1.9644588551987623e-05, "loss": 0.8466, "step": 596 }, { "epoch": 0.11, "learning_rate": 1.964296985376823e-05, "loss": 0.647, "step": 597 }, { "epoch": 0.11, "learning_rate": 1.964134754478726e-05, "loss": 0.622, "step": 598 }, { "epoch": 0.11, "learning_rate": 1.9639721625652184e-05, "loss": 0.6798, "step": 599 }, { "epoch": 0.11, "learning_rate": 1.963809209697181e-05, "loss": 0.6253, "step": 600 }, { "epoch": 0.11, "learning_rate": 1.963645895935632e-05, "loss": 0.6333, "step": 601 }, { "epoch": 0.11, "learning_rate": 1.9634822213417216e-05, "loss": 0.6672, "step": 602 }, { "epoch": 0.11, "learning_rate": 1.9633181859767385e-05, "loss": 0.6169, "step": 603 }, { "epoch": 0.11, "learning_rate": 1.963153789902104e-05, "loss": 0.7079, "step": 604 }, { "epoch": 0.11, "learning_rate": 1.9629890331793762e-05, "loss": 0.6191, "step": 605 }, { "epoch": 0.11, "learning_rate": 1.9628239158702468e-05, "loss": 0.7398, "step": 606 }, { "epoch": 0.11, "learning_rate": 1.962658438036543e-05, "loss": 0.6511, "step": 607 }, { "epoch": 0.11, "learning_rate": 1.962492599740228e-05, "loss": 0.4896, "step": 608 }, { "epoch": 0.12, "learning_rate": 1.962326401043398e-05, "loss": 0.5733, "step": 609 }, { "epoch": 0.12, "learning_rate": 1.9621598420082863e-05, "loss": 0.6114, "step": 610 }, { "epoch": 0.12, "learning_rate": 1.9619929226972593e-05, "loss": 0.7648, "step": 611 }, { "epoch": 0.12, "learning_rate": 1.961825643172819e-05, "loss": 0.666, "step": 612 }, { "epoch": 0.12, "learning_rate": 1.9616580034976037e-05, "loss": 0.5617, "step": 613 }, { "epoch": 0.12, "learning_rate": 1.9614900037343837e-05, "loss": 0.636, "step": 614 }, { "epoch": 0.12, "learning_rate": 1.9613216439460664e-05, "loss": 0.6065, "step": 615 }, { "epoch": 0.12, "learning_rate": 1.9611529241956933e-05, "loss": 0.6504, "step": 616 }, { "epoch": 0.12, "learning_rate": 1.9609838445464406e-05, "loss": 0.6761, "step": 617 }, { "epoch": 0.12, "learning_rate": 1.9608144050616192e-05, "loss": 0.6545, "step": 618 }, { "epoch": 0.12, "learning_rate": 1.960644605804675e-05, "loss": 0.5833, "step": 619 }, { "epoch": 0.12, "learning_rate": 1.9604744468391882e-05, "loss": 0.855, "step": 620 }, { "epoch": 0.12, "learning_rate": 1.960303928228874e-05, "loss": 0.7429, "step": 621 }, { "epoch": 0.12, "learning_rate": 1.9601330500375827e-05, "loss": 0.6052, "step": 622 }, { "epoch": 0.12, "learning_rate": 1.9599618123292985e-05, "loss": 0.5438, "step": 623 }, { "epoch": 0.12, "learning_rate": 1.95979021516814e-05, "loss": 0.5654, "step": 624 }, { "epoch": 0.12, "learning_rate": 1.959618258618362e-05, "loss": 0.6089, "step": 625 }, { "epoch": 0.12, "learning_rate": 1.959445942744352e-05, "loss": 0.7795, "step": 626 }, { "epoch": 0.12, "learning_rate": 1.959273267610633e-05, "loss": 0.5702, "step": 627 }, { "epoch": 0.12, "learning_rate": 1.959100233281862e-05, "loss": 0.6607, "step": 628 }, { "epoch": 0.12, "learning_rate": 1.9589268398228313e-05, "loss": 0.6178, "step": 629 }, { "epoch": 0.12, "learning_rate": 1.9587530872984672e-05, "loss": 0.6211, "step": 630 }, { "epoch": 0.12, "learning_rate": 1.9585789757738298e-05, "loss": 0.5015, "step": 631 }, { "epoch": 0.12, "learning_rate": 1.958404505314115e-05, "loss": 0.6951, "step": 632 }, { "epoch": 0.12, "learning_rate": 1.958229675984652e-05, "loss": 0.7686, "step": 633 }, { "epoch": 0.12, "learning_rate": 1.9580544878509054e-05, "loss": 0.6494, "step": 634 }, { "epoch": 0.12, "learning_rate": 1.9578789409784727e-05, "loss": 0.4908, "step": 635 }, { "epoch": 0.12, "learning_rate": 1.957703035433087e-05, "loss": 0.7011, "step": 636 }, { "epoch": 0.12, "learning_rate": 1.9575267712806152e-05, "loss": 0.5187, "step": 637 }, { "epoch": 0.12, "learning_rate": 1.9573501485870585e-05, "loss": 0.6435, "step": 638 }, { "epoch": 0.12, "learning_rate": 1.9571731674185523e-05, "loss": 0.7211, "step": 639 }, { "epoch": 0.12, "learning_rate": 1.956995827841366e-05, "loss": 0.5494, "step": 640 }, { "epoch": 0.12, "learning_rate": 1.9568181299219042e-05, "loss": 0.6652, "step": 641 }, { "epoch": 0.12, "learning_rate": 1.9566400737267045e-05, "loss": 0.7224, "step": 642 }, { "epoch": 0.12, "learning_rate": 1.9564616593224396e-05, "loss": 0.6485, "step": 643 }, { "epoch": 0.12, "learning_rate": 1.9562828867759154e-05, "loss": 0.5622, "step": 644 }, { "epoch": 0.12, "learning_rate": 1.9561037561540722e-05, "loss": 0.4832, "step": 645 }, { "epoch": 0.12, "learning_rate": 1.9559242675239855e-05, "loss": 0.5593, "step": 646 }, { "epoch": 0.12, "learning_rate": 1.955744420952863e-05, "loss": 0.5924, "step": 647 }, { "epoch": 0.12, "learning_rate": 1.9555642165080475e-05, "loss": 0.6536, "step": 648 }, { "epoch": 0.12, "learning_rate": 1.955383654257016e-05, "loss": 0.5222, "step": 649 }, { "epoch": 0.12, "learning_rate": 1.955202734267379e-05, "loss": 0.7254, "step": 650 }, { "epoch": 0.12, "learning_rate": 1.955021456606881e-05, "loss": 0.5802, "step": 651 }, { "epoch": 0.12, "learning_rate": 1.954839821343401e-05, "loss": 0.6827, "step": 652 }, { "epoch": 0.12, "learning_rate": 1.9546578285449504e-05, "loss": 0.7597, "step": 653 }, { "epoch": 0.12, "learning_rate": 1.954475478279676e-05, "loss": 0.5999, "step": 654 }, { "epoch": 0.12, "learning_rate": 1.954292770615858e-05, "loss": 0.7118, "step": 655 }, { "epoch": 0.12, "learning_rate": 1.9541097056219108e-05, "loss": 0.5568, "step": 656 }, { "epoch": 0.12, "learning_rate": 1.9539262833663813e-05, "loss": 0.6132, "step": 657 }, { "epoch": 0.12, "learning_rate": 1.9537425039179518e-05, "loss": 0.5714, "step": 658 }, { "epoch": 0.12, "learning_rate": 1.9535583673454365e-05, "loss": 0.6229, "step": 659 }, { "epoch": 0.12, "learning_rate": 1.953373873717786e-05, "loss": 0.5683, "step": 660 }, { "epoch": 0.12, "learning_rate": 1.9531890231040814e-05, "loss": 0.5276, "step": 661 }, { "epoch": 0.13, "learning_rate": 1.9530038155735405e-05, "loss": 0.6613, "step": 662 }, { "epoch": 0.13, "learning_rate": 1.9528182511955124e-05, "loss": 0.7438, "step": 663 }, { "epoch": 0.13, "learning_rate": 1.9526323300394813e-05, "loss": 0.4759, "step": 664 }, { "epoch": 0.13, "learning_rate": 1.952446052175064e-05, "loss": 0.7326, "step": 665 }, { "epoch": 0.13, "learning_rate": 1.952259417672011e-05, "loss": 0.541, "step": 666 }, { "epoch": 0.13, "learning_rate": 1.9520724266002078e-05, "loss": 0.6963, "step": 667 }, { "epoch": 0.13, "learning_rate": 1.9518850790296712e-05, "loss": 0.5858, "step": 668 }, { "epoch": 0.13, "learning_rate": 1.951697375030553e-05, "loss": 0.6852, "step": 669 }, { "epoch": 0.13, "learning_rate": 1.951509314673138e-05, "loss": 0.6302, "step": 670 }, { "epoch": 0.13, "learning_rate": 1.951320898027845e-05, "loss": 0.5549, "step": 671 }, { "epoch": 0.13, "learning_rate": 1.9511321251652243e-05, "loss": 0.5985, "step": 672 }, { "epoch": 0.13, "learning_rate": 1.9509429961559624e-05, "loss": 0.7871, "step": 673 }, { "epoch": 0.13, "learning_rate": 1.9507535110708766e-05, "loss": 0.4962, "step": 674 }, { "epoch": 0.13, "learning_rate": 1.9505636699809192e-05, "loss": 0.6802, "step": 675 }, { "epoch": 0.13, "learning_rate": 1.9503734729571752e-05, "loss": 0.6865, "step": 676 }, { "epoch": 0.13, "learning_rate": 1.9501829200708627e-05, "loss": 0.7255, "step": 677 }, { "epoch": 0.13, "learning_rate": 1.9499920113933335e-05, "loss": 0.6856, "step": 678 }, { "epoch": 0.13, "learning_rate": 1.949800746996072e-05, "loss": 0.5618, "step": 679 }, { "epoch": 0.13, "learning_rate": 1.9496091269506968e-05, "loss": 0.6032, "step": 680 }, { "epoch": 0.13, "learning_rate": 1.9494171513289587e-05, "loss": 0.64, "step": 681 }, { "epoch": 0.13, "learning_rate": 1.9492248202027425e-05, "loss": 0.5501, "step": 682 }, { "epoch": 0.13, "learning_rate": 1.9490321336440646e-05, "loss": 0.7113, "step": 683 }, { "epoch": 0.13, "learning_rate": 1.9488390917250763e-05, "loss": 0.7672, "step": 684 }, { "epoch": 0.13, "learning_rate": 1.948645694518061e-05, "loss": 0.6695, "step": 685 }, { "epoch": 0.13, "learning_rate": 1.9484519420954356e-05, "loss": 0.6951, "step": 686 }, { "epoch": 0.13, "learning_rate": 1.948257834529749e-05, "loss": 0.7459, "step": 687 }, { "epoch": 0.13, "learning_rate": 1.9480633718936844e-05, "loss": 0.5781, "step": 688 }, { "epoch": 0.13, "learning_rate": 1.947868554260057e-05, "loss": 0.6469, "step": 689 }, { "epoch": 0.13, "learning_rate": 1.9476733817018155e-05, "loss": 0.7176, "step": 690 }, { "epoch": 0.13, "learning_rate": 1.9474778542920413e-05, "loss": 0.6698, "step": 691 }, { "epoch": 0.13, "learning_rate": 1.947281972103949e-05, "loss": 0.5304, "step": 692 }, { "epoch": 0.13, "learning_rate": 1.9470857352108848e-05, "loss": 0.5724, "step": 693 }, { "epoch": 0.13, "learning_rate": 1.946889143686329e-05, "loss": 0.5637, "step": 694 }, { "epoch": 0.13, "learning_rate": 1.9466921976038947e-05, "loss": 0.6861, "step": 695 }, { "epoch": 0.13, "learning_rate": 1.9464948970373272e-05, "loss": 0.5744, "step": 696 }, { "epoch": 0.13, "learning_rate": 1.9462972420605045e-05, "loss": 0.587, "step": 697 }, { "epoch": 0.13, "learning_rate": 1.9460992327474373e-05, "loss": 0.6041, "step": 698 }, { "epoch": 0.13, "learning_rate": 1.9459008691722697e-05, "loss": 0.6414, "step": 699 }, { "epoch": 0.13, "learning_rate": 1.945702151409278e-05, "loss": 0.7627, "step": 700 }, { "epoch": 0.13, "learning_rate": 1.9455030795328706e-05, "loss": 0.5796, "step": 701 }, { "epoch": 0.13, "learning_rate": 1.945303653617589e-05, "loss": 0.7071, "step": 702 }, { "epoch": 0.13, "learning_rate": 1.9451038737381078e-05, "loss": 0.5868, "step": 703 }, { "epoch": 0.13, "learning_rate": 1.944903739969233e-05, "loss": 0.5489, "step": 704 }, { "epoch": 0.13, "learning_rate": 1.944703252385904e-05, "loss": 0.6015, "step": 705 }, { "epoch": 0.13, "learning_rate": 1.9445024110631922e-05, "loss": 0.5639, "step": 706 }, { "epoch": 0.13, "learning_rate": 1.9443012160763014e-05, "loss": 0.5135, "step": 707 }, { "epoch": 0.13, "learning_rate": 1.9440996675005683e-05, "loss": 0.682, "step": 708 }, { "epoch": 0.13, "learning_rate": 1.9438977654114616e-05, "loss": 0.5945, "step": 709 }, { "epoch": 0.13, "learning_rate": 1.9436955098845832e-05, "loss": 0.6656, "step": 710 }, { "epoch": 0.13, "learning_rate": 1.9434929009956657e-05, "loss": 0.7992, "step": 711 }, { "epoch": 0.13, "learning_rate": 1.9432899388205756e-05, "loss": 0.7891, "step": 712 }, { "epoch": 0.13, "learning_rate": 1.9430866234353113e-05, "loss": 0.5022, "step": 713 }, { "epoch": 0.13, "learning_rate": 1.9428829549160024e-05, "loss": 0.6958, "step": 714 }, { "epoch": 0.14, "learning_rate": 1.942678933338912e-05, "loss": 0.666, "step": 715 }, { "epoch": 0.14, "learning_rate": 1.942474558780435e-05, "loss": 0.5324, "step": 716 }, { "epoch": 0.14, "learning_rate": 1.9422698313170982e-05, "loss": 0.6685, "step": 717 }, { "epoch": 0.14, "learning_rate": 1.942064751025561e-05, "loss": 0.6058, "step": 718 }, { "epoch": 0.14, "learning_rate": 1.9418593179826143e-05, "loss": 0.6526, "step": 719 }, { "epoch": 0.14, "learning_rate": 1.941653532265182e-05, "loss": 0.5976, "step": 720 }, { "epoch": 0.14, "learning_rate": 1.9414473939503193e-05, "loss": 0.6152, "step": 721 }, { "epoch": 0.14, "learning_rate": 1.9412409031152136e-05, "loss": 0.6327, "step": 722 }, { "epoch": 0.14, "learning_rate": 1.9410340598371845e-05, "loss": 0.6101, "step": 723 }, { "epoch": 0.14, "learning_rate": 1.9408268641936832e-05, "loss": 0.7224, "step": 724 }, { "epoch": 0.14, "learning_rate": 1.9406193162622934e-05, "loss": 0.7438, "step": 725 }, { "epoch": 0.14, "learning_rate": 1.94041141612073e-05, "loss": 0.6451, "step": 726 }, { "epoch": 0.14, "learning_rate": 1.9402031638468407e-05, "loss": 0.4848, "step": 727 }, { "epoch": 0.14, "learning_rate": 1.9399945595186037e-05, "loss": 0.6546, "step": 728 }, { "epoch": 0.14, "learning_rate": 1.9397856032141305e-05, "loss": 0.5252, "step": 729 }, { "epoch": 0.14, "learning_rate": 1.9395762950116634e-05, "loss": 0.6216, "step": 730 }, { "epoch": 0.14, "learning_rate": 1.9393666349895772e-05, "loss": 0.5868, "step": 731 }, { "epoch": 0.14, "learning_rate": 1.9391566232263776e-05, "loss": 0.7081, "step": 732 }, { "epoch": 0.14, "learning_rate": 1.9389462598007027e-05, "loss": 0.838, "step": 733 }, { "epoch": 0.14, "learning_rate": 1.938735544791322e-05, "loss": 0.6864, "step": 734 }, { "epoch": 0.14, "learning_rate": 1.9385244782771365e-05, "loss": 0.5882, "step": 735 }, { "epoch": 0.14, "learning_rate": 1.938313060337179e-05, "loss": 0.5637, "step": 736 }, { "epoch": 0.14, "learning_rate": 1.9381012910506146e-05, "loss": 0.5931, "step": 737 }, { "epoch": 0.14, "learning_rate": 1.937889170496738e-05, "loss": 0.455, "step": 738 }, { "epoch": 0.14, "learning_rate": 1.9376766987549775e-05, "loss": 0.5748, "step": 739 }, { "epoch": 0.14, "learning_rate": 1.937463875904892e-05, "loss": 0.5796, "step": 740 }, { "epoch": 0.14, "learning_rate": 1.9372507020261716e-05, "loss": 0.6434, "step": 741 }, { "epoch": 0.14, "learning_rate": 1.9370371771986388e-05, "loss": 0.5936, "step": 742 }, { "epoch": 0.14, "learning_rate": 1.9368233015022464e-05, "loss": 0.6566, "step": 743 }, { "epoch": 0.14, "learning_rate": 1.9366090750170793e-05, "loss": 0.4777, "step": 744 }, { "epoch": 0.14, "learning_rate": 1.9363944978233533e-05, "loss": 0.5978, "step": 745 }, { "epoch": 0.14, "learning_rate": 1.936179570001416e-05, "loss": 0.7114, "step": 746 }, { "epoch": 0.14, "learning_rate": 1.935964291631746e-05, "loss": 0.5794, "step": 747 }, { "epoch": 0.14, "learning_rate": 1.935748662794953e-05, "loss": 0.5913, "step": 748 }, { "epoch": 0.14, "learning_rate": 1.935532683571778e-05, "loss": 0.6191, "step": 749 }, { "epoch": 0.14, "learning_rate": 1.9353163540430942e-05, "loss": 0.5596, "step": 750 }, { "epoch": 0.14, "learning_rate": 1.9350996742899043e-05, "loss": 0.68, "step": 751 }, { "epoch": 0.14, "learning_rate": 1.934882644393343e-05, "loss": 0.6164, "step": 752 }, { "epoch": 0.14, "learning_rate": 1.9346652644346763e-05, "loss": 0.781, "step": 753 }, { "epoch": 0.14, "learning_rate": 1.934447534495301e-05, "loss": 0.6546, "step": 754 }, { "epoch": 0.14, "learning_rate": 1.934229454656745e-05, "loss": 0.66, "step": 755 }, { "epoch": 0.14, "learning_rate": 1.9340110250006672e-05, "loss": 0.9669, "step": 756 }, { "epoch": 0.14, "learning_rate": 1.933792245608857e-05, "loss": 0.6082, "step": 757 }, { "epoch": 0.14, "learning_rate": 1.933573116563236e-05, "loss": 0.5477, "step": 758 }, { "epoch": 0.14, "learning_rate": 1.933353637945856e-05, "loss": 0.6458, "step": 759 }, { "epoch": 0.14, "learning_rate": 1.9331338098388985e-05, "loss": 0.6161, "step": 760 }, { "epoch": 0.14, "learning_rate": 1.932913632324678e-05, "loss": 0.7132, "step": 761 }, { "epoch": 0.14, "learning_rate": 1.932693105485639e-05, "loss": 0.5627, "step": 762 }, { "epoch": 0.14, "learning_rate": 1.932472229404356e-05, "loss": 0.4438, "step": 763 }, { "epoch": 0.14, "learning_rate": 1.9322510041635353e-05, "loss": 0.8241, "step": 764 }, { "epoch": 0.14, "learning_rate": 1.932029429846014e-05, "loss": 0.5992, "step": 765 }, { "epoch": 0.14, "learning_rate": 1.9318075065347584e-05, "loss": 0.5083, "step": 766 }, { "epoch": 0.14, "learning_rate": 1.9315852343128677e-05, "loss": 0.6782, "step": 767 }, { "epoch": 0.15, "learning_rate": 1.9313626132635702e-05, "loss": 0.5897, "step": 768 }, { "epoch": 0.15, "learning_rate": 1.9311396434702247e-05, "loss": 0.6798, "step": 769 }, { "epoch": 0.15, "learning_rate": 1.9309163250163222e-05, "loss": 0.6842, "step": 770 }, { "epoch": 0.15, "learning_rate": 1.930692657985482e-05, "loss": 0.6186, "step": 771 }, { "epoch": 0.15, "learning_rate": 1.930468642461456e-05, "loss": 0.769, "step": 772 }, { "epoch": 0.15, "learning_rate": 1.9302442785281252e-05, "loss": 0.6532, "step": 773 }, { "epoch": 0.15, "learning_rate": 1.930019566269502e-05, "loss": 0.5093, "step": 774 }, { "epoch": 0.15, "learning_rate": 1.929794505769728e-05, "loss": 0.5877, "step": 775 }, { "epoch": 0.15, "learning_rate": 1.9295690971130763e-05, "loss": 0.5852, "step": 776 }, { "epoch": 0.15, "learning_rate": 1.9293433403839506e-05, "loss": 0.5103, "step": 777 }, { "epoch": 0.15, "learning_rate": 1.9291172356668843e-05, "loss": 0.6663, "step": 778 }, { "epoch": 0.15, "learning_rate": 1.9288907830465403e-05, "loss": 0.6642, "step": 779 }, { "epoch": 0.15, "learning_rate": 1.9286639826077128e-05, "loss": 0.4564, "step": 780 }, { "epoch": 0.15, "learning_rate": 1.9284368344353273e-05, "loss": 0.6327, "step": 781 }, { "epoch": 0.15, "learning_rate": 1.928209338614437e-05, "loss": 0.5594, "step": 782 }, { "epoch": 0.15, "learning_rate": 1.9279814952302266e-05, "loss": 0.6267, "step": 783 }, { "epoch": 0.15, "learning_rate": 1.927753304368012e-05, "loss": 0.4985, "step": 784 }, { "epoch": 0.15, "learning_rate": 1.9275247661132373e-05, "loss": 0.5941, "step": 785 }, { "epoch": 0.15, "learning_rate": 1.9272958805514775e-05, "loss": 0.6993, "step": 786 }, { "epoch": 0.15, "learning_rate": 1.9270666477684375e-05, "loss": 0.5463, "step": 787 }, { "epoch": 0.15, "learning_rate": 1.926837067849953e-05, "loss": 0.685, "step": 788 }, { "epoch": 0.15, "learning_rate": 1.926607140881989e-05, "loss": 0.5844, "step": 789 }, { "epoch": 0.15, "learning_rate": 1.9263768669506398e-05, "loss": 0.8152, "step": 790 }, { "epoch": 0.15, "learning_rate": 1.9261462461421308e-05, "loss": 0.5646, "step": 791 }, { "epoch": 0.15, "learning_rate": 1.9259152785428165e-05, "loss": 0.7259, "step": 792 }, { "epoch": 0.15, "learning_rate": 1.9256839642391822e-05, "loss": 0.5941, "step": 793 }, { "epoch": 0.15, "learning_rate": 1.9254523033178416e-05, "loss": 0.6839, "step": 794 }, { "epoch": 0.15, "learning_rate": 1.9252202958655393e-05, "loss": 0.4982, "step": 795 }, { "epoch": 0.15, "learning_rate": 1.9249879419691492e-05, "loss": 0.7201, "step": 796 }, { "epoch": 0.15, "learning_rate": 1.9247552417156758e-05, "loss": 0.7528, "step": 797 }, { "epoch": 0.15, "learning_rate": 1.9245221951922515e-05, "loss": 0.6746, "step": 798 }, { "epoch": 0.15, "learning_rate": 1.92428880248614e-05, "loss": 0.6949, "step": 799 }, { "epoch": 0.15, "learning_rate": 1.9240550636847335e-05, "loss": 0.6712, "step": 800 }, { "epoch": 0.15, "learning_rate": 1.923820978875555e-05, "loss": 0.5549, "step": 801 }, { "epoch": 0.15, "learning_rate": 1.9235865481462557e-05, "loss": 0.6585, "step": 802 }, { "epoch": 0.15, "learning_rate": 1.9233517715846176e-05, "loss": 0.5416, "step": 803 }, { "epoch": 0.15, "learning_rate": 1.9231166492785514e-05, "loss": 0.6663, "step": 804 }, { "epoch": 0.15, "learning_rate": 1.9228811813160972e-05, "loss": 0.5909, "step": 805 }, { "epoch": 0.15, "learning_rate": 1.9226453677854255e-05, "loss": 0.804, "step": 806 }, { "epoch": 0.15, "learning_rate": 1.9224092087748344e-05, "loss": 0.6092, "step": 807 }, { "epoch": 0.15, "learning_rate": 1.9221727043727534e-05, "loss": 0.6703, "step": 808 }, { "epoch": 0.15, "learning_rate": 1.92193585466774e-05, "loss": 0.6617, "step": 809 }, { "epoch": 0.15, "learning_rate": 1.9216986597484814e-05, "loss": 0.6041, "step": 810 }, { "epoch": 0.15, "learning_rate": 1.9214611197037943e-05, "loss": 0.6311, "step": 811 }, { "epoch": 0.15, "learning_rate": 1.9212232346226238e-05, "loss": 0.6619, "step": 812 }, { "epoch": 0.15, "learning_rate": 1.9209850045940452e-05, "loss": 0.6272, "step": 813 }, { "epoch": 0.15, "learning_rate": 1.9207464297072625e-05, "loss": 0.6699, "step": 814 }, { "epoch": 0.15, "learning_rate": 1.9205075100516087e-05, "loss": 0.5532, "step": 815 }, { "epoch": 0.15, "learning_rate": 1.9202682457165463e-05, "loss": 0.6767, "step": 816 }, { "epoch": 0.15, "learning_rate": 1.920028636791667e-05, "loss": 0.5421, "step": 817 }, { "epoch": 0.15, "learning_rate": 1.9197886833666903e-05, "loss": 0.5874, "step": 818 }, { "epoch": 0.15, "learning_rate": 1.9195483855314656e-05, "loss": 0.6839, "step": 819 }, { "epoch": 0.15, "learning_rate": 1.919307743375972e-05, "loss": 0.6249, "step": 820 }, { "epoch": 0.16, "learning_rate": 1.919066756990316e-05, "loss": 0.6728, "step": 821 }, { "epoch": 0.16, "learning_rate": 1.9188254264647338e-05, "loss": 0.7826, "step": 822 }, { "epoch": 0.16, "learning_rate": 1.918583751889591e-05, "loss": 0.5555, "step": 823 }, { "epoch": 0.16, "learning_rate": 1.9183417333553812e-05, "loss": 0.6756, "step": 824 }, { "epoch": 0.16, "learning_rate": 1.9180993709527268e-05, "loss": 0.6456, "step": 825 }, { "epoch": 0.16, "learning_rate": 1.9178566647723787e-05, "loss": 0.568, "step": 826 }, { "epoch": 0.16, "learning_rate": 1.9176136149052184e-05, "loss": 0.7508, "step": 827 }, { "epoch": 0.16, "learning_rate": 1.9173702214422533e-05, "loss": 0.813, "step": 828 }, { "epoch": 0.16, "learning_rate": 1.9171264844746215e-05, "loss": 0.7126, "step": 829 }, { "epoch": 0.16, "learning_rate": 1.916882404093589e-05, "loss": 0.6835, "step": 830 }, { "epoch": 0.16, "learning_rate": 1.916637980390551e-05, "loss": 0.6673, "step": 831 }, { "epoch": 0.16, "learning_rate": 1.9163932134570298e-05, "loss": 0.7244, "step": 832 }, { "epoch": 0.16, "learning_rate": 1.9161481033846773e-05, "loss": 0.6283, "step": 833 }, { "epoch": 0.16, "learning_rate": 1.915902650265274e-05, "loss": 0.6931, "step": 834 }, { "epoch": 0.16, "learning_rate": 1.9156568541907293e-05, "loss": 0.4884, "step": 835 }, { "epoch": 0.16, "learning_rate": 1.9154107152530792e-05, "loss": 0.6362, "step": 836 }, { "epoch": 0.16, "learning_rate": 1.9151642335444894e-05, "loss": 0.6063, "step": 837 }, { "epoch": 0.16, "learning_rate": 1.914917409157254e-05, "loss": 0.6472, "step": 838 }, { "epoch": 0.16, "learning_rate": 1.9146702421837952e-05, "loss": 0.4123, "step": 839 }, { "epoch": 0.16, "learning_rate": 1.914422732716663e-05, "loss": 0.6746, "step": 840 }, { "epoch": 0.16, "learning_rate": 1.9141748808485367e-05, "loss": 0.6176, "step": 841 }, { "epoch": 0.16, "learning_rate": 1.9139266866722227e-05, "loss": 0.5804, "step": 842 }, { "epoch": 0.16, "learning_rate": 1.913678150280656e-05, "loss": 0.6943, "step": 843 }, { "epoch": 0.16, "learning_rate": 1.9134292717669002e-05, "loss": 0.6927, "step": 844 }, { "epoch": 0.16, "learning_rate": 1.9131800512241466e-05, "loss": 0.8294, "step": 845 }, { "epoch": 0.16, "learning_rate": 1.912930488745714e-05, "loss": 0.5905, "step": 846 }, { "epoch": 0.16, "learning_rate": 1.9126805844250507e-05, "loss": 0.8265, "step": 847 }, { "epoch": 0.16, "learning_rate": 1.9124303383557316e-05, "loss": 0.5976, "step": 848 }, { "epoch": 0.16, "learning_rate": 1.91217975063146e-05, "loss": 0.6046, "step": 849 }, { "epoch": 0.16, "learning_rate": 1.911928821346067e-05, "loss": 0.6501, "step": 850 }, { "epoch": 0.16, "learning_rate": 1.9116775505935122e-05, "loss": 0.6571, "step": 851 }, { "epoch": 0.16, "learning_rate": 1.911425938467883e-05, "loss": 0.8515, "step": 852 }, { "epoch": 0.16, "learning_rate": 1.9111739850633934e-05, "loss": 0.5963, "step": 853 }, { "epoch": 0.16, "learning_rate": 1.910921690474387e-05, "loss": 0.5839, "step": 854 }, { "epoch": 0.16, "learning_rate": 1.9106690547953336e-05, "loss": 0.5777, "step": 855 }, { "epoch": 0.16, "learning_rate": 1.910416078120832e-05, "loss": 0.6018, "step": 856 }, { "epoch": 0.16, "learning_rate": 1.910162760545607e-05, "loss": 0.5631, "step": 857 }, { "epoch": 0.16, "learning_rate": 1.9099091021645134e-05, "loss": 0.7011, "step": 858 }, { "epoch": 0.16, "learning_rate": 1.9096551030725312e-05, "loss": 0.6195, "step": 859 }, { "epoch": 0.16, "learning_rate": 1.9094007633647695e-05, "loss": 0.6846, "step": 860 }, { "epoch": 0.16, "learning_rate": 1.909146083136465e-05, "loss": 0.6347, "step": 861 }, { "epoch": 0.16, "learning_rate": 1.9088910624829805e-05, "loss": 0.736, "step": 862 }, { "epoch": 0.16, "learning_rate": 1.9086357014998077e-05, "loss": 0.7483, "step": 863 }, { "epoch": 0.16, "learning_rate": 1.908380000282565e-05, "loss": 0.7445, "step": 864 }, { "epoch": 0.16, "learning_rate": 1.9081239589269992e-05, "loss": 0.7819, "step": 865 }, { "epoch": 0.16, "learning_rate": 1.9078675775289828e-05, "loss": 0.6087, "step": 866 }, { "epoch": 0.16, "learning_rate": 1.9076108561845167e-05, "loss": 0.6911, "step": 867 }, { "epoch": 0.16, "learning_rate": 1.9073537949897293e-05, "loss": 0.6111, "step": 868 }, { "epoch": 0.16, "learning_rate": 1.9070963940408758e-05, "loss": 0.5844, "step": 869 }, { "epoch": 0.16, "learning_rate": 1.9068386534343383e-05, "loss": 0.5167, "step": 870 }, { "epoch": 0.16, "learning_rate": 1.906580573266627e-05, "loss": 0.5836, "step": 871 }, { "epoch": 0.16, "learning_rate": 1.906322153634378e-05, "loss": 0.6087, "step": 872 }, { "epoch": 0.16, "learning_rate": 1.906063394634356e-05, "loss": 0.5723, "step": 873 }, { "epoch": 0.17, "learning_rate": 1.9058042963634518e-05, "loss": 0.4785, "step": 874 }, { "epoch": 0.17, "learning_rate": 1.905544858918683e-05, "loss": 0.6577, "step": 875 }, { "epoch": 0.17, "learning_rate": 1.9052850823971952e-05, "loss": 0.5798, "step": 876 }, { "epoch": 0.17, "learning_rate": 1.90502496689626e-05, "loss": 0.6885, "step": 877 }, { "epoch": 0.17, "learning_rate": 1.9047645125132767e-05, "loss": 0.6114, "step": 878 }, { "epoch": 0.17, "learning_rate": 1.904503719345771e-05, "loss": 0.7577, "step": 879 }, { "epoch": 0.17, "learning_rate": 1.9042425874913952e-05, "loss": 0.5028, "step": 880 }, { "epoch": 0.17, "learning_rate": 1.9039811170479296e-05, "loss": 0.5928, "step": 881 }, { "epoch": 0.17, "learning_rate": 1.9037193081132796e-05, "loss": 0.6201, "step": 882 }, { "epoch": 0.17, "learning_rate": 1.9034571607854793e-05, "loss": 0.6095, "step": 883 }, { "epoch": 0.17, "learning_rate": 1.9031946751626872e-05, "loss": 0.7417, "step": 884 }, { "epoch": 0.17, "learning_rate": 1.9029318513431908e-05, "loss": 0.5673, "step": 885 }, { "epoch": 0.17, "learning_rate": 1.9026686894254025e-05, "loss": 0.719, "step": 886 }, { "epoch": 0.17, "learning_rate": 1.902405189507862e-05, "loss": 0.6307, "step": 887 }, { "epoch": 0.17, "learning_rate": 1.902141351689236e-05, "loss": 0.746, "step": 888 }, { "epoch": 0.17, "learning_rate": 1.901877176068317e-05, "loss": 0.617, "step": 889 }, { "epoch": 0.17, "learning_rate": 1.901612662744024e-05, "loss": 0.5678, "step": 890 }, { "epoch": 0.17, "learning_rate": 1.901347811815403e-05, "loss": 0.7067, "step": 891 }, { "epoch": 0.17, "learning_rate": 1.9010826233816258e-05, "loss": 0.6585, "step": 892 }, { "epoch": 0.17, "learning_rate": 1.900817097541991e-05, "loss": 0.6748, "step": 893 }, { "epoch": 0.17, "learning_rate": 1.9005512343959235e-05, "loss": 0.6226, "step": 894 }, { "epoch": 0.17, "learning_rate": 1.9002850340429745e-05, "loss": 0.5234, "step": 895 }, { "epoch": 0.17, "learning_rate": 1.900018496582821e-05, "loss": 0.642, "step": 896 }, { "epoch": 0.17, "learning_rate": 1.899751622115267e-05, "loss": 0.7369, "step": 897 }, { "epoch": 0.17, "learning_rate": 1.8994844107402423e-05, "loss": 0.7871, "step": 898 }, { "epoch": 0.17, "learning_rate": 1.8992168625578023e-05, "loss": 0.494, "step": 899 }, { "epoch": 0.17, "learning_rate": 1.89894897766813e-05, "loss": 0.6086, "step": 900 }, { "epoch": 0.17, "learning_rate": 1.8986807561715325e-05, "loss": 0.6571, "step": 901 }, { "epoch": 0.17, "learning_rate": 1.8984121981684448e-05, "loss": 0.6841, "step": 902 }, { "epoch": 0.17, "learning_rate": 1.8981433037594268e-05, "loss": 0.6146, "step": 903 }, { "epoch": 0.17, "learning_rate": 1.8978740730451647e-05, "loss": 0.6822, "step": 904 }, { "epoch": 0.17, "learning_rate": 1.8976045061264706e-05, "loss": 0.657, "step": 905 }, { "epoch": 0.17, "learning_rate": 1.8973346031042825e-05, "loss": 0.6619, "step": 906 }, { "epoch": 0.17, "learning_rate": 1.8970643640796642e-05, "loss": 0.6548, "step": 907 }, { "epoch": 0.17, "learning_rate": 1.8967937891538054e-05, "loss": 0.8225, "step": 908 }, { "epoch": 0.17, "learning_rate": 1.8965228784280212e-05, "loss": 0.5337, "step": 909 }, { "epoch": 0.17, "learning_rate": 1.896251632003753e-05, "loss": 0.6117, "step": 910 }, { "epoch": 0.17, "learning_rate": 1.895980049982568e-05, "loss": 0.7221, "step": 911 }, { "epoch": 0.17, "learning_rate": 1.895708132466158e-05, "loss": 0.5542, "step": 912 }, { "epoch": 0.17, "learning_rate": 1.8954358795563418e-05, "loss": 0.6472, "step": 913 }, { "epoch": 0.17, "learning_rate": 1.8951632913550625e-05, "loss": 0.7083, "step": 914 }, { "epoch": 0.17, "learning_rate": 1.89489036796439e-05, "loss": 0.8085, "step": 915 }, { "epoch": 0.17, "learning_rate": 1.894617109486519e-05, "loss": 0.6214, "step": 916 }, { "epoch": 0.17, "learning_rate": 1.8943435160237693e-05, "loss": 0.4737, "step": 917 }, { "epoch": 0.17, "learning_rate": 1.894069587678587e-05, "loss": 0.8245, "step": 918 }, { "epoch": 0.17, "learning_rate": 1.893795324553543e-05, "loss": 0.5847, "step": 919 }, { "epoch": 0.17, "learning_rate": 1.893520726751334e-05, "loss": 0.8312, "step": 920 }, { "epoch": 0.17, "learning_rate": 1.8932457943747814e-05, "loss": 0.6219, "step": 921 }, { "epoch": 0.17, "learning_rate": 1.8929705275268326e-05, "loss": 0.6198, "step": 922 }, { "epoch": 0.17, "learning_rate": 1.8926949263105595e-05, "loss": 0.6145, "step": 923 }, { "epoch": 0.17, "learning_rate": 1.89241899082916e-05, "loss": 0.4266, "step": 924 }, { "epoch": 0.17, "learning_rate": 1.8921427211859568e-05, "loss": 0.7579, "step": 925 }, { "epoch": 0.17, "learning_rate": 1.8918661174843974e-05, "loss": 0.635, "step": 926 }, { "epoch": 0.18, "learning_rate": 1.8915891798280545e-05, "loss": 0.5516, "step": 927 }, { "epoch": 0.18, "learning_rate": 1.8913119083206264e-05, "loss": 0.737, "step": 928 }, { "epoch": 0.18, "learning_rate": 1.8910343030659355e-05, "loss": 0.6503, "step": 929 }, { "epoch": 0.18, "learning_rate": 1.8907563641679303e-05, "loss": 0.5896, "step": 930 }, { "epoch": 0.18, "learning_rate": 1.8904780917306837e-05, "loss": 0.5248, "step": 931 }, { "epoch": 0.18, "learning_rate": 1.890199485858393e-05, "loss": 0.5724, "step": 932 }, { "epoch": 0.18, "learning_rate": 1.88992054665538e-05, "loss": 0.6786, "step": 933 }, { "epoch": 0.18, "learning_rate": 1.8896412742260935e-05, "loss": 0.7046, "step": 934 }, { "epoch": 0.18, "learning_rate": 1.8893616686751053e-05, "loss": 0.6563, "step": 935 }, { "epoch": 0.18, "learning_rate": 1.8890817301071124e-05, "loss": 0.5025, "step": 936 }, { "epoch": 0.18, "learning_rate": 1.8888014586269353e-05, "loss": 0.6277, "step": 937 }, { "epoch": 0.18, "learning_rate": 1.8885208543395212e-05, "loss": 0.6108, "step": 938 }, { "epoch": 0.18, "learning_rate": 1.888239917349941e-05, "loss": 0.6856, "step": 939 }, { "epoch": 0.18, "learning_rate": 1.88795864776339e-05, "loss": 0.632, "step": 940 }, { "epoch": 0.18, "learning_rate": 1.887677045685188e-05, "loss": 0.9351, "step": 941 }, { "epoch": 0.18, "learning_rate": 1.8873951112207795e-05, "loss": 0.5587, "step": 942 }, { "epoch": 0.18, "learning_rate": 1.8871128444757335e-05, "loss": 0.783, "step": 943 }, { "epoch": 0.18, "learning_rate": 1.8868302455557435e-05, "loss": 0.4008, "step": 944 }, { "epoch": 0.18, "learning_rate": 1.8865473145666272e-05, "loss": 0.7884, "step": 945 }, { "epoch": 0.18, "learning_rate": 1.8862640516143264e-05, "loss": 0.5406, "step": 946 }, { "epoch": 0.18, "learning_rate": 1.8859804568049083e-05, "loss": 0.6987, "step": 947 }, { "epoch": 0.18, "learning_rate": 1.8856965302445623e-05, "loss": 0.6288, "step": 948 }, { "epoch": 0.18, "learning_rate": 1.8854122720396043e-05, "loss": 0.6703, "step": 949 }, { "epoch": 0.18, "learning_rate": 1.885127682296473e-05, "loss": 0.6301, "step": 950 }, { "epoch": 0.18, "learning_rate": 1.8848427611217316e-05, "loss": 0.6698, "step": 951 }, { "epoch": 0.18, "learning_rate": 1.8845575086220672e-05, "loss": 0.7547, "step": 952 }, { "epoch": 0.18, "learning_rate": 1.8842719249042913e-05, "loss": 0.6052, "step": 953 }, { "epoch": 0.18, "learning_rate": 1.8839860100753396e-05, "loss": 0.613, "step": 954 }, { "epoch": 0.18, "learning_rate": 1.883699764242271e-05, "loss": 0.6204, "step": 955 }, { "epoch": 0.18, "learning_rate": 1.8834131875122692e-05, "loss": 0.607, "step": 956 }, { "epoch": 0.18, "learning_rate": 1.8831262799926412e-05, "loss": 0.792, "step": 957 }, { "epoch": 0.18, "learning_rate": 1.882839041790818e-05, "loss": 0.559, "step": 958 }, { "epoch": 0.18, "learning_rate": 1.882551473014355e-05, "loss": 0.6348, "step": 959 }, { "epoch": 0.18, "learning_rate": 1.88226357377093e-05, "loss": 0.5667, "step": 960 }, { "epoch": 0.18, "learning_rate": 1.8819753441683464e-05, "loss": 0.5702, "step": 961 }, { "epoch": 0.18, "learning_rate": 1.8816867843145296e-05, "loss": 0.6723, "step": 962 }, { "epoch": 0.18, "learning_rate": 1.8813978943175296e-05, "loss": 0.6044, "step": 963 }, { "epoch": 0.18, "learning_rate": 1.88110867428552e-05, "loss": 0.7332, "step": 964 }, { "epoch": 0.18, "learning_rate": 1.8808191243267977e-05, "loss": 0.5585, "step": 965 }, { "epoch": 0.18, "learning_rate": 1.8805292445497828e-05, "loss": 0.6491, "step": 966 }, { "epoch": 0.18, "learning_rate": 1.88023903506302e-05, "loss": 0.5677, "step": 967 }, { "epoch": 0.18, "learning_rate": 1.8799484959751767e-05, "loss": 0.6494, "step": 968 }, { "epoch": 0.18, "learning_rate": 1.879657627395043e-05, "loss": 0.5456, "step": 969 }, { "epoch": 0.18, "learning_rate": 1.879366429431534e-05, "loss": 0.5685, "step": 970 }, { "epoch": 0.18, "learning_rate": 1.8790749021936873e-05, "loss": 0.5719, "step": 971 }, { "epoch": 0.18, "learning_rate": 1.8787830457906633e-05, "loss": 0.5967, "step": 972 }, { "epoch": 0.18, "learning_rate": 1.8784908603317467e-05, "loss": 0.4431, "step": 973 }, { "epoch": 0.18, "learning_rate": 1.8781983459263443e-05, "loss": 0.5898, "step": 974 }, { "epoch": 0.18, "learning_rate": 1.877905502683987e-05, "loss": 0.7605, "step": 975 }, { "epoch": 0.18, "learning_rate": 1.8776123307143285e-05, "loss": 0.5926, "step": 976 }, { "epoch": 0.18, "learning_rate": 1.8773188301271458e-05, "loss": 0.8481, "step": 977 }, { "epoch": 0.18, "learning_rate": 1.877025001032338e-05, "loss": 0.6704, "step": 978 }, { "epoch": 0.18, "learning_rate": 1.8767308435399286e-05, "loss": 0.6114, "step": 979 }, { "epoch": 0.19, "learning_rate": 1.8764363577600628e-05, "loss": 0.5793, "step": 980 }, { "epoch": 0.19, "learning_rate": 1.87614154380301e-05, "loss": 0.5804, "step": 981 }, { "epoch": 0.19, "learning_rate": 1.8758464017791614e-05, "loss": 0.7032, "step": 982 }, { "epoch": 0.19, "learning_rate": 1.8755509317990315e-05, "loss": 0.6574, "step": 983 }, { "epoch": 0.19, "learning_rate": 1.8752551339732576e-05, "loss": 0.7285, "step": 984 }, { "epoch": 0.19, "learning_rate": 1.8749590084125998e-05, "loss": 0.6979, "step": 985 }, { "epoch": 0.19, "learning_rate": 1.8746625552279403e-05, "loss": 0.4682, "step": 986 }, { "epoch": 0.19, "learning_rate": 1.874365774530285e-05, "loss": 0.5839, "step": 987 }, { "epoch": 0.19, "learning_rate": 1.8740686664307622e-05, "loss": 0.6532, "step": 988 }, { "epoch": 0.19, "learning_rate": 1.873771231040622e-05, "loss": 0.7945, "step": 989 }, { "epoch": 0.19, "learning_rate": 1.8734734684712376e-05, "loss": 0.5067, "step": 990 }, { "epoch": 0.19, "learning_rate": 1.8731753788341052e-05, "loss": 0.6928, "step": 991 }, { "epoch": 0.19, "learning_rate": 1.8728769622408423e-05, "loss": 0.7042, "step": 992 }, { "epoch": 0.19, "learning_rate": 1.87257821880319e-05, "loss": 0.6096, "step": 993 }, { "epoch": 0.19, "learning_rate": 1.872279148633011e-05, "loss": 0.579, "step": 994 }, { "epoch": 0.19, "learning_rate": 1.871979751842291e-05, "loss": 0.5624, "step": 995 }, { "epoch": 0.19, "learning_rate": 1.871680028543137e-05, "loss": 0.6938, "step": 996 }, { "epoch": 0.19, "learning_rate": 1.8713799788477794e-05, "loss": 0.7494, "step": 997 }, { "epoch": 0.19, "learning_rate": 1.8710796028685702e-05, "loss": 0.6804, "step": 998 }, { "epoch": 0.19, "learning_rate": 1.8707789007179835e-05, "loss": 0.5586, "step": 999 }, { "epoch": 0.19, "learning_rate": 1.8704778725086157e-05, "loss": 0.6211, "step": 1000 }, { "epoch": 0.19, "learning_rate": 1.8701765183531855e-05, "loss": 0.7616, "step": 1001 }, { "epoch": 0.19, "learning_rate": 1.8698748383645334e-05, "loss": 0.5837, "step": 1002 }, { "epoch": 0.19, "learning_rate": 1.8695728326556217e-05, "loss": 0.6087, "step": 1003 }, { "epoch": 0.19, "learning_rate": 1.8692705013395354e-05, "loss": 0.724, "step": 1004 }, { "epoch": 0.19, "learning_rate": 1.8689678445294807e-05, "loss": 0.6206, "step": 1005 }, { "epoch": 0.19, "learning_rate": 1.8686648623387856e-05, "loss": 0.5135, "step": 1006 }, { "epoch": 0.19, "learning_rate": 1.8683615548809007e-05, "loss": 0.5427, "step": 1007 }, { "epoch": 0.19, "learning_rate": 1.868057922269398e-05, "loss": 0.5491, "step": 1008 }, { "epoch": 0.19, "learning_rate": 1.8677539646179706e-05, "loss": 0.5754, "step": 1009 }, { "epoch": 0.19, "learning_rate": 1.8674496820404347e-05, "loss": 0.5521, "step": 1010 }, { "epoch": 0.19, "learning_rate": 1.867145074650727e-05, "loss": 0.7253, "step": 1011 }, { "epoch": 0.19, "learning_rate": 1.866840142562906e-05, "loss": 0.7106, "step": 1012 }, { "epoch": 0.19, "learning_rate": 1.8665348858911524e-05, "loss": 0.7113, "step": 1013 }, { "epoch": 0.19, "learning_rate": 1.8662293047497677e-05, "loss": 0.5261, "step": 1014 }, { "epoch": 0.19, "learning_rate": 1.8659233992531753e-05, "loss": 0.6985, "step": 1015 }, { "epoch": 0.19, "learning_rate": 1.86561716951592e-05, "loss": 0.7584, "step": 1016 }, { "epoch": 0.19, "learning_rate": 1.865310615652668e-05, "loss": 0.566, "step": 1017 }, { "epoch": 0.19, "learning_rate": 1.8650037377782072e-05, "loss": 0.7234, "step": 1018 }, { "epoch": 0.19, "learning_rate": 1.864696536007446e-05, "loss": 0.7392, "step": 1019 }, { "epoch": 0.19, "learning_rate": 1.8643890104554144e-05, "loss": 0.5283, "step": 1020 }, { "epoch": 0.19, "learning_rate": 1.8640811612372643e-05, "loss": 0.6838, "step": 1021 }, { "epoch": 0.19, "learning_rate": 1.863772988468268e-05, "loss": 0.6075, "step": 1022 }, { "epoch": 0.19, "learning_rate": 1.8634644922638193e-05, "loss": 0.6556, "step": 1023 }, { "epoch": 0.19, "learning_rate": 1.863155672739433e-05, "loss": 0.7148, "step": 1024 }, { "epoch": 0.19, "learning_rate": 1.8628465300107453e-05, "loss": 0.7118, "step": 1025 }, { "epoch": 0.19, "learning_rate": 1.862537064193513e-05, "loss": 0.5628, "step": 1026 }, { "epoch": 0.19, "learning_rate": 1.862227275403614e-05, "loss": 0.3998, "step": 1027 }, { "epoch": 0.19, "learning_rate": 1.8619171637570468e-05, "loss": 0.6068, "step": 1028 }, { "epoch": 0.19, "learning_rate": 1.8616067293699316e-05, "loss": 0.6896, "step": 1029 }, { "epoch": 0.19, "learning_rate": 1.861295972358509e-05, "loss": 0.6964, "step": 1030 }, { "epoch": 0.19, "learning_rate": 1.8609848928391398e-05, "loss": 0.646, "step": 1031 }, { "epoch": 0.19, "learning_rate": 1.8606734909283075e-05, "loss": 0.7398, "step": 1032 }, { "epoch": 0.2, "learning_rate": 1.8603617667426132e-05, "loss": 0.5592, "step": 1033 }, { "epoch": 0.2, "learning_rate": 1.860049720398782e-05, "loss": 0.6808, "step": 1034 }, { "epoch": 0.2, "learning_rate": 1.8597373520136565e-05, "loss": 0.6682, "step": 1035 }, { "epoch": 0.2, "learning_rate": 1.8594246617042033e-05, "loss": 0.6005, "step": 1036 }, { "epoch": 0.2, "learning_rate": 1.8591116495875065e-05, "loss": 0.6298, "step": 1037 }, { "epoch": 0.2, "learning_rate": 1.858798315780772e-05, "loss": 0.5783, "step": 1038 }, { "epoch": 0.2, "learning_rate": 1.858484660401327e-05, "loss": 0.6213, "step": 1039 }, { "epoch": 0.2, "learning_rate": 1.8581706835666167e-05, "loss": 0.6614, "step": 1040 }, { "epoch": 0.2, "learning_rate": 1.8578563853942094e-05, "loss": 0.5615, "step": 1041 }, { "epoch": 0.2, "learning_rate": 1.8575417660017917e-05, "loss": 0.7991, "step": 1042 }, { "epoch": 0.2, "learning_rate": 1.8572268255071718e-05, "loss": 0.5536, "step": 1043 }, { "epoch": 0.2, "learning_rate": 1.856911564028277e-05, "loss": 0.6453, "step": 1044 }, { "epoch": 0.2, "learning_rate": 1.8565959816831563e-05, "loss": 0.641, "step": 1045 }, { "epoch": 0.2, "learning_rate": 1.856280078589977e-05, "loss": 0.6731, "step": 1046 }, { "epoch": 0.2, "learning_rate": 1.8559638548670276e-05, "loss": 0.4693, "step": 1047 }, { "epoch": 0.2, "learning_rate": 1.855647310632717e-05, "loss": 0.838, "step": 1048 }, { "epoch": 0.2, "learning_rate": 1.855330446005573e-05, "loss": 0.662, "step": 1049 }, { "epoch": 0.2, "learning_rate": 1.855013261104244e-05, "loss": 0.5241, "step": 1050 }, { "epoch": 0.2, "learning_rate": 1.854695756047499e-05, "loss": 0.7165, "step": 1051 }, { "epoch": 0.2, "learning_rate": 1.854377930954225e-05, "loss": 0.6542, "step": 1052 }, { "epoch": 0.2, "learning_rate": 1.8540597859434314e-05, "loss": 0.6379, "step": 1053 }, { "epoch": 0.2, "learning_rate": 1.8537413211342447e-05, "loss": 0.5853, "step": 1054 }, { "epoch": 0.2, "learning_rate": 1.8534225366459133e-05, "loss": 0.6787, "step": 1055 }, { "epoch": 0.2, "learning_rate": 1.853103432597804e-05, "loss": 0.5619, "step": 1056 }, { "epoch": 0.2, "learning_rate": 1.8527840091094038e-05, "loss": 0.5366, "step": 1057 }, { "epoch": 0.2, "learning_rate": 1.8524642663003195e-05, "loss": 0.546, "step": 1058 }, { "epoch": 0.2, "learning_rate": 1.8521442042902775e-05, "loss": 0.6336, "step": 1059 }, { "epoch": 0.2, "learning_rate": 1.851823823199122e-05, "loss": 0.7357, "step": 1060 }, { "epoch": 0.2, "learning_rate": 1.851503123146819e-05, "loss": 0.6964, "step": 1061 }, { "epoch": 0.2, "learning_rate": 1.8511821042534537e-05, "loss": 0.5135, "step": 1062 }, { "epoch": 0.2, "learning_rate": 1.8508607666392285e-05, "loss": 0.5448, "step": 1063 }, { "epoch": 0.2, "learning_rate": 1.8505391104244683e-05, "loss": 0.5008, "step": 1064 }, { "epoch": 0.2, "learning_rate": 1.8502171357296144e-05, "loss": 0.6437, "step": 1065 }, { "epoch": 0.2, "learning_rate": 1.849894842675229e-05, "loss": 0.4832, "step": 1066 }, { "epoch": 0.2, "learning_rate": 1.849572231381993e-05, "loss": 0.7402, "step": 1067 }, { "epoch": 0.2, "learning_rate": 1.849249301970707e-05, "loss": 0.6081, "step": 1068 }, { "epoch": 0.2, "learning_rate": 1.84892605456229e-05, "loss": 0.6185, "step": 1069 }, { "epoch": 0.2, "learning_rate": 1.8486024892777803e-05, "loss": 0.7121, "step": 1070 }, { "epoch": 0.2, "learning_rate": 1.8482786062383353e-05, "loss": 0.7054, "step": 1071 }, { "epoch": 0.2, "learning_rate": 1.8479544055652313e-05, "loss": 0.507, "step": 1072 }, { "epoch": 0.2, "learning_rate": 1.847629887379864e-05, "loss": 0.6318, "step": 1073 }, { "epoch": 0.2, "learning_rate": 1.847305051803747e-05, "loss": 0.6549, "step": 1074 }, { "epoch": 0.2, "learning_rate": 1.8469798989585145e-05, "loss": 0.5394, "step": 1075 }, { "epoch": 0.2, "learning_rate": 1.8466544289659166e-05, "loss": 0.6242, "step": 1076 }, { "epoch": 0.2, "learning_rate": 1.8463286419478256e-05, "loss": 0.5879, "step": 1077 }, { "epoch": 0.2, "learning_rate": 1.84600253802623e-05, "loss": 0.6402, "step": 1078 }, { "epoch": 0.2, "learning_rate": 1.8456761173232376e-05, "loss": 0.7443, "step": 1079 }, { "epoch": 0.2, "learning_rate": 1.845349379961075e-05, "loss": 0.7271, "step": 1080 }, { "epoch": 0.2, "learning_rate": 1.845022326062088e-05, "loss": 0.6865, "step": 1081 }, { "epoch": 0.2, "learning_rate": 1.8446949557487398e-05, "loss": 0.6866, "step": 1082 }, { "epoch": 0.2, "learning_rate": 1.8443672691436127e-05, "loss": 0.605, "step": 1083 }, { "epoch": 0.2, "learning_rate": 1.844039266369407e-05, "loss": 0.5895, "step": 1084 }, { "epoch": 0.2, "learning_rate": 1.8437109475489417e-05, "loss": 0.6089, "step": 1085 }, { "epoch": 0.21, "learning_rate": 1.843382312805155e-05, "loss": 0.7216, "step": 1086 }, { "epoch": 0.21, "learning_rate": 1.843053362261102e-05, "loss": 0.527, "step": 1087 }, { "epoch": 0.21, "learning_rate": 1.842724096039956e-05, "loss": 0.6968, "step": 1088 }, { "epoch": 0.21, "learning_rate": 1.8423945142650094e-05, "loss": 0.6241, "step": 1089 }, { "epoch": 0.21, "learning_rate": 1.842064617059673e-05, "loss": 0.6152, "step": 1090 }, { "epoch": 0.21, "learning_rate": 1.8417344045474747e-05, "loss": 0.5978, "step": 1091 }, { "epoch": 0.21, "learning_rate": 1.841403876852061e-05, "loss": 0.5955, "step": 1092 }, { "epoch": 0.21, "learning_rate": 1.841073034097196e-05, "loss": 0.5824, "step": 1093 }, { "epoch": 0.21, "learning_rate": 1.8407418764067627e-05, "loss": 0.625, "step": 1094 }, { "epoch": 0.21, "learning_rate": 1.8404104039047612e-05, "loss": 0.5729, "step": 1095 }, { "epoch": 0.21, "learning_rate": 1.8400786167153092e-05, "loss": 0.7498, "step": 1096 }, { "epoch": 0.21, "learning_rate": 1.8397465149626438e-05, "loss": 0.8384, "step": 1097 }, { "epoch": 0.21, "learning_rate": 1.8394140987711178e-05, "loss": 0.5438, "step": 1098 }, { "epoch": 0.21, "learning_rate": 1.839081368265203e-05, "loss": 0.6708, "step": 1099 }, { "epoch": 0.21, "learning_rate": 1.8387483235694893e-05, "loss": 0.587, "step": 1100 }, { "epoch": 0.21, "learning_rate": 1.8384149648086832e-05, "loss": 0.6099, "step": 1101 }, { "epoch": 0.21, "learning_rate": 1.8380812921076086e-05, "loss": 0.5625, "step": 1102 }, { "epoch": 0.21, "learning_rate": 1.8377473055912087e-05, "loss": 0.6705, "step": 1103 }, { "epoch": 0.21, "learning_rate": 1.8374130053845423e-05, "loss": 0.5287, "step": 1104 }, { "epoch": 0.21, "learning_rate": 1.8370783916127868e-05, "loss": 0.5022, "step": 1105 }, { "epoch": 0.21, "learning_rate": 1.8367434644012366e-05, "loss": 0.5353, "step": 1106 }, { "epoch": 0.21, "learning_rate": 1.836408223875303e-05, "loss": 0.5345, "step": 1107 }, { "epoch": 0.21, "learning_rate": 1.836072670160516e-05, "loss": 0.8513, "step": 1108 }, { "epoch": 0.21, "learning_rate": 1.8357368033825214e-05, "loss": 0.6044, "step": 1109 }, { "epoch": 0.21, "learning_rate": 1.8354006236670825e-05, "loss": 0.497, "step": 1110 }, { "epoch": 0.21, "learning_rate": 1.8350641311400813e-05, "loss": 0.6852, "step": 1111 }, { "epoch": 0.21, "learning_rate": 1.8347273259275144e-05, "loss": 0.6819, "step": 1112 }, { "epoch": 0.21, "learning_rate": 1.8343902081554976e-05, "loss": 0.6192, "step": 1113 }, { "epoch": 0.21, "learning_rate": 1.8340527779502628e-05, "loss": 0.6261, "step": 1114 }, { "epoch": 0.21, "learning_rate": 1.833715035438159e-05, "loss": 0.6714, "step": 1115 }, { "epoch": 0.21, "learning_rate": 1.8333769807456524e-05, "loss": 0.6632, "step": 1116 }, { "epoch": 0.21, "learning_rate": 1.8330386139993253e-05, "loss": 0.6037, "step": 1117 }, { "epoch": 0.21, "learning_rate": 1.832699935325878e-05, "loss": 0.7395, "step": 1118 }, { "epoch": 0.21, "learning_rate": 1.832360944852127e-05, "loss": 0.6428, "step": 1119 }, { "epoch": 0.21, "learning_rate": 1.8320216427050053e-05, "loss": 0.5531, "step": 1120 }, { "epoch": 0.21, "learning_rate": 1.831682029011563e-05, "loss": 0.7085, "step": 1121 }, { "epoch": 0.21, "learning_rate": 1.8313421038989666e-05, "loss": 0.7669, "step": 1122 }, { "epoch": 0.21, "learning_rate": 1.8310018674944995e-05, "loss": 0.5925, "step": 1123 }, { "epoch": 0.21, "learning_rate": 1.8306613199255618e-05, "loss": 0.5693, "step": 1124 }, { "epoch": 0.21, "learning_rate": 1.8303204613196694e-05, "loss": 0.6354, "step": 1125 }, { "epoch": 0.21, "learning_rate": 1.829979291804455e-05, "loss": 0.7578, "step": 1126 }, { "epoch": 0.21, "learning_rate": 1.8296378115076683e-05, "loss": 0.6739, "step": 1127 }, { "epoch": 0.21, "learning_rate": 1.8292960205571742e-05, "loss": 0.6583, "step": 1128 }, { "epoch": 0.21, "learning_rate": 1.8289539190809552e-05, "loss": 0.5808, "step": 1129 }, { "epoch": 0.21, "learning_rate": 1.8286115072071095e-05, "loss": 0.5759, "step": 1130 }, { "epoch": 0.21, "learning_rate": 1.828268785063851e-05, "loss": 0.4506, "step": 1131 }, { "epoch": 0.21, "learning_rate": 1.827925752779511e-05, "loss": 0.5377, "step": 1132 }, { "epoch": 0.21, "learning_rate": 1.8275824104825353e-05, "loss": 0.5511, "step": 1133 }, { "epoch": 0.21, "learning_rate": 1.8272387583014874e-05, "loss": 0.8718, "step": 1134 }, { "epoch": 0.21, "learning_rate": 1.8268947963650457e-05, "loss": 0.9047, "step": 1135 }, { "epoch": 0.21, "learning_rate": 1.8265505248020056e-05, "loss": 0.7269, "step": 1136 }, { "epoch": 0.21, "learning_rate": 1.826205943741277e-05, "loss": 0.6187, "step": 1137 }, { "epoch": 0.22, "learning_rate": 1.8258610533118877e-05, "loss": 0.5083, "step": 1138 }, { "epoch": 0.22, "learning_rate": 1.825515853642979e-05, "loss": 0.6838, "step": 1139 }, { "epoch": 0.22, "learning_rate": 1.82517034486381e-05, "loss": 0.4996, "step": 1140 }, { "epoch": 0.22, "learning_rate": 1.8248245271037542e-05, "loss": 0.5337, "step": 1141 }, { "epoch": 0.22, "learning_rate": 1.8244784004923018e-05, "loss": 0.5244, "step": 1142 }, { "epoch": 0.22, "learning_rate": 1.8241319651590577e-05, "loss": 0.4718, "step": 1143 }, { "epoch": 0.22, "learning_rate": 1.823785221233743e-05, "loss": 0.567, "step": 1144 }, { "epoch": 0.22, "learning_rate": 1.8234381688461943e-05, "loss": 0.6693, "step": 1145 }, { "epoch": 0.22, "learning_rate": 1.823090808126364e-05, "loss": 0.5314, "step": 1146 }, { "epoch": 0.22, "learning_rate": 1.8227431392043188e-05, "loss": 0.6462, "step": 1147 }, { "epoch": 0.22, "learning_rate": 1.822395162210242e-05, "loss": 0.5107, "step": 1148 }, { "epoch": 0.22, "learning_rate": 1.822046877274432e-05, "loss": 0.5104, "step": 1149 }, { "epoch": 0.22, "learning_rate": 1.8216982845273022e-05, "loss": 0.5169, "step": 1150 }, { "epoch": 0.22, "learning_rate": 1.821349384099381e-05, "loss": 0.6941, "step": 1151 }, { "epoch": 0.22, "learning_rate": 1.8210001761213134e-05, "loss": 0.7258, "step": 1152 }, { "epoch": 0.22, "learning_rate": 1.8206506607238573e-05, "loss": 0.6194, "step": 1153 }, { "epoch": 0.22, "learning_rate": 1.820300838037888e-05, "loss": 0.619, "step": 1154 }, { "epoch": 0.22, "learning_rate": 1.8199507081943943e-05, "loss": 0.5923, "step": 1155 }, { "epoch": 0.22, "learning_rate": 1.819600271324481e-05, "loss": 0.5896, "step": 1156 }, { "epoch": 0.22, "learning_rate": 1.8192495275593667e-05, "loss": 0.549, "step": 1157 }, { "epoch": 0.22, "learning_rate": 1.8188984770303866e-05, "loss": 0.5684, "step": 1158 }, { "epoch": 0.22, "learning_rate": 1.818547119868989e-05, "loss": 0.6516, "step": 1159 }, { "epoch": 0.22, "learning_rate": 1.818195456206738e-05, "loss": 0.6088, "step": 1160 }, { "epoch": 0.22, "learning_rate": 1.8178434861753128e-05, "loss": 0.7894, "step": 1161 }, { "epoch": 0.22, "learning_rate": 1.817491209906506e-05, "loss": 0.4696, "step": 1162 }, { "epoch": 0.22, "learning_rate": 1.8171386275322264e-05, "loss": 0.5161, "step": 1163 }, { "epoch": 0.22, "learning_rate": 1.816785739184496e-05, "loss": 0.4949, "step": 1164 }, { "epoch": 0.22, "learning_rate": 1.8164325449954533e-05, "loss": 0.6396, "step": 1165 }, { "epoch": 0.22, "learning_rate": 1.8160790450973484e-05, "loss": 0.588, "step": 1166 }, { "epoch": 0.22, "learning_rate": 1.8157252396225487e-05, "loss": 0.657, "step": 1167 }, { "epoch": 0.22, "learning_rate": 1.8153711287035348e-05, "loss": 0.4273, "step": 1168 }, { "epoch": 0.22, "learning_rate": 1.815016712472901e-05, "loss": 0.7047, "step": 1169 }, { "epoch": 0.22, "learning_rate": 1.8146619910633577e-05, "loss": 0.6328, "step": 1170 }, { "epoch": 0.22, "learning_rate": 1.8143069646077282e-05, "loss": 0.6533, "step": 1171 }, { "epoch": 0.22, "learning_rate": 1.81395163323895e-05, "loss": 0.524, "step": 1172 }, { "epoch": 0.22, "learning_rate": 1.8135959970900753e-05, "loss": 0.6528, "step": 1173 }, { "epoch": 0.22, "learning_rate": 1.8132400562942704e-05, "loss": 0.7485, "step": 1174 }, { "epoch": 0.22, "learning_rate": 1.8128838109848158e-05, "loss": 0.6489, "step": 1175 }, { "epoch": 0.22, "learning_rate": 1.812527261295105e-05, "loss": 0.6087, "step": 1176 }, { "epoch": 0.22, "learning_rate": 1.812170407358647e-05, "loss": 0.6178, "step": 1177 }, { "epoch": 0.22, "learning_rate": 1.811813249309063e-05, "loss": 0.7929, "step": 1178 }, { "epoch": 0.22, "learning_rate": 1.8114557872800906e-05, "loss": 0.5257, "step": 1179 }, { "epoch": 0.22, "learning_rate": 1.811098021405578e-05, "loss": 0.5456, "step": 1180 }, { "epoch": 0.22, "learning_rate": 1.8107399518194902e-05, "loss": 0.5629, "step": 1181 }, { "epoch": 0.22, "learning_rate": 1.810381578655904e-05, "loss": 0.6053, "step": 1182 }, { "epoch": 0.22, "learning_rate": 1.81002290204901e-05, "loss": 0.4679, "step": 1183 }, { "epoch": 0.22, "learning_rate": 1.809663922133113e-05, "loss": 0.5863, "step": 1184 }, { "epoch": 0.22, "learning_rate": 1.809304639042632e-05, "loss": 0.7561, "step": 1185 }, { "epoch": 0.22, "learning_rate": 1.808945052912098e-05, "loss": 0.78, "step": 1186 }, { "epoch": 0.22, "learning_rate": 1.8085851638761564e-05, "loss": 0.5826, "step": 1187 }, { "epoch": 0.22, "learning_rate": 1.8082249720695657e-05, "loss": 0.5977, "step": 1188 }, { "epoch": 0.22, "learning_rate": 1.8078644776271985e-05, "loss": 0.6756, "step": 1189 }, { "epoch": 0.22, "learning_rate": 1.8075036806840388e-05, "loss": 0.557, "step": 1190 }, { "epoch": 0.23, "learning_rate": 1.8071425813751867e-05, "loss": 0.6103, "step": 1191 }, { "epoch": 0.23, "learning_rate": 1.8067811798358535e-05, "loss": 0.6093, "step": 1192 }, { "epoch": 0.23, "learning_rate": 1.806419476201364e-05, "loss": 0.4291, "step": 1193 }, { "epoch": 0.23, "learning_rate": 1.8060574706071564e-05, "loss": 0.5303, "step": 1194 }, { "epoch": 0.23, "learning_rate": 1.805695163188782e-05, "loss": 0.6524, "step": 1195 }, { "epoch": 0.23, "learning_rate": 1.8053325540819048e-05, "loss": 0.5429, "step": 1196 }, { "epoch": 0.23, "learning_rate": 1.8049696434223018e-05, "loss": 0.7551, "step": 1197 }, { "epoch": 0.23, "learning_rate": 1.8046064313458635e-05, "loss": 0.5625, "step": 1198 }, { "epoch": 0.23, "learning_rate": 1.8042429179885926e-05, "loss": 0.4973, "step": 1199 }, { "epoch": 0.23, "learning_rate": 1.803879103486605e-05, "loss": 0.683, "step": 1200 }, { "epoch": 0.23, "learning_rate": 1.8035149879761294e-05, "loss": 0.5581, "step": 1201 }, { "epoch": 0.23, "learning_rate": 1.803150571593506e-05, "loss": 0.575, "step": 1202 }, { "epoch": 0.23, "learning_rate": 1.80278585447519e-05, "loss": 0.6254, "step": 1203 }, { "epoch": 0.23, "learning_rate": 1.802420836757747e-05, "loss": 0.5094, "step": 1204 }, { "epoch": 0.23, "learning_rate": 1.8020555185778566e-05, "loss": 0.6753, "step": 1205 }, { "epoch": 0.23, "learning_rate": 1.80168990007231e-05, "loss": 0.7235, "step": 1206 }, { "epoch": 0.23, "learning_rate": 1.801323981378011e-05, "loss": 0.6079, "step": 1207 }, { "epoch": 0.23, "learning_rate": 1.8009577626319768e-05, "loss": 0.6635, "step": 1208 }, { "epoch": 0.23, "learning_rate": 1.8005912439713352e-05, "loss": 0.6443, "step": 1209 }, { "epoch": 0.23, "learning_rate": 1.800224425533328e-05, "loss": 0.5312, "step": 1210 }, { "epoch": 0.23, "learning_rate": 1.7998573074553086e-05, "loss": 0.7175, "step": 1211 }, { "epoch": 0.23, "learning_rate": 1.799489889874742e-05, "loss": 0.6706, "step": 1212 }, { "epoch": 0.23, "learning_rate": 1.799122172929206e-05, "loss": 0.6261, "step": 1213 }, { "epoch": 0.23, "learning_rate": 1.7987541567563905e-05, "loss": 0.7684, "step": 1214 }, { "epoch": 0.23, "learning_rate": 1.798385841494097e-05, "loss": 0.5973, "step": 1215 }, { "epoch": 0.23, "learning_rate": 1.7980172272802398e-05, "loss": 0.514, "step": 1216 }, { "epoch": 0.23, "learning_rate": 1.797648314252844e-05, "loss": 0.7001, "step": 1217 }, { "epoch": 0.23, "learning_rate": 1.797279102550048e-05, "loss": 0.5902, "step": 1218 }, { "epoch": 0.23, "learning_rate": 1.796909592310101e-05, "loss": 0.6479, "step": 1219 }, { "epoch": 0.23, "learning_rate": 1.7965397836713637e-05, "loss": 0.7434, "step": 1220 }, { "epoch": 0.23, "learning_rate": 1.7961696767723098e-05, "loss": 0.6319, "step": 1221 }, { "epoch": 0.23, "learning_rate": 1.7957992717515234e-05, "loss": 0.4695, "step": 1222 }, { "epoch": 0.23, "learning_rate": 1.7954285687477013e-05, "loss": 0.5784, "step": 1223 }, { "epoch": 0.23, "learning_rate": 1.795057567899651e-05, "loss": 0.5587, "step": 1224 }, { "epoch": 0.23, "learning_rate": 1.794686269346292e-05, "loss": 0.6629, "step": 1225 }, { "epoch": 0.23, "learning_rate": 1.7943146732266553e-05, "loss": 0.6403, "step": 1226 }, { "epoch": 0.23, "learning_rate": 1.7939427796798835e-05, "loss": 0.7139, "step": 1227 }, { "epoch": 0.23, "learning_rate": 1.7935705888452295e-05, "loss": 0.6358, "step": 1228 }, { "epoch": 0.23, "learning_rate": 1.7931981008620588e-05, "loss": 0.4881, "step": 1229 }, { "epoch": 0.23, "learning_rate": 1.7928253158698474e-05, "loss": 0.8216, "step": 1230 }, { "epoch": 0.23, "learning_rate": 1.792452234008183e-05, "loss": 0.5825, "step": 1231 }, { "epoch": 0.23, "learning_rate": 1.792078855416764e-05, "loss": 0.6072, "step": 1232 }, { "epoch": 0.23, "learning_rate": 1.7917051802354003e-05, "loss": 0.6582, "step": 1233 }, { "epoch": 0.23, "learning_rate": 1.791331208604013e-05, "loss": 0.727, "step": 1234 }, { "epoch": 0.23, "learning_rate": 1.7909569406626332e-05, "loss": 0.502, "step": 1235 }, { "epoch": 0.23, "learning_rate": 1.790582376551404e-05, "loss": 0.5612, "step": 1236 }, { "epoch": 0.23, "learning_rate": 1.790207516410579e-05, "loss": 0.6181, "step": 1237 }, { "epoch": 0.23, "learning_rate": 1.7898323603805228e-05, "loss": 0.6488, "step": 1238 }, { "epoch": 0.23, "learning_rate": 1.789456908601711e-05, "loss": 0.5395, "step": 1239 }, { "epoch": 0.23, "learning_rate": 1.7890811612147287e-05, "loss": 0.6086, "step": 1240 }, { "epoch": 0.23, "learning_rate": 1.7887051183602736e-05, "loss": 0.6238, "step": 1241 }, { "epoch": 0.23, "learning_rate": 1.788328780179152e-05, "loss": 0.6923, "step": 1242 }, { "epoch": 0.23, "learning_rate": 1.7879521468122833e-05, "loss": 0.7086, "step": 1243 }, { "epoch": 0.24, "learning_rate": 1.7875752184006945e-05, "loss": 0.6572, "step": 1244 }, { "epoch": 0.24, "learning_rate": 1.7871979950855256e-05, "loss": 0.5991, "step": 1245 }, { "epoch": 0.24, "learning_rate": 1.7868204770080252e-05, "loss": 0.6048, "step": 1246 }, { "epoch": 0.24, "learning_rate": 1.7864426643095537e-05, "loss": 0.6229, "step": 1247 }, { "epoch": 0.24, "learning_rate": 1.786064557131581e-05, "loss": 0.73, "step": 1248 }, { "epoch": 0.24, "learning_rate": 1.785686155615687e-05, "loss": 0.475, "step": 1249 }, { "epoch": 0.24, "learning_rate": 1.7853074599035628e-05, "loss": 0.5951, "step": 1250 }, { "epoch": 0.24, "learning_rate": 1.784928470137009e-05, "loss": 0.5214, "step": 1251 }, { "epoch": 0.24, "learning_rate": 1.784549186457936e-05, "loss": 0.5244, "step": 1252 }, { "epoch": 0.24, "learning_rate": 1.784169609008365e-05, "loss": 0.6403, "step": 1253 }, { "epoch": 0.24, "learning_rate": 1.7837897379304267e-05, "loss": 0.7071, "step": 1254 }, { "epoch": 0.24, "learning_rate": 1.7834095733663624e-05, "loss": 0.5415, "step": 1255 }, { "epoch": 0.24, "learning_rate": 1.783029115458522e-05, "loss": 0.6182, "step": 1256 }, { "epoch": 0.24, "learning_rate": 1.7826483643493664e-05, "loss": 0.6117, "step": 1257 }, { "epoch": 0.24, "learning_rate": 1.7822673201814664e-05, "loss": 0.5071, "step": 1258 }, { "epoch": 0.24, "learning_rate": 1.7818859830975016e-05, "loss": 0.5654, "step": 1259 }, { "epoch": 0.24, "learning_rate": 1.7815043532402616e-05, "loss": 0.6708, "step": 1260 }, { "epoch": 0.24, "learning_rate": 1.7811224307526464e-05, "loss": 0.6146, "step": 1261 }, { "epoch": 0.24, "learning_rate": 1.7807402157776645e-05, "loss": 0.5851, "step": 1262 }, { "epoch": 0.24, "learning_rate": 1.780357708458434e-05, "loss": 0.5154, "step": 1263 }, { "epoch": 0.24, "learning_rate": 1.7799749089381843e-05, "loss": 0.5821, "step": 1264 }, { "epoch": 0.24, "learning_rate": 1.779591817360251e-05, "loss": 0.6596, "step": 1265 }, { "epoch": 0.24, "learning_rate": 1.7792084338680816e-05, "loss": 0.4192, "step": 1266 }, { "epoch": 0.24, "learning_rate": 1.7788247586052324e-05, "loss": 0.519, "step": 1267 }, { "epoch": 0.24, "learning_rate": 1.778440791715368e-05, "loss": 0.5634, "step": 1268 }, { "epoch": 0.24, "learning_rate": 1.7780565333422636e-05, "loss": 0.5947, "step": 1269 }, { "epoch": 0.24, "learning_rate": 1.777671983629802e-05, "loss": 0.523, "step": 1270 }, { "epoch": 0.24, "learning_rate": 1.7772871427219767e-05, "loss": 0.7015, "step": 1271 }, { "epoch": 0.24, "learning_rate": 1.7769020107628892e-05, "loss": 0.5818, "step": 1272 }, { "epoch": 0.24, "learning_rate": 1.7765165878967497e-05, "loss": 0.5754, "step": 1273 }, { "epoch": 0.24, "learning_rate": 1.7761308742678787e-05, "loss": 0.5656, "step": 1274 }, { "epoch": 0.24, "learning_rate": 1.775744870020704e-05, "loss": 0.7581, "step": 1275 }, { "epoch": 0.24, "learning_rate": 1.7753585752997638e-05, "loss": 0.6464, "step": 1276 }, { "epoch": 0.24, "learning_rate": 1.774971990249703e-05, "loss": 0.7446, "step": 1277 }, { "epoch": 0.24, "learning_rate": 1.7745851150152777e-05, "loss": 0.5266, "step": 1278 }, { "epoch": 0.24, "learning_rate": 1.7741979497413505e-05, "loss": 0.5522, "step": 1279 }, { "epoch": 0.24, "learning_rate": 1.7738104945728935e-05, "loss": 0.6225, "step": 1280 }, { "epoch": 0.24, "learning_rate": 1.773422749654988e-05, "loss": 0.8063, "step": 1281 }, { "epoch": 0.24, "learning_rate": 1.773034715132823e-05, "loss": 0.6506, "step": 1282 }, { "epoch": 0.24, "learning_rate": 1.772646391151695e-05, "loss": 0.6625, "step": 1283 }, { "epoch": 0.24, "learning_rate": 1.7722577778570116e-05, "loss": 0.5325, "step": 1284 }, { "epoch": 0.24, "learning_rate": 1.771868875394286e-05, "loss": 0.7432, "step": 1285 }, { "epoch": 0.24, "learning_rate": 1.7714796839091408e-05, "loss": 0.6533, "step": 1286 }, { "epoch": 0.24, "learning_rate": 1.7710902035473075e-05, "loss": 0.7234, "step": 1287 }, { "epoch": 0.24, "learning_rate": 1.7707004344546246e-05, "loss": 0.554, "step": 1288 }, { "epoch": 0.24, "learning_rate": 1.7703103767770387e-05, "loss": 0.4693, "step": 1289 }, { "epoch": 0.24, "learning_rate": 1.769920030660606e-05, "loss": 0.5559, "step": 1290 }, { "epoch": 0.24, "learning_rate": 1.7695293962514886e-05, "loss": 0.5746, "step": 1291 }, { "epoch": 0.24, "learning_rate": 1.7691384736959584e-05, "loss": 0.5855, "step": 1292 }, { "epoch": 0.24, "learning_rate": 1.768747263140394e-05, "loss": 0.4356, "step": 1293 }, { "epoch": 0.24, "learning_rate": 1.768355764731282e-05, "loss": 0.6465, "step": 1294 }, { "epoch": 0.24, "learning_rate": 1.767963978615218e-05, "loss": 0.6538, "step": 1295 }, { "epoch": 0.24, "learning_rate": 1.7675719049389027e-05, "loss": 0.6491, "step": 1296 }, { "epoch": 0.25, "learning_rate": 1.7671795438491476e-05, "loss": 0.6001, "step": 1297 }, { "epoch": 0.25, "learning_rate": 1.7667868954928695e-05, "loss": 0.5571, "step": 1298 }, { "epoch": 0.25, "learning_rate": 1.766393960017094e-05, "loss": 0.4638, "step": 1299 }, { "epoch": 0.25, "learning_rate": 1.7660007375689537e-05, "loss": 0.7924, "step": 1300 }, { "epoch": 0.25, "learning_rate": 1.7656072282956887e-05, "loss": 0.5272, "step": 1301 }, { "epoch": 0.25, "learning_rate": 1.765213432344647e-05, "loss": 0.5429, "step": 1302 }, { "epoch": 0.25, "learning_rate": 1.7648193498632828e-05, "loss": 0.5592, "step": 1303 }, { "epoch": 0.25, "learning_rate": 1.764424980999159e-05, "loss": 0.6348, "step": 1304 }, { "epoch": 0.25, "learning_rate": 1.7640303258999442e-05, "loss": 0.6176, "step": 1305 }, { "epoch": 0.25, "learning_rate": 1.763635384713416e-05, "loss": 0.5416, "step": 1306 }, { "epoch": 0.25, "learning_rate": 1.763240157587457e-05, "loss": 0.7743, "step": 1307 }, { "epoch": 0.25, "learning_rate": 1.762844644670059e-05, "loss": 0.6068, "step": 1308 }, { "epoch": 0.25, "learning_rate": 1.7624488461093197e-05, "loss": 0.4343, "step": 1309 }, { "epoch": 0.25, "learning_rate": 1.7620527620534432e-05, "loss": 0.597, "step": 1310 }, { "epoch": 0.25, "learning_rate": 1.761656392650742e-05, "loss": 0.6763, "step": 1311 }, { "epoch": 0.25, "learning_rate": 1.7612597380496344e-05, "loss": 0.6208, "step": 1312 }, { "epoch": 0.25, "learning_rate": 1.7608627983986452e-05, "loss": 0.5105, "step": 1313 }, { "epoch": 0.25, "learning_rate": 1.760465573846407e-05, "loss": 0.782, "step": 1314 }, { "epoch": 0.25, "learning_rate": 1.7600680645416583e-05, "loss": 0.7729, "step": 1315 }, { "epoch": 0.25, "learning_rate": 1.7596702706332446e-05, "loss": 0.5772, "step": 1316 }, { "epoch": 0.25, "learning_rate": 1.759272192270118e-05, "loss": 0.6116, "step": 1317 }, { "epoch": 0.25, "learning_rate": 1.7588738296013367e-05, "loss": 0.5852, "step": 1318 }, { "epoch": 0.25, "learning_rate": 1.7584751827760655e-05, "loss": 0.4363, "step": 1319 }, { "epoch": 0.25, "learning_rate": 1.7580762519435757e-05, "loss": 0.6878, "step": 1320 }, { "epoch": 0.25, "learning_rate": 1.757677037253245e-05, "loss": 0.5244, "step": 1321 }, { "epoch": 0.25, "learning_rate": 1.7572775388545577e-05, "loss": 0.6731, "step": 1322 }, { "epoch": 0.25, "learning_rate": 1.7568777568971035e-05, "loss": 0.6155, "step": 1323 }, { "epoch": 0.25, "learning_rate": 1.756477691530579e-05, "loss": 0.5429, "step": 1324 }, { "epoch": 0.25, "learning_rate": 1.7560773429047862e-05, "loss": 0.637, "step": 1325 }, { "epoch": 0.25, "learning_rate": 1.755676711169635e-05, "loss": 0.4831, "step": 1326 }, { "epoch": 0.25, "learning_rate": 1.7552757964751375e-05, "loss": 0.8057, "step": 1327 }, { "epoch": 0.25, "learning_rate": 1.7548745989714167e-05, "loss": 0.7047, "step": 1328 }, { "epoch": 0.25, "learning_rate": 1.7544731188086976e-05, "loss": 0.5902, "step": 1329 }, { "epoch": 0.25, "learning_rate": 1.7540713561373127e-05, "loss": 0.6175, "step": 1330 }, { "epoch": 0.25, "learning_rate": 1.7536693111077002e-05, "loss": 0.4928, "step": 1331 }, { "epoch": 0.25, "learning_rate": 1.7532669838704036e-05, "loss": 0.7044, "step": 1332 }, { "epoch": 0.25, "learning_rate": 1.7528643745760724e-05, "loss": 0.6148, "step": 1333 }, { "epoch": 0.25, "learning_rate": 1.7524614833754615e-05, "loss": 0.5079, "step": 1334 }, { "epoch": 0.25, "learning_rate": 1.752058310419432e-05, "loss": 0.5868, "step": 1335 }, { "epoch": 0.25, "learning_rate": 1.751654855858949e-05, "loss": 0.6933, "step": 1336 }, { "epoch": 0.25, "learning_rate": 1.751251119845085e-05, "loss": 0.4903, "step": 1337 }, { "epoch": 0.25, "learning_rate": 1.750847102529017e-05, "loss": 0.7062, "step": 1338 }, { "epoch": 0.25, "learning_rate": 1.750442804062026e-05, "loss": 0.357, "step": 1339 }, { "epoch": 0.25, "learning_rate": 1.7500382245955007e-05, "loss": 0.6088, "step": 1340 }, { "epoch": 0.25, "learning_rate": 1.7496333642809337e-05, "loss": 0.7182, "step": 1341 }, { "epoch": 0.25, "learning_rate": 1.749228223269922e-05, "loss": 0.6369, "step": 1342 }, { "epoch": 0.25, "learning_rate": 1.74882280171417e-05, "loss": 0.5228, "step": 1343 }, { "epoch": 0.25, "learning_rate": 1.748417099765485e-05, "loss": 0.6296, "step": 1344 }, { "epoch": 0.25, "learning_rate": 1.74801111757578e-05, "loss": 0.567, "step": 1345 }, { "epoch": 0.25, "learning_rate": 1.747604855297073e-05, "loss": 0.4983, "step": 1346 }, { "epoch": 0.25, "learning_rate": 1.7471983130814872e-05, "loss": 0.7389, "step": 1347 }, { "epoch": 0.25, "learning_rate": 1.74679149108125e-05, "loss": 0.5638, "step": 1348 }, { "epoch": 0.25, "learning_rate": 1.746384389448694e-05, "loss": 0.6807, "step": 1349 }, { "epoch": 0.26, "learning_rate": 1.745977008336256e-05, "loss": 0.6734, "step": 1350 }, { "epoch": 0.26, "learning_rate": 1.7455693478964782e-05, "loss": 0.6912, "step": 1351 }, { "epoch": 0.26, "learning_rate": 1.745161408282007e-05, "loss": 0.5636, "step": 1352 }, { "epoch": 0.26, "learning_rate": 1.744753189645593e-05, "loss": 0.4027, "step": 1353 }, { "epoch": 0.26, "learning_rate": 1.7443446921400924e-05, "loss": 0.5479, "step": 1354 }, { "epoch": 0.26, "learning_rate": 1.743935915918464e-05, "loss": 0.679, "step": 1355 }, { "epoch": 0.26, "learning_rate": 1.7435268611337727e-05, "loss": 0.5915, "step": 1356 }, { "epoch": 0.26, "learning_rate": 1.7431175279391864e-05, "loss": 0.5409, "step": 1357 }, { "epoch": 0.26, "learning_rate": 1.742707916487979e-05, "loss": 0.6069, "step": 1358 }, { "epoch": 0.26, "learning_rate": 1.7422980269335262e-05, "loss": 0.5858, "step": 1359 }, { "epoch": 0.26, "learning_rate": 1.7418878594293095e-05, "loss": 0.6636, "step": 1360 }, { "epoch": 0.26, "learning_rate": 1.741477414128914e-05, "loss": 0.6452, "step": 1361 }, { "epoch": 0.26, "learning_rate": 1.7410666911860298e-05, "loss": 0.684, "step": 1362 }, { "epoch": 0.26, "learning_rate": 1.7406556907544486e-05, "loss": 0.3533, "step": 1363 }, { "epoch": 0.26, "learning_rate": 1.7402444129880683e-05, "loss": 0.5252, "step": 1364 }, { "epoch": 0.26, "learning_rate": 1.7398328580408896e-05, "loss": 0.5956, "step": 1365 }, { "epoch": 0.26, "learning_rate": 1.739421026067017e-05, "loss": 0.5251, "step": 1366 }, { "epoch": 0.26, "learning_rate": 1.7390089172206594e-05, "loss": 0.4391, "step": 1367 }, { "epoch": 0.26, "learning_rate": 1.7385965316561285e-05, "loss": 0.6689, "step": 1368 }, { "epoch": 0.26, "learning_rate": 1.73818386952784e-05, "loss": 0.5935, "step": 1369 }, { "epoch": 0.26, "learning_rate": 1.737770930990313e-05, "loss": 0.681, "step": 1370 }, { "epoch": 0.26, "learning_rate": 1.7373577161981708e-05, "loss": 0.6445, "step": 1371 }, { "epoch": 0.26, "learning_rate": 1.736944225306139e-05, "loss": 0.5348, "step": 1372 }, { "epoch": 0.26, "learning_rate": 1.7365304584690478e-05, "loss": 0.7073, "step": 1373 }, { "epoch": 0.26, "learning_rate": 1.7361164158418296e-05, "loss": 0.6518, "step": 1374 }, { "epoch": 0.26, "learning_rate": 1.7357020975795208e-05, "loss": 0.6345, "step": 1375 }, { "epoch": 0.26, "learning_rate": 1.735287503837261e-05, "loss": 0.6038, "step": 1376 }, { "epoch": 0.26, "learning_rate": 1.7348726347702922e-05, "loss": 0.6215, "step": 1377 }, { "epoch": 0.26, "learning_rate": 1.7344574905339608e-05, "loss": 0.5527, "step": 1378 }, { "epoch": 0.26, "learning_rate": 1.7340420712837144e-05, "loss": 0.6355, "step": 1379 }, { "epoch": 0.26, "learning_rate": 1.7336263771751055e-05, "loss": 0.4508, "step": 1380 }, { "epoch": 0.26, "learning_rate": 1.7332104083637887e-05, "loss": 0.529, "step": 1381 }, { "epoch": 0.26, "learning_rate": 1.7327941650055208e-05, "loss": 0.6833, "step": 1382 }, { "epoch": 0.26, "learning_rate": 1.7323776472561625e-05, "loss": 0.4829, "step": 1383 }, { "epoch": 0.26, "learning_rate": 1.731960855271677e-05, "loss": 0.5169, "step": 1384 }, { "epoch": 0.26, "learning_rate": 1.731543789208129e-05, "loss": 0.589, "step": 1385 }, { "epoch": 0.26, "learning_rate": 1.731126449221688e-05, "loss": 0.5681, "step": 1386 }, { "epoch": 0.26, "learning_rate": 1.730708835468624e-05, "loss": 0.4832, "step": 1387 }, { "epoch": 0.26, "learning_rate": 1.7302909481053106e-05, "loss": 0.6413, "step": 1388 }, { "epoch": 0.26, "learning_rate": 1.729872787288224e-05, "loss": 0.7852, "step": 1389 }, { "epoch": 0.26, "learning_rate": 1.729454353173942e-05, "loss": 0.6015, "step": 1390 }, { "epoch": 0.26, "learning_rate": 1.7290356459191447e-05, "loss": 0.7533, "step": 1391 }, { "epoch": 0.26, "learning_rate": 1.7286166656806157e-05, "loss": 0.5159, "step": 1392 }, { "epoch": 0.26, "learning_rate": 1.7281974126152402e-05, "loss": 0.6834, "step": 1393 }, { "epoch": 0.26, "learning_rate": 1.727777886880005e-05, "loss": 0.7594, "step": 1394 }, { "epoch": 0.26, "learning_rate": 1.727358088631999e-05, "loss": 0.7001, "step": 1395 }, { "epoch": 0.26, "learning_rate": 1.7269380180284142e-05, "loss": 0.493, "step": 1396 }, { "epoch": 0.26, "learning_rate": 1.7265176752265437e-05, "loss": 0.5517, "step": 1397 }, { "epoch": 0.26, "learning_rate": 1.726097060383783e-05, "loss": 0.5341, "step": 1398 }, { "epoch": 0.26, "learning_rate": 1.7256761736576284e-05, "loss": 0.5668, "step": 1399 }, { "epoch": 0.26, "learning_rate": 1.7252550152056795e-05, "loss": 0.6706, "step": 1400 }, { "epoch": 0.26, "learning_rate": 1.7248335851856367e-05, "loss": 0.5381, "step": 1401 }, { "epoch": 0.26, "learning_rate": 1.7244118837553028e-05, "loss": 0.6714, "step": 1402 }, { "epoch": 0.27, "learning_rate": 1.7239899110725815e-05, "loss": 0.5611, "step": 1403 }, { "epoch": 0.27, "learning_rate": 1.723567667295478e-05, "loss": 0.6804, "step": 1404 }, { "epoch": 0.27, "learning_rate": 1.7231451525821e-05, "loss": 0.6177, "step": 1405 }, { "epoch": 0.27, "learning_rate": 1.7227223670906554e-05, "loss": 0.641, "step": 1406 }, { "epoch": 0.27, "learning_rate": 1.7222993109794547e-05, "loss": 0.5489, "step": 1407 }, { "epoch": 0.27, "learning_rate": 1.7218759844069084e-05, "loss": 0.4403, "step": 1408 }, { "epoch": 0.27, "learning_rate": 1.7214523875315298e-05, "loss": 0.688, "step": 1409 }, { "epoch": 0.27, "learning_rate": 1.721028520511932e-05, "loss": 0.5057, "step": 1410 }, { "epoch": 0.27, "learning_rate": 1.7206043835068304e-05, "loss": 0.4988, "step": 1411 }, { "epoch": 0.27, "learning_rate": 1.7201799766750405e-05, "loss": 0.8098, "step": 1412 }, { "epoch": 0.27, "learning_rate": 1.7197553001754796e-05, "loss": 0.7501, "step": 1413 }, { "epoch": 0.27, "learning_rate": 1.7193303541671652e-05, "loss": 0.7061, "step": 1414 }, { "epoch": 0.27, "learning_rate": 1.7189051388092167e-05, "loss": 0.6106, "step": 1415 }, { "epoch": 0.27, "learning_rate": 1.7184796542608538e-05, "loss": 0.5511, "step": 1416 }, { "epoch": 0.27, "learning_rate": 1.7180539006813973e-05, "loss": 0.5549, "step": 1417 }, { "epoch": 0.27, "learning_rate": 1.717627878230268e-05, "loss": 0.5948, "step": 1418 }, { "epoch": 0.27, "learning_rate": 1.7172015870669875e-05, "loss": 0.583, "step": 1419 }, { "epoch": 0.27, "learning_rate": 1.7167750273511796e-05, "loss": 0.5884, "step": 1420 }, { "epoch": 0.27, "learning_rate": 1.7163481992425662e-05, "loss": 0.5537, "step": 1421 }, { "epoch": 0.27, "learning_rate": 1.7159211029009716e-05, "loss": 0.5934, "step": 1422 }, { "epoch": 0.27, "learning_rate": 1.71549373848632e-05, "loss": 0.5111, "step": 1423 }, { "epoch": 0.27, "learning_rate": 1.7150661061586354e-05, "loss": 0.5578, "step": 1424 }, { "epoch": 0.27, "learning_rate": 1.7146382060780428e-05, "loss": 0.6121, "step": 1425 }, { "epoch": 0.27, "learning_rate": 1.7142100384047672e-05, "loss": 0.6706, "step": 1426 }, { "epoch": 0.27, "learning_rate": 1.7137816032991338e-05, "loss": 0.7424, "step": 1427 }, { "epoch": 0.27, "learning_rate": 1.7133529009215685e-05, "loss": 0.723, "step": 1428 }, { "epoch": 0.27, "learning_rate": 1.712923931432596e-05, "loss": 0.5684, "step": 1429 }, { "epoch": 0.27, "learning_rate": 1.7124946949928418e-05, "loss": 0.552, "step": 1430 }, { "epoch": 0.27, "learning_rate": 1.7120651917630317e-05, "loss": 0.4514, "step": 1431 }, { "epoch": 0.27, "learning_rate": 1.711635421903991e-05, "loss": 0.5139, "step": 1432 }, { "epoch": 0.27, "learning_rate": 1.7112053855766446e-05, "loss": 0.5619, "step": 1433 }, { "epoch": 0.27, "learning_rate": 1.7107750829420177e-05, "loss": 0.6745, "step": 1434 }, { "epoch": 0.27, "learning_rate": 1.7103445141612348e-05, "loss": 0.6543, "step": 1435 }, { "epoch": 0.27, "learning_rate": 1.7099136793955205e-05, "loss": 0.6353, "step": 1436 }, { "epoch": 0.27, "learning_rate": 1.7094825788061984e-05, "loss": 0.5393, "step": 1437 }, { "epoch": 0.27, "learning_rate": 1.709051212554692e-05, "loss": 0.774, "step": 1438 }, { "epoch": 0.27, "learning_rate": 1.7086195808025242e-05, "loss": 0.5575, "step": 1439 }, { "epoch": 0.27, "learning_rate": 1.7081876837113173e-05, "loss": 0.5133, "step": 1440 }, { "epoch": 0.27, "learning_rate": 1.7077555214427933e-05, "loss": 0.6224, "step": 1441 }, { "epoch": 0.27, "learning_rate": 1.707323094158773e-05, "loss": 0.5575, "step": 1442 }, { "epoch": 0.27, "learning_rate": 1.7068904020211764e-05, "loss": 0.641, "step": 1443 }, { "epoch": 0.27, "learning_rate": 1.706457445192023e-05, "loss": 0.5687, "step": 1444 }, { "epoch": 0.27, "learning_rate": 1.7060242238334317e-05, "loss": 0.6552, "step": 1445 }, { "epoch": 0.27, "learning_rate": 1.7055907381076196e-05, "loss": 0.6392, "step": 1446 }, { "epoch": 0.27, "learning_rate": 1.7051569881769033e-05, "loss": 0.6018, "step": 1447 }, { "epoch": 0.27, "learning_rate": 1.7047229742036987e-05, "loss": 0.5588, "step": 1448 }, { "epoch": 0.27, "learning_rate": 1.7042886963505196e-05, "loss": 0.655, "step": 1449 }, { "epoch": 0.27, "learning_rate": 1.7038541547799797e-05, "loss": 0.6132, "step": 1450 }, { "epoch": 0.27, "learning_rate": 1.7034193496547903e-05, "loss": 0.5211, "step": 1451 }, { "epoch": 0.27, "learning_rate": 1.7029842811377623e-05, "loss": 0.5417, "step": 1452 }, { "epoch": 0.27, "learning_rate": 1.702548949391805e-05, "loss": 0.5748, "step": 1453 }, { "epoch": 0.27, "learning_rate": 1.702113354579926e-05, "loss": 0.5473, "step": 1454 }, { "epoch": 0.27, "learning_rate": 1.701677496865232e-05, "loss": 0.6001, "step": 1455 }, { "epoch": 0.28, "learning_rate": 1.7012413764109274e-05, "loss": 0.5881, "step": 1456 }, { "epoch": 0.28, "learning_rate": 1.7008049933803153e-05, "loss": 0.5174, "step": 1457 }, { "epoch": 0.28, "learning_rate": 1.700368347936797e-05, "loss": 0.5429, "step": 1458 }, { "epoch": 0.28, "learning_rate": 1.6999314402438726e-05, "loss": 0.6169, "step": 1459 }, { "epoch": 0.28, "learning_rate": 1.699494270465139e-05, "loss": 0.561, "step": 1460 }, { "epoch": 0.28, "learning_rate": 1.6990568387642936e-05, "loss": 0.4459, "step": 1461 }, { "epoch": 0.28, "learning_rate": 1.69861914530513e-05, "loss": 0.589, "step": 1462 }, { "epoch": 0.28, "learning_rate": 1.69818119025154e-05, "loss": 0.6018, "step": 1463 }, { "epoch": 0.28, "learning_rate": 1.6977429737675136e-05, "loss": 0.6237, "step": 1464 }, { "epoch": 0.28, "learning_rate": 1.6973044960171394e-05, "loss": 0.544, "step": 1465 }, { "epoch": 0.28, "learning_rate": 1.6968657571646027e-05, "loss": 0.5871, "step": 1466 }, { "epoch": 0.28, "learning_rate": 1.696426757374187e-05, "loss": 0.519, "step": 1467 }, { "epoch": 0.28, "learning_rate": 1.6959874968102736e-05, "loss": 0.583, "step": 1468 }, { "epoch": 0.28, "learning_rate": 1.6955479756373415e-05, "loss": 0.6231, "step": 1469 }, { "epoch": 0.28, "learning_rate": 1.6951081940199676e-05, "loss": 0.6945, "step": 1470 }, { "epoch": 0.28, "learning_rate": 1.6946681521228256e-05, "loss": 0.5252, "step": 1471 }, { "epoch": 0.28, "learning_rate": 1.6942278501106865e-05, "loss": 0.7087, "step": 1472 }, { "epoch": 0.28, "learning_rate": 1.69378728814842e-05, "loss": 0.5507, "step": 1473 }, { "epoch": 0.28, "learning_rate": 1.6933464664009915e-05, "loss": 0.5233, "step": 1474 }, { "epoch": 0.28, "learning_rate": 1.6929053850334653e-05, "loss": 0.5241, "step": 1475 }, { "epoch": 0.28, "learning_rate": 1.6924640442110016e-05, "loss": 0.6595, "step": 1476 }, { "epoch": 0.28, "learning_rate": 1.6920224440988578e-05, "loss": 0.5011, "step": 1477 }, { "epoch": 0.28, "learning_rate": 1.69158058486239e-05, "loss": 0.5046, "step": 1478 }, { "epoch": 0.28, "learning_rate": 1.6911384666670493e-05, "loss": 0.7385, "step": 1479 }, { "epoch": 0.28, "learning_rate": 1.6906960896783847e-05, "loss": 0.6148, "step": 1480 }, { "epoch": 0.28, "learning_rate": 1.690253454062043e-05, "loss": 0.7022, "step": 1481 }, { "epoch": 0.28, "learning_rate": 1.6898105599837654e-05, "loss": 0.7872, "step": 1482 }, { "epoch": 0.28, "learning_rate": 1.6893674076093923e-05, "loss": 0.5984, "step": 1483 }, { "epoch": 0.28, "learning_rate": 1.6889239971048594e-05, "loss": 0.508, "step": 1484 }, { "epoch": 0.28, "learning_rate": 1.6884803286362e-05, "loss": 0.735, "step": 1485 }, { "epoch": 0.28, "learning_rate": 1.6880364023695435e-05, "loss": 0.6937, "step": 1486 }, { "epoch": 0.28, "learning_rate": 1.6875922184711152e-05, "loss": 0.6188, "step": 1487 }, { "epoch": 0.28, "learning_rate": 1.687147777107238e-05, "loss": 0.614, "step": 1488 }, { "epoch": 0.28, "learning_rate": 1.6867030784443313e-05, "loss": 0.5874, "step": 1489 }, { "epoch": 0.28, "learning_rate": 1.6862581226489093e-05, "loss": 0.5484, "step": 1490 }, { "epoch": 0.28, "learning_rate": 1.685812909887584e-05, "loss": 0.5786, "step": 1491 }, { "epoch": 0.28, "learning_rate": 1.6853674403270628e-05, "loss": 0.6042, "step": 1492 }, { "epoch": 0.28, "learning_rate": 1.68492171413415e-05, "loss": 0.6611, "step": 1493 }, { "epoch": 0.28, "learning_rate": 1.684475731475745e-05, "loss": 0.5814, "step": 1494 }, { "epoch": 0.28, "learning_rate": 1.6840294925188436e-05, "loss": 0.7758, "step": 1495 }, { "epoch": 0.28, "learning_rate": 1.6835829974305385e-05, "loss": 0.5991, "step": 1496 }, { "epoch": 0.28, "learning_rate": 1.6831362463780173e-05, "loss": 0.557, "step": 1497 }, { "epoch": 0.28, "learning_rate": 1.6826892395285632e-05, "loss": 0.5493, "step": 1498 }, { "epoch": 0.28, "learning_rate": 1.682241977049556e-05, "loss": 0.5846, "step": 1499 }, { "epoch": 0.28, "learning_rate": 1.681794459108471e-05, "loss": 0.633, "step": 1500 }, { "epoch": 0.28, "learning_rate": 1.681346685872879e-05, "loss": 0.6315, "step": 1501 }, { "epoch": 0.28, "learning_rate": 1.6808986575104464e-05, "loss": 0.6774, "step": 1502 }, { "epoch": 0.28, "learning_rate": 1.680450374188935e-05, "loss": 0.6875, "step": 1503 }, { "epoch": 0.28, "learning_rate": 1.6800018360762026e-05, "loss": 0.6968, "step": 1504 }, { "epoch": 0.28, "learning_rate": 1.6795530433402016e-05, "loss": 0.6012, "step": 1505 }, { "epoch": 0.28, "learning_rate": 1.6791039961489805e-05, "loss": 0.4998, "step": 1506 }, { "epoch": 0.28, "learning_rate": 1.6786546946706826e-05, "loss": 0.5965, "step": 1507 }, { "epoch": 0.28, "learning_rate": 1.6782051390735468e-05, "loss": 0.6145, "step": 1508 }, { "epoch": 0.29, "learning_rate": 1.6777553295259065e-05, "loss": 0.6323, "step": 1509 }, { "epoch": 0.29, "learning_rate": 1.6773052661961912e-05, "loss": 0.5479, "step": 1510 }, { "epoch": 0.29, "learning_rate": 1.6768549492529243e-05, "loss": 0.6911, "step": 1511 }, { "epoch": 0.29, "learning_rate": 1.6764043788647253e-05, "loss": 0.7565, "step": 1512 }, { "epoch": 0.29, "learning_rate": 1.675953555200307e-05, "loss": 0.8648, "step": 1513 }, { "epoch": 0.29, "learning_rate": 1.6755024784284794e-05, "loss": 0.4231, "step": 1514 }, { "epoch": 0.29, "learning_rate": 1.675051148718145e-05, "loss": 0.4991, "step": 1515 }, { "epoch": 0.29, "learning_rate": 1.6745995662383022e-05, "loss": 0.6622, "step": 1516 }, { "epoch": 0.29, "learning_rate": 1.6741477311580442e-05, "loss": 0.5921, "step": 1517 }, { "epoch": 0.29, "learning_rate": 1.6736956436465573e-05, "loss": 0.5665, "step": 1518 }, { "epoch": 0.29, "learning_rate": 1.6732433038731245e-05, "loss": 0.4669, "step": 1519 }, { "epoch": 0.29, "learning_rate": 1.6727907120071215e-05, "loss": 0.548, "step": 1520 }, { "epoch": 0.29, "learning_rate": 1.6723378682180193e-05, "loss": 0.6628, "step": 1521 }, { "epoch": 0.29, "learning_rate": 1.671884772675383e-05, "loss": 0.6679, "step": 1522 }, { "epoch": 0.29, "learning_rate": 1.671431425548872e-05, "loss": 0.4911, "step": 1523 }, { "epoch": 0.29, "learning_rate": 1.6709778270082395e-05, "loss": 0.7051, "step": 1524 }, { "epoch": 0.29, "learning_rate": 1.670523977223334e-05, "loss": 0.5582, "step": 1525 }, { "epoch": 0.29, "learning_rate": 1.670069876364096e-05, "loss": 0.4785, "step": 1526 }, { "epoch": 0.29, "learning_rate": 1.669615524600562e-05, "loss": 0.5972, "step": 1527 }, { "epoch": 0.29, "learning_rate": 1.669160922102862e-05, "loss": 0.7963, "step": 1528 }, { "epoch": 0.29, "learning_rate": 1.6687060690412194e-05, "loss": 0.5964, "step": 1529 }, { "epoch": 0.29, "learning_rate": 1.6682509655859515e-05, "loss": 0.5174, "step": 1530 }, { "epoch": 0.29, "learning_rate": 1.6677956119074697e-05, "loss": 0.7178, "step": 1531 }, { "epoch": 0.29, "learning_rate": 1.6673400081762785e-05, "loss": 0.4297, "step": 1532 }, { "epoch": 0.29, "learning_rate": 1.666884154562977e-05, "loss": 0.4984, "step": 1533 }, { "epoch": 0.29, "learning_rate": 1.6664280512382567e-05, "loss": 0.4618, "step": 1534 }, { "epoch": 0.29, "learning_rate": 1.6659716983729038e-05, "loss": 0.5059, "step": 1535 }, { "epoch": 0.29, "learning_rate": 1.665515096137797e-05, "loss": 0.4335, "step": 1536 }, { "epoch": 0.29, "learning_rate": 1.6650582447039087e-05, "loss": 0.5871, "step": 1537 }, { "epoch": 0.29, "learning_rate": 1.6646011442423045e-05, "loss": 0.6818, "step": 1538 }, { "epoch": 0.29, "learning_rate": 1.6641437949241436e-05, "loss": 0.4751, "step": 1539 }, { "epoch": 0.29, "learning_rate": 1.663686196920678e-05, "loss": 0.6192, "step": 1540 }, { "epoch": 0.29, "learning_rate": 1.6632283504032532e-05, "loss": 0.6461, "step": 1541 }, { "epoch": 0.29, "learning_rate": 1.6627702555433075e-05, "loss": 0.6176, "step": 1542 }, { "epoch": 0.29, "learning_rate": 1.662311912512372e-05, "loss": 0.5292, "step": 1543 }, { "epoch": 0.29, "learning_rate": 1.6618533214820712e-05, "loss": 0.6219, "step": 1544 }, { "epoch": 0.29, "learning_rate": 1.6613944826241216e-05, "loss": 0.5155, "step": 1545 }, { "epoch": 0.29, "learning_rate": 1.660935396110334e-05, "loss": 0.6983, "step": 1546 }, { "epoch": 0.29, "learning_rate": 1.6604760621126104e-05, "loss": 0.5451, "step": 1547 }, { "epoch": 0.29, "learning_rate": 1.660016480802946e-05, "loss": 0.7285, "step": 1548 }, { "epoch": 0.29, "learning_rate": 1.6595566523534298e-05, "loss": 0.5851, "step": 1549 }, { "epoch": 0.29, "learning_rate": 1.659096576936241e-05, "loss": 0.5391, "step": 1550 }, { "epoch": 0.29, "learning_rate": 1.658636254723653e-05, "loss": 0.5732, "step": 1551 }, { "epoch": 0.29, "learning_rate": 1.658175685888031e-05, "loss": 0.6, "step": 1552 }, { "epoch": 0.29, "learning_rate": 1.657714870601833e-05, "loss": 0.4738, "step": 1553 }, { "epoch": 0.29, "learning_rate": 1.6572538090376086e-05, "loss": 0.5701, "step": 1554 }, { "epoch": 0.29, "learning_rate": 1.6567925013680005e-05, "loss": 0.7767, "step": 1555 }, { "epoch": 0.29, "learning_rate": 1.656330947765742e-05, "loss": 0.5152, "step": 1556 }, { "epoch": 0.29, "learning_rate": 1.655869148403661e-05, "loss": 0.5495, "step": 1557 }, { "epoch": 0.29, "learning_rate": 1.6554071034546746e-05, "loss": 0.4465, "step": 1558 }, { "epoch": 0.29, "learning_rate": 1.654944813091794e-05, "loss": 0.625, "step": 1559 }, { "epoch": 0.29, "learning_rate": 1.6544822774881212e-05, "loss": 0.588, "step": 1560 }, { "epoch": 0.29, "learning_rate": 1.6540194968168506e-05, "loss": 0.5481, "step": 1561 }, { "epoch": 0.3, "learning_rate": 1.653556471251268e-05, "loss": 0.4862, "step": 1562 }, { "epoch": 0.3, "learning_rate": 1.6530932009647508e-05, "loss": 0.5021, "step": 1563 }, { "epoch": 0.3, "learning_rate": 1.652629686130768e-05, "loss": 0.6206, "step": 1564 }, { "epoch": 0.3, "learning_rate": 1.6521659269228815e-05, "loss": 0.4767, "step": 1565 }, { "epoch": 0.3, "learning_rate": 1.651701923514742e-05, "loss": 0.6299, "step": 1566 }, { "epoch": 0.3, "learning_rate": 1.6512376760800943e-05, "loss": 0.6486, "step": 1567 }, { "epoch": 0.3, "learning_rate": 1.6507731847927735e-05, "loss": 0.5918, "step": 1568 }, { "epoch": 0.3, "learning_rate": 1.6503084498267058e-05, "loss": 0.7229, "step": 1569 }, { "epoch": 0.3, "learning_rate": 1.649843471355909e-05, "loss": 0.6286, "step": 1570 }, { "epoch": 0.3, "learning_rate": 1.6493782495544915e-05, "loss": 0.6434, "step": 1571 }, { "epoch": 0.3, "learning_rate": 1.6489127845966538e-05, "loss": 0.6164, "step": 1572 }, { "epoch": 0.3, "learning_rate": 1.6484470766566866e-05, "loss": 0.6057, "step": 1573 }, { "epoch": 0.3, "learning_rate": 1.647981125908972e-05, "loss": 0.4154, "step": 1574 }, { "epoch": 0.3, "learning_rate": 1.647514932527983e-05, "loss": 0.6895, "step": 1575 }, { "epoch": 0.3, "learning_rate": 1.6470484966882834e-05, "loss": 0.6217, "step": 1576 }, { "epoch": 0.3, "learning_rate": 1.646581818564528e-05, "loss": 0.4864, "step": 1577 }, { "epoch": 0.3, "learning_rate": 1.646114898331461e-05, "loss": 0.5457, "step": 1578 }, { "epoch": 0.3, "learning_rate": 1.6456477361639193e-05, "loss": 0.508, "step": 1579 }, { "epoch": 0.3, "learning_rate": 1.645180332236829e-05, "loss": 0.6799, "step": 1580 }, { "epoch": 0.3, "learning_rate": 1.6447126867252083e-05, "loss": 0.6916, "step": 1581 }, { "epoch": 0.3, "learning_rate": 1.6442447998041628e-05, "loss": 0.5367, "step": 1582 }, { "epoch": 0.3, "learning_rate": 1.6437766716488918e-05, "loss": 0.5703, "step": 1583 }, { "epoch": 0.3, "learning_rate": 1.6433083024346827e-05, "loss": 0.6, "step": 1584 }, { "epoch": 0.3, "learning_rate": 1.6428396923369145e-05, "loss": 0.6006, "step": 1585 }, { "epoch": 0.3, "learning_rate": 1.6423708415310563e-05, "loss": 0.6146, "step": 1586 }, { "epoch": 0.3, "learning_rate": 1.641901750192666e-05, "loss": 0.5474, "step": 1587 }, { "epoch": 0.3, "learning_rate": 1.6414324184973925e-05, "loss": 0.5987, "step": 1588 }, { "epoch": 0.3, "learning_rate": 1.6409628466209756e-05, "loss": 0.5569, "step": 1589 }, { "epoch": 0.3, "learning_rate": 1.640493034739244e-05, "loss": 0.6291, "step": 1590 }, { "epoch": 0.3, "learning_rate": 1.640022983028115e-05, "loss": 0.6844, "step": 1591 }, { "epoch": 0.3, "learning_rate": 1.639552691663599e-05, "loss": 0.6538, "step": 1592 }, { "epoch": 0.3, "learning_rate": 1.639082160821793e-05, "loss": 0.6291, "step": 1593 }, { "epoch": 0.3, "learning_rate": 1.6386113906788853e-05, "loss": 0.6467, "step": 1594 }, { "epoch": 0.3, "learning_rate": 1.638140381411153e-05, "loss": 0.6628, "step": 1595 }, { "epoch": 0.3, "learning_rate": 1.6376691331949638e-05, "loss": 0.6086, "step": 1596 }, { "epoch": 0.3, "learning_rate": 1.6371976462067744e-05, "loss": 0.707, "step": 1597 }, { "epoch": 0.3, "learning_rate": 1.6367259206231302e-05, "loss": 0.5518, "step": 1598 }, { "epoch": 0.3, "learning_rate": 1.636253956620666e-05, "loss": 0.5365, "step": 1599 }, { "epoch": 0.3, "learning_rate": 1.6357817543761072e-05, "loss": 0.564, "step": 1600 }, { "epoch": 0.3, "learning_rate": 1.6353093140662674e-05, "loss": 0.5614, "step": 1601 }, { "epoch": 0.3, "learning_rate": 1.634836635868049e-05, "loss": 0.8269, "step": 1602 }, { "epoch": 0.3, "learning_rate": 1.6343637199584447e-05, "loss": 0.4994, "step": 1603 }, { "epoch": 0.3, "learning_rate": 1.6338905665145352e-05, "loss": 0.5822, "step": 1604 }, { "epoch": 0.3, "learning_rate": 1.63341717571349e-05, "loss": 0.5145, "step": 1605 }, { "epoch": 0.3, "learning_rate": 1.6329435477325684e-05, "loss": 0.7312, "step": 1606 }, { "epoch": 0.3, "learning_rate": 1.6324696827491178e-05, "loss": 0.5878, "step": 1607 }, { "epoch": 0.3, "learning_rate": 1.6319955809405748e-05, "loss": 0.5483, "step": 1608 }, { "epoch": 0.3, "learning_rate": 1.631521242484464e-05, "loss": 0.4618, "step": 1609 }, { "epoch": 0.3, "learning_rate": 1.6310466675583995e-05, "loss": 0.5631, "step": 1610 }, { "epoch": 0.3, "learning_rate": 1.6305718563400836e-05, "loss": 0.6718, "step": 1611 }, { "epoch": 0.3, "learning_rate": 1.630096809007306e-05, "loss": 0.5174, "step": 1612 }, { "epoch": 0.3, "learning_rate": 1.629621525737947e-05, "loss": 0.573, "step": 1613 }, { "epoch": 0.3, "learning_rate": 1.6291460067099733e-05, "loss": 0.6085, "step": 1614 }, { "epoch": 0.31, "learning_rate": 1.6286702521014408e-05, "loss": 0.5033, "step": 1615 }, { "epoch": 0.31, "learning_rate": 1.6281942620904934e-05, "loss": 0.519, "step": 1616 }, { "epoch": 0.31, "learning_rate": 1.6277180368553637e-05, "loss": 0.6358, "step": 1617 }, { "epoch": 0.31, "learning_rate": 1.6272415765743708e-05, "loss": 0.6607, "step": 1618 }, { "epoch": 0.31, "learning_rate": 1.6267648814259235e-05, "loss": 0.5621, "step": 1619 }, { "epoch": 0.31, "learning_rate": 1.626287951588518e-05, "loss": 0.7471, "step": 1620 }, { "epoch": 0.31, "learning_rate": 1.6258107872407376e-05, "loss": 0.6529, "step": 1621 }, { "epoch": 0.31, "learning_rate": 1.6253333885612553e-05, "loss": 0.5798, "step": 1622 }, { "epoch": 0.31, "learning_rate": 1.6248557557288293e-05, "loss": 0.8362, "step": 1623 }, { "epoch": 0.31, "learning_rate": 1.6243778889223074e-05, "loss": 0.5901, "step": 1624 }, { "epoch": 0.31, "learning_rate": 1.6238997883206248e-05, "loss": 0.6155, "step": 1625 }, { "epoch": 0.31, "learning_rate": 1.6234214541028034e-05, "loss": 0.569, "step": 1626 }, { "epoch": 0.31, "learning_rate": 1.622942886447953e-05, "loss": 0.6978, "step": 1627 }, { "epoch": 0.31, "learning_rate": 1.6224640855352715e-05, "loss": 0.5421, "step": 1628 }, { "epoch": 0.31, "learning_rate": 1.6219850515440426e-05, "loss": 0.5015, "step": 1629 }, { "epoch": 0.31, "learning_rate": 1.6215057846536385e-05, "loss": 0.5267, "step": 1630 }, { "epoch": 0.31, "learning_rate": 1.6210262850435183e-05, "loss": 0.5465, "step": 1631 }, { "epoch": 0.31, "learning_rate": 1.6205465528932286e-05, "loss": 0.5682, "step": 1632 }, { "epoch": 0.31, "learning_rate": 1.620066588382402e-05, "loss": 0.5096, "step": 1633 }, { "epoch": 0.31, "learning_rate": 1.6195863916907596e-05, "loss": 0.5753, "step": 1634 }, { "epoch": 0.31, "learning_rate": 1.619105962998108e-05, "loss": 0.7451, "step": 1635 }, { "epoch": 0.31, "learning_rate": 1.618625302484341e-05, "loss": 0.4534, "step": 1636 }, { "epoch": 0.31, "learning_rate": 1.6181444103294405e-05, "loss": 0.6759, "step": 1637 }, { "epoch": 0.31, "learning_rate": 1.6176632867134738e-05, "loss": 0.5178, "step": 1638 }, { "epoch": 0.31, "learning_rate": 1.6171819318165946e-05, "loss": 0.6596, "step": 1639 }, { "epoch": 0.31, "learning_rate": 1.6167003458190446e-05, "loss": 0.5649, "step": 1640 }, { "epoch": 0.31, "learning_rate": 1.616218528901151e-05, "loss": 0.4778, "step": 1641 }, { "epoch": 0.31, "learning_rate": 1.6157364812433275e-05, "loss": 0.7073, "step": 1642 }, { "epoch": 0.31, "learning_rate": 1.6152542030260747e-05, "loss": 0.5753, "step": 1643 }, { "epoch": 0.31, "learning_rate": 1.6147716944299792e-05, "loss": 0.5643, "step": 1644 }, { "epoch": 0.31, "learning_rate": 1.6142889556357136e-05, "loss": 0.508, "step": 1645 }, { "epoch": 0.31, "learning_rate": 1.6138059868240377e-05, "loss": 0.6897, "step": 1646 }, { "epoch": 0.31, "learning_rate": 1.613322788175796e-05, "loss": 0.6888, "step": 1647 }, { "epoch": 0.31, "learning_rate": 1.61283935987192e-05, "loss": 0.8056, "step": 1648 }, { "epoch": 0.31, "learning_rate": 1.6123557020934275e-05, "loss": 0.4933, "step": 1649 }, { "epoch": 0.31, "learning_rate": 1.6118718150214213e-05, "loss": 0.6334, "step": 1650 }, { "epoch": 0.31, "learning_rate": 1.6113876988370907e-05, "loss": 0.6112, "step": 1651 }, { "epoch": 0.31, "learning_rate": 1.61090335372171e-05, "loss": 0.5884, "step": 1652 }, { "epoch": 0.31, "learning_rate": 1.6104187798566405e-05, "loss": 0.7712, "step": 1653 }, { "epoch": 0.31, "learning_rate": 1.6099339774233274e-05, "loss": 0.7958, "step": 1654 }, { "epoch": 0.31, "learning_rate": 1.609448946603304e-05, "loss": 0.559, "step": 1655 }, { "epoch": 0.31, "learning_rate": 1.608963687578187e-05, "loss": 0.6058, "step": 1656 }, { "epoch": 0.31, "learning_rate": 1.608478200529679e-05, "loss": 0.4981, "step": 1657 }, { "epoch": 0.31, "learning_rate": 1.607992485639568e-05, "loss": 0.619, "step": 1658 }, { "epoch": 0.31, "learning_rate": 1.6075065430897284e-05, "loss": 0.506, "step": 1659 }, { "epoch": 0.31, "learning_rate": 1.607020373062118e-05, "loss": 0.4286, "step": 1660 }, { "epoch": 0.31, "learning_rate": 1.6065339757387812e-05, "loss": 0.5259, "step": 1661 }, { "epoch": 0.31, "learning_rate": 1.6060473513018467e-05, "loss": 0.5578, "step": 1662 }, { "epoch": 0.31, "learning_rate": 1.605560499933529e-05, "loss": 0.6804, "step": 1663 }, { "epoch": 0.31, "learning_rate": 1.6050734218161265e-05, "loss": 0.4614, "step": 1664 }, { "epoch": 0.31, "learning_rate": 1.6045861171320242e-05, "loss": 0.4903, "step": 1665 }, { "epoch": 0.31, "learning_rate": 1.6040985860636896e-05, "loss": 0.7114, "step": 1666 }, { "epoch": 0.31, "learning_rate": 1.6036108287936774e-05, "loss": 0.6392, "step": 1667 }, { "epoch": 0.32, "learning_rate": 1.6031228455046255e-05, "loss": 0.5792, "step": 1668 }, { "epoch": 0.32, "learning_rate": 1.6026346363792565e-05, "loss": 0.5584, "step": 1669 }, { "epoch": 0.32, "learning_rate": 1.6021462016003778e-05, "loss": 0.6968, "step": 1670 }, { "epoch": 0.32, "learning_rate": 1.601657541350882e-05, "loss": 0.4564, "step": 1671 }, { "epoch": 0.32, "learning_rate": 1.601168655813745e-05, "loss": 0.5841, "step": 1672 }, { "epoch": 0.32, "learning_rate": 1.6006795451720272e-05, "loss": 0.5598, "step": 1673 }, { "epoch": 0.32, "learning_rate": 1.6001902096088745e-05, "loss": 0.743, "step": 1674 }, { "epoch": 0.32, "learning_rate": 1.5997006493075154e-05, "loss": 0.4822, "step": 1675 }, { "epoch": 0.32, "learning_rate": 1.5992108644512636e-05, "loss": 0.5429, "step": 1676 }, { "epoch": 0.32, "learning_rate": 1.598720855223516e-05, "loss": 0.5175, "step": 1677 }, { "epoch": 0.32, "learning_rate": 1.598230621807755e-05, "loss": 0.62, "step": 1678 }, { "epoch": 0.32, "learning_rate": 1.597740164387546e-05, "loss": 0.7945, "step": 1679 }, { "epoch": 0.32, "learning_rate": 1.5972494831465376e-05, "loss": 0.5644, "step": 1680 }, { "epoch": 0.32, "learning_rate": 1.5967585782684635e-05, "loss": 0.6724, "step": 1681 }, { "epoch": 0.32, "learning_rate": 1.59626744993714e-05, "loss": 0.4584, "step": 1682 }, { "epoch": 0.32, "learning_rate": 1.595776098336468e-05, "loss": 0.5387, "step": 1683 }, { "epoch": 0.32, "learning_rate": 1.595284523650432e-05, "loss": 0.5537, "step": 1684 }, { "epoch": 0.32, "learning_rate": 1.5947927260630985e-05, "loss": 0.7468, "step": 1685 }, { "epoch": 0.32, "learning_rate": 1.59430070575862e-05, "loss": 0.5811, "step": 1686 }, { "epoch": 0.32, "learning_rate": 1.5938084629212308e-05, "loss": 0.6037, "step": 1687 }, { "epoch": 0.32, "learning_rate": 1.593315997735248e-05, "loss": 0.8431, "step": 1688 }, { "epoch": 0.32, "learning_rate": 1.592823310385073e-05, "loss": 0.5807, "step": 1689 }, { "epoch": 0.32, "learning_rate": 1.5923304010551904e-05, "loss": 0.5783, "step": 1690 }, { "epoch": 0.32, "learning_rate": 1.5918372699301673e-05, "loss": 0.6102, "step": 1691 }, { "epoch": 0.32, "learning_rate": 1.5913439171946542e-05, "loss": 0.6449, "step": 1692 }, { "epoch": 0.32, "learning_rate": 1.590850343033385e-05, "loss": 0.5137, "step": 1693 }, { "epoch": 0.32, "learning_rate": 1.5903565476311755e-05, "loss": 0.563, "step": 1694 }, { "epoch": 0.32, "learning_rate": 1.589862531172925e-05, "loss": 0.601, "step": 1695 }, { "epoch": 0.32, "learning_rate": 1.5893682938436158e-05, "loss": 0.552, "step": 1696 }, { "epoch": 0.32, "learning_rate": 1.5888738358283125e-05, "loss": 0.6442, "step": 1697 }, { "epoch": 0.32, "learning_rate": 1.5883791573121622e-05, "loss": 0.6054, "step": 1698 }, { "epoch": 0.32, "learning_rate": 1.587884258480395e-05, "loss": 0.3972, "step": 1699 }, { "epoch": 0.32, "learning_rate": 1.5873891395183228e-05, "loss": 0.5666, "step": 1700 }, { "epoch": 0.32, "learning_rate": 1.5868938006113407e-05, "loss": 0.647, "step": 1701 }, { "epoch": 0.32, "learning_rate": 1.5863982419449265e-05, "loss": 0.6769, "step": 1702 }, { "epoch": 0.32, "learning_rate": 1.585902463704639e-05, "loss": 0.591, "step": 1703 }, { "epoch": 0.32, "learning_rate": 1.5854064660761196e-05, "loss": 0.7816, "step": 1704 }, { "epoch": 0.32, "learning_rate": 1.5849102492450924e-05, "loss": 0.4772, "step": 1705 }, { "epoch": 0.32, "learning_rate": 1.584413813397364e-05, "loss": 0.5074, "step": 1706 }, { "epoch": 0.32, "learning_rate": 1.5839171587188213e-05, "loss": 0.5609, "step": 1707 }, { "epoch": 0.32, "learning_rate": 1.5834202853954342e-05, "loss": 0.6974, "step": 1708 }, { "epoch": 0.32, "learning_rate": 1.5829231936132555e-05, "loss": 0.5999, "step": 1709 }, { "epoch": 0.32, "learning_rate": 1.5824258835584174e-05, "loss": 0.4979, "step": 1710 }, { "epoch": 0.32, "learning_rate": 1.5819283554171356e-05, "loss": 0.6459, "step": 1711 }, { "epoch": 0.32, "learning_rate": 1.5814306093757076e-05, "loss": 0.6185, "step": 1712 }, { "epoch": 0.32, "learning_rate": 1.580932645620511e-05, "loss": 0.589, "step": 1713 }, { "epoch": 0.32, "learning_rate": 1.5804344643380064e-05, "loss": 0.5937, "step": 1714 }, { "epoch": 0.32, "learning_rate": 1.5799360657147355e-05, "loss": 0.4549, "step": 1715 }, { "epoch": 0.32, "learning_rate": 1.5794374499373202e-05, "loss": 0.638, "step": 1716 }, { "epoch": 0.32, "learning_rate": 1.5789386171924656e-05, "loss": 0.4639, "step": 1717 }, { "epoch": 0.32, "learning_rate": 1.5784395676669567e-05, "loss": 0.4877, "step": 1718 }, { "epoch": 0.32, "learning_rate": 1.57794030154766e-05, "loss": 0.6558, "step": 1719 }, { "epoch": 0.32, "learning_rate": 1.5774408190215232e-05, "loss": 0.5548, "step": 1720 }, { "epoch": 0.33, "learning_rate": 1.5769411202755756e-05, "loss": 0.6746, "step": 1721 }, { "epoch": 0.33, "learning_rate": 1.5764412054969264e-05, "loss": 0.54, "step": 1722 }, { "epoch": 0.33, "learning_rate": 1.5759410748727663e-05, "loss": 0.6056, "step": 1723 }, { "epoch": 0.33, "learning_rate": 1.5754407285903657e-05, "loss": 0.7075, "step": 1724 }, { "epoch": 0.33, "learning_rate": 1.5749401668370786e-05, "loss": 0.5612, "step": 1725 }, { "epoch": 0.33, "learning_rate": 1.574439389800337e-05, "loss": 0.5069, "step": 1726 }, { "epoch": 0.33, "learning_rate": 1.5739383976676538e-05, "loss": 0.531, "step": 1727 }, { "epoch": 0.33, "learning_rate": 1.5734371906266236e-05, "loss": 0.5308, "step": 1728 }, { "epoch": 0.33, "learning_rate": 1.5729357688649205e-05, "loss": 0.6665, "step": 1729 }, { "epoch": 0.33, "learning_rate": 1.5724341325702996e-05, "loss": 0.7032, "step": 1730 }, { "epoch": 0.33, "learning_rate": 1.571932281930596e-05, "loss": 0.5157, "step": 1731 }, { "epoch": 0.33, "learning_rate": 1.5714302171337254e-05, "loss": 0.6679, "step": 1732 }, { "epoch": 0.33, "learning_rate": 1.5709279383676833e-05, "loss": 0.6558, "step": 1733 }, { "epoch": 0.33, "learning_rate": 1.5704254458205446e-05, "loss": 0.67, "step": 1734 }, { "epoch": 0.33, "learning_rate": 1.5699227396804667e-05, "loss": 0.5117, "step": 1735 }, { "epoch": 0.33, "learning_rate": 1.5694198201356842e-05, "loss": 0.6767, "step": 1736 }, { "epoch": 0.33, "learning_rate": 1.5689166873745133e-05, "loss": 0.5885, "step": 1737 }, { "epoch": 0.33, "learning_rate": 1.5684133415853497e-05, "loss": 0.4319, "step": 1738 }, { "epoch": 0.33, "learning_rate": 1.567909782956668e-05, "loss": 0.5947, "step": 1739 }, { "epoch": 0.33, "learning_rate": 1.5674060116770234e-05, "loss": 0.6532, "step": 1740 }, { "epoch": 0.33, "learning_rate": 1.5669020279350514e-05, "loss": 0.6943, "step": 1741 }, { "epoch": 0.33, "learning_rate": 1.5663978319194647e-05, "loss": 0.7376, "step": 1742 }, { "epoch": 0.33, "learning_rate": 1.565893423819058e-05, "loss": 0.6424, "step": 1743 }, { "epoch": 0.33, "learning_rate": 1.5653888038227044e-05, "loss": 0.5472, "step": 1744 }, { "epoch": 0.33, "learning_rate": 1.564883972119356e-05, "loss": 0.6489, "step": 1745 }, { "epoch": 0.33, "learning_rate": 1.5643789288980446e-05, "loss": 0.4147, "step": 1746 }, { "epoch": 0.33, "learning_rate": 1.5638736743478807e-05, "loss": 0.5784, "step": 1747 }, { "epoch": 0.33, "learning_rate": 1.563368208658055e-05, "loss": 0.6114, "step": 1748 }, { "epoch": 0.33, "learning_rate": 1.5628625320178362e-05, "loss": 0.6054, "step": 1749 }, { "epoch": 0.33, "learning_rate": 1.5623566446165722e-05, "loss": 0.5766, "step": 1750 }, { "epoch": 0.33, "learning_rate": 1.5618505466436907e-05, "loss": 0.4565, "step": 1751 }, { "epoch": 0.33, "learning_rate": 1.5613442382886966e-05, "loss": 0.5759, "step": 1752 }, { "epoch": 0.33, "learning_rate": 1.560837719741175e-05, "loss": 0.6293, "step": 1753 }, { "epoch": 0.33, "learning_rate": 1.5603309911907897e-05, "loss": 0.6898, "step": 1754 }, { "epoch": 0.33, "learning_rate": 1.5598240528272815e-05, "loss": 0.6254, "step": 1755 }, { "epoch": 0.33, "learning_rate": 1.5593169048404718e-05, "loss": 0.4497, "step": 1756 }, { "epoch": 0.33, "learning_rate": 1.5588095474202597e-05, "loss": 0.6484, "step": 1757 }, { "epoch": 0.33, "learning_rate": 1.558301980756622e-05, "loss": 0.4421, "step": 1758 }, { "epoch": 0.33, "learning_rate": 1.5577942050396147e-05, "loss": 0.4923, "step": 1759 }, { "epoch": 0.33, "learning_rate": 1.557286220459372e-05, "loss": 0.5483, "step": 1760 }, { "epoch": 0.33, "learning_rate": 1.556778027206107e-05, "loss": 0.4994, "step": 1761 }, { "epoch": 0.33, "learning_rate": 1.556269625470108e-05, "loss": 0.5308, "step": 1762 }, { "epoch": 0.33, "learning_rate": 1.5557610154417456e-05, "loss": 0.5752, "step": 1763 }, { "epoch": 0.33, "learning_rate": 1.555252197311465e-05, "loss": 0.5097, "step": 1764 }, { "epoch": 0.33, "learning_rate": 1.554743171269791e-05, "loss": 0.6006, "step": 1765 }, { "epoch": 0.33, "learning_rate": 1.5542339375073262e-05, "loss": 0.5182, "step": 1766 }, { "epoch": 0.33, "learning_rate": 1.55372449621475e-05, "loss": 0.4459, "step": 1767 }, { "epoch": 0.33, "learning_rate": 1.5532148475828206e-05, "loss": 0.5107, "step": 1768 }, { "epoch": 0.33, "learning_rate": 1.552704991802373e-05, "loss": 0.6753, "step": 1769 }, { "epoch": 0.33, "learning_rate": 1.5521949290643203e-05, "loss": 0.6457, "step": 1770 }, { "epoch": 0.33, "learning_rate": 1.5516846595596525e-05, "loss": 0.6172, "step": 1771 }, { "epoch": 0.33, "learning_rate": 1.5511741834794386e-05, "loss": 0.4617, "step": 1772 }, { "epoch": 0.33, "learning_rate": 1.5506635010148228e-05, "loss": 0.6032, "step": 1773 }, { "epoch": 0.34, "learning_rate": 1.5501526123570277e-05, "loss": 0.8055, "step": 1774 }, { "epoch": 0.34, "learning_rate": 1.5496415176973532e-05, "loss": 0.5206, "step": 1775 }, { "epoch": 0.34, "learning_rate": 1.5491302172271764e-05, "loss": 0.5727, "step": 1776 }, { "epoch": 0.34, "learning_rate": 1.54861871113795e-05, "loss": 0.6873, "step": 1777 }, { "epoch": 0.34, "learning_rate": 1.5481069996212063e-05, "loss": 0.6008, "step": 1778 }, { "epoch": 0.34, "learning_rate": 1.5475950828685528e-05, "loss": 0.6257, "step": 1779 }, { "epoch": 0.34, "learning_rate": 1.5470829610716735e-05, "loss": 0.5878, "step": 1780 }, { "epoch": 0.34, "learning_rate": 1.5465706344223303e-05, "loss": 0.5188, "step": 1781 }, { "epoch": 0.34, "learning_rate": 1.5460581031123613e-05, "loss": 0.5916, "step": 1782 }, { "epoch": 0.34, "learning_rate": 1.5455453673336813e-05, "loss": 0.5638, "step": 1783 }, { "epoch": 0.34, "learning_rate": 1.5450324272782813e-05, "loss": 0.6927, "step": 1784 }, { "epoch": 0.34, "learning_rate": 1.5445192831382296e-05, "loss": 0.6456, "step": 1785 }, { "epoch": 0.34, "learning_rate": 1.5440059351056704e-05, "loss": 0.5446, "step": 1786 }, { "epoch": 0.34, "learning_rate": 1.5434923833728238e-05, "loss": 0.6715, "step": 1787 }, { "epoch": 0.34, "learning_rate": 1.5429786281319873e-05, "loss": 0.6587, "step": 1788 }, { "epoch": 0.34, "learning_rate": 1.5424646695755337e-05, "loss": 0.5771, "step": 1789 }, { "epoch": 0.34, "learning_rate": 1.5419505078959118e-05, "loss": 0.501, "step": 1790 }, { "epoch": 0.34, "learning_rate": 1.5414361432856475e-05, "loss": 0.7051, "step": 1791 }, { "epoch": 0.34, "learning_rate": 1.5409215759373415e-05, "loss": 0.5519, "step": 1792 }, { "epoch": 0.34, "learning_rate": 1.5404068060436715e-05, "loss": 0.5305, "step": 1793 }, { "epoch": 0.34, "learning_rate": 1.53989183379739e-05, "loss": 0.5829, "step": 1794 }, { "epoch": 0.34, "learning_rate": 1.539376659391326e-05, "loss": 0.6817, "step": 1795 }, { "epoch": 0.34, "learning_rate": 1.5388612830183842e-05, "loss": 0.6226, "step": 1796 }, { "epoch": 0.34, "learning_rate": 1.538345704871544e-05, "loss": 0.6398, "step": 1797 }, { "epoch": 0.34, "learning_rate": 1.537829925143862e-05, "loss": 0.6571, "step": 1798 }, { "epoch": 0.34, "learning_rate": 1.5373139440284687e-05, "loss": 0.602, "step": 1799 }, { "epoch": 0.34, "learning_rate": 1.53679776171857e-05, "loss": 0.5631, "step": 1800 }, { "epoch": 0.34, "learning_rate": 1.5362813784074493e-05, "loss": 0.84, "step": 1801 }, { "epoch": 0.34, "learning_rate": 1.5357647942884627e-05, "loss": 0.5393, "step": 1802 }, { "epoch": 0.34, "learning_rate": 1.5352480095550427e-05, "loss": 0.5971, "step": 1803 }, { "epoch": 0.34, "learning_rate": 1.534731024400697e-05, "loss": 0.688, "step": 1804 }, { "epoch": 0.34, "learning_rate": 1.5342138390190078e-05, "loss": 0.6871, "step": 1805 }, { "epoch": 0.34, "learning_rate": 1.5336964536036328e-05, "loss": 0.6208, "step": 1806 }, { "epoch": 0.34, "learning_rate": 1.533178868348304e-05, "loss": 0.7018, "step": 1807 }, { "epoch": 0.34, "learning_rate": 1.532661083446829e-05, "loss": 0.5821, "step": 1808 }, { "epoch": 0.34, "learning_rate": 1.5321430990930893e-05, "loss": 0.5127, "step": 1809 }, { "epoch": 0.34, "learning_rate": 1.531624915481042e-05, "loss": 0.6247, "step": 1810 }, { "epoch": 0.34, "learning_rate": 1.5311065328047184e-05, "loss": 0.5795, "step": 1811 }, { "epoch": 0.34, "learning_rate": 1.530587951258224e-05, "loss": 0.5779, "step": 1812 }, { "epoch": 0.34, "learning_rate": 1.530069171035739e-05, "loss": 0.5333, "step": 1813 }, { "epoch": 0.34, "learning_rate": 1.5295501923315178e-05, "loss": 0.6923, "step": 1814 }, { "epoch": 0.34, "learning_rate": 1.5290310153398906e-05, "loss": 0.4923, "step": 1815 }, { "epoch": 0.34, "learning_rate": 1.5285116402552592e-05, "loss": 0.6078, "step": 1816 }, { "epoch": 0.34, "learning_rate": 1.5279920672721014e-05, "loss": 0.7185, "step": 1817 }, { "epoch": 0.34, "learning_rate": 1.5274722965849692e-05, "loss": 0.5658, "step": 1818 }, { "epoch": 0.34, "learning_rate": 1.5269523283884874e-05, "loss": 0.6284, "step": 1819 }, { "epoch": 0.34, "learning_rate": 1.526432162877356e-05, "loss": 0.4198, "step": 1820 }, { "epoch": 0.34, "learning_rate": 1.5259118002463484e-05, "loss": 0.5104, "step": 1821 }, { "epoch": 0.34, "learning_rate": 1.5253912406903109e-05, "loss": 0.5732, "step": 1822 }, { "epoch": 0.34, "learning_rate": 1.5248704844041655e-05, "loss": 0.6834, "step": 1823 }, { "epoch": 0.34, "learning_rate": 1.5243495315829062e-05, "loss": 0.6216, "step": 1824 }, { "epoch": 0.34, "learning_rate": 1.5238283824216015e-05, "loss": 0.6011, "step": 1825 }, { "epoch": 0.34, "learning_rate": 1.5233070371153926e-05, "loss": 0.5288, "step": 1826 }, { "epoch": 0.35, "learning_rate": 1.522785495859495e-05, "loss": 0.5025, "step": 1827 }, { "epoch": 0.35, "learning_rate": 1.5222637588491971e-05, "loss": 0.7065, "step": 1828 }, { "epoch": 0.35, "learning_rate": 1.521741826279861e-05, "loss": 0.6381, "step": 1829 }, { "epoch": 0.35, "learning_rate": 1.5212196983469212e-05, "loss": 0.6338, "step": 1830 }, { "epoch": 0.35, "learning_rate": 1.5206973752458857e-05, "loss": 0.4671, "step": 1831 }, { "epoch": 0.35, "learning_rate": 1.5201748571723367e-05, "loss": 0.6506, "step": 1832 }, { "epoch": 0.35, "learning_rate": 1.5196521443219277e-05, "loss": 0.5925, "step": 1833 }, { "epoch": 0.35, "learning_rate": 1.5191292368903865e-05, "loss": 0.6347, "step": 1834 }, { "epoch": 0.35, "learning_rate": 1.5186061350735124e-05, "loss": 0.7759, "step": 1835 }, { "epoch": 0.35, "learning_rate": 1.5180828390671787e-05, "loss": 0.7019, "step": 1836 }, { "epoch": 0.35, "learning_rate": 1.517559349067331e-05, "loss": 0.4756, "step": 1837 }, { "epoch": 0.35, "learning_rate": 1.5170356652699873e-05, "loss": 0.5113, "step": 1838 }, { "epoch": 0.35, "learning_rate": 1.5165117878712386e-05, "loss": 0.6124, "step": 1839 }, { "epoch": 0.35, "learning_rate": 1.515987717067248e-05, "loss": 0.5485, "step": 1840 }, { "epoch": 0.35, "learning_rate": 1.5154634530542512e-05, "loss": 0.502, "step": 1841 }, { "epoch": 0.35, "learning_rate": 1.514938996028556e-05, "loss": 0.5099, "step": 1842 }, { "epoch": 0.35, "learning_rate": 1.514414346186543e-05, "loss": 0.7417, "step": 1843 }, { "epoch": 0.35, "learning_rate": 1.5138895037246646e-05, "loss": 0.4921, "step": 1844 }, { "epoch": 0.35, "learning_rate": 1.5133644688394456e-05, "loss": 0.6719, "step": 1845 }, { "epoch": 0.35, "learning_rate": 1.5128392417274822e-05, "loss": 0.5741, "step": 1846 }, { "epoch": 0.35, "learning_rate": 1.5123138225854437e-05, "loss": 0.6025, "step": 1847 }, { "epoch": 0.35, "learning_rate": 1.51178821161007e-05, "loss": 0.6541, "step": 1848 }, { "epoch": 0.35, "learning_rate": 1.511262408998174e-05, "loss": 0.5056, "step": 1849 }, { "epoch": 0.35, "learning_rate": 1.5107364149466399e-05, "loss": 0.5652, "step": 1850 }, { "epoch": 0.35, "learning_rate": 1.5102102296524228e-05, "loss": 0.6504, "step": 1851 }, { "epoch": 0.35, "learning_rate": 1.5096838533125508e-05, "loss": 0.4422, "step": 1852 }, { "epoch": 0.35, "learning_rate": 1.5091572861241228e-05, "loss": 0.5864, "step": 1853 }, { "epoch": 0.35, "learning_rate": 1.5086305282843089e-05, "loss": 0.5793, "step": 1854 }, { "epoch": 0.35, "learning_rate": 1.5081035799903511e-05, "loss": 0.7601, "step": 1855 }, { "epoch": 0.35, "learning_rate": 1.5075764414395627e-05, "loss": 0.6921, "step": 1856 }, { "epoch": 0.35, "learning_rate": 1.507049112829328e-05, "loss": 0.4504, "step": 1857 }, { "epoch": 0.35, "learning_rate": 1.5065215943571026e-05, "loss": 0.5741, "step": 1858 }, { "epoch": 0.35, "learning_rate": 1.5059938862204126e-05, "loss": 0.631, "step": 1859 }, { "epoch": 0.35, "learning_rate": 1.5054659886168566e-05, "loss": 0.6853, "step": 1860 }, { "epoch": 0.35, "learning_rate": 1.5049379017441026e-05, "loss": 0.4414, "step": 1861 }, { "epoch": 0.35, "learning_rate": 1.5044096257998902e-05, "loss": 0.47, "step": 1862 }, { "epoch": 0.35, "learning_rate": 1.5038811609820294e-05, "loss": 0.5189, "step": 1863 }, { "epoch": 0.35, "learning_rate": 1.5033525074884017e-05, "loss": 0.6342, "step": 1864 }, { "epoch": 0.35, "learning_rate": 1.5028236655169583e-05, "loss": 0.5489, "step": 1865 }, { "epoch": 0.35, "learning_rate": 1.5022946352657216e-05, "loss": 0.6361, "step": 1866 }, { "epoch": 0.35, "learning_rate": 1.5017654169327847e-05, "loss": 0.5531, "step": 1867 }, { "epoch": 0.35, "learning_rate": 1.5012360107163098e-05, "loss": 0.7704, "step": 1868 }, { "epoch": 0.35, "learning_rate": 1.5007064168145316e-05, "loss": 0.581, "step": 1869 }, { "epoch": 0.35, "learning_rate": 1.5001766354257528e-05, "loss": 0.4632, "step": 1870 }, { "epoch": 0.35, "learning_rate": 1.4996466667483479e-05, "loss": 0.6607, "step": 1871 }, { "epoch": 0.35, "learning_rate": 1.499116510980761e-05, "loss": 0.6787, "step": 1872 }, { "epoch": 0.35, "learning_rate": 1.4985861683215059e-05, "loss": 0.8264, "step": 1873 }, { "epoch": 0.35, "learning_rate": 1.4980556389691675e-05, "loss": 0.4967, "step": 1874 }, { "epoch": 0.35, "learning_rate": 1.4975249231223987e-05, "loss": 0.6395, "step": 1875 }, { "epoch": 0.35, "learning_rate": 1.4969940209799248e-05, "loss": 0.57, "step": 1876 }, { "epoch": 0.35, "learning_rate": 1.4964629327405385e-05, "loss": 0.4951, "step": 1877 }, { "epoch": 0.35, "learning_rate": 1.495931658603103e-05, "loss": 0.5687, "step": 1878 }, { "epoch": 0.35, "learning_rate": 1.4954001987665518e-05, "loss": 0.6539, "step": 1879 }, { "epoch": 0.36, "learning_rate": 1.4948685534298868e-05, "loss": 0.5703, "step": 1880 }, { "epoch": 0.36, "learning_rate": 1.4943367227921805e-05, "loss": 0.6911, "step": 1881 }, { "epoch": 0.36, "learning_rate": 1.4938047070525739e-05, "loss": 0.5946, "step": 1882 }, { "epoch": 0.36, "learning_rate": 1.4932725064102778e-05, "loss": 0.5287, "step": 1883 }, { "epoch": 0.36, "learning_rate": 1.4927401210645718e-05, "loss": 0.5784, "step": 1884 }, { "epoch": 0.36, "learning_rate": 1.4922075512148049e-05, "loss": 0.698, "step": 1885 }, { "epoch": 0.36, "learning_rate": 1.4916747970603952e-05, "loss": 0.5247, "step": 1886 }, { "epoch": 0.36, "learning_rate": 1.4911418588008302e-05, "loss": 0.6944, "step": 1887 }, { "epoch": 0.36, "learning_rate": 1.4906087366356658e-05, "loss": 0.8495, "step": 1888 }, { "epoch": 0.36, "learning_rate": 1.4900754307645264e-05, "loss": 0.6692, "step": 1889 }, { "epoch": 0.36, "learning_rate": 1.4895419413871066e-05, "loss": 0.4697, "step": 1890 }, { "epoch": 0.36, "learning_rate": 1.489008268703168e-05, "loss": 0.5792, "step": 1891 }, { "epoch": 0.36, "learning_rate": 1.4884744129125421e-05, "loss": 0.6068, "step": 1892 }, { "epoch": 0.36, "learning_rate": 1.4879403742151283e-05, "loss": 0.5292, "step": 1893 }, { "epoch": 0.36, "learning_rate": 1.4874061528108948e-05, "loss": 0.6115, "step": 1894 }, { "epoch": 0.36, "learning_rate": 1.4868717488998785e-05, "loss": 0.6788, "step": 1895 }, { "epoch": 0.36, "learning_rate": 1.4863371626821831e-05, "loss": 0.6662, "step": 1896 }, { "epoch": 0.36, "learning_rate": 1.4858023943579831e-05, "loss": 0.6338, "step": 1897 }, { "epoch": 0.36, "learning_rate": 1.4852674441275192e-05, "loss": 0.6752, "step": 1898 }, { "epoch": 0.36, "learning_rate": 1.4847323121911008e-05, "loss": 0.4482, "step": 1899 }, { "epoch": 0.36, "learning_rate": 1.4841969987491052e-05, "loss": 0.5345, "step": 1900 }, { "epoch": 0.36, "learning_rate": 1.4836615040019781e-05, "loss": 0.7101, "step": 1901 }, { "epoch": 0.36, "learning_rate": 1.4831258281502328e-05, "loss": 0.5502, "step": 1902 }, { "epoch": 0.36, "learning_rate": 1.4825899713944503e-05, "loss": 0.4552, "step": 1903 }, { "epoch": 0.36, "learning_rate": 1.4820539339352795e-05, "loss": 0.6086, "step": 1904 }, { "epoch": 0.36, "learning_rate": 1.4815177159734373e-05, "loss": 0.6878, "step": 1905 }, { "epoch": 0.36, "learning_rate": 1.480981317709707e-05, "loss": 0.5485, "step": 1906 }, { "epoch": 0.36, "learning_rate": 1.4804447393449408e-05, "loss": 0.5138, "step": 1907 }, { "epoch": 0.36, "learning_rate": 1.4799079810800579e-05, "loss": 0.736, "step": 1908 }, { "epoch": 0.36, "learning_rate": 1.4793710431160444e-05, "loss": 0.4627, "step": 1909 }, { "epoch": 0.36, "learning_rate": 1.4788339256539543e-05, "loss": 0.5551, "step": 1910 }, { "epoch": 0.36, "learning_rate": 1.478296628894908e-05, "loss": 0.6607, "step": 1911 }, { "epoch": 0.36, "learning_rate": 1.4777591530400943e-05, "loss": 0.5358, "step": 1912 }, { "epoch": 0.36, "learning_rate": 1.4772214982907678e-05, "loss": 0.5681, "step": 1913 }, { "epoch": 0.36, "learning_rate": 1.476683664848251e-05, "loss": 0.4296, "step": 1914 }, { "epoch": 0.36, "learning_rate": 1.4761456529139324e-05, "loss": 0.4882, "step": 1915 }, { "epoch": 0.36, "learning_rate": 1.4756074626892684e-05, "loss": 0.6184, "step": 1916 }, { "epoch": 0.36, "learning_rate": 1.4750690943757815e-05, "loss": 0.5742, "step": 1917 }, { "epoch": 0.36, "learning_rate": 1.4745305481750607e-05, "loss": 0.4511, "step": 1918 }, { "epoch": 0.36, "learning_rate": 1.4739918242887623e-05, "loss": 0.585, "step": 1919 }, { "epoch": 0.36, "learning_rate": 1.4734529229186082e-05, "loss": 0.5163, "step": 1920 }, { "epoch": 0.36, "learning_rate": 1.4729138442663884e-05, "loss": 0.5508, "step": 1921 }, { "epoch": 0.36, "learning_rate": 1.472374588533957e-05, "loss": 0.546, "step": 1922 }, { "epoch": 0.36, "learning_rate": 1.4718351559232362e-05, "loss": 0.6106, "step": 1923 }, { "epoch": 0.36, "learning_rate": 1.4712955466362136e-05, "loss": 0.621, "step": 1924 }, { "epoch": 0.36, "learning_rate": 1.4707557608749434e-05, "loss": 0.6339, "step": 1925 }, { "epoch": 0.36, "learning_rate": 1.4702157988415455e-05, "loss": 0.7364, "step": 1926 }, { "epoch": 0.36, "learning_rate": 1.469675660738206e-05, "loss": 0.6693, "step": 1927 }, { "epoch": 0.36, "learning_rate": 1.4691353467671772e-05, "loss": 0.6322, "step": 1928 }, { "epoch": 0.36, "learning_rate": 1.4685948571307762e-05, "loss": 0.5154, "step": 1929 }, { "epoch": 0.36, "learning_rate": 1.4680541920313876e-05, "loss": 0.6501, "step": 1930 }, { "epoch": 0.36, "learning_rate": 1.4675133516714599e-05, "loss": 0.5716, "step": 1931 }, { "epoch": 0.37, "learning_rate": 1.4669723362535084e-05, "loss": 0.6371, "step": 1932 }, { "epoch": 0.37, "learning_rate": 1.4664311459801139e-05, "loss": 0.4324, "step": 1933 }, { "epoch": 0.37, "learning_rate": 1.4658897810539217e-05, "loss": 0.6323, "step": 1934 }, { "epoch": 0.37, "learning_rate": 1.4653482416776436e-05, "loss": 0.5545, "step": 1935 }, { "epoch": 0.37, "learning_rate": 1.4648065280540562e-05, "loss": 0.54, "step": 1936 }, { "epoch": 0.37, "learning_rate": 1.4642646403860017e-05, "loss": 0.4739, "step": 1937 }, { "epoch": 0.37, "learning_rate": 1.463722578876387e-05, "loss": 0.6942, "step": 1938 }, { "epoch": 0.37, "learning_rate": 1.4631803437281837e-05, "loss": 0.5857, "step": 1939 }, { "epoch": 0.37, "learning_rate": 1.4626379351444304e-05, "loss": 0.5119, "step": 1940 }, { "epoch": 0.37, "learning_rate": 1.462095353328228e-05, "loss": 0.5066, "step": 1941 }, { "epoch": 0.37, "learning_rate": 1.4615525984827441e-05, "loss": 0.4576, "step": 1942 }, { "epoch": 0.37, "learning_rate": 1.4610096708112107e-05, "loss": 0.5459, "step": 1943 }, { "epoch": 0.37, "learning_rate": 1.4604665705169239e-05, "loss": 0.5528, "step": 1944 }, { "epoch": 0.37, "learning_rate": 1.4599232978032452e-05, "loss": 0.7566, "step": 1945 }, { "epoch": 0.37, "learning_rate": 1.4593798528735998e-05, "loss": 0.8539, "step": 1946 }, { "epoch": 0.37, "learning_rate": 1.4588362359314787e-05, "loss": 0.5186, "step": 1947 }, { "epoch": 0.37, "learning_rate": 1.4582924471804358e-05, "loss": 0.6615, "step": 1948 }, { "epoch": 0.37, "learning_rate": 1.4577484868240908e-05, "loss": 0.5027, "step": 1949 }, { "epoch": 0.37, "learning_rate": 1.4572043550661262e-05, "loss": 0.5968, "step": 1950 }, { "epoch": 0.37, "learning_rate": 1.4566600521102898e-05, "loss": 0.619, "step": 1951 }, { "epoch": 0.37, "learning_rate": 1.456115578160393e-05, "loss": 0.553, "step": 1952 }, { "epoch": 0.37, "learning_rate": 1.4555709334203116e-05, "loss": 0.4782, "step": 1953 }, { "epoch": 0.37, "learning_rate": 1.4550261180939846e-05, "loss": 0.7012, "step": 1954 }, { "epoch": 0.37, "learning_rate": 1.4544811323854155e-05, "loss": 0.5721, "step": 1955 }, { "epoch": 0.37, "learning_rate": 1.453935976498672e-05, "loss": 0.5037, "step": 1956 }, { "epoch": 0.37, "learning_rate": 1.453390650637884e-05, "loss": 0.467, "step": 1957 }, { "epoch": 0.37, "learning_rate": 1.4528451550072472e-05, "loss": 0.6087, "step": 1958 }, { "epoch": 0.37, "learning_rate": 1.4522994898110188e-05, "loss": 0.5373, "step": 1959 }, { "epoch": 0.37, "learning_rate": 1.4517536552535205e-05, "loss": 0.5095, "step": 1960 }, { "epoch": 0.37, "learning_rate": 1.4512076515391375e-05, "loss": 0.4573, "step": 1961 }, { "epoch": 0.37, "learning_rate": 1.4506614788723181e-05, "loss": 0.5063, "step": 1962 }, { "epoch": 0.37, "learning_rate": 1.4501151374575742e-05, "loss": 0.5337, "step": 1963 }, { "epoch": 0.37, "learning_rate": 1.4495686274994799e-05, "loss": 0.4332, "step": 1964 }, { "epoch": 0.37, "learning_rate": 1.449021949202674e-05, "loss": 0.7649, "step": 1965 }, { "epoch": 0.37, "learning_rate": 1.4484751027718567e-05, "loss": 0.3724, "step": 1966 }, { "epoch": 0.37, "learning_rate": 1.4479280884117919e-05, "loss": 0.609, "step": 1967 }, { "epoch": 0.37, "learning_rate": 1.447380906327307e-05, "loss": 0.4315, "step": 1968 }, { "epoch": 0.37, "learning_rate": 1.446833556723291e-05, "loss": 0.42, "step": 1969 }, { "epoch": 0.37, "learning_rate": 1.4462860398046964e-05, "loss": 0.6879, "step": 1970 }, { "epoch": 0.37, "learning_rate": 1.4457383557765385e-05, "loss": 0.5968, "step": 1971 }, { "epoch": 0.37, "learning_rate": 1.4451905048438943e-05, "loss": 0.4947, "step": 1972 }, { "epoch": 0.37, "learning_rate": 1.4446424872119041e-05, "loss": 0.6004, "step": 1973 }, { "epoch": 0.37, "learning_rate": 1.4440943030857698e-05, "loss": 0.5348, "step": 1974 }, { "epoch": 0.37, "learning_rate": 1.4435459526707574e-05, "loss": 0.6951, "step": 1975 }, { "epoch": 0.37, "learning_rate": 1.4429974361721928e-05, "loss": 0.6712, "step": 1976 }, { "epoch": 0.37, "learning_rate": 1.4424487537954658e-05, "loss": 0.6103, "step": 1977 }, { "epoch": 0.37, "learning_rate": 1.4418999057460277e-05, "loss": 0.6401, "step": 1978 }, { "epoch": 0.37, "learning_rate": 1.4413508922293917e-05, "loss": 0.5248, "step": 1979 }, { "epoch": 0.37, "learning_rate": 1.4408017134511334e-05, "loss": 0.6691, "step": 1980 }, { "epoch": 0.37, "learning_rate": 1.4402523696168898e-05, "loss": 0.5254, "step": 1981 }, { "epoch": 0.37, "learning_rate": 1.43970286093236e-05, "loss": 0.5766, "step": 1982 }, { "epoch": 0.37, "learning_rate": 1.4391531876033047e-05, "loss": 0.563, "step": 1983 }, { "epoch": 0.37, "learning_rate": 1.4386033498355464e-05, "loss": 0.5582, "step": 1984 }, { "epoch": 0.38, "learning_rate": 1.4380533478349694e-05, "loss": 0.6873, "step": 1985 }, { "epoch": 0.38, "learning_rate": 1.437503181807519e-05, "loss": 0.4901, "step": 1986 }, { "epoch": 0.38, "learning_rate": 1.4369528519592016e-05, "loss": 0.6593, "step": 1987 }, { "epoch": 0.38, "learning_rate": 1.436402358496086e-05, "loss": 0.6982, "step": 1988 }, { "epoch": 0.38, "learning_rate": 1.4358517016243013e-05, "loss": 0.5235, "step": 1989 }, { "epoch": 0.38, "learning_rate": 1.4353008815500387e-05, "loss": 0.6964, "step": 1990 }, { "epoch": 0.38, "learning_rate": 1.4347498984795501e-05, "loss": 0.456, "step": 1991 }, { "epoch": 0.38, "learning_rate": 1.4341987526191475e-05, "loss": 0.6528, "step": 1992 }, { "epoch": 0.38, "learning_rate": 1.4336474441752052e-05, "loss": 0.583, "step": 1993 }, { "epoch": 0.38, "learning_rate": 1.4330959733541581e-05, "loss": 0.5862, "step": 1994 }, { "epoch": 0.38, "learning_rate": 1.4325443403625012e-05, "loss": 0.5675, "step": 1995 }, { "epoch": 0.38, "learning_rate": 1.4319925454067912e-05, "loss": 0.5143, "step": 1996 }, { "epoch": 0.38, "learning_rate": 1.4314405886936444e-05, "loss": 0.7891, "step": 1997 }, { "epoch": 0.38, "learning_rate": 1.4308884704297388e-05, "loss": 0.6855, "step": 1998 }, { "epoch": 0.38, "learning_rate": 1.4303361908218119e-05, "loss": 0.5469, "step": 1999 }, { "epoch": 0.38, "learning_rate": 1.4297837500766619e-05, "loss": 0.5859, "step": 2000 }, { "epoch": 0.38, "learning_rate": 1.429231148401148e-05, "loss": 0.5573, "step": 2001 }, { "epoch": 0.38, "learning_rate": 1.4286783860021884e-05, "loss": 0.6411, "step": 2002 }, { "epoch": 0.38, "learning_rate": 1.428125463086763e-05, "loss": 0.5588, "step": 2003 }, { "epoch": 0.38, "learning_rate": 1.4275723798619106e-05, "loss": 0.6823, "step": 2004 }, { "epoch": 0.38, "learning_rate": 1.4270191365347304e-05, "loss": 0.5778, "step": 2005 }, { "epoch": 0.38, "learning_rate": 1.4264657333123814e-05, "loss": 0.6493, "step": 2006 }, { "epoch": 0.38, "learning_rate": 1.425912170402083e-05, "loss": 0.5854, "step": 2007 }, { "epoch": 0.38, "learning_rate": 1.4253584480111142e-05, "loss": 0.5067, "step": 2008 }, { "epoch": 0.38, "learning_rate": 1.4248045663468129e-05, "loss": 0.7334, "step": 2009 }, { "epoch": 0.38, "learning_rate": 1.4242505256165779e-05, "loss": 0.5397, "step": 2010 }, { "epoch": 0.38, "learning_rate": 1.4236963260278667e-05, "loss": 0.7243, "step": 2011 }, { "epoch": 0.38, "learning_rate": 1.4231419677881966e-05, "loss": 0.7491, "step": 2012 }, { "epoch": 0.38, "learning_rate": 1.4225874511051445e-05, "loss": 0.6077, "step": 2013 }, { "epoch": 0.38, "learning_rate": 1.422032776186346e-05, "loss": 0.5184, "step": 2014 }, { "epoch": 0.38, "learning_rate": 1.4214779432394965e-05, "loss": 0.5572, "step": 2015 }, { "epoch": 0.38, "learning_rate": 1.4209229524723503e-05, "loss": 0.5067, "step": 2016 }, { "epoch": 0.38, "learning_rate": 1.4203678040927211e-05, "loss": 0.525, "step": 2017 }, { "epoch": 0.38, "learning_rate": 1.4198124983084812e-05, "loss": 0.5396, "step": 2018 }, { "epoch": 0.38, "learning_rate": 1.4192570353275623e-05, "loss": 0.5606, "step": 2019 }, { "epoch": 0.38, "learning_rate": 1.4187014153579548e-05, "loss": 0.5509, "step": 2020 }, { "epoch": 0.38, "learning_rate": 1.4181456386077073e-05, "loss": 0.4435, "step": 2021 }, { "epoch": 0.38, "learning_rate": 1.4175897052849278e-05, "loss": 0.6465, "step": 2022 }, { "epoch": 0.38, "learning_rate": 1.4170336155977833e-05, "loss": 0.7233, "step": 2023 }, { "epoch": 0.38, "learning_rate": 1.416477369754498e-05, "loss": 0.5652, "step": 2024 }, { "epoch": 0.38, "learning_rate": 1.415920967963356e-05, "loss": 0.6913, "step": 2025 }, { "epoch": 0.38, "learning_rate": 1.4153644104326985e-05, "loss": 0.5659, "step": 2026 }, { "epoch": 0.38, "learning_rate": 1.414807697370926e-05, "loss": 0.4572, "step": 2027 }, { "epoch": 0.38, "learning_rate": 1.4142508289864972e-05, "loss": 0.5511, "step": 2028 }, { "epoch": 0.38, "learning_rate": 1.4136938054879284e-05, "loss": 0.5156, "step": 2029 }, { "epoch": 0.38, "learning_rate": 1.4131366270837938e-05, "loss": 0.669, "step": 2030 }, { "epoch": 0.38, "learning_rate": 1.4125792939827268e-05, "loss": 0.4658, "step": 2031 }, { "epoch": 0.38, "learning_rate": 1.4120218063934177e-05, "loss": 0.7922, "step": 2032 }, { "epoch": 0.38, "learning_rate": 1.411464164524615e-05, "loss": 0.5772, "step": 2033 }, { "epoch": 0.38, "learning_rate": 1.4109063685851245e-05, "loss": 0.5447, "step": 2034 }, { "epoch": 0.38, "learning_rate": 1.4103484187838104e-05, "loss": 0.5551, "step": 2035 }, { "epoch": 0.38, "learning_rate": 1.4097903153295944e-05, "loss": 0.5805, "step": 2036 }, { "epoch": 0.38, "learning_rate": 1.4092320584314552e-05, "loss": 0.4206, "step": 2037 }, { "epoch": 0.39, "learning_rate": 1.4086736482984294e-05, "loss": 0.6699, "step": 2038 }, { "epoch": 0.39, "learning_rate": 1.4081150851396109e-05, "loss": 0.4838, "step": 2039 }, { "epoch": 0.39, "learning_rate": 1.4075563691641506e-05, "loss": 0.4567, "step": 2040 }, { "epoch": 0.39, "learning_rate": 1.4069975005812573e-05, "loss": 0.5208, "step": 2041 }, { "epoch": 0.39, "learning_rate": 1.4064384796001962e-05, "loss": 0.762, "step": 2042 }, { "epoch": 0.39, "learning_rate": 1.4058793064302902e-05, "loss": 0.66, "step": 2043 }, { "epoch": 0.39, "learning_rate": 1.4053199812809186e-05, "loss": 0.5597, "step": 2044 }, { "epoch": 0.39, "learning_rate": 1.4047605043615183e-05, "loss": 0.5755, "step": 2045 }, { "epoch": 0.39, "learning_rate": 1.404200875881582e-05, "loss": 0.5111, "step": 2046 }, { "epoch": 0.39, "learning_rate": 1.4036410960506601e-05, "loss": 0.603, "step": 2047 }, { "epoch": 0.39, "learning_rate": 1.4030811650783594e-05, "loss": 0.4649, "step": 2048 }, { "epoch": 0.39, "learning_rate": 1.4025210831743431e-05, "loss": 0.5139, "step": 2049 }, { "epoch": 0.39, "learning_rate": 1.4019608505483314e-05, "loss": 0.5652, "step": 2050 }, { "epoch": 0.39, "learning_rate": 1.4014004674101001e-05, "loss": 0.5222, "step": 2051 }, { "epoch": 0.39, "learning_rate": 1.4008399339694822e-05, "loss": 0.6241, "step": 2052 }, { "epoch": 0.39, "learning_rate": 1.4002792504363665e-05, "loss": 0.5817, "step": 2053 }, { "epoch": 0.39, "learning_rate": 1.399718417020698e-05, "loss": 0.6278, "step": 2054 }, { "epoch": 0.39, "learning_rate": 1.3991574339324782e-05, "loss": 0.4877, "step": 2055 }, { "epoch": 0.39, "learning_rate": 1.398596301381764e-05, "loss": 0.6347, "step": 2056 }, { "epoch": 0.39, "learning_rate": 1.3980350195786691e-05, "loss": 0.6038, "step": 2057 }, { "epoch": 0.39, "learning_rate": 1.3974735887333627e-05, "loss": 0.5174, "step": 2058 }, { "epoch": 0.39, "learning_rate": 1.3969120090560695e-05, "loss": 0.639, "step": 2059 }, { "epoch": 0.39, "learning_rate": 1.3963502807570702e-05, "loss": 0.6459, "step": 2060 }, { "epoch": 0.39, "learning_rate": 1.3957884040467006e-05, "loss": 0.5329, "step": 2061 }, { "epoch": 0.39, "learning_rate": 1.395226379135354e-05, "loss": 0.4267, "step": 2062 }, { "epoch": 0.39, "learning_rate": 1.3946642062334765e-05, "loss": 0.5191, "step": 2063 }, { "epoch": 0.39, "learning_rate": 1.3941018855515716e-05, "loss": 0.4508, "step": 2064 }, { "epoch": 0.39, "learning_rate": 1.3935394173001972e-05, "loss": 0.5123, "step": 2065 }, { "epoch": 0.39, "learning_rate": 1.3929768016899667e-05, "loss": 0.7707, "step": 2066 }, { "epoch": 0.39, "learning_rate": 1.3924140389315488e-05, "loss": 0.6802, "step": 2067 }, { "epoch": 0.39, "learning_rate": 1.3918511292356673e-05, "loss": 0.6645, "step": 2068 }, { "epoch": 0.39, "learning_rate": 1.3912880728131004e-05, "loss": 0.5383, "step": 2069 }, { "epoch": 0.39, "learning_rate": 1.3907248698746822e-05, "loss": 0.7546, "step": 2070 }, { "epoch": 0.39, "learning_rate": 1.3901615206313013e-05, "loss": 0.6405, "step": 2071 }, { "epoch": 0.39, "learning_rate": 1.3895980252939006e-05, "loss": 0.6276, "step": 2072 }, { "epoch": 0.39, "learning_rate": 1.3890343840734782e-05, "loss": 0.4313, "step": 2073 }, { "epoch": 0.39, "learning_rate": 1.388470597181087e-05, "loss": 0.6365, "step": 2074 }, { "epoch": 0.39, "learning_rate": 1.387906664827834e-05, "loss": 0.5469, "step": 2075 }, { "epoch": 0.39, "learning_rate": 1.3873425872248805e-05, "loss": 0.5573, "step": 2076 }, { "epoch": 0.39, "learning_rate": 1.3867783645834428e-05, "loss": 0.5712, "step": 2077 }, { "epoch": 0.39, "learning_rate": 1.3862139971147915e-05, "loss": 0.5401, "step": 2078 }, { "epoch": 0.39, "learning_rate": 1.3856494850302508e-05, "loss": 0.5148, "step": 2079 }, { "epoch": 0.39, "learning_rate": 1.3850848285411994e-05, "loss": 0.6839, "step": 2080 }, { "epoch": 0.39, "learning_rate": 1.3845200278590708e-05, "loss": 0.6191, "step": 2081 }, { "epoch": 0.39, "learning_rate": 1.3839550831953503e-05, "loss": 0.5655, "step": 2082 }, { "epoch": 0.39, "learning_rate": 1.3833899947615801e-05, "loss": 0.5445, "step": 2083 }, { "epoch": 0.39, "learning_rate": 1.3828247627693538e-05, "loss": 0.648, "step": 2084 }, { "epoch": 0.39, "learning_rate": 1.3822593874303201e-05, "loss": 0.5886, "step": 2085 }, { "epoch": 0.39, "learning_rate": 1.3816938689561808e-05, "loss": 0.6148, "step": 2086 }, { "epoch": 0.39, "learning_rate": 1.3811282075586916e-05, "loss": 0.5431, "step": 2087 }, { "epoch": 0.39, "learning_rate": 1.3805624034496612e-05, "loss": 0.5258, "step": 2088 }, { "epoch": 0.39, "learning_rate": 1.3799964568409523e-05, "loss": 0.6313, "step": 2089 }, { "epoch": 0.39, "learning_rate": 1.3794303679444813e-05, "loss": 0.4623, "step": 2090 }, { "epoch": 0.4, "learning_rate": 1.3788641369722163e-05, "loss": 0.5208, "step": 2091 }, { "epoch": 0.4, "learning_rate": 1.3782977641361803e-05, "loss": 0.4881, "step": 2092 }, { "epoch": 0.4, "learning_rate": 1.377731249648449e-05, "loss": 0.5885, "step": 2093 }, { "epoch": 0.4, "learning_rate": 1.3771645937211502e-05, "loss": 0.5007, "step": 2094 }, { "epoch": 0.4, "learning_rate": 1.3765977965664659e-05, "loss": 0.4677, "step": 2095 }, { "epoch": 0.4, "learning_rate": 1.3760308583966299e-05, "loss": 0.6411, "step": 2096 }, { "epoch": 0.4, "learning_rate": 1.3754637794239303e-05, "loss": 0.6303, "step": 2097 }, { "epoch": 0.4, "learning_rate": 1.3748965598607057e-05, "loss": 0.5626, "step": 2098 }, { "epoch": 0.4, "learning_rate": 1.3743291999193495e-05, "loss": 0.6903, "step": 2099 }, { "epoch": 0.4, "learning_rate": 1.3737616998123068e-05, "loss": 0.5027, "step": 2100 }, { "epoch": 0.4, "learning_rate": 1.373194059752075e-05, "loss": 0.4535, "step": 2101 }, { "epoch": 0.4, "learning_rate": 1.3726262799512038e-05, "loss": 0.7366, "step": 2102 }, { "epoch": 0.4, "learning_rate": 1.3720583606222956e-05, "loss": 0.7491, "step": 2103 }, { "epoch": 0.4, "learning_rate": 1.371490301978005e-05, "loss": 0.6673, "step": 2104 }, { "epoch": 0.4, "learning_rate": 1.3709221042310388e-05, "loss": 0.6727, "step": 2105 }, { "epoch": 0.4, "learning_rate": 1.370353767594156e-05, "loss": 0.781, "step": 2106 }, { "epoch": 0.4, "learning_rate": 1.3697852922801669e-05, "loss": 0.6918, "step": 2107 }, { "epoch": 0.4, "learning_rate": 1.369216678501934e-05, "loss": 0.6525, "step": 2108 }, { "epoch": 0.4, "learning_rate": 1.3686479264723728e-05, "loss": 0.4852, "step": 2109 }, { "epoch": 0.4, "learning_rate": 1.3680790364044492e-05, "loss": 0.6414, "step": 2110 }, { "epoch": 0.4, "learning_rate": 1.3675100085111809e-05, "loss": 0.5319, "step": 2111 }, { "epoch": 0.4, "learning_rate": 1.366940843005638e-05, "loss": 0.5301, "step": 2112 }, { "epoch": 0.4, "learning_rate": 1.3663715401009414e-05, "loss": 0.4729, "step": 2113 }, { "epoch": 0.4, "learning_rate": 1.3658021000102638e-05, "loss": 0.5805, "step": 2114 }, { "epoch": 0.4, "learning_rate": 1.3652325229468288e-05, "loss": 0.5481, "step": 2115 }, { "epoch": 0.4, "learning_rate": 1.3646628091239125e-05, "loss": 0.5088, "step": 2116 }, { "epoch": 0.4, "learning_rate": 1.3640929587548403e-05, "loss": 0.5655, "step": 2117 }, { "epoch": 0.4, "learning_rate": 1.363522972052991e-05, "loss": 0.545, "step": 2118 }, { "epoch": 0.4, "learning_rate": 1.3629528492317925e-05, "loss": 0.5359, "step": 2119 }, { "epoch": 0.4, "learning_rate": 1.3623825905047245e-05, "loss": 0.5717, "step": 2120 }, { "epoch": 0.4, "learning_rate": 1.3618121960853177e-05, "loss": 0.618, "step": 2121 }, { "epoch": 0.4, "learning_rate": 1.3612416661871532e-05, "loss": 0.5926, "step": 2122 }, { "epoch": 0.4, "learning_rate": 1.3606710010238633e-05, "loss": 0.514, "step": 2123 }, { "epoch": 0.4, "learning_rate": 1.3601002008091302e-05, "loss": 0.6786, "step": 2124 }, { "epoch": 0.4, "learning_rate": 1.3595292657566876e-05, "loss": 0.5365, "step": 2125 }, { "epoch": 0.4, "learning_rate": 1.3589581960803195e-05, "loss": 0.4894, "step": 2126 }, { "epoch": 0.4, "learning_rate": 1.3583869919938597e-05, "loss": 0.7511, "step": 2127 }, { "epoch": 0.4, "learning_rate": 1.3578156537111928e-05, "loss": 0.5222, "step": 2128 }, { "epoch": 0.4, "learning_rate": 1.3572441814462534e-05, "loss": 0.6847, "step": 2129 }, { "epoch": 0.4, "learning_rate": 1.3566725754130265e-05, "loss": 0.7435, "step": 2130 }, { "epoch": 0.4, "learning_rate": 1.356100835825547e-05, "loss": 0.6757, "step": 2131 }, { "epoch": 0.4, "learning_rate": 1.3555289628979005e-05, "loss": 0.657, "step": 2132 }, { "epoch": 0.4, "learning_rate": 1.3549569568442208e-05, "loss": 0.4837, "step": 2133 }, { "epoch": 0.4, "learning_rate": 1.3543848178786941e-05, "loss": 0.4819, "step": 2134 }, { "epoch": 0.4, "learning_rate": 1.353812546215554e-05, "loss": 0.4873, "step": 2135 }, { "epoch": 0.4, "learning_rate": 1.3532401420690847e-05, "loss": 0.6954, "step": 2136 }, { "epoch": 0.4, "learning_rate": 1.3526676056536205e-05, "loss": 0.5893, "step": 2137 }, { "epoch": 0.4, "learning_rate": 1.3520949371835448e-05, "loss": 0.6295, "step": 2138 }, { "epoch": 0.4, "learning_rate": 1.35152213687329e-05, "loss": 0.4769, "step": 2139 }, { "epoch": 0.4, "learning_rate": 1.3509492049373388e-05, "loss": 0.4812, "step": 2140 }, { "epoch": 0.4, "learning_rate": 1.3503761415902221e-05, "loss": 0.4917, "step": 2141 }, { "epoch": 0.4, "learning_rate": 1.3498029470465212e-05, "loss": 0.5155, "step": 2142 }, { "epoch": 0.4, "learning_rate": 1.3492296215208653e-05, "loss": 0.4701, "step": 2143 }, { "epoch": 0.41, "learning_rate": 1.348656165227934e-05, "loss": 0.7118, "step": 2144 }, { "epoch": 0.41, "learning_rate": 1.3480825783824545e-05, "loss": 0.5449, "step": 2145 }, { "epoch": 0.41, "learning_rate": 1.3475088611992038e-05, "loss": 0.5267, "step": 2146 }, { "epoch": 0.41, "learning_rate": 1.3469350138930073e-05, "loss": 0.6651, "step": 2147 }, { "epoch": 0.41, "learning_rate": 1.3463610366787392e-05, "loss": 0.5834, "step": 2148 }, { "epoch": 0.41, "learning_rate": 1.3457869297713227e-05, "loss": 0.4862, "step": 2149 }, { "epoch": 0.41, "learning_rate": 1.3452126933857286e-05, "loss": 0.5247, "step": 2150 }, { "epoch": 0.41, "learning_rate": 1.3446383277369777e-05, "loss": 0.6938, "step": 2151 }, { "epoch": 0.41, "learning_rate": 1.3440638330401375e-05, "loss": 0.6272, "step": 2152 }, { "epoch": 0.41, "learning_rate": 1.343489209510325e-05, "loss": 0.6388, "step": 2153 }, { "epoch": 0.41, "learning_rate": 1.3429144573627055e-05, "loss": 0.3972, "step": 2154 }, { "epoch": 0.41, "learning_rate": 1.3423395768124914e-05, "loss": 0.4017, "step": 2155 }, { "epoch": 0.41, "learning_rate": 1.3417645680749441e-05, "loss": 0.4779, "step": 2156 }, { "epoch": 0.41, "learning_rate": 1.3411894313653727e-05, "loss": 0.6229, "step": 2157 }, { "epoch": 0.41, "learning_rate": 1.3406141668991344e-05, "loss": 0.4434, "step": 2158 }, { "epoch": 0.41, "learning_rate": 1.3400387748916338e-05, "loss": 0.5871, "step": 2159 }, { "epoch": 0.41, "learning_rate": 1.3394632555583238e-05, "loss": 0.5689, "step": 2160 }, { "epoch": 0.41, "learning_rate": 1.3388876091147048e-05, "loss": 0.7287, "step": 2161 }, { "epoch": 0.41, "learning_rate": 1.338311835776324e-05, "loss": 0.7309, "step": 2162 }, { "epoch": 0.41, "learning_rate": 1.3377359357587776e-05, "loss": 0.5997, "step": 2163 }, { "epoch": 0.41, "learning_rate": 1.3371599092777084e-05, "loss": 0.4905, "step": 2164 }, { "epoch": 0.41, "learning_rate": 1.3365837565488065e-05, "loss": 0.574, "step": 2165 }, { "epoch": 0.41, "learning_rate": 1.3360074777878089e-05, "loss": 0.7338, "step": 2166 }, { "epoch": 0.41, "learning_rate": 1.3354310732105014e-05, "loss": 0.5053, "step": 2167 }, { "epoch": 0.41, "learning_rate": 1.3348545430327146e-05, "loss": 0.6964, "step": 2168 }, { "epoch": 0.41, "learning_rate": 1.334277887470328e-05, "loss": 0.6457, "step": 2169 }, { "epoch": 0.41, "learning_rate": 1.3337011067392673e-05, "loss": 0.5051, "step": 2170 }, { "epoch": 0.41, "learning_rate": 1.333124201055505e-05, "loss": 0.4775, "step": 2171 }, { "epoch": 0.41, "learning_rate": 1.3325471706350606e-05, "loss": 0.5111, "step": 2172 }, { "epoch": 0.41, "learning_rate": 1.3319700156940003e-05, "loss": 0.5008, "step": 2173 }, { "epoch": 0.41, "learning_rate": 1.3313927364484368e-05, "loss": 0.581, "step": 2174 }, { "epoch": 0.41, "learning_rate": 1.3308153331145293e-05, "loss": 0.5461, "step": 2175 }, { "epoch": 0.41, "learning_rate": 1.3302378059084835e-05, "loss": 0.5714, "step": 2176 }, { "epoch": 0.41, "learning_rate": 1.3296601550465525e-05, "loss": 0.5187, "step": 2177 }, { "epoch": 0.41, "learning_rate": 1.3290823807450333e-05, "loss": 0.6369, "step": 2178 }, { "epoch": 0.41, "learning_rate": 1.3285044832202716e-05, "loss": 0.6136, "step": 2179 }, { "epoch": 0.41, "learning_rate": 1.3279264626886579e-05, "loss": 0.5102, "step": 2180 }, { "epoch": 0.41, "learning_rate": 1.327348319366629e-05, "loss": 0.6006, "step": 2181 }, { "epoch": 0.41, "learning_rate": 1.326770053470668e-05, "loss": 0.5622, "step": 2182 }, { "epoch": 0.41, "learning_rate": 1.3261916652173034e-05, "loss": 0.5852, "step": 2183 }, { "epoch": 0.41, "learning_rate": 1.32561315482311e-05, "loss": 0.6932, "step": 2184 }, { "epoch": 0.41, "learning_rate": 1.3250345225047078e-05, "loss": 0.6941, "step": 2185 }, { "epoch": 0.41, "learning_rate": 1.3244557684787631e-05, "loss": 0.6159, "step": 2186 }, { "epoch": 0.41, "learning_rate": 1.3238768929619874e-05, "loss": 0.4833, "step": 2187 }, { "epoch": 0.41, "learning_rate": 1.3232978961711376e-05, "loss": 0.5224, "step": 2188 }, { "epoch": 0.41, "learning_rate": 1.322718778323016e-05, "loss": 0.7408, "step": 2189 }, { "epoch": 0.41, "learning_rate": 1.3221395396344707e-05, "loss": 0.5099, "step": 2190 }, { "epoch": 0.41, "learning_rate": 1.3215601803223945e-05, "loss": 0.5155, "step": 2191 }, { "epoch": 0.41, "learning_rate": 1.3209807006037254e-05, "loss": 0.4841, "step": 2192 }, { "epoch": 0.41, "learning_rate": 1.3204011006954474e-05, "loss": 0.5624, "step": 2193 }, { "epoch": 0.41, "learning_rate": 1.3198213808145879e-05, "loss": 0.552, "step": 2194 }, { "epoch": 0.41, "learning_rate": 1.3192415411782203e-05, "loss": 0.5521, "step": 2195 }, { "epoch": 0.41, "learning_rate": 1.3186615820034632e-05, "loss": 0.4757, "step": 2196 }, { "epoch": 0.42, "learning_rate": 1.3180815035074786e-05, "loss": 0.717, "step": 2197 }, { "epoch": 0.42, "learning_rate": 1.3175013059074747e-05, "loss": 0.7794, "step": 2198 }, { "epoch": 0.42, "learning_rate": 1.316920989420703e-05, "loss": 0.5903, "step": 2199 }, { "epoch": 0.42, "learning_rate": 1.3163405542644602e-05, "loss": 0.5964, "step": 2200 }, { "epoch": 0.42, "learning_rate": 1.3157600006560878e-05, "loss": 0.6212, "step": 2201 }, { "epoch": 0.42, "learning_rate": 1.3151793288129705e-05, "loss": 0.5085, "step": 2202 }, { "epoch": 0.42, "learning_rate": 1.3145985389525387e-05, "loss": 0.6312, "step": 2203 }, { "epoch": 0.42, "learning_rate": 1.3140176312922654e-05, "loss": 0.5963, "step": 2204 }, { "epoch": 0.42, "learning_rate": 1.3134366060496693e-05, "loss": 0.7328, "step": 2205 }, { "epoch": 0.42, "learning_rate": 1.3128554634423118e-05, "loss": 0.597, "step": 2206 }, { "epoch": 0.42, "learning_rate": 1.3122742036877994e-05, "loss": 0.5104, "step": 2207 }, { "epoch": 0.42, "learning_rate": 1.3116928270037815e-05, "loss": 0.5216, "step": 2208 }, { "epoch": 0.42, "learning_rate": 1.3111113336079517e-05, "loss": 0.6339, "step": 2209 }, { "epoch": 0.42, "learning_rate": 1.3105297237180472e-05, "loss": 0.4872, "step": 2210 }, { "epoch": 0.42, "learning_rate": 1.309947997551849e-05, "loss": 0.6051, "step": 2211 }, { "epoch": 0.42, "learning_rate": 1.309366155327182e-05, "loss": 0.5744, "step": 2212 }, { "epoch": 0.42, "learning_rate": 1.3087841972619133e-05, "loss": 0.5541, "step": 2213 }, { "epoch": 0.42, "learning_rate": 1.3082021235739546e-05, "loss": 0.5593, "step": 2214 }, { "epoch": 0.42, "learning_rate": 1.3076199344812606e-05, "loss": 0.5775, "step": 2215 }, { "epoch": 0.42, "learning_rate": 1.3070376302018287e-05, "loss": 0.5996, "step": 2216 }, { "epoch": 0.42, "learning_rate": 1.3064552109537e-05, "loss": 0.5791, "step": 2217 }, { "epoch": 0.42, "learning_rate": 1.3058726769549584e-05, "loss": 0.4657, "step": 2218 }, { "epoch": 0.42, "learning_rate": 1.3052900284237311e-05, "loss": 0.5166, "step": 2219 }, { "epoch": 0.42, "learning_rate": 1.3047072655781874e-05, "loss": 0.7104, "step": 2220 }, { "epoch": 0.42, "learning_rate": 1.3041243886365402e-05, "loss": 0.5763, "step": 2221 }, { "epoch": 0.42, "learning_rate": 1.3035413978170455e-05, "loss": 0.5086, "step": 2222 }, { "epoch": 0.42, "learning_rate": 1.3029582933379998e-05, "loss": 0.6203, "step": 2223 }, { "epoch": 0.42, "learning_rate": 1.3023750754177449e-05, "loss": 0.4986, "step": 2224 }, { "epoch": 0.42, "learning_rate": 1.3017917442746633e-05, "loss": 0.5373, "step": 2225 }, { "epoch": 0.42, "learning_rate": 1.3012083001271805e-05, "loss": 0.5584, "step": 2226 }, { "epoch": 0.42, "learning_rate": 1.3006247431937644e-05, "loss": 0.6486, "step": 2227 }, { "epoch": 0.42, "learning_rate": 1.3000410736929246e-05, "loss": 0.7272, "step": 2228 }, { "epoch": 0.42, "learning_rate": 1.2994572918432134e-05, "loss": 0.6536, "step": 2229 }, { "epoch": 0.42, "learning_rate": 1.2988733978632248e-05, "loss": 0.5865, "step": 2230 }, { "epoch": 0.42, "learning_rate": 1.2982893919715958e-05, "loss": 0.6606, "step": 2231 }, { "epoch": 0.42, "learning_rate": 1.2977052743870032e-05, "loss": 0.5464, "step": 2232 }, { "epoch": 0.42, "learning_rate": 1.2971210453281675e-05, "loss": 0.6519, "step": 2233 }, { "epoch": 0.42, "learning_rate": 1.2965367050138507e-05, "loss": 0.4899, "step": 2234 }, { "epoch": 0.42, "learning_rate": 1.2959522536628554e-05, "loss": 0.6431, "step": 2235 }, { "epoch": 0.42, "learning_rate": 1.295367691494027e-05, "loss": 0.6226, "step": 2236 }, { "epoch": 0.42, "learning_rate": 1.2947830187262514e-05, "loss": 0.5144, "step": 2237 }, { "epoch": 0.42, "learning_rate": 1.294198235578457e-05, "loss": 0.6407, "step": 2238 }, { "epoch": 0.42, "learning_rate": 1.2936133422696122e-05, "loss": 0.4638, "step": 2239 }, { "epoch": 0.42, "learning_rate": 1.293028339018728e-05, "loss": 0.6761, "step": 2240 }, { "epoch": 0.42, "learning_rate": 1.2924432260448556e-05, "loss": 0.5601, "step": 2241 }, { "epoch": 0.42, "learning_rate": 1.2918580035670875e-05, "loss": 0.7177, "step": 2242 }, { "epoch": 0.42, "learning_rate": 1.2912726718045576e-05, "loss": 0.6005, "step": 2243 }, { "epoch": 0.42, "learning_rate": 1.2906872309764404e-05, "loss": 0.4866, "step": 2244 }, { "epoch": 0.42, "learning_rate": 1.2901016813019509e-05, "loss": 0.699, "step": 2245 }, { "epoch": 0.42, "learning_rate": 1.2895160230003456e-05, "loss": 0.6273, "step": 2246 }, { "epoch": 0.42, "learning_rate": 1.2889302562909214e-05, "loss": 0.5978, "step": 2247 }, { "epoch": 0.42, "learning_rate": 1.2883443813930153e-05, "loss": 0.4585, "step": 2248 }, { "epoch": 0.42, "learning_rate": 1.2877583985260054e-05, "loss": 0.5506, "step": 2249 }, { "epoch": 0.43, "learning_rate": 1.2871723079093101e-05, "loss": 0.511, "step": 2250 }, { "epoch": 0.43, "learning_rate": 1.2865861097623881e-05, "loss": 0.513, "step": 2251 }, { "epoch": 0.43, "learning_rate": 1.2859998043047383e-05, "loss": 0.4954, "step": 2252 }, { "epoch": 0.43, "learning_rate": 1.2854133917558996e-05, "loss": 0.603, "step": 2253 }, { "epoch": 0.43, "learning_rate": 1.2848268723354518e-05, "loss": 0.6588, "step": 2254 }, { "epoch": 0.43, "learning_rate": 1.2842402462630137e-05, "loss": 0.6231, "step": 2255 }, { "epoch": 0.43, "learning_rate": 1.2836535137582442e-05, "loss": 0.5122, "step": 2256 }, { "epoch": 0.43, "learning_rate": 1.2830666750408434e-05, "loss": 0.4174, "step": 2257 }, { "epoch": 0.43, "learning_rate": 1.282479730330549e-05, "loss": 0.6223, "step": 2258 }, { "epoch": 0.43, "learning_rate": 1.2818926798471398e-05, "loss": 0.626, "step": 2259 }, { "epoch": 0.43, "learning_rate": 1.2813055238104344e-05, "loss": 0.6053, "step": 2260 }, { "epoch": 0.43, "learning_rate": 1.28071826244029e-05, "loss": 0.579, "step": 2261 }, { "epoch": 0.43, "learning_rate": 1.2801308959566034e-05, "loss": 0.6077, "step": 2262 }, { "epoch": 0.43, "learning_rate": 1.2795434245793114e-05, "loss": 0.7164, "step": 2263 }, { "epoch": 0.43, "learning_rate": 1.2789558485283897e-05, "loss": 0.734, "step": 2264 }, { "epoch": 0.43, "learning_rate": 1.2783681680238532e-05, "loss": 0.4297, "step": 2265 }, { "epoch": 0.43, "learning_rate": 1.2777803832857558e-05, "loss": 0.6674, "step": 2266 }, { "epoch": 0.43, "learning_rate": 1.2771924945341906e-05, "loss": 0.5654, "step": 2267 }, { "epoch": 0.43, "learning_rate": 1.2766045019892897e-05, "loss": 0.516, "step": 2268 }, { "epoch": 0.43, "learning_rate": 1.276016405871224e-05, "loss": 0.4901, "step": 2269 }, { "epoch": 0.43, "learning_rate": 1.2754282064002026e-05, "loss": 0.5794, "step": 2270 }, { "epoch": 0.43, "learning_rate": 1.2748399037964742e-05, "loss": 0.5879, "step": 2271 }, { "epoch": 0.43, "learning_rate": 1.2742514982803257e-05, "loss": 0.5863, "step": 2272 }, { "epoch": 0.43, "learning_rate": 1.2736629900720832e-05, "loss": 0.5113, "step": 2273 }, { "epoch": 0.43, "learning_rate": 1.2730743793921094e-05, "loss": 0.6055, "step": 2274 }, { "epoch": 0.43, "learning_rate": 1.2724856664608077e-05, "loss": 0.5831, "step": 2275 }, { "epoch": 0.43, "learning_rate": 1.2718968514986183e-05, "loss": 0.557, "step": 2276 }, { "epoch": 0.43, "learning_rate": 1.2713079347260198e-05, "loss": 0.7647, "step": 2277 }, { "epoch": 0.43, "learning_rate": 1.2707189163635292e-05, "loss": 0.6873, "step": 2278 }, { "epoch": 0.43, "learning_rate": 1.270129796631702e-05, "loss": 0.5424, "step": 2279 }, { "epoch": 0.43, "learning_rate": 1.2695405757511303e-05, "loss": 0.6672, "step": 2280 }, { "epoch": 0.43, "learning_rate": 1.2689512539424454e-05, "loss": 0.5454, "step": 2281 }, { "epoch": 0.43, "learning_rate": 1.2683618314263158e-05, "loss": 0.546, "step": 2282 }, { "epoch": 0.43, "learning_rate": 1.2677723084234479e-05, "loss": 0.5113, "step": 2283 }, { "epoch": 0.43, "learning_rate": 1.2671826851545851e-05, "loss": 0.502, "step": 2284 }, { "epoch": 0.43, "learning_rate": 1.2665929618405097e-05, "loss": 0.6416, "step": 2285 }, { "epoch": 0.43, "learning_rate": 1.2660031387020401e-05, "loss": 0.5054, "step": 2286 }, { "epoch": 0.43, "learning_rate": 1.2654132159600327e-05, "loss": 0.5501, "step": 2287 }, { "epoch": 0.43, "learning_rate": 1.264823193835381e-05, "loss": 0.6436, "step": 2288 }, { "epoch": 0.43, "learning_rate": 1.2642330725490161e-05, "loss": 0.4724, "step": 2289 }, { "epoch": 0.43, "learning_rate": 1.2636428523219058e-05, "loss": 0.5366, "step": 2290 }, { "epoch": 0.43, "learning_rate": 1.2630525333750548e-05, "loss": 0.4538, "step": 2291 }, { "epoch": 0.43, "learning_rate": 1.262462115929506e-05, "loss": 0.5906, "step": 2292 }, { "epoch": 0.43, "learning_rate": 1.2618716002063368e-05, "loss": 0.3925, "step": 2293 }, { "epoch": 0.43, "learning_rate": 1.2612809864266637e-05, "loss": 0.5561, "step": 2294 }, { "epoch": 0.43, "learning_rate": 1.2606902748116394e-05, "loss": 0.6806, "step": 2295 }, { "epoch": 0.43, "learning_rate": 1.2600994655824523e-05, "loss": 0.6468, "step": 2296 }, { "epoch": 0.43, "learning_rate": 1.2595085589603281e-05, "loss": 0.48, "step": 2297 }, { "epoch": 0.43, "learning_rate": 1.2589175551665287e-05, "loss": 0.6967, "step": 2298 }, { "epoch": 0.43, "learning_rate": 1.258326454422353e-05, "loss": 0.6273, "step": 2299 }, { "epoch": 0.43, "learning_rate": 1.257735256949135e-05, "loss": 0.5908, "step": 2300 }, { "epoch": 0.43, "learning_rate": 1.257143962968246e-05, "loss": 0.6058, "step": 2301 }, { "epoch": 0.43, "learning_rate": 1.2565525727010932e-05, "loss": 0.4691, "step": 2302 }, { "epoch": 0.44, "learning_rate": 1.2559610863691193e-05, "loss": 0.7054, "step": 2303 }, { "epoch": 0.44, "learning_rate": 1.2553695041938039e-05, "loss": 0.4603, "step": 2304 }, { "epoch": 0.44, "learning_rate": 1.2547778263966617e-05, "loss": 0.486, "step": 2305 }, { "epoch": 0.44, "learning_rate": 1.254186053199243e-05, "loss": 0.5668, "step": 2306 }, { "epoch": 0.44, "learning_rate": 1.2535941848231352e-05, "loss": 0.6506, "step": 2307 }, { "epoch": 0.44, "learning_rate": 1.25300222148996e-05, "loss": 0.7455, "step": 2308 }, { "epoch": 0.44, "learning_rate": 1.252410163421375e-05, "loss": 0.3753, "step": 2309 }, { "epoch": 0.44, "learning_rate": 1.2518180108390732e-05, "loss": 0.608, "step": 2310 }, { "epoch": 0.44, "learning_rate": 1.2512257639647838e-05, "loss": 0.5122, "step": 2311 }, { "epoch": 0.44, "learning_rate": 1.2506334230202698e-05, "loss": 0.4807, "step": 2312 }, { "epoch": 0.44, "learning_rate": 1.2500409882273312e-05, "loss": 0.3758, "step": 2313 }, { "epoch": 0.44, "learning_rate": 1.2494484598078016e-05, "loss": 0.6126, "step": 2314 }, { "epoch": 0.44, "learning_rate": 1.2488558379835507e-05, "loss": 0.5271, "step": 2315 }, { "epoch": 0.44, "learning_rate": 1.2482631229764824e-05, "loss": 0.781, "step": 2316 }, { "epoch": 0.44, "learning_rate": 1.2476703150085356e-05, "loss": 0.57, "step": 2317 }, { "epoch": 0.44, "learning_rate": 1.2470774143016854e-05, "loss": 0.6225, "step": 2318 }, { "epoch": 0.44, "learning_rate": 1.2464844210779392e-05, "loss": 0.4379, "step": 2319 }, { "epoch": 0.44, "learning_rate": 1.2458913355593412e-05, "loss": 0.7705, "step": 2320 }, { "epoch": 0.44, "learning_rate": 1.2452981579679688e-05, "loss": 0.4015, "step": 2321 }, { "epoch": 0.44, "learning_rate": 1.2447048885259347e-05, "loss": 0.6594, "step": 2322 }, { "epoch": 0.44, "learning_rate": 1.2441115274553854e-05, "loss": 0.5562, "step": 2323 }, { "epoch": 0.44, "learning_rate": 1.2435180749785021e-05, "loss": 0.46, "step": 2324 }, { "epoch": 0.44, "learning_rate": 1.2429245313175e-05, "loss": 0.5096, "step": 2325 }, { "epoch": 0.44, "learning_rate": 1.2423308966946284e-05, "loss": 0.6311, "step": 2326 }, { "epoch": 0.44, "learning_rate": 1.2417371713321713e-05, "loss": 0.5128, "step": 2327 }, { "epoch": 0.44, "learning_rate": 1.2411433554524457e-05, "loss": 0.4396, "step": 2328 }, { "epoch": 0.44, "learning_rate": 1.2405494492778034e-05, "loss": 0.5601, "step": 2329 }, { "epoch": 0.44, "learning_rate": 1.2399554530306293e-05, "loss": 0.5578, "step": 2330 }, { "epoch": 0.44, "learning_rate": 1.2393613669333422e-05, "loss": 0.5396, "step": 2331 }, { "epoch": 0.44, "learning_rate": 1.2387671912083947e-05, "loss": 0.5198, "step": 2332 }, { "epoch": 0.44, "learning_rate": 1.2381729260782728e-05, "loss": 0.4854, "step": 2333 }, { "epoch": 0.44, "learning_rate": 1.2375785717654968e-05, "loss": 0.599, "step": 2334 }, { "epoch": 0.44, "learning_rate": 1.236984128492619e-05, "loss": 0.5882, "step": 2335 }, { "epoch": 0.44, "learning_rate": 1.2363895964822259e-05, "loss": 0.6558, "step": 2336 }, { "epoch": 0.44, "learning_rate": 1.2357949759569372e-05, "loss": 0.4994, "step": 2337 }, { "epoch": 0.44, "learning_rate": 1.2352002671394047e-05, "loss": 0.6359, "step": 2338 }, { "epoch": 0.44, "learning_rate": 1.2346054702523154e-05, "loss": 0.5457, "step": 2339 }, { "epoch": 0.44, "learning_rate": 1.2340105855183872e-05, "loss": 0.5162, "step": 2340 }, { "epoch": 0.44, "learning_rate": 1.2334156131603718e-05, "loss": 0.7098, "step": 2341 }, { "epoch": 0.44, "learning_rate": 1.2328205534010538e-05, "loss": 0.5118, "step": 2342 }, { "epoch": 0.44, "learning_rate": 1.23222540646325e-05, "loss": 0.6384, "step": 2343 }, { "epoch": 0.44, "learning_rate": 1.231630172569811e-05, "loss": 0.6398, "step": 2344 }, { "epoch": 0.44, "learning_rate": 1.231034851943618e-05, "loss": 0.4625, "step": 2345 }, { "epoch": 0.44, "learning_rate": 1.2304394448075864e-05, "loss": 0.5919, "step": 2346 }, { "epoch": 0.44, "learning_rate": 1.2298439513846634e-05, "loss": 0.6371, "step": 2347 }, { "epoch": 0.44, "learning_rate": 1.2292483718978288e-05, "loss": 0.5887, "step": 2348 }, { "epoch": 0.44, "learning_rate": 1.2286527065700938e-05, "loss": 0.681, "step": 2349 }, { "epoch": 0.44, "learning_rate": 1.2280569556245026e-05, "loss": 0.4429, "step": 2350 }, { "epoch": 0.44, "learning_rate": 1.2274611192841309e-05, "loss": 0.5462, "step": 2351 }, { "epoch": 0.44, "learning_rate": 1.2268651977720867e-05, "loss": 0.5795, "step": 2352 }, { "epoch": 0.44, "learning_rate": 1.2262691913115103e-05, "loss": 0.6146, "step": 2353 }, { "epoch": 0.44, "learning_rate": 1.2256731001255723e-05, "loss": 0.4699, "step": 2354 }, { "epoch": 0.44, "learning_rate": 1.2250769244374772e-05, "loss": 0.6616, "step": 2355 }, { "epoch": 0.45, "learning_rate": 1.2244806644704594e-05, "loss": 0.7125, "step": 2356 }, { "epoch": 0.45, "learning_rate": 1.2238843204477855e-05, "loss": 0.6338, "step": 2357 }, { "epoch": 0.45, "learning_rate": 1.2232878925927533e-05, "loss": 0.7599, "step": 2358 }, { "epoch": 0.45, "learning_rate": 1.2226913811286924e-05, "loss": 0.5509, "step": 2359 }, { "epoch": 0.45, "learning_rate": 1.2220947862789641e-05, "loss": 0.5225, "step": 2360 }, { "epoch": 0.45, "learning_rate": 1.2214981082669593e-05, "loss": 0.5384, "step": 2361 }, { "epoch": 0.45, "learning_rate": 1.2209013473161017e-05, "loss": 0.495, "step": 2362 }, { "epoch": 0.45, "learning_rate": 1.2203045036498456e-05, "loss": 0.3722, "step": 2363 }, { "epoch": 0.45, "learning_rate": 1.2197075774916754e-05, "loss": 0.5464, "step": 2364 }, { "epoch": 0.45, "learning_rate": 1.2191105690651078e-05, "loss": 0.5805, "step": 2365 }, { "epoch": 0.45, "learning_rate": 1.2185134785936892e-05, "loss": 0.5471, "step": 2366 }, { "epoch": 0.45, "learning_rate": 1.2179163063009974e-05, "loss": 0.7299, "step": 2367 }, { "epoch": 0.45, "learning_rate": 1.21731905241064e-05, "loss": 0.5153, "step": 2368 }, { "epoch": 0.45, "learning_rate": 1.2167217171462566e-05, "loss": 0.439, "step": 2369 }, { "epoch": 0.45, "learning_rate": 1.2161243007315158e-05, "loss": 0.5231, "step": 2370 }, { "epoch": 0.45, "learning_rate": 1.215526803390117e-05, "loss": 0.6858, "step": 2371 }, { "epoch": 0.45, "learning_rate": 1.214929225345791e-05, "loss": 0.4786, "step": 2372 }, { "epoch": 0.45, "learning_rate": 1.2143315668222966e-05, "loss": 0.5186, "step": 2373 }, { "epoch": 0.45, "learning_rate": 1.213733828043425e-05, "loss": 0.6726, "step": 2374 }, { "epoch": 0.45, "learning_rate": 1.2131360092329962e-05, "loss": 0.5036, "step": 2375 }, { "epoch": 0.45, "learning_rate": 1.2125381106148604e-05, "loss": 0.5348, "step": 2376 }, { "epoch": 0.45, "learning_rate": 1.2119401324128976e-05, "loss": 0.6589, "step": 2377 }, { "epoch": 0.45, "learning_rate": 1.211342074851018e-05, "loss": 0.4281, "step": 2378 }, { "epoch": 0.45, "learning_rate": 1.2107439381531612e-05, "loss": 0.4938, "step": 2379 }, { "epoch": 0.45, "learning_rate": 1.210145722543296e-05, "loss": 0.5383, "step": 2380 }, { "epoch": 0.45, "learning_rate": 1.209547428245422e-05, "loss": 0.6894, "step": 2381 }, { "epoch": 0.45, "learning_rate": 1.208949055483567e-05, "loss": 0.7322, "step": 2382 }, { "epoch": 0.45, "learning_rate": 1.2083506044817889e-05, "loss": 0.6169, "step": 2383 }, { "epoch": 0.45, "learning_rate": 1.2077520754641745e-05, "loss": 0.8171, "step": 2384 }, { "epoch": 0.45, "learning_rate": 1.20715346865484e-05, "loss": 0.5123, "step": 2385 }, { "epoch": 0.45, "learning_rate": 1.206554784277931e-05, "loss": 0.4449, "step": 2386 }, { "epoch": 0.45, "learning_rate": 1.2059560225576212e-05, "loss": 0.4905, "step": 2387 }, { "epoch": 0.45, "learning_rate": 1.2053571837181148e-05, "loss": 0.5048, "step": 2388 }, { "epoch": 0.45, "learning_rate": 1.2047582679836432e-05, "loss": 0.7448, "step": 2389 }, { "epoch": 0.45, "learning_rate": 1.2041592755784676e-05, "loss": 0.5102, "step": 2390 }, { "epoch": 0.45, "learning_rate": 1.2035602067268781e-05, "loss": 0.5747, "step": 2391 }, { "epoch": 0.45, "learning_rate": 1.2029610616531926e-05, "loss": 0.4384, "step": 2392 }, { "epoch": 0.45, "learning_rate": 1.202361840581758e-05, "loss": 0.661, "step": 2393 }, { "epoch": 0.45, "learning_rate": 1.2017625437369498e-05, "loss": 0.4807, "step": 2394 }, { "epoch": 0.45, "learning_rate": 1.2011631713431718e-05, "loss": 0.6163, "step": 2395 }, { "epoch": 0.45, "learning_rate": 1.2005637236248555e-05, "loss": 0.613, "step": 2396 }, { "epoch": 0.45, "learning_rate": 1.1999642008064612e-05, "loss": 0.3455, "step": 2397 }, { "epoch": 0.45, "learning_rate": 1.1993646031124775e-05, "loss": 0.5827, "step": 2398 }, { "epoch": 0.45, "learning_rate": 1.1987649307674206e-05, "loss": 0.5537, "step": 2399 }, { "epoch": 0.45, "learning_rate": 1.1981651839958349e-05, "loss": 0.5313, "step": 2400 }, { "epoch": 0.45, "learning_rate": 1.1975653630222924e-05, "loss": 0.5858, "step": 2401 }, { "epoch": 0.45, "learning_rate": 1.196965468071393e-05, "loss": 0.6632, "step": 2402 }, { "epoch": 0.45, "learning_rate": 1.1963654993677645e-05, "loss": 0.5366, "step": 2403 }, { "epoch": 0.45, "learning_rate": 1.195765457136062e-05, "loss": 0.4409, "step": 2404 }, { "epoch": 0.45, "learning_rate": 1.1951653416009686e-05, "loss": 0.4915, "step": 2405 }, { "epoch": 0.45, "learning_rate": 1.194565152987194e-05, "loss": 0.5773, "step": 2406 }, { "epoch": 0.45, "learning_rate": 1.1939648915194766e-05, "loss": 0.6235, "step": 2407 }, { "epoch": 0.45, "learning_rate": 1.1933645574225805e-05, "loss": 0.6017, "step": 2408 }, { "epoch": 0.46, "learning_rate": 1.1927641509212984e-05, "loss": 0.3602, "step": 2409 }, { "epoch": 0.46, "learning_rate": 1.1921636722404493e-05, "loss": 0.434, "step": 2410 }, { "epoch": 0.46, "learning_rate": 1.191563121604879e-05, "loss": 0.54, "step": 2411 }, { "epoch": 0.46, "learning_rate": 1.1909624992394613e-05, "loss": 0.4384, "step": 2412 }, { "epoch": 0.46, "learning_rate": 1.1903618053690957e-05, "loss": 0.7469, "step": 2413 }, { "epoch": 0.46, "learning_rate": 1.1897610402187097e-05, "loss": 0.5281, "step": 2414 }, { "epoch": 0.46, "learning_rate": 1.1891602040132557e-05, "loss": 0.484, "step": 2415 }, { "epoch": 0.46, "learning_rate": 1.1885592969777149e-05, "loss": 0.5625, "step": 2416 }, { "epoch": 0.46, "learning_rate": 1.1879583193370934e-05, "loss": 0.5393, "step": 2417 }, { "epoch": 0.46, "learning_rate": 1.1873572713164241e-05, "loss": 0.6545, "step": 2418 }, { "epoch": 0.46, "learning_rate": 1.1867561531407668e-05, "loss": 0.5308, "step": 2419 }, { "epoch": 0.46, "learning_rate": 1.1861549650352069e-05, "loss": 0.6603, "step": 2420 }, { "epoch": 0.46, "learning_rate": 1.1855537072248564e-05, "loss": 0.4867, "step": 2421 }, { "epoch": 0.46, "learning_rate": 1.184952379934853e-05, "loss": 0.5423, "step": 2422 }, { "epoch": 0.46, "learning_rate": 1.184350983390361e-05, "loss": 0.649, "step": 2423 }, { "epoch": 0.46, "learning_rate": 1.1837495178165706e-05, "loss": 0.5333, "step": 2424 }, { "epoch": 0.46, "learning_rate": 1.1831479834386967e-05, "loss": 0.5127, "step": 2425 }, { "epoch": 0.46, "learning_rate": 1.1825463804819814e-05, "loss": 0.5891, "step": 2426 }, { "epoch": 0.46, "learning_rate": 1.1819447091716918e-05, "loss": 0.6252, "step": 2427 }, { "epoch": 0.46, "learning_rate": 1.181342969733121e-05, "loss": 0.654, "step": 2428 }, { "epoch": 0.46, "learning_rate": 1.1807411623915865e-05, "loss": 0.5431, "step": 2429 }, { "epoch": 0.46, "learning_rate": 1.180139287372433e-05, "loss": 0.5815, "step": 2430 }, { "epoch": 0.46, "learning_rate": 1.1795373449010288e-05, "loss": 0.6465, "step": 2431 }, { "epoch": 0.46, "learning_rate": 1.1789353352027683e-05, "loss": 0.5623, "step": 2432 }, { "epoch": 0.46, "learning_rate": 1.1783332585030717e-05, "loss": 0.5684, "step": 2433 }, { "epoch": 0.46, "learning_rate": 1.177731115027383e-05, "loss": 0.5758, "step": 2434 }, { "epoch": 0.46, "learning_rate": 1.177128905001172e-05, "loss": 0.5583, "step": 2435 }, { "epoch": 0.46, "learning_rate": 1.176526628649933e-05, "loss": 0.6128, "step": 2436 }, { "epoch": 0.46, "learning_rate": 1.1759242861991855e-05, "loss": 0.5591, "step": 2437 }, { "epoch": 0.46, "learning_rate": 1.1753218778744736e-05, "loss": 0.6615, "step": 2438 }, { "epoch": 0.46, "learning_rate": 1.174719403901366e-05, "loss": 0.59, "step": 2439 }, { "epoch": 0.46, "learning_rate": 1.1741168645054566e-05, "loss": 0.6926, "step": 2440 }, { "epoch": 0.46, "learning_rate": 1.1735142599123624e-05, "loss": 0.4956, "step": 2441 }, { "epoch": 0.46, "learning_rate": 1.172911590347726e-05, "loss": 0.58, "step": 2442 }, { "epoch": 0.46, "learning_rate": 1.172308856037214e-05, "loss": 0.6934, "step": 2443 }, { "epoch": 0.46, "learning_rate": 1.1717060572065174e-05, "loss": 0.4797, "step": 2444 }, { "epoch": 0.46, "learning_rate": 1.171103194081351e-05, "loss": 0.5762, "step": 2445 }, { "epoch": 0.46, "learning_rate": 1.1705002668874538e-05, "loss": 0.4984, "step": 2446 }, { "epoch": 0.46, "learning_rate": 1.1698972758505891e-05, "loss": 0.6153, "step": 2447 }, { "epoch": 0.46, "learning_rate": 1.1692942211965434e-05, "loss": 0.5156, "step": 2448 }, { "epoch": 0.46, "learning_rate": 1.1686911031511284e-05, "loss": 0.6096, "step": 2449 }, { "epoch": 0.46, "learning_rate": 1.1680879219401774e-05, "loss": 0.5142, "step": 2450 }, { "epoch": 0.46, "learning_rate": 1.1674846777895494e-05, "loss": 0.5321, "step": 2451 }, { "epoch": 0.46, "learning_rate": 1.1668813709251263e-05, "loss": 0.5817, "step": 2452 }, { "epoch": 0.46, "learning_rate": 1.1662780015728123e-05, "loss": 0.4443, "step": 2453 }, { "epoch": 0.46, "learning_rate": 1.1656745699585373e-05, "loss": 0.584, "step": 2454 }, { "epoch": 0.46, "learning_rate": 1.165071076308252e-05, "loss": 0.6176, "step": 2455 }, { "epoch": 0.46, "learning_rate": 1.1644675208479329e-05, "loss": 0.6359, "step": 2456 }, { "epoch": 0.46, "learning_rate": 1.1638639038035771e-05, "loss": 0.5163, "step": 2457 }, { "epoch": 0.46, "learning_rate": 1.1632602254012064e-05, "loss": 0.6711, "step": 2458 }, { "epoch": 0.46, "learning_rate": 1.1626564858668657e-05, "loss": 0.4363, "step": 2459 }, { "epoch": 0.46, "learning_rate": 1.162052685426621e-05, "loss": 0.566, "step": 2460 }, { "epoch": 0.46, "learning_rate": 1.1614488243065636e-05, "loss": 0.4326, "step": 2461 }, { "epoch": 0.47, "learning_rate": 1.1608449027328056e-05, "loss": 0.4422, "step": 2462 }, { "epoch": 0.47, "learning_rate": 1.1602409209314825e-05, "loss": 0.6701, "step": 2463 }, { "epoch": 0.47, "learning_rate": 1.1596368791287522e-05, "loss": 0.617, "step": 2464 }, { "epoch": 0.47, "learning_rate": 1.1590327775507952e-05, "loss": 0.5051, "step": 2465 }, { "epoch": 0.47, "learning_rate": 1.1584286164238144e-05, "loss": 0.5861, "step": 2466 }, { "epoch": 0.47, "learning_rate": 1.1578243959740345e-05, "loss": 0.4784, "step": 2467 }, { "epoch": 0.47, "learning_rate": 1.1572201164277033e-05, "loss": 0.4607, "step": 2468 }, { "epoch": 0.47, "learning_rate": 1.15661577801109e-05, "loss": 0.5439, "step": 2469 }, { "epoch": 0.47, "learning_rate": 1.156011380950486e-05, "loss": 0.5271, "step": 2470 }, { "epoch": 0.47, "learning_rate": 1.155406925472205e-05, "loss": 0.5937, "step": 2471 }, { "epoch": 0.47, "learning_rate": 1.1548024118025822e-05, "loss": 0.8131, "step": 2472 }, { "epoch": 0.47, "learning_rate": 1.1541978401679746e-05, "loss": 0.708, "step": 2473 }, { "epoch": 0.47, "learning_rate": 1.153593210794761e-05, "loss": 0.5336, "step": 2474 }, { "epoch": 0.47, "learning_rate": 1.1529885239093422e-05, "loss": 0.5452, "step": 2475 }, { "epoch": 0.47, "learning_rate": 1.1523837797381395e-05, "loss": 0.5366, "step": 2476 }, { "epoch": 0.47, "learning_rate": 1.1517789785075965e-05, "loss": 0.6255, "step": 2477 }, { "epoch": 0.47, "learning_rate": 1.1511741204441789e-05, "loss": 0.5804, "step": 2478 }, { "epoch": 0.47, "learning_rate": 1.1505692057743714e-05, "loss": 0.6302, "step": 2479 }, { "epoch": 0.47, "learning_rate": 1.149964234724682e-05, "loss": 0.7076, "step": 2480 }, { "epoch": 0.47, "learning_rate": 1.1493592075216391e-05, "loss": 0.5908, "step": 2481 }, { "epoch": 0.47, "learning_rate": 1.148754124391792e-05, "loss": 0.4432, "step": 2482 }, { "epoch": 0.47, "learning_rate": 1.1481489855617108e-05, "loss": 0.5953, "step": 2483 }, { "epoch": 0.47, "learning_rate": 1.1475437912579868e-05, "loss": 0.6158, "step": 2484 }, { "epoch": 0.47, "learning_rate": 1.1469385417072323e-05, "loss": 0.5615, "step": 2485 }, { "epoch": 0.47, "learning_rate": 1.1463332371360794e-05, "loss": 0.4911, "step": 2486 }, { "epoch": 0.47, "learning_rate": 1.1457278777711816e-05, "loss": 0.3982, "step": 2487 }, { "epoch": 0.47, "learning_rate": 1.145122463839213e-05, "loss": 0.5525, "step": 2488 }, { "epoch": 0.47, "learning_rate": 1.1445169955668674e-05, "loss": 0.5322, "step": 2489 }, { "epoch": 0.47, "learning_rate": 1.1439114731808594e-05, "loss": 0.6147, "step": 2490 }, { "epoch": 0.47, "learning_rate": 1.1433058969079239e-05, "loss": 0.5692, "step": 2491 }, { "epoch": 0.47, "learning_rate": 1.142700266974816e-05, "loss": 0.6542, "step": 2492 }, { "epoch": 0.47, "learning_rate": 1.14209458360831e-05, "loss": 0.4109, "step": 2493 }, { "epoch": 0.47, "learning_rate": 1.1414888470352026e-05, "loss": 0.5844, "step": 2494 }, { "epoch": 0.47, "learning_rate": 1.1408830574823074e-05, "loss": 0.4066, "step": 2495 }, { "epoch": 0.47, "learning_rate": 1.1402772151764596e-05, "loss": 0.6047, "step": 2496 }, { "epoch": 0.47, "learning_rate": 1.139671320344514e-05, "loss": 0.6142, "step": 2497 }, { "epoch": 0.47, "learning_rate": 1.1390653732133447e-05, "loss": 0.4415, "step": 2498 }, { "epoch": 0.47, "learning_rate": 1.138459374009846e-05, "loss": 0.6104, "step": 2499 }, { "epoch": 0.47, "learning_rate": 1.1378533229609304e-05, "loss": 0.5457, "step": 2500 }, { "epoch": 0.47, "learning_rate": 1.1372472202935315e-05, "loss": 0.7463, "step": 2501 }, { "epoch": 0.47, "learning_rate": 1.1366410662346008e-05, "loss": 0.4772, "step": 2502 }, { "epoch": 0.47, "learning_rate": 1.13603486101111e-05, "loss": 0.5744, "step": 2503 }, { "epoch": 0.47, "learning_rate": 1.1354286048500496e-05, "loss": 0.4232, "step": 2504 }, { "epoch": 0.47, "learning_rate": 1.1348222979784289e-05, "loss": 0.536, "step": 2505 }, { "epoch": 0.47, "learning_rate": 1.1342159406232769e-05, "loss": 0.5588, "step": 2506 }, { "epoch": 0.47, "learning_rate": 1.1336095330116406e-05, "loss": 0.6071, "step": 2507 }, { "epoch": 0.47, "learning_rate": 1.1330030753705865e-05, "loss": 0.6406, "step": 2508 }, { "epoch": 0.47, "learning_rate": 1.1323965679271993e-05, "loss": 0.5207, "step": 2509 }, { "epoch": 0.47, "learning_rate": 1.1317900109085835e-05, "loss": 0.6309, "step": 2510 }, { "epoch": 0.47, "learning_rate": 1.1311834045418607e-05, "loss": 0.5369, "step": 2511 }, { "epoch": 0.47, "learning_rate": 1.130576749054171e-05, "loss": 0.5741, "step": 2512 }, { "epoch": 0.47, "learning_rate": 1.129970044672675e-05, "loss": 0.5945, "step": 2513 }, { "epoch": 0.47, "learning_rate": 1.1293632916245487e-05, "loss": 0.6166, "step": 2514 }, { "epoch": 0.48, "learning_rate": 1.1287564901369886e-05, "loss": 0.606, "step": 2515 }, { "epoch": 0.48, "learning_rate": 1.1281496404372082e-05, "loss": 0.5601, "step": 2516 }, { "epoch": 0.48, "learning_rate": 1.127542742752439e-05, "loss": 0.5089, "step": 2517 }, { "epoch": 0.48, "learning_rate": 1.1269357973099314e-05, "loss": 0.512, "step": 2518 }, { "epoch": 0.48, "learning_rate": 1.1263288043369522e-05, "loss": 0.5219, "step": 2519 }, { "epoch": 0.48, "learning_rate": 1.1257217640607883e-05, "loss": 0.4488, "step": 2520 }, { "epoch": 0.48, "learning_rate": 1.125114676708741e-05, "loss": 0.509, "step": 2521 }, { "epoch": 0.48, "learning_rate": 1.1245075425081328e-05, "loss": 0.5134, "step": 2522 }, { "epoch": 0.48, "learning_rate": 1.1239003616863011e-05, "loss": 0.4998, "step": 2523 }, { "epoch": 0.48, "learning_rate": 1.1232931344706021e-05, "loss": 0.4653, "step": 2524 }, { "epoch": 0.48, "learning_rate": 1.1226858610884088e-05, "loss": 0.5476, "step": 2525 }, { "epoch": 0.48, "learning_rate": 1.1220785417671116e-05, "loss": 0.5864, "step": 2526 }, { "epoch": 0.48, "learning_rate": 1.1214711767341184e-05, "loss": 0.5447, "step": 2527 }, { "epoch": 0.48, "learning_rate": 1.1208637662168536e-05, "loss": 0.4657, "step": 2528 }, { "epoch": 0.48, "learning_rate": 1.1202563104427596e-05, "loss": 0.6225, "step": 2529 }, { "epoch": 0.48, "learning_rate": 1.1196488096392943e-05, "loss": 0.6471, "step": 2530 }, { "epoch": 0.48, "learning_rate": 1.1190412640339344e-05, "loss": 0.5144, "step": 2531 }, { "epoch": 0.48, "learning_rate": 1.1184336738541714e-05, "loss": 0.5635, "step": 2532 }, { "epoch": 0.48, "learning_rate": 1.117826039327515e-05, "loss": 0.5097, "step": 2533 }, { "epoch": 0.48, "learning_rate": 1.1172183606814903e-05, "loss": 0.5868, "step": 2534 }, { "epoch": 0.48, "learning_rate": 1.1166106381436396e-05, "loss": 0.6885, "step": 2535 }, { "epoch": 0.48, "learning_rate": 1.1160028719415226e-05, "loss": 0.4371, "step": 2536 }, { "epoch": 0.48, "learning_rate": 1.1153950623027127e-05, "loss": 0.567, "step": 2537 }, { "epoch": 0.48, "learning_rate": 1.1147872094548022e-05, "loss": 0.5322, "step": 2538 }, { "epoch": 0.48, "learning_rate": 1.1141793136253987e-05, "loss": 0.4348, "step": 2539 }, { "epoch": 0.48, "learning_rate": 1.1135713750421248e-05, "loss": 0.6973, "step": 2540 }, { "epoch": 0.48, "learning_rate": 1.1129633939326207e-05, "loss": 0.5914, "step": 2541 }, { "epoch": 0.48, "learning_rate": 1.1123553705245418e-05, "loss": 0.5581, "step": 2542 }, { "epoch": 0.48, "learning_rate": 1.1117473050455594e-05, "loss": 0.7031, "step": 2543 }, { "epoch": 0.48, "learning_rate": 1.1111391977233602e-05, "loss": 0.599, "step": 2544 }, { "epoch": 0.48, "learning_rate": 1.1105310487856477e-05, "loss": 0.4786, "step": 2545 }, { "epoch": 0.48, "learning_rate": 1.1099228584601397e-05, "loss": 0.5386, "step": 2546 }, { "epoch": 0.48, "learning_rate": 1.1093146269745694e-05, "loss": 0.5962, "step": 2547 }, { "epoch": 0.48, "learning_rate": 1.1087063545566872e-05, "loss": 0.5773, "step": 2548 }, { "epoch": 0.48, "learning_rate": 1.108098041434257e-05, "loss": 0.4579, "step": 2549 }, { "epoch": 0.48, "learning_rate": 1.107489687835059e-05, "loss": 0.7348, "step": 2550 }, { "epoch": 0.48, "learning_rate": 1.1068812939868876e-05, "loss": 0.5018, "step": 2551 }, { "epoch": 0.48, "learning_rate": 1.1062728601175532e-05, "loss": 0.5074, "step": 2552 }, { "epoch": 0.48, "learning_rate": 1.1056643864548805e-05, "loss": 0.5604, "step": 2553 }, { "epoch": 0.48, "learning_rate": 1.1050558732267095e-05, "loss": 0.5107, "step": 2554 }, { "epoch": 0.48, "learning_rate": 1.1044473206608954e-05, "loss": 0.4421, "step": 2555 }, { "epoch": 0.48, "learning_rate": 1.1038387289853069e-05, "loss": 0.417, "step": 2556 }, { "epoch": 0.48, "learning_rate": 1.1032300984278286e-05, "loss": 0.4875, "step": 2557 }, { "epoch": 0.48, "learning_rate": 1.1026214292163591e-05, "loss": 0.5871, "step": 2558 }, { "epoch": 0.48, "learning_rate": 1.1020127215788112e-05, "loss": 0.6317, "step": 2559 }, { "epoch": 0.48, "learning_rate": 1.1014039757431127e-05, "loss": 0.5468, "step": 2560 }, { "epoch": 0.48, "learning_rate": 1.1007951919372056e-05, "loss": 0.4433, "step": 2561 }, { "epoch": 0.48, "learning_rate": 1.1001863703890453e-05, "loss": 0.5657, "step": 2562 }, { "epoch": 0.48, "learning_rate": 1.0995775113266026e-05, "loss": 0.6984, "step": 2563 }, { "epoch": 0.48, "learning_rate": 1.0989686149778613e-05, "loss": 0.6332, "step": 2564 }, { "epoch": 0.48, "learning_rate": 1.0983596815708202e-05, "loss": 0.5989, "step": 2565 }, { "epoch": 0.48, "learning_rate": 1.0977507113334903e-05, "loss": 0.4455, "step": 2566 }, { "epoch": 0.48, "learning_rate": 1.0971417044938984e-05, "loss": 0.8571, "step": 2567 }, { "epoch": 0.49, "learning_rate": 1.0965326612800835e-05, "loss": 0.4933, "step": 2568 }, { "epoch": 0.49, "learning_rate": 1.095923581920099e-05, "loss": 0.5875, "step": 2569 }, { "epoch": 0.49, "learning_rate": 1.0953144666420114e-05, "loss": 0.5361, "step": 2570 }, { "epoch": 0.49, "learning_rate": 1.0947053156739016e-05, "loss": 0.6399, "step": 2571 }, { "epoch": 0.49, "learning_rate": 1.0940961292438624e-05, "loss": 0.5091, "step": 2572 }, { "epoch": 0.49, "learning_rate": 1.09348690758e-05, "loss": 0.5135, "step": 2573 }, { "epoch": 0.49, "learning_rate": 1.092877650910436e-05, "loss": 0.5605, "step": 2574 }, { "epoch": 0.49, "learning_rate": 1.092268359463302e-05, "loss": 0.4201, "step": 2575 }, { "epoch": 0.49, "learning_rate": 1.0916590334667451e-05, "loss": 0.5038, "step": 2576 }, { "epoch": 0.49, "learning_rate": 1.091049673148924e-05, "loss": 0.5358, "step": 2577 }, { "epoch": 0.49, "learning_rate": 1.0904402787380104e-05, "loss": 0.4974, "step": 2578 }, { "epoch": 0.49, "learning_rate": 1.0898308504621893e-05, "loss": 0.597, "step": 2579 }, { "epoch": 0.49, "learning_rate": 1.0892213885496577e-05, "loss": 0.556, "step": 2580 }, { "epoch": 0.49, "learning_rate": 1.0886118932286263e-05, "loss": 0.7047, "step": 2581 }, { "epoch": 0.49, "learning_rate": 1.0880023647273163e-05, "loss": 0.628, "step": 2582 }, { "epoch": 0.49, "learning_rate": 1.0873928032739639e-05, "loss": 0.5711, "step": 2583 }, { "epoch": 0.49, "learning_rate": 1.0867832090968154e-05, "loss": 0.445, "step": 2584 }, { "epoch": 0.49, "learning_rate": 1.0861735824241309e-05, "loss": 0.5686, "step": 2585 }, { "epoch": 0.49, "learning_rate": 1.0855639234841816e-05, "loss": 0.4484, "step": 2586 }, { "epoch": 0.49, "learning_rate": 1.0849542325052514e-05, "loss": 0.5434, "step": 2587 }, { "epoch": 0.49, "learning_rate": 1.084344509715636e-05, "loss": 0.5754, "step": 2588 }, { "epoch": 0.49, "learning_rate": 1.0837347553436426e-05, "loss": 0.3622, "step": 2589 }, { "epoch": 0.49, "learning_rate": 1.0831249696175918e-05, "loss": 0.4423, "step": 2590 }, { "epoch": 0.49, "learning_rate": 1.0825151527658135e-05, "loss": 0.71, "step": 2591 }, { "epoch": 0.49, "learning_rate": 1.0819053050166515e-05, "loss": 0.5361, "step": 2592 }, { "epoch": 0.49, "learning_rate": 1.0812954265984599e-05, "loss": 0.5501, "step": 2593 }, { "epoch": 0.49, "learning_rate": 1.0806855177396045e-05, "loss": 0.4335, "step": 2594 }, { "epoch": 0.49, "learning_rate": 1.0800755786684627e-05, "loss": 0.7452, "step": 2595 }, { "epoch": 0.49, "learning_rate": 1.0794656096134228e-05, "loss": 0.5148, "step": 2596 }, { "epoch": 0.49, "learning_rate": 1.0788556108028854e-05, "loss": 0.5997, "step": 2597 }, { "epoch": 0.49, "learning_rate": 1.078245582465261e-05, "loss": 0.5782, "step": 2598 }, { "epoch": 0.49, "learning_rate": 1.0776355248289712e-05, "loss": 0.4868, "step": 2599 }, { "epoch": 0.49, "learning_rate": 1.0770254381224501e-05, "loss": 0.6682, "step": 2600 }, { "epoch": 0.49, "learning_rate": 1.0764153225741403e-05, "loss": 0.4333, "step": 2601 }, { "epoch": 0.49, "learning_rate": 1.0758051784124976e-05, "loss": 0.7171, "step": 2602 }, { "epoch": 0.49, "learning_rate": 1.075195005865987e-05, "loss": 0.5698, "step": 2603 }, { "epoch": 0.49, "learning_rate": 1.0745848051630845e-05, "loss": 0.6157, "step": 2604 }, { "epoch": 0.49, "learning_rate": 1.0739745765322768e-05, "loss": 0.5601, "step": 2605 }, { "epoch": 0.49, "learning_rate": 1.0733643202020606e-05, "loss": 0.7661, "step": 2606 }, { "epoch": 0.49, "learning_rate": 1.072754036400944e-05, "loss": 0.5412, "step": 2607 }, { "epoch": 0.49, "learning_rate": 1.0721437253574439e-05, "loss": 0.5201, "step": 2608 }, { "epoch": 0.49, "learning_rate": 1.0715333873000888e-05, "loss": 0.6437, "step": 2609 }, { "epoch": 0.49, "learning_rate": 1.0709230224574165e-05, "loss": 0.4681, "step": 2610 }, { "epoch": 0.49, "learning_rate": 1.070312631057975e-05, "loss": 0.5601, "step": 2611 }, { "epoch": 0.49, "learning_rate": 1.0697022133303223e-05, "loss": 0.7075, "step": 2612 }, { "epoch": 0.49, "learning_rate": 1.0690917695030265e-05, "loss": 0.6852, "step": 2613 }, { "epoch": 0.49, "learning_rate": 1.0684812998046647e-05, "loss": 0.473, "step": 2614 }, { "epoch": 0.49, "learning_rate": 1.0678708044638247e-05, "loss": 0.5388, "step": 2615 }, { "epoch": 0.49, "learning_rate": 1.0672602837091032e-05, "loss": 0.431, "step": 2616 }, { "epoch": 0.49, "learning_rate": 1.0666497377691067e-05, "loss": 0.5561, "step": 2617 }, { "epoch": 0.49, "learning_rate": 1.066039166872451e-05, "loss": 0.5045, "step": 2618 }, { "epoch": 0.49, "learning_rate": 1.0654285712477613e-05, "loss": 0.4898, "step": 2619 }, { "epoch": 0.49, "learning_rate": 1.064817951123672e-05, "loss": 0.4481, "step": 2620 }, { "epoch": 0.5, "learning_rate": 1.064207306728827e-05, "loss": 0.5216, "step": 2621 }, { "epoch": 0.5, "learning_rate": 1.0635966382918787e-05, "loss": 0.573, "step": 2622 }, { "epoch": 0.5, "learning_rate": 1.0629859460414892e-05, "loss": 0.464, "step": 2623 }, { "epoch": 0.5, "learning_rate": 1.0623752302063284e-05, "loss": 0.5751, "step": 2624 }, { "epoch": 0.5, "learning_rate": 1.0617644910150766e-05, "loss": 0.4379, "step": 2625 }, { "epoch": 0.5, "learning_rate": 1.061153728696422e-05, "loss": 0.5347, "step": 2626 }, { "epoch": 0.5, "learning_rate": 1.0605429434790607e-05, "loss": 0.499, "step": 2627 }, { "epoch": 0.5, "learning_rate": 1.0599321355916988e-05, "loss": 0.4588, "step": 2628 }, { "epoch": 0.5, "learning_rate": 1.05932130526305e-05, "loss": 0.4768, "step": 2629 }, { "epoch": 0.5, "learning_rate": 1.0587104527218368e-05, "loss": 0.5436, "step": 2630 }, { "epoch": 0.5, "learning_rate": 1.0580995781967898e-05, "loss": 0.4108, "step": 2631 }, { "epoch": 0.5, "learning_rate": 1.0574886819166479e-05, "loss": 0.4045, "step": 2632 }, { "epoch": 0.5, "learning_rate": 1.056877764110158e-05, "loss": 0.4543, "step": 2633 }, { "epoch": 0.5, "learning_rate": 1.0562668250060752e-05, "loss": 0.4583, "step": 2634 }, { "epoch": 0.5, "learning_rate": 1.055655864833163e-05, "loss": 0.3974, "step": 2635 }, { "epoch": 0.5, "learning_rate": 1.0550448838201918e-05, "loss": 0.4443, "step": 2636 }, { "epoch": 0.5, "learning_rate": 1.0544338821959407e-05, "loss": 0.4689, "step": 2637 }, { "epoch": 0.5, "learning_rate": 1.0538228601891963e-05, "loss": 0.452, "step": 2638 }, { "epoch": 0.5, "learning_rate": 1.0532118180287523e-05, "loss": 0.7442, "step": 2639 }, { "epoch": 0.5, "learning_rate": 1.0526007559434107e-05, "loss": 0.4532, "step": 2640 }, { "epoch": 0.5, "learning_rate": 1.0519896741619803e-05, "loss": 0.5434, "step": 2641 }, { "epoch": 0.5, "learning_rate": 1.0513785729132785e-05, "loss": 0.5419, "step": 2642 }, { "epoch": 0.5, "learning_rate": 1.0507674524261275e-05, "loss": 0.4588, "step": 2643 }, { "epoch": 0.5, "learning_rate": 1.0501563129293598e-05, "loss": 0.7093, "step": 2644 }, { "epoch": 0.5, "learning_rate": 1.0495451546518127e-05, "loss": 0.4982, "step": 2645 }, { "epoch": 0.5, "learning_rate": 1.0489339778223317e-05, "loss": 0.5438, "step": 2646 }, { "epoch": 0.5, "learning_rate": 1.0483227826697686e-05, "loss": 0.4707, "step": 2647 }, { "epoch": 0.5, "learning_rate": 1.0477115694229828e-05, "loss": 0.722, "step": 2648 }, { "epoch": 0.5, "learning_rate": 1.0471003383108394e-05, "loss": 0.6375, "step": 2649 }, { "epoch": 0.5, "learning_rate": 1.0464890895622111e-05, "loss": 0.6374, "step": 2650 }, { "epoch": 0.5, "learning_rate": 1.0458778234059773e-05, "loss": 0.5245, "step": 2651 }, { "epoch": 0.5, "learning_rate": 1.0452665400710228e-05, "loss": 0.5664, "step": 2652 }, { "epoch": 0.5, "learning_rate": 1.0446552397862402e-05, "loss": 0.4537, "step": 2653 }, { "epoch": 0.5, "learning_rate": 1.0440439227805279e-05, "loss": 0.4842, "step": 2654 }, { "epoch": 0.5, "learning_rate": 1.0434325892827896e-05, "loss": 0.4891, "step": 2655 }, { "epoch": 0.5, "learning_rate": 1.042821239521937e-05, "loss": 0.4236, "step": 2656 }, { "epoch": 0.5, "learning_rate": 1.0422098737268862e-05, "loss": 0.44, "step": 2657 }, { "epoch": 0.5, "learning_rate": 1.041598492126561e-05, "loss": 0.4536, "step": 2658 }, { "epoch": 0.5, "learning_rate": 1.0409870949498892e-05, "loss": 0.4361, "step": 2659 }, { "epoch": 0.5, "learning_rate": 1.0403756824258057e-05, "loss": 0.366, "step": 2660 }, { "epoch": 0.5, "learning_rate": 1.0397642547832514e-05, "loss": 0.5477, "step": 2661 }, { "epoch": 0.5, "learning_rate": 1.0391528122511712e-05, "loss": 0.5323, "step": 2662 }, { "epoch": 0.5, "learning_rate": 1.0385413550585178e-05, "loss": 0.5425, "step": 2663 }, { "epoch": 0.5, "learning_rate": 1.0379298834342475e-05, "loss": 0.6236, "step": 2664 }, { "epoch": 0.5, "learning_rate": 1.0373183976073234e-05, "loss": 0.7944, "step": 2665 }, { "epoch": 0.5, "learning_rate": 1.0367068978067125e-05, "loss": 0.5758, "step": 2666 }, { "epoch": 0.5, "learning_rate": 1.0360953842613886e-05, "loss": 0.4301, "step": 2667 }, { "epoch": 0.5, "learning_rate": 1.0354838572003296e-05, "loss": 0.578, "step": 2668 }, { "epoch": 0.5, "learning_rate": 1.0348723168525183e-05, "loss": 0.5863, "step": 2669 }, { "epoch": 0.5, "learning_rate": 1.0342607634469439e-05, "loss": 0.4548, "step": 2670 }, { "epoch": 0.5, "learning_rate": 1.0336491972125984e-05, "loss": 0.5234, "step": 2671 }, { "epoch": 0.5, "learning_rate": 1.0330376183784802e-05, "loss": 0.5687, "step": 2672 }, { "epoch": 0.51, "learning_rate": 1.0324260271735923e-05, "loss": 0.5575, "step": 2673 }, { "epoch": 0.51, "learning_rate": 1.0318144238269412e-05, "loss": 0.4902, "step": 2674 }, { "epoch": 0.51, "learning_rate": 1.0312028085675393e-05, "loss": 0.6113, "step": 2675 }, { "epoch": 0.51, "learning_rate": 1.0305911816244024e-05, "loss": 0.4576, "step": 2676 }, { "epoch": 0.51, "learning_rate": 1.0299795432265516e-05, "loss": 0.467, "step": 2677 }, { "epoch": 0.51, "learning_rate": 1.0293678936030114e-05, "loss": 0.4824, "step": 2678 }, { "epoch": 0.51, "learning_rate": 1.0287562329828112e-05, "loss": 0.6147, "step": 2679 }, { "epoch": 0.51, "learning_rate": 1.0281445615949843e-05, "loss": 0.5824, "step": 2680 }, { "epoch": 0.51, "learning_rate": 1.0275328796685674e-05, "loss": 0.5046, "step": 2681 }, { "epoch": 0.51, "learning_rate": 1.0269211874326024e-05, "loss": 0.6117, "step": 2682 }, { "epoch": 0.51, "learning_rate": 1.0263094851161342e-05, "loss": 0.4939, "step": 2683 }, { "epoch": 0.51, "learning_rate": 1.0256977729482115e-05, "loss": 0.5119, "step": 2684 }, { "epoch": 0.51, "learning_rate": 1.025086051157887e-05, "loss": 0.4434, "step": 2685 }, { "epoch": 0.51, "learning_rate": 1.0244743199742166e-05, "loss": 0.6273, "step": 2686 }, { "epoch": 0.51, "learning_rate": 1.0238625796262604e-05, "loss": 0.5254, "step": 2687 }, { "epoch": 0.51, "learning_rate": 1.0232508303430811e-05, "loss": 0.6646, "step": 2688 }, { "epoch": 0.51, "learning_rate": 1.0226390723537451e-05, "loss": 0.608, "step": 2689 }, { "epoch": 0.51, "learning_rate": 1.0220273058873225e-05, "loss": 0.5349, "step": 2690 }, { "epoch": 0.51, "learning_rate": 1.021415531172886e-05, "loss": 0.5692, "step": 2691 }, { "epoch": 0.51, "learning_rate": 1.0208037484395114e-05, "loss": 0.5361, "step": 2692 }, { "epoch": 0.51, "learning_rate": 1.020191957916278e-05, "loss": 0.5337, "step": 2693 }, { "epoch": 0.51, "learning_rate": 1.0195801598322674e-05, "loss": 0.4988, "step": 2694 }, { "epoch": 0.51, "learning_rate": 1.0189683544165643e-05, "loss": 0.6059, "step": 2695 }, { "epoch": 0.51, "learning_rate": 1.0183565418982563e-05, "loss": 0.5928, "step": 2696 }, { "epoch": 0.51, "learning_rate": 1.0177447225064334e-05, "loss": 0.413, "step": 2697 }, { "epoch": 0.51, "learning_rate": 1.0171328964701885e-05, "loss": 0.4608, "step": 2698 }, { "epoch": 0.51, "learning_rate": 1.0165210640186168e-05, "loss": 0.5546, "step": 2699 }, { "epoch": 0.51, "learning_rate": 1.0159092253808156e-05, "loss": 0.5403, "step": 2700 }, { "epoch": 0.51, "learning_rate": 1.015297380785885e-05, "loss": 0.4064, "step": 2701 }, { "epoch": 0.51, "learning_rate": 1.014685530462927e-05, "loss": 0.574, "step": 2702 }, { "epoch": 0.51, "learning_rate": 1.0140736746410462e-05, "loss": 0.563, "step": 2703 }, { "epoch": 0.51, "learning_rate": 1.0134618135493481e-05, "loss": 0.4162, "step": 2704 }, { "epoch": 0.51, "learning_rate": 1.0128499474169422e-05, "loss": 0.5671, "step": 2705 }, { "epoch": 0.51, "learning_rate": 1.0122380764729378e-05, "loss": 0.487, "step": 2706 }, { "epoch": 0.51, "learning_rate": 1.0116262009464475e-05, "loss": 0.4393, "step": 2707 }, { "epoch": 0.51, "learning_rate": 1.0110143210665843e-05, "loss": 0.6862, "step": 2708 }, { "epoch": 0.51, "learning_rate": 1.0104024370624644e-05, "loss": 0.5551, "step": 2709 }, { "epoch": 0.51, "learning_rate": 1.009790549163204e-05, "loss": 0.5777, "step": 2710 }, { "epoch": 0.51, "learning_rate": 1.0091786575979215e-05, "loss": 0.3727, "step": 2711 }, { "epoch": 0.51, "learning_rate": 1.0085667625957372e-05, "loss": 0.3504, "step": 2712 }, { "epoch": 0.51, "learning_rate": 1.0079548643857718e-05, "loss": 0.5351, "step": 2713 }, { "epoch": 0.51, "learning_rate": 1.0073429631971467e-05, "loss": 0.465, "step": 2714 }, { "epoch": 0.51, "learning_rate": 1.0067310592589867e-05, "loss": 0.4409, "step": 2715 }, { "epoch": 0.51, "learning_rate": 1.006119152800415e-05, "loss": 0.5686, "step": 2716 }, { "epoch": 0.51, "learning_rate": 1.0055072440505576e-05, "loss": 0.5794, "step": 2717 }, { "epoch": 0.51, "learning_rate": 1.0048953332385401e-05, "loss": 0.5605, "step": 2718 }, { "epoch": 0.51, "learning_rate": 1.0042834205934897e-05, "loss": 0.4053, "step": 2719 }, { "epoch": 0.51, "learning_rate": 1.0036715063445342e-05, "loss": 0.6097, "step": 2720 }, { "epoch": 0.51, "learning_rate": 1.0030595907208012e-05, "loss": 0.6077, "step": 2721 }, { "epoch": 0.51, "learning_rate": 1.0024476739514205e-05, "loss": 0.4294, "step": 2722 }, { "epoch": 0.51, "learning_rate": 1.0018357562655199e-05, "loss": 0.5216, "step": 2723 }, { "epoch": 0.51, "learning_rate": 1.00122383789223e-05, "loss": 0.404, "step": 2724 }, { "epoch": 0.51, "learning_rate": 1.00061191906068e-05, "loss": 0.6293, "step": 2725 }, { "epoch": 0.52, "learning_rate": 1e-05, "loss": 0.6528, "step": 2726 }, { "epoch": 0.52, "learning_rate": 9.993880809393203e-06, "loss": 0.4957, "step": 2727 }, { "epoch": 0.52, "learning_rate": 9.9877616210777e-06, "loss": 0.4008, "step": 2728 }, { "epoch": 0.52, "learning_rate": 9.981642437344804e-06, "loss": 0.5579, "step": 2729 }, { "epoch": 0.52, "learning_rate": 9.9755232604858e-06, "loss": 0.5913, "step": 2730 }, { "epoch": 0.52, "learning_rate": 9.969404092791987e-06, "loss": 0.6209, "step": 2731 }, { "epoch": 0.52, "learning_rate": 9.963284936554661e-06, "loss": 0.5417, "step": 2732 }, { "epoch": 0.52, "learning_rate": 9.957165794065107e-06, "loss": 0.6953, "step": 2733 }, { "epoch": 0.52, "learning_rate": 9.951046667614602e-06, "loss": 0.5529, "step": 2734 }, { "epoch": 0.52, "learning_rate": 9.944927559494428e-06, "loss": 0.3847, "step": 2735 }, { "epoch": 0.52, "learning_rate": 9.938808471995856e-06, "loss": 0.4279, "step": 2736 }, { "epoch": 0.52, "learning_rate": 9.932689407410136e-06, "loss": 0.4318, "step": 2737 }, { "epoch": 0.52, "learning_rate": 9.926570368028535e-06, "loss": 0.4936, "step": 2738 }, { "epoch": 0.52, "learning_rate": 9.920451356142289e-06, "loss": 0.5305, "step": 2739 }, { "epoch": 0.52, "learning_rate": 9.91433237404263e-06, "loss": 0.5505, "step": 2740 }, { "epoch": 0.52, "learning_rate": 9.908213424020789e-06, "loss": 0.4633, "step": 2741 }, { "epoch": 0.52, "learning_rate": 9.902094508367962e-06, "loss": 0.5418, "step": 2742 }, { "epoch": 0.52, "learning_rate": 9.89597562937536e-06, "loss": 0.5683, "step": 2743 }, { "epoch": 0.52, "learning_rate": 9.889856789334159e-06, "loss": 0.547, "step": 2744 }, { "epoch": 0.52, "learning_rate": 9.883737990535527e-06, "loss": 0.4445, "step": 2745 }, { "epoch": 0.52, "learning_rate": 9.877619235270625e-06, "loss": 0.5953, "step": 2746 }, { "epoch": 0.52, "learning_rate": 9.871500525830581e-06, "loss": 0.4677, "step": 2747 }, { "epoch": 0.52, "learning_rate": 9.865381864506519e-06, "loss": 0.5829, "step": 2748 }, { "epoch": 0.52, "learning_rate": 9.859263253589541e-06, "loss": 0.4285, "step": 2749 }, { "epoch": 0.52, "learning_rate": 9.853144695370735e-06, "loss": 0.4165, "step": 2750 }, { "epoch": 0.52, "learning_rate": 9.847026192141152e-06, "loss": 0.5295, "step": 2751 }, { "epoch": 0.52, "learning_rate": 9.840907746191847e-06, "loss": 0.4143, "step": 2752 }, { "epoch": 0.52, "learning_rate": 9.834789359813834e-06, "loss": 0.5231, "step": 2753 }, { "epoch": 0.52, "learning_rate": 9.828671035298115e-06, "loss": 0.5459, "step": 2754 }, { "epoch": 0.52, "learning_rate": 9.822552774935668e-06, "loss": 0.4801, "step": 2755 }, { "epoch": 0.52, "learning_rate": 9.81643458101744e-06, "loss": 0.5834, "step": 2756 }, { "epoch": 0.52, "learning_rate": 9.810316455834359e-06, "loss": 0.7334, "step": 2757 }, { "epoch": 0.52, "learning_rate": 9.804198401677328e-06, "loss": 0.6355, "step": 2758 }, { "epoch": 0.52, "learning_rate": 9.798080420837224e-06, "loss": 0.4824, "step": 2759 }, { "epoch": 0.52, "learning_rate": 9.791962515604887e-06, "loss": 0.4964, "step": 2760 }, { "epoch": 0.52, "learning_rate": 9.785844688271142e-06, "loss": 0.5241, "step": 2761 }, { "epoch": 0.52, "learning_rate": 9.77972694112678e-06, "loss": 0.4813, "step": 2762 }, { "epoch": 0.52, "learning_rate": 9.773609276462549e-06, "loss": 0.4106, "step": 2763 }, { "epoch": 0.52, "learning_rate": 9.767491696569194e-06, "loss": 0.5177, "step": 2764 }, { "epoch": 0.52, "learning_rate": 9.7613742037374e-06, "loss": 0.5305, "step": 2765 }, { "epoch": 0.52, "learning_rate": 9.755256800257836e-06, "loss": 0.7822, "step": 2766 }, { "epoch": 0.52, "learning_rate": 9.749139488421133e-06, "loss": 0.444, "step": 2767 }, { "epoch": 0.52, "learning_rate": 9.74302227051789e-06, "loss": 0.5793, "step": 2768 }, { "epoch": 0.52, "learning_rate": 9.736905148838661e-06, "loss": 0.5255, "step": 2769 }, { "epoch": 0.52, "learning_rate": 9.730788125673977e-06, "loss": 0.5149, "step": 2770 }, { "epoch": 0.52, "learning_rate": 9.72467120331433e-06, "loss": 0.5083, "step": 2771 }, { "epoch": 0.52, "learning_rate": 9.718554384050159e-06, "loss": 0.4244, "step": 2772 }, { "epoch": 0.52, "learning_rate": 9.712437670171892e-06, "loss": 0.5039, "step": 2773 }, { "epoch": 0.52, "learning_rate": 9.706321063969888e-06, "loss": 0.4176, "step": 2774 }, { "epoch": 0.52, "learning_rate": 9.700204567734485e-06, "loss": 0.5816, "step": 2775 }, { "epoch": 0.52, "learning_rate": 9.69408818375598e-06, "loss": 0.6281, "step": 2776 }, { "epoch": 0.52, "learning_rate": 9.687971914324607e-06, "loss": 0.6714, "step": 2777 }, { "epoch": 0.52, "learning_rate": 9.68185576173059e-06, "loss": 0.5102, "step": 2778 }, { "epoch": 0.53, "learning_rate": 9.67573972826408e-06, "loss": 0.521, "step": 2779 }, { "epoch": 0.53, "learning_rate": 9.669623816215198e-06, "loss": 0.4446, "step": 2780 }, { "epoch": 0.53, "learning_rate": 9.66350802787402e-06, "loss": 0.5782, "step": 2781 }, { "epoch": 0.53, "learning_rate": 9.657392365530566e-06, "loss": 0.69, "step": 2782 }, { "epoch": 0.53, "learning_rate": 9.651276831474817e-06, "loss": 0.6719, "step": 2783 }, { "epoch": 0.53, "learning_rate": 9.645161427996707e-06, "loss": 0.3895, "step": 2784 }, { "epoch": 0.53, "learning_rate": 9.639046157386117e-06, "loss": 0.4088, "step": 2785 }, { "epoch": 0.53, "learning_rate": 9.632931021932877e-06, "loss": 0.4428, "step": 2786 }, { "epoch": 0.53, "learning_rate": 9.626816023926771e-06, "loss": 0.6064, "step": 2787 }, { "epoch": 0.53, "learning_rate": 9.620701165657527e-06, "loss": 0.5348, "step": 2788 }, { "epoch": 0.53, "learning_rate": 9.614586449414824e-06, "loss": 0.5343, "step": 2789 }, { "epoch": 0.53, "learning_rate": 9.60847187748829e-06, "loss": 0.5262, "step": 2790 }, { "epoch": 0.53, "learning_rate": 9.602357452167492e-06, "loss": 0.4868, "step": 2791 }, { "epoch": 0.53, "learning_rate": 9.596243175741944e-06, "loss": 0.439, "step": 2792 }, { "epoch": 0.53, "learning_rate": 9.59012905050111e-06, "loss": 0.5359, "step": 2793 }, { "epoch": 0.53, "learning_rate": 9.584015078734395e-06, "loss": 0.5442, "step": 2794 }, { "epoch": 0.53, "learning_rate": 9.57790126273114e-06, "loss": 0.5419, "step": 2795 }, { "epoch": 0.53, "learning_rate": 9.571787604780633e-06, "loss": 0.5754, "step": 2796 }, { "epoch": 0.53, "learning_rate": 9.565674107172109e-06, "loss": 0.5224, "step": 2797 }, { "epoch": 0.53, "learning_rate": 9.559560772194723e-06, "loss": 0.5712, "step": 2798 }, { "epoch": 0.53, "learning_rate": 9.5534476021376e-06, "loss": 0.6409, "step": 2799 }, { "epoch": 0.53, "learning_rate": 9.547334599289775e-06, "loss": 0.4633, "step": 2800 }, { "epoch": 0.53, "learning_rate": 9.541221765940229e-06, "loss": 0.4252, "step": 2801 }, { "epoch": 0.53, "learning_rate": 9.535109104377892e-06, "loss": 0.497, "step": 2802 }, { "epoch": 0.53, "learning_rate": 9.52899661689161e-06, "loss": 0.5062, "step": 2803 }, { "epoch": 0.53, "learning_rate": 9.522884305770176e-06, "loss": 0.4586, "step": 2804 }, { "epoch": 0.53, "learning_rate": 9.516772173302316e-06, "loss": 0.4393, "step": 2805 }, { "epoch": 0.53, "learning_rate": 9.510660221776684e-06, "loss": 0.5711, "step": 2806 }, { "epoch": 0.53, "learning_rate": 9.504548453481875e-06, "loss": 0.5228, "step": 2807 }, { "epoch": 0.53, "learning_rate": 9.498436870706406e-06, "loss": 0.6446, "step": 2808 }, { "epoch": 0.53, "learning_rate": 9.492325475738725e-06, "loss": 0.4771, "step": 2809 }, { "epoch": 0.53, "learning_rate": 9.486214270867219e-06, "loss": 0.551, "step": 2810 }, { "epoch": 0.53, "learning_rate": 9.480103258380198e-06, "loss": 0.4328, "step": 2811 }, { "epoch": 0.53, "learning_rate": 9.473992440565896e-06, "loss": 0.427, "step": 2812 }, { "epoch": 0.53, "learning_rate": 9.467881819712478e-06, "loss": 0.5902, "step": 2813 }, { "epoch": 0.53, "learning_rate": 9.46177139810804e-06, "loss": 0.4928, "step": 2814 }, { "epoch": 0.53, "learning_rate": 9.455661178040593e-06, "loss": 0.4944, "step": 2815 }, { "epoch": 0.53, "learning_rate": 9.449551161798085e-06, "loss": 0.6, "step": 2816 }, { "epoch": 0.53, "learning_rate": 9.443441351668375e-06, "loss": 0.589, "step": 2817 }, { "epoch": 0.53, "learning_rate": 9.43733174993925e-06, "loss": 0.5031, "step": 2818 }, { "epoch": 0.53, "learning_rate": 9.431222358898423e-06, "loss": 0.5462, "step": 2819 }, { "epoch": 0.53, "learning_rate": 9.425113180833528e-06, "loss": 0.4103, "step": 2820 }, { "epoch": 0.53, "learning_rate": 9.419004218032105e-06, "loss": 0.5538, "step": 2821 }, { "epoch": 0.53, "learning_rate": 9.412895472781635e-06, "loss": 0.5299, "step": 2822 }, { "epoch": 0.53, "learning_rate": 9.406786947369507e-06, "loss": 0.4624, "step": 2823 }, { "epoch": 0.53, "learning_rate": 9.400678644083014e-06, "loss": 0.4718, "step": 2824 }, { "epoch": 0.53, "learning_rate": 9.394570565209398e-06, "loss": 0.5571, "step": 2825 }, { "epoch": 0.53, "learning_rate": 9.388462713035787e-06, "loss": 0.6849, "step": 2826 }, { "epoch": 0.53, "learning_rate": 9.382355089849235e-06, "loss": 0.5158, "step": 2827 }, { "epoch": 0.53, "learning_rate": 9.376247697936719e-06, "loss": 0.5603, "step": 2828 }, { "epoch": 0.53, "learning_rate": 9.370140539585113e-06, "loss": 0.4033, "step": 2829 }, { "epoch": 0.53, "learning_rate": 9.364033617081216e-06, "loss": 0.543, "step": 2830 }, { "epoch": 0.53, "learning_rate": 9.357926932711733e-06, "loss": 0.6393, "step": 2831 }, { "epoch": 0.54, "learning_rate": 9.351820488763285e-06, "loss": 0.5856, "step": 2832 }, { "epoch": 0.54, "learning_rate": 9.34571428752239e-06, "loss": 0.5697, "step": 2833 }, { "epoch": 0.54, "learning_rate": 9.339608331275493e-06, "loss": 0.7254, "step": 2834 }, { "epoch": 0.54, "learning_rate": 9.333502622308937e-06, "loss": 0.521, "step": 2835 }, { "epoch": 0.54, "learning_rate": 9.32739716290897e-06, "loss": 0.4736, "step": 2836 }, { "epoch": 0.54, "learning_rate": 9.321291955361756e-06, "loss": 0.6969, "step": 2837 }, { "epoch": 0.54, "learning_rate": 9.315187001953354e-06, "loss": 0.6021, "step": 2838 }, { "epoch": 0.54, "learning_rate": 9.309082304969738e-06, "loss": 0.4301, "step": 2839 }, { "epoch": 0.54, "learning_rate": 9.302977866696779e-06, "loss": 0.498, "step": 2840 }, { "epoch": 0.54, "learning_rate": 9.29687368942025e-06, "loss": 0.5398, "step": 2841 }, { "epoch": 0.54, "learning_rate": 9.290769775425838e-06, "loss": 0.3539, "step": 2842 }, { "epoch": 0.54, "learning_rate": 9.284666126999116e-06, "loss": 0.6601, "step": 2843 }, { "epoch": 0.54, "learning_rate": 9.278562746425561e-06, "loss": 0.434, "step": 2844 }, { "epoch": 0.54, "learning_rate": 9.272459635990563e-06, "loss": 0.4883, "step": 2845 }, { "epoch": 0.54, "learning_rate": 9.266356797979397e-06, "loss": 0.4688, "step": 2846 }, { "epoch": 0.54, "learning_rate": 9.260254234677235e-06, "loss": 0.4445, "step": 2847 }, { "epoch": 0.54, "learning_rate": 9.254151948369158e-06, "loss": 0.7588, "step": 2848 }, { "epoch": 0.54, "learning_rate": 9.248049941340133e-06, "loss": 0.4728, "step": 2849 }, { "epoch": 0.54, "learning_rate": 9.241948215875024e-06, "loss": 0.5616, "step": 2850 }, { "epoch": 0.54, "learning_rate": 9.235846774258599e-06, "loss": 0.4253, "step": 2851 }, { "epoch": 0.54, "learning_rate": 9.229745618775505e-06, "loss": 0.4773, "step": 2852 }, { "epoch": 0.54, "learning_rate": 9.223644751710288e-06, "loss": 0.489, "step": 2853 }, { "epoch": 0.54, "learning_rate": 9.217544175347394e-06, "loss": 0.685, "step": 2854 }, { "epoch": 0.54, "learning_rate": 9.211443891971151e-06, "loss": 0.5987, "step": 2855 }, { "epoch": 0.54, "learning_rate": 9.205343903865774e-06, "loss": 0.4836, "step": 2856 }, { "epoch": 0.54, "learning_rate": 9.199244213315377e-06, "loss": 0.5499, "step": 2857 }, { "epoch": 0.54, "learning_rate": 9.19314482260396e-06, "loss": 0.6017, "step": 2858 }, { "epoch": 0.54, "learning_rate": 9.187045734015401e-06, "loss": 0.5178, "step": 2859 }, { "epoch": 0.54, "learning_rate": 9.180946949833487e-06, "loss": 0.5892, "step": 2860 }, { "epoch": 0.54, "learning_rate": 9.174848472341867e-06, "loss": 0.6489, "step": 2861 }, { "epoch": 0.54, "learning_rate": 9.168750303824085e-06, "loss": 0.4936, "step": 2862 }, { "epoch": 0.54, "learning_rate": 9.162652446563576e-06, "loss": 0.5396, "step": 2863 }, { "epoch": 0.54, "learning_rate": 9.156554902843645e-06, "loss": 0.5427, "step": 2864 }, { "epoch": 0.54, "learning_rate": 9.15045767494749e-06, "loss": 0.4673, "step": 2865 }, { "epoch": 0.54, "learning_rate": 9.144360765158188e-06, "loss": 0.6484, "step": 2866 }, { "epoch": 0.54, "learning_rate": 9.138264175758693e-06, "loss": 0.5655, "step": 2867 }, { "epoch": 0.54, "learning_rate": 9.132167909031848e-06, "loss": 0.4558, "step": 2868 }, { "epoch": 0.54, "learning_rate": 9.126071967260365e-06, "loss": 0.4492, "step": 2869 }, { "epoch": 0.54, "learning_rate": 9.119976352726837e-06, "loss": 0.6602, "step": 2870 }, { "epoch": 0.54, "learning_rate": 9.11388106771374e-06, "loss": 0.6167, "step": 2871 }, { "epoch": 0.54, "learning_rate": 9.107786114503426e-06, "loss": 0.6037, "step": 2872 }, { "epoch": 0.54, "learning_rate": 9.10169149537811e-06, "loss": 0.3803, "step": 2873 }, { "epoch": 0.54, "learning_rate": 9.0955972126199e-06, "loss": 0.4871, "step": 2874 }, { "epoch": 0.54, "learning_rate": 9.089503268510764e-06, "loss": 0.4424, "step": 2875 }, { "epoch": 0.54, "learning_rate": 9.08340966533255e-06, "loss": 0.5097, "step": 2876 }, { "epoch": 0.54, "learning_rate": 9.07731640536698e-06, "loss": 0.5263, "step": 2877 }, { "epoch": 0.54, "learning_rate": 9.071223490895644e-06, "loss": 0.4157, "step": 2878 }, { "epoch": 0.54, "learning_rate": 9.065130924199998e-06, "loss": 0.4861, "step": 2879 }, { "epoch": 0.54, "learning_rate": 9.059038707561381e-06, "loss": 0.5462, "step": 2880 }, { "epoch": 0.54, "learning_rate": 9.05294684326099e-06, "loss": 0.4644, "step": 2881 }, { "epoch": 0.54, "learning_rate": 9.046855333579884e-06, "loss": 0.4375, "step": 2882 }, { "epoch": 0.54, "learning_rate": 9.040764180799011e-06, "loss": 0.5648, "step": 2883 }, { "epoch": 0.54, "learning_rate": 9.03467338719917e-06, "loss": 0.6093, "step": 2884 }, { "epoch": 0.55, "learning_rate": 9.028582955061016e-06, "loss": 0.5243, "step": 2885 }, { "epoch": 0.55, "learning_rate": 9.022492886665099e-06, "loss": 0.6067, "step": 2886 }, { "epoch": 0.55, "learning_rate": 9.016403184291805e-06, "loss": 0.4974, "step": 2887 }, { "epoch": 0.55, "learning_rate": 9.010313850221387e-06, "loss": 0.5623, "step": 2888 }, { "epoch": 0.55, "learning_rate": 9.00422488673398e-06, "loss": 0.4278, "step": 2889 }, { "epoch": 0.55, "learning_rate": 8.998136296109552e-06, "loss": 0.4943, "step": 2890 }, { "epoch": 0.55, "learning_rate": 8.992048080627947e-06, "loss": 0.4367, "step": 2891 }, { "epoch": 0.55, "learning_rate": 8.985960242568875e-06, "loss": 0.6131, "step": 2892 }, { "epoch": 0.55, "learning_rate": 8.979872784211893e-06, "loss": 0.4937, "step": 2893 }, { "epoch": 0.55, "learning_rate": 8.973785707836414e-06, "loss": 0.5192, "step": 2894 }, { "epoch": 0.55, "learning_rate": 8.967699015721717e-06, "loss": 0.4223, "step": 2895 }, { "epoch": 0.55, "learning_rate": 8.961612710146934e-06, "loss": 0.4285, "step": 2896 }, { "epoch": 0.55, "learning_rate": 8.955526793391049e-06, "loss": 0.5222, "step": 2897 }, { "epoch": 0.55, "learning_rate": 8.949441267732908e-06, "loss": 0.4752, "step": 2898 }, { "epoch": 0.55, "learning_rate": 8.943356135451198e-06, "loss": 0.5753, "step": 2899 }, { "epoch": 0.55, "learning_rate": 8.937271398824471e-06, "loss": 0.5015, "step": 2900 }, { "epoch": 0.55, "learning_rate": 8.931187060131126e-06, "loss": 0.5093, "step": 2901 }, { "epoch": 0.55, "learning_rate": 8.925103121649412e-06, "loss": 0.5462, "step": 2902 }, { "epoch": 0.55, "learning_rate": 8.919019585657432e-06, "loss": 0.4924, "step": 2903 }, { "epoch": 0.55, "learning_rate": 8.912936454433131e-06, "loss": 0.547, "step": 2904 }, { "epoch": 0.55, "learning_rate": 8.906853730254306e-06, "loss": 0.5586, "step": 2905 }, { "epoch": 0.55, "learning_rate": 8.900771415398608e-06, "loss": 0.5253, "step": 2906 }, { "epoch": 0.55, "learning_rate": 8.894689512143528e-06, "loss": 0.5854, "step": 2907 }, { "epoch": 0.55, "learning_rate": 8.8886080227664e-06, "loss": 0.3988, "step": 2908 }, { "epoch": 0.55, "learning_rate": 8.88252694954441e-06, "loss": 0.6246, "step": 2909 }, { "epoch": 0.55, "learning_rate": 8.876446294754587e-06, "loss": 0.4862, "step": 2910 }, { "epoch": 0.55, "learning_rate": 8.870366060673793e-06, "loss": 0.6121, "step": 2911 }, { "epoch": 0.55, "learning_rate": 8.864286249578755e-06, "loss": 0.5953, "step": 2912 }, { "epoch": 0.55, "learning_rate": 8.858206863746018e-06, "loss": 0.4309, "step": 2913 }, { "epoch": 0.55, "learning_rate": 8.852127905451978e-06, "loss": 0.6244, "step": 2914 }, { "epoch": 0.55, "learning_rate": 8.846049376972876e-06, "loss": 0.5172, "step": 2915 }, { "epoch": 0.55, "learning_rate": 8.83997128058478e-06, "loss": 0.6256, "step": 2916 }, { "epoch": 0.55, "learning_rate": 8.833893618563604e-06, "loss": 0.3789, "step": 2917 }, { "epoch": 0.55, "learning_rate": 8.8278163931851e-06, "loss": 0.5416, "step": 2918 }, { "epoch": 0.55, "learning_rate": 8.821739606724857e-06, "loss": 0.6849, "step": 2919 }, { "epoch": 0.55, "learning_rate": 8.81566326145829e-06, "loss": 0.6048, "step": 2920 }, { "epoch": 0.55, "learning_rate": 8.809587359660661e-06, "loss": 0.5398, "step": 2921 }, { "epoch": 0.55, "learning_rate": 8.803511903607059e-06, "loss": 0.5547, "step": 2922 }, { "epoch": 0.55, "learning_rate": 8.797436895572407e-06, "loss": 0.3728, "step": 2923 }, { "epoch": 0.55, "learning_rate": 8.791362337831469e-06, "loss": 0.5033, "step": 2924 }, { "epoch": 0.55, "learning_rate": 8.785288232658821e-06, "loss": 0.6686, "step": 2925 }, { "epoch": 0.55, "learning_rate": 8.779214582328887e-06, "loss": 0.4512, "step": 2926 }, { "epoch": 0.55, "learning_rate": 8.773141389115914e-06, "loss": 0.6226, "step": 2927 }, { "epoch": 0.55, "learning_rate": 8.767068655293984e-06, "loss": 0.4401, "step": 2928 }, { "epoch": 0.55, "learning_rate": 8.76099638313699e-06, "loss": 0.7129, "step": 2929 }, { "epoch": 0.55, "learning_rate": 8.754924574918675e-06, "loss": 0.5941, "step": 2930 }, { "epoch": 0.55, "learning_rate": 8.748853232912588e-06, "loss": 0.5283, "step": 2931 }, { "epoch": 0.55, "learning_rate": 8.742782359392122e-06, "loss": 0.6601, "step": 2932 }, { "epoch": 0.55, "learning_rate": 8.73671195663048e-06, "loss": 0.5349, "step": 2933 }, { "epoch": 0.55, "learning_rate": 8.73064202690069e-06, "loss": 0.4126, "step": 2934 }, { "epoch": 0.55, "learning_rate": 8.724572572475613e-06, "loss": 0.6247, "step": 2935 }, { "epoch": 0.55, "learning_rate": 8.718503595627923e-06, "loss": 0.5575, "step": 2936 }, { "epoch": 0.55, "learning_rate": 8.712435098630116e-06, "loss": 0.4952, "step": 2937 }, { "epoch": 0.56, "learning_rate": 8.706367083754516e-06, "loss": 0.4761, "step": 2938 }, { "epoch": 0.56, "learning_rate": 8.700299553273255e-06, "loss": 0.5565, "step": 2939 }, { "epoch": 0.56, "learning_rate": 8.69423250945829e-06, "loss": 0.4512, "step": 2940 }, { "epoch": 0.56, "learning_rate": 8.688165954581398e-06, "loss": 0.4271, "step": 2941 }, { "epoch": 0.56, "learning_rate": 8.68209989091417e-06, "loss": 0.4833, "step": 2942 }, { "epoch": 0.56, "learning_rate": 8.676034320728007e-06, "loss": 0.3956, "step": 2943 }, { "epoch": 0.56, "learning_rate": 8.669969246294137e-06, "loss": 0.4187, "step": 2944 }, { "epoch": 0.56, "learning_rate": 8.663904669883599e-06, "loss": 0.4233, "step": 2945 }, { "epoch": 0.56, "learning_rate": 8.657840593767231e-06, "loss": 0.5181, "step": 2946 }, { "epoch": 0.56, "learning_rate": 8.651777020215713e-06, "loss": 0.5666, "step": 2947 }, { "epoch": 0.56, "learning_rate": 8.645713951499508e-06, "loss": 0.6894, "step": 2948 }, { "epoch": 0.56, "learning_rate": 8.6396513898889e-06, "loss": 0.522, "step": 2949 }, { "epoch": 0.56, "learning_rate": 8.633589337653995e-06, "loss": 0.5926, "step": 2950 }, { "epoch": 0.56, "learning_rate": 8.62752779706469e-06, "loss": 0.4989, "step": 2951 }, { "epoch": 0.56, "learning_rate": 8.621466770390698e-06, "loss": 0.5764, "step": 2952 }, { "epoch": 0.56, "learning_rate": 8.615406259901543e-06, "loss": 0.4098, "step": 2953 }, { "epoch": 0.56, "learning_rate": 8.609346267866556e-06, "loss": 0.3709, "step": 2954 }, { "epoch": 0.56, "learning_rate": 8.603286796554864e-06, "loss": 0.4361, "step": 2955 }, { "epoch": 0.56, "learning_rate": 8.597227848235407e-06, "loss": 0.4497, "step": 2956 }, { "epoch": 0.56, "learning_rate": 8.591169425176931e-06, "loss": 0.4724, "step": 2957 }, { "epoch": 0.56, "learning_rate": 8.585111529647976e-06, "loss": 0.5088, "step": 2958 }, { "epoch": 0.56, "learning_rate": 8.579054163916901e-06, "loss": 0.5065, "step": 2959 }, { "epoch": 0.56, "learning_rate": 8.572997330251845e-06, "loss": 0.6236, "step": 2960 }, { "epoch": 0.56, "learning_rate": 8.566941030920763e-06, "loss": 0.6003, "step": 2961 }, { "epoch": 0.56, "learning_rate": 8.560885268191409e-06, "loss": 0.6097, "step": 2962 }, { "epoch": 0.56, "learning_rate": 8.554830044331326e-06, "loss": 0.5347, "step": 2963 }, { "epoch": 0.56, "learning_rate": 8.548775361607872e-06, "loss": 0.4447, "step": 2964 }, { "epoch": 0.56, "learning_rate": 8.542721222288186e-06, "loss": 0.5525, "step": 2965 }, { "epoch": 0.56, "learning_rate": 8.536667628639207e-06, "loss": 0.5475, "step": 2966 }, { "epoch": 0.56, "learning_rate": 8.53061458292768e-06, "loss": 0.4177, "step": 2967 }, { "epoch": 0.56, "learning_rate": 8.524562087420137e-06, "loss": 0.5183, "step": 2968 }, { "epoch": 0.56, "learning_rate": 8.518510144382896e-06, "loss": 0.4696, "step": 2969 }, { "epoch": 0.56, "learning_rate": 8.512458756082084e-06, "loss": 0.5318, "step": 2970 }, { "epoch": 0.56, "learning_rate": 8.506407924783614e-06, "loss": 0.5538, "step": 2971 }, { "epoch": 0.56, "learning_rate": 8.50035765275318e-06, "loss": 0.5127, "step": 2972 }, { "epoch": 0.56, "learning_rate": 8.49430794225629e-06, "loss": 0.5981, "step": 2973 }, { "epoch": 0.56, "learning_rate": 8.488258795558216e-06, "loss": 0.3834, "step": 2974 }, { "epoch": 0.56, "learning_rate": 8.482210214924033e-06, "loss": 0.5269, "step": 2975 }, { "epoch": 0.56, "learning_rate": 8.47616220261861e-06, "loss": 0.5368, "step": 2976 }, { "epoch": 0.56, "learning_rate": 8.470114760906583e-06, "loss": 0.4843, "step": 2977 }, { "epoch": 0.56, "learning_rate": 8.464067892052392e-06, "loss": 0.5317, "step": 2978 }, { "epoch": 0.56, "learning_rate": 8.458021598320255e-06, "loss": 0.4098, "step": 2979 }, { "epoch": 0.56, "learning_rate": 8.451975881974183e-06, "loss": 0.4445, "step": 2980 }, { "epoch": 0.56, "learning_rate": 8.445930745277953e-06, "loss": 0.4867, "step": 2981 }, { "epoch": 0.56, "learning_rate": 8.439886190495142e-06, "loss": 0.4445, "step": 2982 }, { "epoch": 0.56, "learning_rate": 8.433842219889103e-06, "loss": 0.5093, "step": 2983 }, { "epoch": 0.56, "learning_rate": 8.427798835722969e-06, "loss": 0.5275, "step": 2984 }, { "epoch": 0.56, "learning_rate": 8.421756040259658e-06, "loss": 0.5373, "step": 2985 }, { "epoch": 0.56, "learning_rate": 8.415713835761861e-06, "loss": 0.5868, "step": 2986 }, { "epoch": 0.56, "learning_rate": 8.409672224492051e-06, "loss": 0.5477, "step": 2987 }, { "epoch": 0.56, "learning_rate": 8.403631208712481e-06, "loss": 0.5404, "step": 2988 }, { "epoch": 0.56, "learning_rate": 8.397590790685182e-06, "loss": 0.5223, "step": 2989 }, { "epoch": 0.56, "learning_rate": 8.391550972671948e-06, "loss": 0.5017, "step": 2990 }, { "epoch": 0.57, "learning_rate": 8.385511756934367e-06, "loss": 0.4057, "step": 2991 }, { "epoch": 0.57, "learning_rate": 8.379473145733793e-06, "loss": 0.4246, "step": 2992 }, { "epoch": 0.57, "learning_rate": 8.373435141331348e-06, "loss": 0.5538, "step": 2993 }, { "epoch": 0.57, "learning_rate": 8.367397745987938e-06, "loss": 0.466, "step": 2994 }, { "epoch": 0.57, "learning_rate": 8.36136096196423e-06, "loss": 0.4652, "step": 2995 }, { "epoch": 0.57, "learning_rate": 8.355324791520675e-06, "loss": 0.5004, "step": 2996 }, { "epoch": 0.57, "learning_rate": 8.349289236917482e-06, "loss": 0.5398, "step": 2997 }, { "epoch": 0.57, "learning_rate": 8.343254300414629e-06, "loss": 0.5208, "step": 2998 }, { "epoch": 0.57, "learning_rate": 8.337219984271879e-06, "loss": 0.5345, "step": 2999 }, { "epoch": 0.57, "learning_rate": 8.331186290748742e-06, "loss": 0.4417, "step": 3000 }, { "epoch": 0.57, "learning_rate": 8.325153222104506e-06, "loss": 0.4823, "step": 3001 }, { "epoch": 0.57, "learning_rate": 8.319120780598228e-06, "loss": 0.4919, "step": 3002 }, { "epoch": 0.57, "learning_rate": 8.313088968488723e-06, "loss": 0.4386, "step": 3003 }, { "epoch": 0.57, "learning_rate": 8.307057788034568e-06, "loss": 0.4287, "step": 3004 }, { "epoch": 0.57, "learning_rate": 8.301027241494112e-06, "loss": 0.5095, "step": 3005 }, { "epoch": 0.57, "learning_rate": 8.294997331125467e-06, "loss": 0.5032, "step": 3006 }, { "epoch": 0.57, "learning_rate": 8.28896805918649e-06, "loss": 0.5239, "step": 3007 }, { "epoch": 0.57, "learning_rate": 8.282939427934828e-06, "loss": 0.6591, "step": 3008 }, { "epoch": 0.57, "learning_rate": 8.276911439627862e-06, "loss": 0.4267, "step": 3009 }, { "epoch": 0.57, "learning_rate": 8.27088409652274e-06, "loss": 0.501, "step": 3010 }, { "epoch": 0.57, "learning_rate": 8.26485740087638e-06, "loss": 0.4469, "step": 3011 }, { "epoch": 0.57, "learning_rate": 8.25883135494544e-06, "loss": 0.4296, "step": 3012 }, { "epoch": 0.57, "learning_rate": 8.252805960986338e-06, "loss": 0.5235, "step": 3013 }, { "epoch": 0.57, "learning_rate": 8.246781221255265e-06, "loss": 0.5718, "step": 3014 }, { "epoch": 0.57, "learning_rate": 8.240757138008149e-06, "loss": 0.6041, "step": 3015 }, { "epoch": 0.57, "learning_rate": 8.234733713500673e-06, "loss": 0.4478, "step": 3016 }, { "epoch": 0.57, "learning_rate": 8.228710949988283e-06, "loss": 0.4361, "step": 3017 }, { "epoch": 0.57, "learning_rate": 8.222688849726172e-06, "loss": 0.5025, "step": 3018 }, { "epoch": 0.57, "learning_rate": 8.216667414969285e-06, "loss": 0.5559, "step": 3019 }, { "epoch": 0.57, "learning_rate": 8.210646647972319e-06, "loss": 0.4382, "step": 3020 }, { "epoch": 0.57, "learning_rate": 8.204626550989717e-06, "loss": 0.6519, "step": 3021 }, { "epoch": 0.57, "learning_rate": 8.198607126275674e-06, "loss": 0.4549, "step": 3022 }, { "epoch": 0.57, "learning_rate": 8.192588376084137e-06, "loss": 0.4406, "step": 3023 }, { "epoch": 0.57, "learning_rate": 8.186570302668792e-06, "loss": 0.5285, "step": 3024 }, { "epoch": 0.57, "learning_rate": 8.180552908283083e-06, "loss": 0.3716, "step": 3025 }, { "epoch": 0.57, "learning_rate": 8.174536195180189e-06, "loss": 0.4863, "step": 3026 }, { "epoch": 0.57, "learning_rate": 8.168520165613035e-06, "loss": 0.5362, "step": 3027 }, { "epoch": 0.57, "learning_rate": 8.162504821834296e-06, "loss": 0.4767, "step": 3028 }, { "epoch": 0.57, "learning_rate": 8.156490166096391e-06, "loss": 0.5038, "step": 3029 }, { "epoch": 0.57, "learning_rate": 8.15047620065147e-06, "loss": 0.5282, "step": 3030 }, { "epoch": 0.57, "learning_rate": 8.144462927751439e-06, "loss": 0.5963, "step": 3031 }, { "epoch": 0.57, "learning_rate": 8.138450349647936e-06, "loss": 0.6506, "step": 3032 }, { "epoch": 0.57, "learning_rate": 8.132438468592332e-06, "loss": 0.5802, "step": 3033 }, { "epoch": 0.57, "learning_rate": 8.12642728683576e-06, "loss": 0.4579, "step": 3034 }, { "epoch": 0.57, "learning_rate": 8.12041680662907e-06, "loss": 0.4116, "step": 3035 }, { "epoch": 0.57, "learning_rate": 8.114407030222851e-06, "loss": 0.8309, "step": 3036 }, { "epoch": 0.57, "learning_rate": 8.108397959867445e-06, "loss": 0.4523, "step": 3037 }, { "epoch": 0.57, "learning_rate": 8.102389597812909e-06, "loss": 0.4969, "step": 3038 }, { "epoch": 0.57, "learning_rate": 8.096381946309043e-06, "loss": 0.5381, "step": 3039 }, { "epoch": 0.57, "learning_rate": 8.090375007605388e-06, "loss": 0.4538, "step": 3040 }, { "epoch": 0.57, "learning_rate": 8.084368783951215e-06, "loss": 0.5091, "step": 3041 }, { "epoch": 0.57, "learning_rate": 8.078363277595512e-06, "loss": 0.5762, "step": 3042 }, { "epoch": 0.57, "learning_rate": 8.072358490787018e-06, "loss": 0.4071, "step": 3043 }, { "epoch": 0.58, "learning_rate": 8.066354425774196e-06, "loss": 0.4642, "step": 3044 }, { "epoch": 0.58, "learning_rate": 8.060351084805236e-06, "loss": 0.4128, "step": 3045 }, { "epoch": 0.58, "learning_rate": 8.054348470128063e-06, "loss": 0.4752, "step": 3046 }, { "epoch": 0.58, "learning_rate": 8.04834658399032e-06, "loss": 0.4344, "step": 3047 }, { "epoch": 0.58, "learning_rate": 8.042345428639382e-06, "loss": 0.5735, "step": 3048 }, { "epoch": 0.58, "learning_rate": 8.036345006322358e-06, "loss": 0.4243, "step": 3049 }, { "epoch": 0.58, "learning_rate": 8.030345319286076e-06, "loss": 0.5409, "step": 3050 }, { "epoch": 0.58, "learning_rate": 8.024346369777081e-06, "loss": 0.4688, "step": 3051 }, { "epoch": 0.58, "learning_rate": 8.018348160041656e-06, "loss": 0.6312, "step": 3052 }, { "epoch": 0.58, "learning_rate": 8.0123506923258e-06, "loss": 0.5084, "step": 3053 }, { "epoch": 0.58, "learning_rate": 8.006353968875226e-06, "loss": 0.6555, "step": 3054 }, { "epoch": 0.58, "learning_rate": 8.000357991935393e-06, "loss": 0.5515, "step": 3055 }, { "epoch": 0.58, "learning_rate": 7.994362763751448e-06, "loss": 0.495, "step": 3056 }, { "epoch": 0.58, "learning_rate": 7.988368286568287e-06, "loss": 0.5238, "step": 3057 }, { "epoch": 0.58, "learning_rate": 7.982374562630506e-06, "loss": 0.4535, "step": 3058 }, { "epoch": 0.58, "learning_rate": 7.97638159418242e-06, "loss": 0.4129, "step": 3059 }, { "epoch": 0.58, "learning_rate": 7.970389383468077e-06, "loss": 0.6314, "step": 3060 }, { "epoch": 0.58, "learning_rate": 7.96439793273122e-06, "loss": 0.4525, "step": 3061 }, { "epoch": 0.58, "learning_rate": 7.958407244215324e-06, "loss": 0.5849, "step": 3062 }, { "epoch": 0.58, "learning_rate": 7.952417320163572e-06, "loss": 0.672, "step": 3063 }, { "epoch": 0.58, "learning_rate": 7.946428162818857e-06, "loss": 0.6415, "step": 3064 }, { "epoch": 0.58, "learning_rate": 7.940439774423788e-06, "loss": 0.3893, "step": 3065 }, { "epoch": 0.58, "learning_rate": 7.934452157220693e-06, "loss": 0.5258, "step": 3066 }, { "epoch": 0.58, "learning_rate": 7.928465313451603e-06, "loss": 0.7122, "step": 3067 }, { "epoch": 0.58, "learning_rate": 7.922479245358257e-06, "loss": 0.4717, "step": 3068 }, { "epoch": 0.58, "learning_rate": 7.916493955182114e-06, "loss": 0.5139, "step": 3069 }, { "epoch": 0.58, "learning_rate": 7.910509445164333e-06, "loss": 0.5068, "step": 3070 }, { "epoch": 0.58, "learning_rate": 7.904525717545781e-06, "loss": 0.4205, "step": 3071 }, { "epoch": 0.58, "learning_rate": 7.898542774567043e-06, "loss": 0.5386, "step": 3072 }, { "epoch": 0.58, "learning_rate": 7.892560618468395e-06, "loss": 0.3922, "step": 3073 }, { "epoch": 0.58, "learning_rate": 7.886579251489822e-06, "loss": 0.6506, "step": 3074 }, { "epoch": 0.58, "learning_rate": 7.880598675871028e-06, "loss": 0.5176, "step": 3075 }, { "epoch": 0.58, "learning_rate": 7.874618893851403e-06, "loss": 0.5194, "step": 3076 }, { "epoch": 0.58, "learning_rate": 7.868639907670042e-06, "loss": 0.5078, "step": 3077 }, { "epoch": 0.58, "learning_rate": 7.862661719565753e-06, "loss": 0.4738, "step": 3078 }, { "epoch": 0.58, "learning_rate": 7.856684331777039e-06, "loss": 0.5476, "step": 3079 }, { "epoch": 0.58, "learning_rate": 7.850707746542093e-06, "loss": 0.4622, "step": 3080 }, { "epoch": 0.58, "learning_rate": 7.844731966098832e-06, "loss": 0.4384, "step": 3081 }, { "epoch": 0.58, "learning_rate": 7.838756992684847e-06, "loss": 0.4824, "step": 3082 }, { "epoch": 0.58, "learning_rate": 7.832782828537437e-06, "loss": 0.5829, "step": 3083 }, { "epoch": 0.58, "learning_rate": 7.826809475893604e-06, "loss": 0.376, "step": 3084 }, { "epoch": 0.58, "learning_rate": 7.820836936990031e-06, "loss": 0.605, "step": 3085 }, { "epoch": 0.58, "learning_rate": 7.814865214063111e-06, "loss": 0.4291, "step": 3086 }, { "epoch": 0.58, "learning_rate": 7.808894309348925e-06, "loss": 0.4467, "step": 3087 }, { "epoch": 0.58, "learning_rate": 7.802924225083246e-06, "loss": 0.4315, "step": 3088 }, { "epoch": 0.58, "learning_rate": 7.796954963501545e-06, "loss": 0.4285, "step": 3089 }, { "epoch": 0.58, "learning_rate": 7.790986526838985e-06, "loss": 0.5254, "step": 3090 }, { "epoch": 0.58, "learning_rate": 7.785018917330409e-06, "loss": 0.6135, "step": 3091 }, { "epoch": 0.58, "learning_rate": 7.77905213721036e-06, "loss": 0.4402, "step": 3092 }, { "epoch": 0.58, "learning_rate": 7.773086188713078e-06, "loss": 0.5399, "step": 3093 }, { "epoch": 0.58, "learning_rate": 7.767121074072465e-06, "loss": 0.7058, "step": 3094 }, { "epoch": 0.58, "learning_rate": 7.761156795522147e-06, "loss": 0.5413, "step": 3095 }, { "epoch": 0.58, "learning_rate": 7.755193355295409e-06, "loss": 0.4109, "step": 3096 }, { "epoch": 0.59, "learning_rate": 7.749230755625228e-06, "loss": 0.6396, "step": 3097 }, { "epoch": 0.59, "learning_rate": 7.743268998744278e-06, "loss": 0.4935, "step": 3098 }, { "epoch": 0.59, "learning_rate": 7.737308086884902e-06, "loss": 0.3488, "step": 3099 }, { "epoch": 0.59, "learning_rate": 7.731348022279135e-06, "loss": 0.5519, "step": 3100 }, { "epoch": 0.59, "learning_rate": 7.725388807158693e-06, "loss": 0.5785, "step": 3101 }, { "epoch": 0.59, "learning_rate": 7.71943044375498e-06, "loss": 0.4332, "step": 3102 }, { "epoch": 0.59, "learning_rate": 7.713472934299066e-06, "loss": 0.5539, "step": 3103 }, { "epoch": 0.59, "learning_rate": 7.707516281021716e-06, "loss": 0.5435, "step": 3104 }, { "epoch": 0.59, "learning_rate": 7.701560486153367e-06, "loss": 0.617, "step": 3105 }, { "epoch": 0.59, "learning_rate": 7.695605551924137e-06, "loss": 0.5418, "step": 3106 }, { "epoch": 0.59, "learning_rate": 7.689651480563824e-06, "loss": 0.4609, "step": 3107 }, { "epoch": 0.59, "learning_rate": 7.683698274301896e-06, "loss": 0.5396, "step": 3108 }, { "epoch": 0.59, "learning_rate": 7.6777459353675e-06, "loss": 0.5072, "step": 3109 }, { "epoch": 0.59, "learning_rate": 7.671794465989464e-06, "loss": 0.5391, "step": 3110 }, { "epoch": 0.59, "learning_rate": 7.665843868396287e-06, "loss": 0.4339, "step": 3111 }, { "epoch": 0.59, "learning_rate": 7.659894144816132e-06, "loss": 0.5433, "step": 3112 }, { "epoch": 0.59, "learning_rate": 7.653945297476849e-06, "loss": 0.388, "step": 3113 }, { "epoch": 0.59, "learning_rate": 7.647997328605956e-06, "loss": 0.4643, "step": 3114 }, { "epoch": 0.59, "learning_rate": 7.642050240430632e-06, "loss": 0.3923, "step": 3115 }, { "epoch": 0.59, "learning_rate": 7.636104035177745e-06, "loss": 0.556, "step": 3116 }, { "epoch": 0.59, "learning_rate": 7.630158715073813e-06, "loss": 0.4252, "step": 3117 }, { "epoch": 0.59, "learning_rate": 7.624214282345033e-06, "loss": 0.384, "step": 3118 }, { "epoch": 0.59, "learning_rate": 7.6182707392172735e-06, "loss": 0.5383, "step": 3119 }, { "epoch": 0.59, "learning_rate": 7.612328087916053e-06, "loss": 0.5599, "step": 3120 }, { "epoch": 0.59, "learning_rate": 7.606386330666582e-06, "loss": 0.5302, "step": 3121 }, { "epoch": 0.59, "learning_rate": 7.600445469693711e-06, "loss": 0.6102, "step": 3122 }, { "epoch": 0.59, "learning_rate": 7.594505507221967e-06, "loss": 0.4188, "step": 3123 }, { "epoch": 0.59, "learning_rate": 7.588566445475544e-06, "loss": 0.5367, "step": 3124 }, { "epoch": 0.59, "learning_rate": 7.582628286678291e-06, "loss": 0.4214, "step": 3125 }, { "epoch": 0.59, "learning_rate": 7.576691033053717e-06, "loss": 0.4915, "step": 3126 }, { "epoch": 0.59, "learning_rate": 7.570754686825004e-06, "loss": 0.5177, "step": 3127 }, { "epoch": 0.59, "learning_rate": 7.5648192502149856e-06, "loss": 0.5544, "step": 3128 }, { "epoch": 0.59, "learning_rate": 7.558884725446149e-06, "loss": 0.4857, "step": 3129 }, { "epoch": 0.59, "learning_rate": 7.5529511147406565e-06, "loss": 0.4185, "step": 3130 }, { "epoch": 0.59, "learning_rate": 7.547018420320314e-06, "loss": 0.6089, "step": 3131 }, { "epoch": 0.59, "learning_rate": 7.541086644406589e-06, "loss": 0.5309, "step": 3132 }, { "epoch": 0.59, "learning_rate": 7.5351557892206105e-06, "loss": 0.42, "step": 3133 }, { "epoch": 0.59, "learning_rate": 7.529225856983151e-06, "loss": 0.4956, "step": 3134 }, { "epoch": 0.59, "learning_rate": 7.523296849914643e-06, "loss": 0.5773, "step": 3135 }, { "epoch": 0.59, "learning_rate": 7.51736877023518e-06, "loss": 0.5708, "step": 3136 }, { "epoch": 0.59, "learning_rate": 7.511441620164499e-06, "loss": 0.4824, "step": 3137 }, { "epoch": 0.59, "learning_rate": 7.505515401921986e-06, "loss": 0.4543, "step": 3138 }, { "epoch": 0.59, "learning_rate": 7.4995901177266905e-06, "loss": 0.421, "step": 3139 }, { "epoch": 0.59, "learning_rate": 7.493665769797306e-06, "loss": 0.4855, "step": 3140 }, { "epoch": 0.59, "learning_rate": 7.487742360352164e-06, "loss": 0.574, "step": 3141 }, { "epoch": 0.59, "learning_rate": 7.481819891609271e-06, "loss": 0.4424, "step": 3142 }, { "epoch": 0.59, "learning_rate": 7.475898365786256e-06, "loss": 0.5256, "step": 3143 }, { "epoch": 0.59, "learning_rate": 7.469977785100403e-06, "loss": 0.5839, "step": 3144 }, { "epoch": 0.59, "learning_rate": 7.464058151768652e-06, "loss": 0.4622, "step": 3145 }, { "epoch": 0.59, "learning_rate": 7.458139468007573e-06, "loss": 0.525, "step": 3146 }, { "epoch": 0.59, "learning_rate": 7.452221736033387e-06, "loss": 0.4739, "step": 3147 }, { "epoch": 0.59, "learning_rate": 7.446304958061963e-06, "loss": 0.6758, "step": 3148 }, { "epoch": 0.59, "learning_rate": 7.440389136308806e-06, "loss": 0.417, "step": 3149 }, { "epoch": 0.6, "learning_rate": 7.434474272989071e-06, "loss": 0.4672, "step": 3150 }, { "epoch": 0.6, "learning_rate": 7.428560370317542e-06, "loss": 0.6176, "step": 3151 }, { "epoch": 0.6, "learning_rate": 7.422647430508652e-06, "loss": 0.3036, "step": 3152 }, { "epoch": 0.6, "learning_rate": 7.4167354557764735e-06, "loss": 0.5978, "step": 3153 }, { "epoch": 0.6, "learning_rate": 7.410824448334717e-06, "loss": 0.5033, "step": 3154 }, { "epoch": 0.6, "learning_rate": 7.404914410396722e-06, "loss": 0.3857, "step": 3155 }, { "epoch": 0.6, "learning_rate": 7.39900534417548e-06, "loss": 0.5037, "step": 3156 }, { "epoch": 0.6, "learning_rate": 7.393097251883609e-06, "loss": 0.5485, "step": 3157 }, { "epoch": 0.6, "learning_rate": 7.387190135733362e-06, "loss": 0.468, "step": 3158 }, { "epoch": 0.6, "learning_rate": 7.3812839979366366e-06, "loss": 0.6598, "step": 3159 }, { "epoch": 0.6, "learning_rate": 7.3753788407049475e-06, "loss": 0.6112, "step": 3160 }, { "epoch": 0.6, "learning_rate": 7.3694746662494535e-06, "loss": 0.5631, "step": 3161 }, { "epoch": 0.6, "learning_rate": 7.363571476780945e-06, "loss": 0.483, "step": 3162 }, { "epoch": 0.6, "learning_rate": 7.3576692745098435e-06, "loss": 0.4778, "step": 3163 }, { "epoch": 0.6, "learning_rate": 7.351768061646192e-06, "loss": 0.4241, "step": 3164 }, { "epoch": 0.6, "learning_rate": 7.345867840399676e-06, "loss": 0.3879, "step": 3165 }, { "epoch": 0.6, "learning_rate": 7.339968612979605e-06, "loss": 0.4364, "step": 3166 }, { "epoch": 0.6, "learning_rate": 7.334070381594904e-06, "loss": 0.5433, "step": 3167 }, { "epoch": 0.6, "learning_rate": 7.328173148454151e-06, "loss": 0.4448, "step": 3168 }, { "epoch": 0.6, "learning_rate": 7.322276915765526e-06, "loss": 0.4766, "step": 3169 }, { "epoch": 0.6, "learning_rate": 7.3163816857368444e-06, "loss": 0.4472, "step": 3170 }, { "epoch": 0.6, "learning_rate": 7.310487460575548e-06, "loss": 0.5455, "step": 3171 }, { "epoch": 0.6, "learning_rate": 7.304594242488702e-06, "loss": 0.3437, "step": 3172 }, { "epoch": 0.6, "learning_rate": 7.298702033682985e-06, "loss": 0.44, "step": 3173 }, { "epoch": 0.6, "learning_rate": 7.29281083636471e-06, "loss": 0.47, "step": 3174 }, { "epoch": 0.6, "learning_rate": 7.286920652739808e-06, "loss": 0.6174, "step": 3175 }, { "epoch": 0.6, "learning_rate": 7.281031485013819e-06, "loss": 0.6003, "step": 3176 }, { "epoch": 0.6, "learning_rate": 7.275143335391927e-06, "loss": 0.434, "step": 3177 }, { "epoch": 0.6, "learning_rate": 7.2692562060789085e-06, "loss": 0.6152, "step": 3178 }, { "epoch": 0.6, "learning_rate": 7.263370099279173e-06, "loss": 0.5534, "step": 3179 }, { "epoch": 0.6, "learning_rate": 7.257485017196746e-06, "loss": 0.6418, "step": 3180 }, { "epoch": 0.6, "learning_rate": 7.251600962035258e-06, "loss": 0.5835, "step": 3181 }, { "epoch": 0.6, "learning_rate": 7.245717935997978e-06, "loss": 0.4626, "step": 3182 }, { "epoch": 0.6, "learning_rate": 7.239835941287765e-06, "loss": 0.5387, "step": 3183 }, { "epoch": 0.6, "learning_rate": 7.233954980107103e-06, "loss": 0.6547, "step": 3184 }, { "epoch": 0.6, "learning_rate": 7.228075054658096e-06, "loss": 0.4901, "step": 3185 }, { "epoch": 0.6, "learning_rate": 7.222196167142445e-06, "loss": 0.6247, "step": 3186 }, { "epoch": 0.6, "learning_rate": 7.21631831976147e-06, "loss": 0.5879, "step": 3187 }, { "epoch": 0.6, "learning_rate": 7.210441514716105e-06, "loss": 0.4696, "step": 3188 }, { "epoch": 0.6, "learning_rate": 7.20456575420689e-06, "loss": 0.4383, "step": 3189 }, { "epoch": 0.6, "learning_rate": 7.19869104043397e-06, "loss": 0.575, "step": 3190 }, { "epoch": 0.6, "learning_rate": 7.192817375597106e-06, "loss": 0.4951, "step": 3191 }, { "epoch": 0.6, "learning_rate": 7.18694476189566e-06, "loss": 0.3872, "step": 3192 }, { "epoch": 0.6, "learning_rate": 7.181073201528602e-06, "loss": 0.618, "step": 3193 }, { "epoch": 0.6, "learning_rate": 7.1752026966945145e-06, "loss": 0.4462, "step": 3194 }, { "epoch": 0.6, "learning_rate": 7.16933324959157e-06, "loss": 0.5992, "step": 3195 }, { "epoch": 0.6, "learning_rate": 7.1634648624175575e-06, "loss": 0.4699, "step": 3196 }, { "epoch": 0.6, "learning_rate": 7.157597537369866e-06, "loss": 0.426, "step": 3197 }, { "epoch": 0.6, "learning_rate": 7.151731276645486e-06, "loss": 0.4749, "step": 3198 }, { "epoch": 0.6, "learning_rate": 7.145866082441004e-06, "loss": 0.4018, "step": 3199 }, { "epoch": 0.6, "learning_rate": 7.14000195695262e-06, "loss": 0.5467, "step": 3200 }, { "epoch": 0.6, "learning_rate": 7.134138902376124e-06, "loss": 0.7808, "step": 3201 }, { "epoch": 0.6, "learning_rate": 7.1282769209069005e-06, "loss": 0.5285, "step": 3202 }, { "epoch": 0.61, "learning_rate": 7.122416014739949e-06, "loss": 0.4721, "step": 3203 }, { "epoch": 0.61, "learning_rate": 7.116556186069852e-06, "loss": 0.4818, "step": 3204 }, { "epoch": 0.61, "learning_rate": 7.110697437090789e-06, "loss": 0.6775, "step": 3205 }, { "epoch": 0.61, "learning_rate": 7.104839769996548e-06, "loss": 0.8018, "step": 3206 }, { "epoch": 0.61, "learning_rate": 7.098983186980495e-06, "loss": 0.4717, "step": 3207 }, { "epoch": 0.61, "learning_rate": 7.0931276902356e-06, "loss": 0.5495, "step": 3208 }, { "epoch": 0.61, "learning_rate": 7.087273281954426e-06, "loss": 0.4359, "step": 3209 }, { "epoch": 0.61, "learning_rate": 7.0814199643291296e-06, "loss": 0.4738, "step": 3210 }, { "epoch": 0.61, "learning_rate": 7.075567739551448e-06, "loss": 0.5617, "step": 3211 }, { "epoch": 0.61, "learning_rate": 7.069716609812722e-06, "loss": 0.4124, "step": 3212 }, { "epoch": 0.61, "learning_rate": 7.063866577303879e-06, "loss": 0.4728, "step": 3213 }, { "epoch": 0.61, "learning_rate": 7.058017644215433e-06, "loss": 0.5131, "step": 3214 }, { "epoch": 0.61, "learning_rate": 7.05216981273749e-06, "loss": 0.5255, "step": 3215 }, { "epoch": 0.61, "learning_rate": 7.046323085059734e-06, "loss": 0.3221, "step": 3216 }, { "epoch": 0.61, "learning_rate": 7.040477463371449e-06, "loss": 0.5473, "step": 3217 }, { "epoch": 0.61, "learning_rate": 7.034632949861497e-06, "loss": 0.4032, "step": 3218 }, { "epoch": 0.61, "learning_rate": 7.028789546718327e-06, "loss": 0.938, "step": 3219 }, { "epoch": 0.61, "learning_rate": 7.022947256129973e-06, "loss": 0.4234, "step": 3220 }, { "epoch": 0.61, "learning_rate": 7.017106080284048e-06, "loss": 0.5973, "step": 3221 }, { "epoch": 0.61, "learning_rate": 7.011266021367753e-06, "loss": 0.5066, "step": 3222 }, { "epoch": 0.61, "learning_rate": 7.005427081567868e-06, "loss": 0.5323, "step": 3223 }, { "epoch": 0.61, "learning_rate": 6.999589263070758e-06, "loss": 0.6164, "step": 3224 }, { "epoch": 0.61, "learning_rate": 6.99375256806236e-06, "loss": 0.5667, "step": 3225 }, { "epoch": 0.61, "learning_rate": 6.987916998728197e-06, "loss": 0.4044, "step": 3226 }, { "epoch": 0.61, "learning_rate": 6.982082557253371e-06, "loss": 0.4616, "step": 3227 }, { "epoch": 0.61, "learning_rate": 6.976249245822552e-06, "loss": 0.4577, "step": 3228 }, { "epoch": 0.61, "learning_rate": 6.9704170666200045e-06, "loss": 0.6498, "step": 3229 }, { "epoch": 0.61, "learning_rate": 6.964586021829551e-06, "loss": 0.5295, "step": 3230 }, { "epoch": 0.61, "learning_rate": 6.958756113634597e-06, "loss": 0.5282, "step": 3231 }, { "epoch": 0.61, "learning_rate": 6.95292734421813e-06, "loss": 0.3377, "step": 3232 }, { "epoch": 0.61, "learning_rate": 6.947099715762694e-06, "loss": 0.4739, "step": 3233 }, { "epoch": 0.61, "learning_rate": 6.941273230450418e-06, "loss": 0.4566, "step": 3234 }, { "epoch": 0.61, "learning_rate": 6.935447890463003e-06, "loss": 0.4811, "step": 3235 }, { "epoch": 0.61, "learning_rate": 6.9296236979817175e-06, "loss": 0.5803, "step": 3236 }, { "epoch": 0.61, "learning_rate": 6.9238006551873985e-06, "loss": 0.3844, "step": 3237 }, { "epoch": 0.61, "learning_rate": 6.917978764260457e-06, "loss": 0.4073, "step": 3238 }, { "epoch": 0.61, "learning_rate": 6.912158027380871e-06, "loss": 0.4587, "step": 3239 }, { "epoch": 0.61, "learning_rate": 6.9063384467281825e-06, "loss": 0.4974, "step": 3240 }, { "epoch": 0.61, "learning_rate": 6.900520024481512e-06, "loss": 0.4855, "step": 3241 }, { "epoch": 0.61, "learning_rate": 6.894702762819531e-06, "loss": 0.5904, "step": 3242 }, { "epoch": 0.61, "learning_rate": 6.888886663920487e-06, "loss": 0.3835, "step": 3243 }, { "epoch": 0.61, "learning_rate": 6.883071729962188e-06, "loss": 0.6465, "step": 3244 }, { "epoch": 0.61, "learning_rate": 6.877257963122006e-06, "loss": 0.6476, "step": 3245 }, { "epoch": 0.61, "learning_rate": 6.871445365576883e-06, "loss": 0.5638, "step": 3246 }, { "epoch": 0.61, "learning_rate": 6.86563393950331e-06, "loss": 0.5597, "step": 3247 }, { "epoch": 0.61, "learning_rate": 6.859823687077347e-06, "loss": 0.5136, "step": 3248 }, { "epoch": 0.61, "learning_rate": 6.854014610474616e-06, "loss": 0.4409, "step": 3249 }, { "epoch": 0.61, "learning_rate": 6.848206711870298e-06, "loss": 0.4024, "step": 3250 }, { "epoch": 0.61, "learning_rate": 6.842399993439125e-06, "loss": 0.5812, "step": 3251 }, { "epoch": 0.61, "learning_rate": 6.8365944573554e-06, "loss": 0.4905, "step": 3252 }, { "epoch": 0.61, "learning_rate": 6.8307901057929735e-06, "loss": 0.605, "step": 3253 }, { "epoch": 0.61, "learning_rate": 6.824986940925256e-06, "loss": 0.5233, "step": 3254 }, { "epoch": 0.61, "learning_rate": 6.8191849649252174e-06, "loss": 0.6124, "step": 3255 }, { "epoch": 0.62, "learning_rate": 6.813384179965373e-06, "loss": 0.4578, "step": 3256 }, { "epoch": 0.62, "learning_rate": 6.807584588217798e-06, "loss": 0.4537, "step": 3257 }, { "epoch": 0.62, "learning_rate": 6.801786191854125e-06, "loss": 0.5043, "step": 3258 }, { "epoch": 0.62, "learning_rate": 6.795988993045532e-06, "loss": 0.5448, "step": 3259 }, { "epoch": 0.62, "learning_rate": 6.790192993962748e-06, "loss": 0.5025, "step": 3260 }, { "epoch": 0.62, "learning_rate": 6.784398196776059e-06, "loss": 0.6605, "step": 3261 }, { "epoch": 0.62, "learning_rate": 6.778604603655297e-06, "loss": 0.6083, "step": 3262 }, { "epoch": 0.62, "learning_rate": 6.7728122167698395e-06, "loss": 0.4249, "step": 3263 }, { "epoch": 0.62, "learning_rate": 6.767021038288628e-06, "loss": 0.5278, "step": 3264 }, { "epoch": 0.62, "learning_rate": 6.761231070380129e-06, "loss": 0.324, "step": 3265 }, { "epoch": 0.62, "learning_rate": 6.755442315212368e-06, "loss": 0.4769, "step": 3266 }, { "epoch": 0.62, "learning_rate": 6.749654774952925e-06, "loss": 0.5361, "step": 3267 }, { "epoch": 0.62, "learning_rate": 6.743868451768904e-06, "loss": 0.5796, "step": 3268 }, { "epoch": 0.62, "learning_rate": 6.738083347826968e-06, "loss": 0.637, "step": 3269 }, { "epoch": 0.62, "learning_rate": 6.732299465293322e-06, "loss": 0.6694, "step": 3270 }, { "epoch": 0.62, "learning_rate": 6.726516806333714e-06, "loss": 0.4917, "step": 3271 }, { "epoch": 0.62, "learning_rate": 6.720735373113424e-06, "loss": 0.433, "step": 3272 }, { "epoch": 0.62, "learning_rate": 6.714955167797288e-06, "loss": 0.4871, "step": 3273 }, { "epoch": 0.62, "learning_rate": 6.709176192549668e-06, "loss": 0.5476, "step": 3274 }, { "epoch": 0.62, "learning_rate": 6.7033984495344785e-06, "loss": 0.4455, "step": 3275 }, { "epoch": 0.62, "learning_rate": 6.697621940915167e-06, "loss": 0.5125, "step": 3276 }, { "epoch": 0.62, "learning_rate": 6.691846668854709e-06, "loss": 0.4601, "step": 3277 }, { "epoch": 0.62, "learning_rate": 6.686072635515636e-06, "loss": 0.4738, "step": 3278 }, { "epoch": 0.62, "learning_rate": 6.680299843060001e-06, "loss": 0.5729, "step": 3279 }, { "epoch": 0.62, "learning_rate": 6.674528293649396e-06, "loss": 0.6135, "step": 3280 }, { "epoch": 0.62, "learning_rate": 6.6687579894449536e-06, "loss": 0.441, "step": 3281 }, { "epoch": 0.62, "learning_rate": 6.662988932607331e-06, "loss": 0.3674, "step": 3282 }, { "epoch": 0.62, "learning_rate": 6.657221125296722e-06, "loss": 0.3977, "step": 3283 }, { "epoch": 0.62, "learning_rate": 6.651454569672856e-06, "loss": 0.413, "step": 3284 }, { "epoch": 0.62, "learning_rate": 6.645689267894993e-06, "loss": 0.5119, "step": 3285 }, { "epoch": 0.62, "learning_rate": 6.639925222121911e-06, "loss": 0.5819, "step": 3286 }, { "epoch": 0.62, "learning_rate": 6.634162434511939e-06, "loss": 0.6636, "step": 3287 }, { "epoch": 0.62, "learning_rate": 6.62840090722292e-06, "loss": 0.7617, "step": 3288 }, { "epoch": 0.62, "learning_rate": 6.622640642412224e-06, "loss": 0.374, "step": 3289 }, { "epoch": 0.62, "learning_rate": 6.616881642236763e-06, "loss": 0.4353, "step": 3290 }, { "epoch": 0.62, "learning_rate": 6.611123908852959e-06, "loss": 0.5051, "step": 3291 }, { "epoch": 0.62, "learning_rate": 6.6053674444167635e-06, "loss": 0.4328, "step": 3292 }, { "epoch": 0.62, "learning_rate": 6.599612251083666e-06, "loss": 0.6986, "step": 3293 }, { "epoch": 0.62, "learning_rate": 6.59385833100866e-06, "loss": 0.6015, "step": 3294 }, { "epoch": 0.62, "learning_rate": 6.588105686346276e-06, "loss": 0.3935, "step": 3295 }, { "epoch": 0.62, "learning_rate": 6.5823543192505615e-06, "loss": 0.5522, "step": 3296 }, { "epoch": 0.62, "learning_rate": 6.57660423187509e-06, "loss": 0.5039, "step": 3297 }, { "epoch": 0.62, "learning_rate": 6.5708554263729485e-06, "loss": 0.5747, "step": 3298 }, { "epoch": 0.62, "learning_rate": 6.56510790489675e-06, "loss": 0.5201, "step": 3299 }, { "epoch": 0.62, "learning_rate": 6.559361669598627e-06, "loss": 0.4286, "step": 3300 }, { "epoch": 0.62, "learning_rate": 6.553616722630225e-06, "loss": 0.5222, "step": 3301 }, { "epoch": 0.62, "learning_rate": 6.547873066142716e-06, "loss": 0.5216, "step": 3302 }, { "epoch": 0.62, "learning_rate": 6.542130702286778e-06, "loss": 0.5534, "step": 3303 }, { "epoch": 0.62, "learning_rate": 6.53638963321261e-06, "loss": 0.4584, "step": 3304 }, { "epoch": 0.62, "learning_rate": 6.5306498610699295e-06, "loss": 0.3926, "step": 3305 }, { "epoch": 0.62, "learning_rate": 6.524911388007963e-06, "loss": 0.4331, "step": 3306 }, { "epoch": 0.62, "learning_rate": 6.519174216175458e-06, "loss": 0.6936, "step": 3307 }, { "epoch": 0.62, "learning_rate": 6.513438347720665e-06, "loss": 0.4282, "step": 3308 }, { "epoch": 0.63, "learning_rate": 6.507703784791346e-06, "loss": 0.5521, "step": 3309 }, { "epoch": 0.63, "learning_rate": 6.50197052953479e-06, "loss": 0.4501, "step": 3310 }, { "epoch": 0.63, "learning_rate": 6.496238584097782e-06, "loss": 0.4071, "step": 3311 }, { "epoch": 0.63, "learning_rate": 6.490507950626616e-06, "loss": 0.4504, "step": 3312 }, { "epoch": 0.63, "learning_rate": 6.484778631267101e-06, "loss": 0.6151, "step": 3313 }, { "epoch": 0.63, "learning_rate": 6.479050628164557e-06, "loss": 0.5619, "step": 3314 }, { "epoch": 0.63, "learning_rate": 6.473323943463795e-06, "loss": 0.4219, "step": 3315 }, { "epoch": 0.63, "learning_rate": 6.467598579309156e-06, "loss": 0.6085, "step": 3316 }, { "epoch": 0.63, "learning_rate": 6.461874537844465e-06, "loss": 0.3434, "step": 3317 }, { "epoch": 0.63, "learning_rate": 6.4561518212130616e-06, "loss": 0.5116, "step": 3318 }, { "epoch": 0.63, "learning_rate": 6.4504304315577924e-06, "loss": 0.5222, "step": 3319 }, { "epoch": 0.63, "learning_rate": 6.444710371021e-06, "loss": 0.4914, "step": 3320 }, { "epoch": 0.63, "learning_rate": 6.438991641744531e-06, "loss": 0.6709, "step": 3321 }, { "epoch": 0.63, "learning_rate": 6.433274245869738e-06, "loss": 0.5064, "step": 3322 }, { "epoch": 0.63, "learning_rate": 6.4275581855374715e-06, "loss": 0.4392, "step": 3323 }, { "epoch": 0.63, "learning_rate": 6.4218434628880755e-06, "loss": 0.8076, "step": 3324 }, { "epoch": 0.63, "learning_rate": 6.416130080061406e-06, "loss": 0.526, "step": 3325 }, { "epoch": 0.63, "learning_rate": 6.410418039196808e-06, "loss": 0.5932, "step": 3326 }, { "epoch": 0.63, "learning_rate": 6.404707342433123e-06, "loss": 0.599, "step": 3327 }, { "epoch": 0.63, "learning_rate": 6.398997991908702e-06, "loss": 0.4436, "step": 3328 }, { "epoch": 0.63, "learning_rate": 6.393289989761373e-06, "loss": 0.5047, "step": 3329 }, { "epoch": 0.63, "learning_rate": 6.387583338128471e-06, "loss": 0.4076, "step": 3330 }, { "epoch": 0.63, "learning_rate": 6.3818780391468276e-06, "loss": 0.5505, "step": 3331 }, { "epoch": 0.63, "learning_rate": 6.376174094952761e-06, "loss": 0.4719, "step": 3332 }, { "epoch": 0.63, "learning_rate": 6.370471507682079e-06, "loss": 0.5007, "step": 3333 }, { "epoch": 0.63, "learning_rate": 6.364770279470093e-06, "loss": 0.564, "step": 3334 }, { "epoch": 0.63, "learning_rate": 6.359070412451598e-06, "loss": 0.5434, "step": 3335 }, { "epoch": 0.63, "learning_rate": 6.353371908760878e-06, "loss": 0.5505, "step": 3336 }, { "epoch": 0.63, "learning_rate": 6.347674770531716e-06, "loss": 0.503, "step": 3337 }, { "epoch": 0.63, "learning_rate": 6.3419789998973655e-06, "loss": 0.5584, "step": 3338 }, { "epoch": 0.63, "learning_rate": 6.33628459899059e-06, "loss": 0.5286, "step": 3339 }, { "epoch": 0.63, "learning_rate": 6.330591569943623e-06, "loss": 0.4597, "step": 3340 }, { "epoch": 0.63, "learning_rate": 6.324899914888192e-06, "loss": 0.519, "step": 3341 }, { "epoch": 0.63, "learning_rate": 6.319209635955512e-06, "loss": 0.407, "step": 3342 }, { "epoch": 0.63, "learning_rate": 6.313520735276274e-06, "loss": 0.5043, "step": 3343 }, { "epoch": 0.63, "learning_rate": 6.307833214980659e-06, "loss": 0.3968, "step": 3344 }, { "epoch": 0.63, "learning_rate": 6.302147077198334e-06, "loss": 0.4639, "step": 3345 }, { "epoch": 0.63, "learning_rate": 6.296462324058445e-06, "loss": 0.5797, "step": 3346 }, { "epoch": 0.63, "learning_rate": 6.2907789576896125e-06, "loss": 0.4748, "step": 3347 }, { "epoch": 0.63, "learning_rate": 6.285096980219951e-06, "loss": 0.4875, "step": 3348 }, { "epoch": 0.63, "learning_rate": 6.2794163937770494e-06, "loss": 0.5545, "step": 3349 }, { "epoch": 0.63, "learning_rate": 6.273737200487964e-06, "loss": 0.5478, "step": 3350 }, { "epoch": 0.63, "learning_rate": 6.268059402479254e-06, "loss": 0.5001, "step": 3351 }, { "epoch": 0.63, "learning_rate": 6.262383001876934e-06, "loss": 0.6528, "step": 3352 }, { "epoch": 0.63, "learning_rate": 6.256708000806504e-06, "loss": 0.5567, "step": 3353 }, { "epoch": 0.63, "learning_rate": 6.251034401392946e-06, "loss": 0.5363, "step": 3354 }, { "epoch": 0.63, "learning_rate": 6.245362205760703e-06, "loss": 0.4667, "step": 3355 }, { "epoch": 0.63, "learning_rate": 6.239691416033702e-06, "loss": 0.5025, "step": 3356 }, { "epoch": 0.63, "learning_rate": 6.2340220343353455e-06, "loss": 0.5575, "step": 3357 }, { "epoch": 0.63, "learning_rate": 6.228354062788503e-06, "loss": 0.4591, "step": 3358 }, { "epoch": 0.63, "learning_rate": 6.222687503515514e-06, "loss": 0.4459, "step": 3359 }, { "epoch": 0.63, "learning_rate": 6.217022358638199e-06, "loss": 0.4457, "step": 3360 }, { "epoch": 0.63, "learning_rate": 6.211358630277839e-06, "loss": 0.607, "step": 3361 }, { "epoch": 0.64, "learning_rate": 6.20569632055519e-06, "loss": 0.5043, "step": 3362 }, { "epoch": 0.64, "learning_rate": 6.200035431590479e-06, "loss": 0.4912, "step": 3363 }, { "epoch": 0.64, "learning_rate": 6.194375965503392e-06, "loss": 0.5051, "step": 3364 }, { "epoch": 0.64, "learning_rate": 6.188717924413088e-06, "loss": 0.4857, "step": 3365 }, { "epoch": 0.64, "learning_rate": 6.1830613104381945e-06, "loss": 0.5091, "step": 3366 }, { "epoch": 0.64, "learning_rate": 6.177406125696804e-06, "loss": 0.4981, "step": 3367 }, { "epoch": 0.64, "learning_rate": 6.1717523723064645e-06, "loss": 0.5932, "step": 3368 }, { "epoch": 0.64, "learning_rate": 6.166100052384203e-06, "loss": 0.5207, "step": 3369 }, { "epoch": 0.64, "learning_rate": 6.160449168046496e-06, "loss": 0.4803, "step": 3370 }, { "epoch": 0.64, "learning_rate": 6.1547997214092966e-06, "loss": 0.5571, "step": 3371 }, { "epoch": 0.64, "learning_rate": 6.149151714588009e-06, "loss": 0.4193, "step": 3372 }, { "epoch": 0.64, "learning_rate": 6.143505149697494e-06, "loss": 0.4508, "step": 3373 }, { "epoch": 0.64, "learning_rate": 6.137860028852088e-06, "loss": 0.4994, "step": 3374 }, { "epoch": 0.64, "learning_rate": 6.1322163541655765e-06, "loss": 0.4291, "step": 3375 }, { "epoch": 0.64, "learning_rate": 6.126574127751197e-06, "loss": 0.5578, "step": 3376 }, { "epoch": 0.64, "learning_rate": 6.120933351721665e-06, "loss": 0.455, "step": 3377 }, { "epoch": 0.64, "learning_rate": 6.115294028189134e-06, "loss": 0.4094, "step": 3378 }, { "epoch": 0.64, "learning_rate": 6.109656159265218e-06, "loss": 0.4123, "step": 3379 }, { "epoch": 0.64, "learning_rate": 6.104019747060998e-06, "loss": 0.4723, "step": 3380 }, { "epoch": 0.64, "learning_rate": 6.098384793686991e-06, "loss": 0.7365, "step": 3381 }, { "epoch": 0.64, "learning_rate": 6.092751301253179e-06, "loss": 0.3808, "step": 3382 }, { "epoch": 0.64, "learning_rate": 6.087119271868997e-06, "loss": 0.6537, "step": 3383 }, { "epoch": 0.64, "learning_rate": 6.0814887076433325e-06, "loss": 0.5426, "step": 3384 }, { "epoch": 0.64, "learning_rate": 6.075859610684514e-06, "loss": 0.5326, "step": 3385 }, { "epoch": 0.64, "learning_rate": 6.070231983100334e-06, "loss": 0.5755, "step": 3386 }, { "epoch": 0.64, "learning_rate": 6.064605826998031e-06, "loss": 0.4518, "step": 3387 }, { "epoch": 0.64, "learning_rate": 6.058981144484284e-06, "loss": 0.4402, "step": 3388 }, { "epoch": 0.64, "learning_rate": 6.053357937665237e-06, "loss": 0.4267, "step": 3389 }, { "epoch": 0.64, "learning_rate": 6.047736208646464e-06, "loss": 0.5992, "step": 3390 }, { "epoch": 0.64, "learning_rate": 6.042115959532992e-06, "loss": 0.6728, "step": 3391 }, { "epoch": 0.64, "learning_rate": 6.036497192429302e-06, "loss": 0.4722, "step": 3392 }, { "epoch": 0.64, "learning_rate": 6.030879909439311e-06, "loss": 0.5192, "step": 3393 }, { "epoch": 0.64, "learning_rate": 6.025264112666376e-06, "loss": 0.5525, "step": 3394 }, { "epoch": 0.64, "learning_rate": 6.01964980421331e-06, "loss": 0.503, "step": 3395 }, { "epoch": 0.64, "learning_rate": 6.014036986182363e-06, "loss": 0.4919, "step": 3396 }, { "epoch": 0.64, "learning_rate": 6.00842566067522e-06, "loss": 0.6135, "step": 3397 }, { "epoch": 0.64, "learning_rate": 6.002815829793024e-06, "loss": 0.4958, "step": 3398 }, { "epoch": 0.64, "learning_rate": 5.997207495636339e-06, "loss": 0.526, "step": 3399 }, { "epoch": 0.64, "learning_rate": 5.99160066030518e-06, "loss": 0.5612, "step": 3400 }, { "epoch": 0.64, "learning_rate": 5.985995325899003e-06, "loss": 0.3793, "step": 3401 }, { "epoch": 0.64, "learning_rate": 5.980391494516686e-06, "loss": 0.6535, "step": 3402 }, { "epoch": 0.64, "learning_rate": 5.974789168256569e-06, "loss": 0.5313, "step": 3403 }, { "epoch": 0.64, "learning_rate": 5.969188349216408e-06, "loss": 0.5087, "step": 3404 }, { "epoch": 0.64, "learning_rate": 5.963589039493401e-06, "loss": 0.5083, "step": 3405 }, { "epoch": 0.64, "learning_rate": 5.957991241184184e-06, "loss": 0.5091, "step": 3406 }, { "epoch": 0.64, "learning_rate": 5.952394956384823e-06, "loss": 0.582, "step": 3407 }, { "epoch": 0.64, "learning_rate": 5.946800187190816e-06, "loss": 0.4688, "step": 3408 }, { "epoch": 0.64, "learning_rate": 5.9412069356971e-06, "loss": 0.5332, "step": 3409 }, { "epoch": 0.64, "learning_rate": 5.935615203998042e-06, "loss": 0.5023, "step": 3410 }, { "epoch": 0.64, "learning_rate": 5.930024994187428e-06, "loss": 0.5693, "step": 3411 }, { "epoch": 0.64, "learning_rate": 5.924436308358497e-06, "loss": 0.4669, "step": 3412 }, { "epoch": 0.64, "learning_rate": 5.9188491486038955e-06, "loss": 0.5396, "step": 3413 }, { "epoch": 0.65, "learning_rate": 5.9132635170157085e-06, "loss": 0.4117, "step": 3414 }, { "epoch": 0.65, "learning_rate": 5.907679415685452e-06, "loss": 0.5417, "step": 3415 }, { "epoch": 0.65, "learning_rate": 5.902096846704059e-06, "loss": 0.5625, "step": 3416 }, { "epoch": 0.65, "learning_rate": 5.896515812161896e-06, "loss": 0.3686, "step": 3417 }, { "epoch": 0.65, "learning_rate": 5.890936314148756e-06, "loss": 0.4596, "step": 3418 }, { "epoch": 0.65, "learning_rate": 5.885358354753856e-06, "loss": 0.5774, "step": 3419 }, { "epoch": 0.65, "learning_rate": 5.879781936065825e-06, "loss": 0.5382, "step": 3420 }, { "epoch": 0.65, "learning_rate": 5.874207060172734e-06, "loss": 0.5118, "step": 3421 }, { "epoch": 0.65, "learning_rate": 5.868633729162064e-06, "loss": 0.5409, "step": 3422 }, { "epoch": 0.65, "learning_rate": 5.863061945120719e-06, "loss": 0.5532, "step": 3423 }, { "epoch": 0.65, "learning_rate": 5.857491710135032e-06, "loss": 0.5417, "step": 3424 }, { "epoch": 0.65, "learning_rate": 5.8519230262907425e-06, "loss": 0.5348, "step": 3425 }, { "epoch": 0.65, "learning_rate": 5.846355895673017e-06, "loss": 0.442, "step": 3426 }, { "epoch": 0.65, "learning_rate": 5.840790320366444e-06, "loss": 0.4738, "step": 3427 }, { "epoch": 0.65, "learning_rate": 5.835226302455021e-06, "loss": 0.604, "step": 3428 }, { "epoch": 0.65, "learning_rate": 5.829663844022171e-06, "loss": 0.481, "step": 3429 }, { "epoch": 0.65, "learning_rate": 5.824102947150722e-06, "loss": 0.5533, "step": 3430 }, { "epoch": 0.65, "learning_rate": 5.8185436139229305e-06, "loss": 0.537, "step": 3431 }, { "epoch": 0.65, "learning_rate": 5.812985846420456e-06, "loss": 0.6688, "step": 3432 }, { "epoch": 0.65, "learning_rate": 5.807429646724378e-06, "loss": 0.5341, "step": 3433 }, { "epoch": 0.65, "learning_rate": 5.801875016915186e-06, "loss": 0.6971, "step": 3434 }, { "epoch": 0.65, "learning_rate": 5.796321959072793e-06, "loss": 0.4766, "step": 3435 }, { "epoch": 0.65, "learning_rate": 5.7907704752765015e-06, "loss": 0.4798, "step": 3436 }, { "epoch": 0.65, "learning_rate": 5.7852205676050355e-06, "loss": 0.3997, "step": 3437 }, { "epoch": 0.65, "learning_rate": 5.779672238136541e-06, "loss": 0.4918, "step": 3438 }, { "epoch": 0.65, "learning_rate": 5.774125488948561e-06, "loss": 0.6842, "step": 3439 }, { "epoch": 0.65, "learning_rate": 5.768580322118034e-06, "loss": 0.5902, "step": 3440 }, { "epoch": 0.65, "learning_rate": 5.763036739721334e-06, "loss": 0.4537, "step": 3441 }, { "epoch": 0.65, "learning_rate": 5.757494743834226e-06, "loss": 0.5297, "step": 3442 }, { "epoch": 0.65, "learning_rate": 5.751954336531873e-06, "loss": 0.4733, "step": 3443 }, { "epoch": 0.65, "learning_rate": 5.746415519888862e-06, "loss": 0.4153, "step": 3444 }, { "epoch": 0.65, "learning_rate": 5.740878295979172e-06, "loss": 0.4148, "step": 3445 }, { "epoch": 0.65, "learning_rate": 5.735342666876189e-06, "loss": 0.3709, "step": 3446 }, { "epoch": 0.65, "learning_rate": 5.7298086346527e-06, "loss": 0.5815, "step": 3447 }, { "epoch": 0.65, "learning_rate": 5.724276201380898e-06, "loss": 0.4302, "step": 3448 }, { "epoch": 0.65, "learning_rate": 5.718745369132373e-06, "loss": 0.5943, "step": 3449 }, { "epoch": 0.65, "learning_rate": 5.713216139978118e-06, "loss": 0.551, "step": 3450 }, { "epoch": 0.65, "learning_rate": 5.707688515988525e-06, "loss": 0.4193, "step": 3451 }, { "epoch": 0.65, "learning_rate": 5.702162499233381e-06, "loss": 0.5102, "step": 3452 }, { "epoch": 0.65, "learning_rate": 5.696638091781888e-06, "loss": 0.4024, "step": 3453 }, { "epoch": 0.65, "learning_rate": 5.691115295702619e-06, "loss": 0.5691, "step": 3454 }, { "epoch": 0.65, "learning_rate": 5.685594113063556e-06, "loss": 0.58, "step": 3455 }, { "epoch": 0.65, "learning_rate": 5.680074545932094e-06, "loss": 0.4491, "step": 3456 }, { "epoch": 0.65, "learning_rate": 5.674556596374993e-06, "loss": 0.4142, "step": 3457 }, { "epoch": 0.65, "learning_rate": 5.669040266458421e-06, "loss": 0.6153, "step": 3458 }, { "epoch": 0.65, "learning_rate": 5.663525558247948e-06, "loss": 0.4642, "step": 3459 }, { "epoch": 0.65, "learning_rate": 5.658012473808532e-06, "loss": 0.6904, "step": 3460 }, { "epoch": 0.65, "learning_rate": 5.652501015204503e-06, "loss": 0.5197, "step": 3461 }, { "epoch": 0.65, "learning_rate": 5.646991184499613e-06, "loss": 0.4289, "step": 3462 }, { "epoch": 0.65, "learning_rate": 5.641482983756987e-06, "loss": 0.4673, "step": 3463 }, { "epoch": 0.65, "learning_rate": 5.635976415039143e-06, "loss": 0.4503, "step": 3464 }, { "epoch": 0.65, "learning_rate": 5.6304714804079866e-06, "loss": 0.5544, "step": 3465 }, { "epoch": 0.65, "learning_rate": 5.624968181924815e-06, "loss": 0.3905, "step": 3466 }, { "epoch": 0.66, "learning_rate": 5.619466521650309e-06, "loss": 0.5364, "step": 3467 }, { "epoch": 0.66, "learning_rate": 5.613966501644537e-06, "loss": 0.4373, "step": 3468 }, { "epoch": 0.66, "learning_rate": 5.608468123966953e-06, "loss": 0.4237, "step": 3469 }, { "epoch": 0.66, "learning_rate": 5.602971390676405e-06, "loss": 0.393, "step": 3470 }, { "epoch": 0.66, "learning_rate": 5.597476303831109e-06, "loss": 0.6294, "step": 3471 }, { "epoch": 0.66, "learning_rate": 5.591982865488668e-06, "loss": 0.554, "step": 3472 }, { "epoch": 0.66, "learning_rate": 5.5864910777060845e-06, "loss": 0.5014, "step": 3473 }, { "epoch": 0.66, "learning_rate": 5.581000942539729e-06, "loss": 0.5121, "step": 3474 }, { "epoch": 0.66, "learning_rate": 5.5755124620453424e-06, "loss": 0.5158, "step": 3475 }, { "epoch": 0.66, "learning_rate": 5.570025638278073e-06, "loss": 0.4608, "step": 3476 }, { "epoch": 0.66, "learning_rate": 5.564540473292433e-06, "loss": 0.4672, "step": 3477 }, { "epoch": 0.66, "learning_rate": 5.5590569691423015e-06, "loss": 0.452, "step": 3478 }, { "epoch": 0.66, "learning_rate": 5.553575127880962e-06, "loss": 0.5112, "step": 3479 }, { "epoch": 0.66, "learning_rate": 5.5480949515610605e-06, "loss": 0.5649, "step": 3480 }, { "epoch": 0.66, "learning_rate": 5.542616442234618e-06, "loss": 0.4602, "step": 3481 }, { "epoch": 0.66, "learning_rate": 5.5371396019530364e-06, "loss": 0.5398, "step": 3482 }, { "epoch": 0.66, "learning_rate": 5.531664432767092e-06, "loss": 0.5054, "step": 3483 }, { "epoch": 0.66, "learning_rate": 5.526190936726934e-06, "loss": 0.5122, "step": 3484 }, { "epoch": 0.66, "learning_rate": 5.5207191158820835e-06, "loss": 0.3856, "step": 3485 }, { "epoch": 0.66, "learning_rate": 5.515248972281438e-06, "loss": 0.5632, "step": 3486 }, { "epoch": 0.66, "learning_rate": 5.509780507973266e-06, "loss": 0.6288, "step": 3487 }, { "epoch": 0.66, "learning_rate": 5.504313725005204e-06, "loss": 0.5059, "step": 3488 }, { "epoch": 0.66, "learning_rate": 5.498848625424264e-06, "loss": 0.554, "step": 3489 }, { "epoch": 0.66, "learning_rate": 5.4933852112768184e-06, "loss": 0.4661, "step": 3490 }, { "epoch": 0.66, "learning_rate": 5.487923484608629e-06, "loss": 0.424, "step": 3491 }, { "epoch": 0.66, "learning_rate": 5.4824634474648005e-06, "loss": 0.5835, "step": 3492 }, { "epoch": 0.66, "learning_rate": 5.477005101889814e-06, "loss": 0.4314, "step": 3493 }, { "epoch": 0.66, "learning_rate": 5.471548449927531e-06, "loss": 0.3086, "step": 3494 }, { "epoch": 0.66, "learning_rate": 5.466093493621158e-06, "loss": 0.6224, "step": 3495 }, { "epoch": 0.66, "learning_rate": 5.4606402350132826e-06, "loss": 0.5816, "step": 3496 }, { "epoch": 0.66, "learning_rate": 5.455188676145846e-06, "loss": 0.3441, "step": 3497 }, { "epoch": 0.66, "learning_rate": 5.449738819060156e-06, "loss": 0.5054, "step": 3498 }, { "epoch": 0.66, "learning_rate": 5.444290665796888e-06, "loss": 0.4813, "step": 3499 }, { "epoch": 0.66, "learning_rate": 5.438844218396072e-06, "loss": 0.5564, "step": 3500 }, { "epoch": 0.66, "learning_rate": 5.433399478897106e-06, "loss": 0.6561, "step": 3501 }, { "epoch": 0.66, "learning_rate": 5.4279564493387414e-06, "loss": 0.4198, "step": 3502 }, { "epoch": 0.66, "learning_rate": 5.422515131759097e-06, "loss": 0.4705, "step": 3503 }, { "epoch": 0.66, "learning_rate": 5.417075528195641e-06, "loss": 0.5703, "step": 3504 }, { "epoch": 0.66, "learning_rate": 5.411637640685219e-06, "loss": 0.6025, "step": 3505 }, { "epoch": 0.66, "learning_rate": 5.406201471264006e-06, "loss": 0.4661, "step": 3506 }, { "epoch": 0.66, "learning_rate": 5.40076702196755e-06, "loss": 0.5251, "step": 3507 }, { "epoch": 0.66, "learning_rate": 5.395334294830766e-06, "loss": 0.553, "step": 3508 }, { "epoch": 0.66, "learning_rate": 5.389903291887898e-06, "loss": 0.5171, "step": 3509 }, { "epoch": 0.66, "learning_rate": 5.3844740151725586e-06, "loss": 0.648, "step": 3510 }, { "epoch": 0.66, "learning_rate": 5.379046466717721e-06, "loss": 0.4543, "step": 3511 }, { "epoch": 0.66, "learning_rate": 5.373620648555702e-06, "loss": 0.4749, "step": 3512 }, { "epoch": 0.66, "learning_rate": 5.368196562718162e-06, "loss": 0.4333, "step": 3513 }, { "epoch": 0.66, "learning_rate": 5.362774211236134e-06, "loss": 0.5011, "step": 3514 }, { "epoch": 0.66, "learning_rate": 5.3573535961399844e-06, "loss": 0.5206, "step": 3515 }, { "epoch": 0.66, "learning_rate": 5.35193471945944e-06, "loss": 0.3573, "step": 3516 }, { "epoch": 0.66, "learning_rate": 5.346517583223567e-06, "loss": 0.4214, "step": 3517 }, { "epoch": 0.66, "learning_rate": 5.3411021894607865e-06, "loss": 0.5389, "step": 3518 }, { "epoch": 0.66, "learning_rate": 5.3356885401988655e-06, "loss": 0.4298, "step": 3519 }, { "epoch": 0.67, "learning_rate": 5.330276637464918e-06, "loss": 0.4552, "step": 3520 }, { "epoch": 0.67, "learning_rate": 5.3248664832854045e-06, "loss": 0.4032, "step": 3521 }, { "epoch": 0.67, "learning_rate": 5.319458079686129e-06, "loss": 0.6496, "step": 3522 }, { "epoch": 0.67, "learning_rate": 5.3140514286922394e-06, "loss": 0.4265, "step": 3523 }, { "epoch": 0.67, "learning_rate": 5.308646532328233e-06, "loss": 0.5252, "step": 3524 }, { "epoch": 0.67, "learning_rate": 5.3032433926179395e-06, "loss": 0.3827, "step": 3525 }, { "epoch": 0.67, "learning_rate": 5.297842011584549e-06, "loss": 0.4257, "step": 3526 }, { "epoch": 0.67, "learning_rate": 5.292442391250567e-06, "loss": 0.5167, "step": 3527 }, { "epoch": 0.67, "learning_rate": 5.287044533637865e-06, "loss": 0.5508, "step": 3528 }, { "epoch": 0.67, "learning_rate": 5.28164844076764e-06, "loss": 0.5471, "step": 3529 }, { "epoch": 0.67, "learning_rate": 5.2762541146604316e-06, "loss": 0.3377, "step": 3530 }, { "epoch": 0.67, "learning_rate": 5.270861557336119e-06, "loss": 0.5653, "step": 3531 }, { "epoch": 0.67, "learning_rate": 5.265470770813918e-06, "loss": 0.5825, "step": 3532 }, { "epoch": 0.67, "learning_rate": 5.26008175711238e-06, "loss": 0.3938, "step": 3533 }, { "epoch": 0.67, "learning_rate": 5.254694518249395e-06, "loss": 0.5072, "step": 3534 }, { "epoch": 0.67, "learning_rate": 5.249309056242189e-06, "loss": 0.4652, "step": 3535 }, { "epoch": 0.67, "learning_rate": 5.243925373107318e-06, "loss": 0.5568, "step": 3536 }, { "epoch": 0.67, "learning_rate": 5.238543470860677e-06, "loss": 0.5639, "step": 3537 }, { "epoch": 0.67, "learning_rate": 5.233163351517494e-06, "loss": 0.6471, "step": 3538 }, { "epoch": 0.67, "learning_rate": 5.227785017092322e-06, "loss": 0.5682, "step": 3539 }, { "epoch": 0.67, "learning_rate": 5.222408469599061e-06, "loss": 0.5105, "step": 3540 }, { "epoch": 0.67, "learning_rate": 5.217033711050923e-06, "loss": 0.7013, "step": 3541 }, { "epoch": 0.67, "learning_rate": 5.211660743460458e-06, "loss": 0.5538, "step": 3542 }, { "epoch": 0.67, "learning_rate": 5.2062895688395595e-06, "loss": 0.4278, "step": 3543 }, { "epoch": 0.67, "learning_rate": 5.200920189199425e-06, "loss": 0.4614, "step": 3544 }, { "epoch": 0.67, "learning_rate": 5.195552606550593e-06, "loss": 0.5245, "step": 3545 }, { "epoch": 0.67, "learning_rate": 5.190186822902932e-06, "loss": 0.5252, "step": 3546 }, { "epoch": 0.67, "learning_rate": 5.184822840265635e-06, "loss": 0.3847, "step": 3547 }, { "epoch": 0.67, "learning_rate": 5.179460660647206e-06, "loss": 0.5081, "step": 3548 }, { "epoch": 0.67, "learning_rate": 5.174100286055499e-06, "loss": 0.4585, "step": 3549 }, { "epoch": 0.67, "learning_rate": 5.168741718497674e-06, "loss": 0.4081, "step": 3550 }, { "epoch": 0.67, "learning_rate": 5.163384959980221e-06, "loss": 0.4893, "step": 3551 }, { "epoch": 0.67, "learning_rate": 5.15803001250895e-06, "loss": 0.4001, "step": 3552 }, { "epoch": 0.67, "learning_rate": 5.152676878088996e-06, "loss": 0.5426, "step": 3553 }, { "epoch": 0.67, "learning_rate": 5.147325558724811e-06, "loss": 0.6149, "step": 3554 }, { "epoch": 0.67, "learning_rate": 5.14197605642017e-06, "loss": 0.6608, "step": 3555 }, { "epoch": 0.67, "learning_rate": 5.136628373178165e-06, "loss": 0.5387, "step": 3556 }, { "epoch": 0.67, "learning_rate": 5.131282511001221e-06, "loss": 0.4085, "step": 3557 }, { "epoch": 0.67, "learning_rate": 5.125938471891054e-06, "loss": 0.4146, "step": 3558 }, { "epoch": 0.67, "learning_rate": 5.120596257848716e-06, "loss": 0.4865, "step": 3559 }, { "epoch": 0.67, "learning_rate": 5.1152558708745795e-06, "loss": 0.4, "step": 3560 }, { "epoch": 0.67, "learning_rate": 5.109917312968325e-06, "loss": 0.4896, "step": 3561 }, { "epoch": 0.67, "learning_rate": 5.104580586128935e-06, "loss": 0.5853, "step": 3562 }, { "epoch": 0.67, "learning_rate": 5.099245692354735e-06, "loss": 0.4437, "step": 3563 }, { "epoch": 0.67, "learning_rate": 5.093912633643348e-06, "loss": 0.4146, "step": 3564 }, { "epoch": 0.67, "learning_rate": 5.088581411991699e-06, "loss": 0.6475, "step": 3565 }, { "epoch": 0.67, "learning_rate": 5.0832520293960485e-06, "loss": 0.4399, "step": 3566 }, { "epoch": 0.67, "learning_rate": 5.077924487851954e-06, "loss": 0.7193, "step": 3567 }, { "epoch": 0.67, "learning_rate": 5.072598789354286e-06, "loss": 0.4709, "step": 3568 }, { "epoch": 0.67, "learning_rate": 5.067274935897226e-06, "loss": 0.4856, "step": 3569 }, { "epoch": 0.67, "learning_rate": 5.061952929474263e-06, "loss": 0.5192, "step": 3570 }, { "epoch": 0.67, "learning_rate": 5.056632772078198e-06, "loss": 0.5042, "step": 3571 }, { "epoch": 0.67, "learning_rate": 5.051314465701134e-06, "loss": 0.6777, "step": 3572 }, { "epoch": 0.68, "learning_rate": 5.045998012334486e-06, "loss": 0.5329, "step": 3573 }, { "epoch": 0.68, "learning_rate": 5.040683413968974e-06, "loss": 0.5675, "step": 3574 }, { "epoch": 0.68, "learning_rate": 5.035370672594622e-06, "loss": 0.3784, "step": 3575 }, { "epoch": 0.68, "learning_rate": 5.0300597902007565e-06, "loss": 0.4492, "step": 3576 }, { "epoch": 0.68, "learning_rate": 5.024750768776011e-06, "loss": 0.4128, "step": 3577 }, { "epoch": 0.68, "learning_rate": 5.019443610308331e-06, "loss": 0.5922, "step": 3578 }, { "epoch": 0.68, "learning_rate": 5.014138316784944e-06, "loss": 0.4892, "step": 3579 }, { "epoch": 0.68, "learning_rate": 5.008834890192394e-06, "loss": 0.4928, "step": 3580 }, { "epoch": 0.68, "learning_rate": 5.003533332516524e-06, "loss": 0.612, "step": 3581 }, { "epoch": 0.68, "learning_rate": 4.998233645742478e-06, "loss": 0.5213, "step": 3582 }, { "epoch": 0.68, "learning_rate": 4.992935831854687e-06, "loss": 0.5113, "step": 3583 }, { "epoch": 0.68, "learning_rate": 4.987639892836903e-06, "loss": 0.4617, "step": 3584 }, { "epoch": 0.68, "learning_rate": 4.982345830672161e-06, "loss": 0.5016, "step": 3585 }, { "epoch": 0.68, "learning_rate": 4.977053647342785e-06, "loss": 0.4038, "step": 3586 }, { "epoch": 0.68, "learning_rate": 4.971763344830419e-06, "loss": 0.4752, "step": 3587 }, { "epoch": 0.68, "learning_rate": 4.966474925115987e-06, "loss": 0.3675, "step": 3588 }, { "epoch": 0.68, "learning_rate": 4.961188390179709e-06, "loss": 0.3985, "step": 3589 }, { "epoch": 0.68, "learning_rate": 4.9559037420011025e-06, "loss": 0.517, "step": 3590 }, { "epoch": 0.68, "learning_rate": 4.950620982558975e-06, "loss": 0.5105, "step": 3591 }, { "epoch": 0.68, "learning_rate": 4.945340113831437e-06, "loss": 0.5471, "step": 3592 }, { "epoch": 0.68, "learning_rate": 4.940061137795876e-06, "loss": 0.5166, "step": 3593 }, { "epoch": 0.68, "learning_rate": 4.934784056428975e-06, "loss": 0.4103, "step": 3594 }, { "epoch": 0.68, "learning_rate": 4.929508871706724e-06, "loss": 0.4926, "step": 3595 }, { "epoch": 0.68, "learning_rate": 4.924235585604377e-06, "loss": 0.5678, "step": 3596 }, { "epoch": 0.68, "learning_rate": 4.91896420009649e-06, "loss": 0.346, "step": 3597 }, { "epoch": 0.68, "learning_rate": 4.913694717156912e-06, "loss": 0.4559, "step": 3598 }, { "epoch": 0.68, "learning_rate": 4.908427138758779e-06, "loss": 0.5344, "step": 3599 }, { "epoch": 0.68, "learning_rate": 4.9031614668744934e-06, "loss": 0.4623, "step": 3600 }, { "epoch": 0.68, "learning_rate": 4.897897703475773e-06, "loss": 0.4759, "step": 3601 }, { "epoch": 0.68, "learning_rate": 4.892635850533604e-06, "loss": 0.4374, "step": 3602 }, { "epoch": 0.68, "learning_rate": 4.88737591001826e-06, "loss": 0.3644, "step": 3603 }, { "epoch": 0.68, "learning_rate": 4.8821178838993e-06, "loss": 0.4018, "step": 3604 }, { "epoch": 0.68, "learning_rate": 4.876861774145565e-06, "loss": 0.4609, "step": 3605 }, { "epoch": 0.68, "learning_rate": 4.871607582725179e-06, "loss": 0.5247, "step": 3606 }, { "epoch": 0.68, "learning_rate": 4.866355311605547e-06, "loss": 0.6381, "step": 3607 }, { "epoch": 0.68, "learning_rate": 4.861104962753357e-06, "loss": 0.5911, "step": 3608 }, { "epoch": 0.68, "learning_rate": 4.855856538134573e-06, "loss": 0.5014, "step": 3609 }, { "epoch": 0.68, "learning_rate": 4.850610039714444e-06, "loss": 0.4734, "step": 3610 }, { "epoch": 0.68, "learning_rate": 4.845365469457494e-06, "loss": 0.5451, "step": 3611 }, { "epoch": 0.68, "learning_rate": 4.8401228293275215e-06, "loss": 0.354, "step": 3612 }, { "epoch": 0.68, "learning_rate": 4.834882121287618e-06, "loss": 0.4395, "step": 3613 }, { "epoch": 0.68, "learning_rate": 4.82964334730013e-06, "loss": 0.4237, "step": 3614 }, { "epoch": 0.68, "learning_rate": 4.824406509326691e-06, "loss": 0.4269, "step": 3615 }, { "epoch": 0.68, "learning_rate": 4.819171609328214e-06, "loss": 0.4605, "step": 3616 }, { "epoch": 0.68, "learning_rate": 4.813938649264881e-06, "loss": 0.5304, "step": 3617 }, { "epoch": 0.68, "learning_rate": 4.808707631096138e-06, "loss": 0.5329, "step": 3618 }, { "epoch": 0.68, "learning_rate": 4.8034785567807225e-06, "loss": 0.5057, "step": 3619 }, { "epoch": 0.68, "learning_rate": 4.798251428276635e-06, "loss": 0.6011, "step": 3620 }, { "epoch": 0.68, "learning_rate": 4.793026247541144e-06, "loss": 0.495, "step": 3621 }, { "epoch": 0.68, "learning_rate": 4.787803016530792e-06, "loss": 0.4956, "step": 3622 }, { "epoch": 0.68, "learning_rate": 4.782581737201394e-06, "loss": 0.5596, "step": 3623 }, { "epoch": 0.68, "learning_rate": 4.777362411508031e-06, "loss": 0.4795, "step": 3624 }, { "epoch": 0.68, "learning_rate": 4.772145041405053e-06, "loss": 0.4693, "step": 3625 }, { "epoch": 0.69, "learning_rate": 4.766929628846073e-06, "loss": 0.542, "step": 3626 }, { "epoch": 0.69, "learning_rate": 4.7617161757839895e-06, "loss": 0.5354, "step": 3627 }, { "epoch": 0.69, "learning_rate": 4.756504684170942e-06, "loss": 0.5771, "step": 3628 }, { "epoch": 0.69, "learning_rate": 4.751295155958345e-06, "loss": 0.4813, "step": 3629 }, { "epoch": 0.69, "learning_rate": 4.746087593096894e-06, "loss": 0.598, "step": 3630 }, { "epoch": 0.69, "learning_rate": 4.740881997536524e-06, "loss": 0.5691, "step": 3631 }, { "epoch": 0.69, "learning_rate": 4.7356783712264405e-06, "loss": 0.4288, "step": 3632 }, { "epoch": 0.69, "learning_rate": 4.730476716115127e-06, "loss": 0.4635, "step": 3633 }, { "epoch": 0.69, "learning_rate": 4.725277034150314e-06, "loss": 0.407, "step": 3634 }, { "epoch": 0.69, "learning_rate": 4.720079327278987e-06, "loss": 0.3943, "step": 3635 }, { "epoch": 0.69, "learning_rate": 4.7148835974474105e-06, "loss": 0.5844, "step": 3636 }, { "epoch": 0.69, "learning_rate": 4.7096898466010976e-06, "loss": 0.3975, "step": 3637 }, { "epoch": 0.69, "learning_rate": 4.70449807668482e-06, "loss": 0.4844, "step": 3638 }, { "epoch": 0.69, "learning_rate": 4.6993082896426125e-06, "loss": 0.5085, "step": 3639 }, { "epoch": 0.69, "learning_rate": 4.6941204874177625e-06, "loss": 0.3902, "step": 3640 }, { "epoch": 0.69, "learning_rate": 4.688934671952818e-06, "loss": 0.4441, "step": 3641 }, { "epoch": 0.69, "learning_rate": 4.683750845189581e-06, "loss": 0.5188, "step": 3642 }, { "epoch": 0.69, "learning_rate": 4.678569009069109e-06, "loss": 0.4289, "step": 3643 }, { "epoch": 0.69, "learning_rate": 4.673389165531714e-06, "loss": 0.634, "step": 3644 }, { "epoch": 0.69, "learning_rate": 4.668211316516964e-06, "loss": 0.6181, "step": 3645 }, { "epoch": 0.69, "learning_rate": 4.6630354639636775e-06, "loss": 0.4493, "step": 3646 }, { "epoch": 0.69, "learning_rate": 4.657861609809923e-06, "loss": 0.3741, "step": 3647 }, { "epoch": 0.69, "learning_rate": 4.652689755993035e-06, "loss": 0.4472, "step": 3648 }, { "epoch": 0.69, "learning_rate": 4.6475199044495765e-06, "loss": 0.5604, "step": 3649 }, { "epoch": 0.69, "learning_rate": 4.642352057115374e-06, "loss": 0.6608, "step": 3650 }, { "epoch": 0.69, "learning_rate": 4.637186215925512e-06, "loss": 0.4695, "step": 3651 }, { "epoch": 0.69, "learning_rate": 4.632022382814299e-06, "loss": 0.4525, "step": 3652 }, { "epoch": 0.69, "learning_rate": 4.626860559715318e-06, "loss": 0.4612, "step": 3653 }, { "epoch": 0.69, "learning_rate": 4.621700748561384e-06, "loss": 0.5393, "step": 3654 }, { "epoch": 0.69, "learning_rate": 4.616542951284561e-06, "loss": 0.6791, "step": 3655 }, { "epoch": 0.69, "learning_rate": 4.611387169816162e-06, "loss": 0.373, "step": 3656 }, { "epoch": 0.69, "learning_rate": 4.6062334060867416e-06, "loss": 0.4408, "step": 3657 }, { "epoch": 0.69, "learning_rate": 4.601081662026103e-06, "loss": 0.5197, "step": 3658 }, { "epoch": 0.69, "learning_rate": 4.595931939563288e-06, "loss": 0.5973, "step": 3659 }, { "epoch": 0.69, "learning_rate": 4.5907842406265864e-06, "loss": 0.4853, "step": 3660 }, { "epoch": 0.69, "learning_rate": 4.5856385671435285e-06, "loss": 0.532, "step": 3661 }, { "epoch": 0.69, "learning_rate": 4.5804949210408856e-06, "loss": 0.5437, "step": 3662 }, { "epoch": 0.69, "learning_rate": 4.575353304244668e-06, "loss": 0.6754, "step": 3663 }, { "epoch": 0.69, "learning_rate": 4.570213718680127e-06, "loss": 0.4302, "step": 3664 }, { "epoch": 0.69, "learning_rate": 4.5650761662717645e-06, "loss": 0.3984, "step": 3665 }, { "epoch": 0.69, "learning_rate": 4.559940648943301e-06, "loss": 0.4557, "step": 3666 }, { "epoch": 0.69, "learning_rate": 4.554807168617703e-06, "loss": 0.4976, "step": 3667 }, { "epoch": 0.69, "learning_rate": 4.549675727217186e-06, "loss": 0.4543, "step": 3668 }, { "epoch": 0.69, "learning_rate": 4.544546326663193e-06, "loss": 0.5146, "step": 3669 }, { "epoch": 0.69, "learning_rate": 4.539418968876389e-06, "loss": 0.4129, "step": 3670 }, { "epoch": 0.69, "learning_rate": 4.534293655776699e-06, "loss": 0.4823, "step": 3671 }, { "epoch": 0.69, "learning_rate": 4.5291703892832705e-06, "loss": 0.551, "step": 3672 }, { "epoch": 0.69, "learning_rate": 4.524049171314475e-06, "loss": 0.3644, "step": 3673 }, { "epoch": 0.69, "learning_rate": 4.518930003787938e-06, "loss": 0.5485, "step": 3674 }, { "epoch": 0.69, "learning_rate": 4.513812888620502e-06, "loss": 0.4162, "step": 3675 }, { "epoch": 0.69, "learning_rate": 4.508697827728242e-06, "loss": 0.5323, "step": 3676 }, { "epoch": 0.69, "learning_rate": 4.5035848230264715e-06, "loss": 0.3471, "step": 3677 }, { "epoch": 0.69, "learning_rate": 4.498473876429727e-06, "loss": 0.4536, "step": 3678 }, { "epoch": 0.7, "learning_rate": 4.493364989851776e-06, "loss": 0.6671, "step": 3679 }, { "epoch": 0.7, "learning_rate": 4.488258165205618e-06, "loss": 0.5601, "step": 3680 }, { "epoch": 0.7, "learning_rate": 4.483153404403472e-06, "loss": 0.3448, "step": 3681 }, { "epoch": 0.7, "learning_rate": 4.478050709356802e-06, "loss": 0.5614, "step": 3682 }, { "epoch": 0.7, "learning_rate": 4.472950081976275e-06, "loss": 0.4477, "step": 3683 }, { "epoch": 0.7, "learning_rate": 4.467851524171796e-06, "loss": 0.4489, "step": 3684 }, { "epoch": 0.7, "learning_rate": 4.4627550378525e-06, "loss": 0.4995, "step": 3685 }, { "epoch": 0.7, "learning_rate": 4.457660624926742e-06, "loss": 0.3268, "step": 3686 }, { "epoch": 0.7, "learning_rate": 4.452568287302088e-06, "loss": 0.5036, "step": 3687 }, { "epoch": 0.7, "learning_rate": 4.44747802688535e-06, "loss": 0.5796, "step": 3688 }, { "epoch": 0.7, "learning_rate": 4.4423898455825456e-06, "loss": 0.613, "step": 3689 }, { "epoch": 0.7, "learning_rate": 4.43730374529892e-06, "loss": 0.572, "step": 3690 }, { "epoch": 0.7, "learning_rate": 4.432219727938936e-06, "loss": 0.4195, "step": 3691 }, { "epoch": 0.7, "learning_rate": 4.42713779540628e-06, "loss": 0.5286, "step": 3692 }, { "epoch": 0.7, "learning_rate": 4.422057949603855e-06, "loss": 0.4141, "step": 3693 }, { "epoch": 0.7, "learning_rate": 4.416980192433784e-06, "loss": 0.3977, "step": 3694 }, { "epoch": 0.7, "learning_rate": 4.411904525797408e-06, "loss": 0.4681, "step": 3695 }, { "epoch": 0.7, "learning_rate": 4.4068309515952855e-06, "loss": 0.4813, "step": 3696 }, { "epoch": 0.7, "learning_rate": 4.40175947172719e-06, "loss": 0.4185, "step": 3697 }, { "epoch": 0.7, "learning_rate": 4.39669008809211e-06, "loss": 0.5414, "step": 3698 }, { "epoch": 0.7, "learning_rate": 4.39162280258825e-06, "loss": 0.3841, "step": 3699 }, { "epoch": 0.7, "learning_rate": 4.386557617113039e-06, "loss": 0.4233, "step": 3700 }, { "epoch": 0.7, "learning_rate": 4.3814945335631e-06, "loss": 0.5064, "step": 3701 }, { "epoch": 0.7, "learning_rate": 4.3764335538342785e-06, "loss": 0.4953, "step": 3702 }, { "epoch": 0.7, "learning_rate": 4.371374679821639e-06, "loss": 0.5123, "step": 3703 }, { "epoch": 0.7, "learning_rate": 4.366317913419454e-06, "loss": 0.4325, "step": 3704 }, { "epoch": 0.7, "learning_rate": 4.3612632565211935e-06, "loss": 0.4391, "step": 3705 }, { "epoch": 0.7, "learning_rate": 4.356210711019556e-06, "loss": 0.6194, "step": 3706 }, { "epoch": 0.7, "learning_rate": 4.351160278806444e-06, "loss": 0.4244, "step": 3707 }, { "epoch": 0.7, "learning_rate": 4.346111961772956e-06, "loss": 0.5246, "step": 3708 }, { "epoch": 0.7, "learning_rate": 4.341065761809418e-06, "loss": 0.475, "step": 3709 }, { "epoch": 0.7, "learning_rate": 4.336021680805353e-06, "loss": 0.4625, "step": 3710 }, { "epoch": 0.7, "learning_rate": 4.330979720649491e-06, "loss": 0.4219, "step": 3711 }, { "epoch": 0.7, "learning_rate": 4.3259398832297665e-06, "loss": 0.5057, "step": 3712 }, { "epoch": 0.7, "learning_rate": 4.32090217043332e-06, "loss": 0.5554, "step": 3713 }, { "epoch": 0.7, "learning_rate": 4.3158665841465074e-06, "loss": 0.4477, "step": 3714 }, { "epoch": 0.7, "learning_rate": 4.310833126254868e-06, "loss": 0.4001, "step": 3715 }, { "epoch": 0.7, "learning_rate": 4.305801798643156e-06, "loss": 0.4283, "step": 3716 }, { "epoch": 0.7, "learning_rate": 4.300772603195335e-06, "loss": 0.5898, "step": 3717 }, { "epoch": 0.7, "learning_rate": 4.2957455417945535e-06, "loss": 0.4937, "step": 3718 }, { "epoch": 0.7, "learning_rate": 4.29072061632317e-06, "loss": 0.4358, "step": 3719 }, { "epoch": 0.7, "learning_rate": 4.285697828662746e-06, "loss": 0.4243, "step": 3720 }, { "epoch": 0.7, "learning_rate": 4.280677180694043e-06, "loss": 0.5556, "step": 3721 }, { "epoch": 0.7, "learning_rate": 4.275658674297004e-06, "loss": 0.5003, "step": 3722 }, { "epoch": 0.7, "learning_rate": 4.270642311350796e-06, "loss": 0.3587, "step": 3723 }, { "epoch": 0.7, "learning_rate": 4.2656280937337665e-06, "loss": 0.4449, "step": 3724 }, { "epoch": 0.7, "learning_rate": 4.260616023323464e-06, "loss": 0.5068, "step": 3725 }, { "epoch": 0.7, "learning_rate": 4.255606101996633e-06, "loss": 0.4196, "step": 3726 }, { "epoch": 0.7, "learning_rate": 4.250598331629215e-06, "loss": 0.549, "step": 3727 }, { "epoch": 0.7, "learning_rate": 4.245592714096342e-06, "loss": 0.493, "step": 3728 }, { "epoch": 0.7, "learning_rate": 4.240589251272342e-06, "loss": 0.5204, "step": 3729 }, { "epoch": 0.7, "learning_rate": 4.235587945030739e-06, "loss": 0.5819, "step": 3730 }, { "epoch": 0.7, "learning_rate": 4.230588797244246e-06, "loss": 0.4804, "step": 3731 }, { "epoch": 0.71, "learning_rate": 4.225591809784769e-06, "loss": 0.4089, "step": 3732 }, { "epoch": 0.71, "learning_rate": 4.220596984523404e-06, "loss": 0.578, "step": 3733 }, { "epoch": 0.71, "learning_rate": 4.215604323330435e-06, "loss": 0.3765, "step": 3734 }, { "epoch": 0.71, "learning_rate": 4.210613828075349e-06, "loss": 0.6591, "step": 3735 }, { "epoch": 0.71, "learning_rate": 4.2056255006268025e-06, "loss": 0.4925, "step": 3736 }, { "epoch": 0.71, "learning_rate": 4.200639342852648e-06, "loss": 0.5362, "step": 3737 }, { "epoch": 0.71, "learning_rate": 4.19565535661994e-06, "loss": 0.4369, "step": 3738 }, { "epoch": 0.71, "learning_rate": 4.1906735437948945e-06, "loss": 0.476, "step": 3739 }, { "epoch": 0.71, "learning_rate": 4.185693906242927e-06, "loss": 0.4103, "step": 3740 }, { "epoch": 0.71, "learning_rate": 4.180716445828645e-06, "loss": 0.4415, "step": 3741 }, { "epoch": 0.71, "learning_rate": 4.175741164415833e-06, "loss": 0.3573, "step": 3742 }, { "epoch": 0.71, "learning_rate": 4.17076806386745e-06, "loss": 0.4523, "step": 3743 }, { "epoch": 0.71, "learning_rate": 4.165797146045658e-06, "loss": 0.4847, "step": 3744 }, { "epoch": 0.71, "learning_rate": 4.160828412811791e-06, "loss": 0.4664, "step": 3745 }, { "epoch": 0.71, "learning_rate": 4.155861866026364e-06, "loss": 0.4131, "step": 3746 }, { "epoch": 0.71, "learning_rate": 4.150897507549076e-06, "loss": 0.4107, "step": 3747 }, { "epoch": 0.71, "learning_rate": 4.1459353392388065e-06, "loss": 0.4945, "step": 3748 }, { "epoch": 0.71, "learning_rate": 4.140975362953615e-06, "loss": 0.5322, "step": 3749 }, { "epoch": 0.71, "learning_rate": 4.136017580550739e-06, "loss": 0.3721, "step": 3750 }, { "epoch": 0.71, "learning_rate": 4.13106199388659e-06, "loss": 0.4273, "step": 3751 }, { "epoch": 0.71, "learning_rate": 4.1261086048167766e-06, "loss": 0.4735, "step": 3752 }, { "epoch": 0.71, "learning_rate": 4.121157415196057e-06, "loss": 0.4288, "step": 3753 }, { "epoch": 0.71, "learning_rate": 4.1162084268783795e-06, "loss": 0.3539, "step": 3754 }, { "epoch": 0.71, "learning_rate": 4.111261641716876e-06, "loss": 0.3638, "step": 3755 }, { "epoch": 0.71, "learning_rate": 4.106317061563846e-06, "loss": 0.2784, "step": 3756 }, { "epoch": 0.71, "learning_rate": 4.101374688270751e-06, "loss": 0.39, "step": 3757 }, { "epoch": 0.71, "learning_rate": 4.0964345236882476e-06, "loss": 0.5442, "step": 3758 }, { "epoch": 0.71, "learning_rate": 4.091496569666157e-06, "loss": 0.4436, "step": 3759 }, { "epoch": 0.71, "learning_rate": 4.086560828053459e-06, "loss": 0.511, "step": 3760 }, { "epoch": 0.71, "learning_rate": 4.08162730069833e-06, "loss": 0.4802, "step": 3761 }, { "epoch": 0.71, "learning_rate": 4.0766959894481e-06, "loss": 0.5404, "step": 3762 }, { "epoch": 0.71, "learning_rate": 4.0717668961492725e-06, "loss": 0.4397, "step": 3763 }, { "epoch": 0.71, "learning_rate": 4.066840022647525e-06, "loss": 0.4152, "step": 3764 }, { "epoch": 0.71, "learning_rate": 4.061915370787697e-06, "loss": 0.5366, "step": 3765 }, { "epoch": 0.71, "learning_rate": 4.056992942413801e-06, "loss": 0.3908, "step": 3766 }, { "epoch": 0.71, "learning_rate": 4.052072739369015e-06, "loss": 0.4283, "step": 3767 }, { "epoch": 0.71, "learning_rate": 4.0471547634956865e-06, "loss": 0.3916, "step": 3768 }, { "epoch": 0.71, "learning_rate": 4.04223901663532e-06, "loss": 0.5941, "step": 3769 }, { "epoch": 0.71, "learning_rate": 4.037325500628605e-06, "loss": 0.4506, "step": 3770 }, { "epoch": 0.71, "learning_rate": 4.0324142173153715e-06, "loss": 0.5095, "step": 3771 }, { "epoch": 0.71, "learning_rate": 4.027505168534625e-06, "loss": 0.4346, "step": 3772 }, { "epoch": 0.71, "learning_rate": 4.022598356124545e-06, "loss": 0.5739, "step": 3773 }, { "epoch": 0.71, "learning_rate": 4.017693781922453e-06, "loss": 0.3817, "step": 3774 }, { "epoch": 0.71, "learning_rate": 4.01279144776484e-06, "loss": 0.3951, "step": 3775 }, { "epoch": 0.71, "learning_rate": 4.007891355487368e-06, "loss": 0.3372, "step": 3776 }, { "epoch": 0.71, "learning_rate": 4.0029935069248494e-06, "loss": 0.522, "step": 3777 }, { "epoch": 0.71, "learning_rate": 3.998097903911258e-06, "loss": 0.3482, "step": 3778 }, { "epoch": 0.71, "learning_rate": 3.9932045482797285e-06, "loss": 0.4726, "step": 3779 }, { "epoch": 0.71, "learning_rate": 3.9883134418625535e-06, "loss": 0.4388, "step": 3780 }, { "epoch": 0.71, "learning_rate": 3.9834245864911824e-06, "loss": 0.4862, "step": 3781 }, { "epoch": 0.71, "learning_rate": 3.978537983996222e-06, "loss": 0.7028, "step": 3782 }, { "epoch": 0.71, "learning_rate": 3.973653636207437e-06, "loss": 0.6082, "step": 3783 }, { "epoch": 0.71, "learning_rate": 3.968771544953748e-06, "loss": 0.4589, "step": 3784 }, { "epoch": 0.72, "learning_rate": 3.963891712063227e-06, "loss": 0.5366, "step": 3785 }, { "epoch": 0.72, "learning_rate": 3.959014139363101e-06, "loss": 0.6103, "step": 3786 }, { "epoch": 0.72, "learning_rate": 3.954138828679762e-06, "loss": 0.4146, "step": 3787 }, { "epoch": 0.72, "learning_rate": 3.949265781838737e-06, "loss": 0.56, "step": 3788 }, { "epoch": 0.72, "learning_rate": 3.944395000664713e-06, "loss": 0.5134, "step": 3789 }, { "epoch": 0.72, "learning_rate": 3.939526486981534e-06, "loss": 0.4458, "step": 3790 }, { "epoch": 0.72, "learning_rate": 3.934660242612194e-06, "loss": 0.46, "step": 3791 }, { "epoch": 0.72, "learning_rate": 3.929796269378822e-06, "loss": 0.576, "step": 3792 }, { "epoch": 0.72, "learning_rate": 3.924934569102719e-06, "loss": 0.432, "step": 3793 }, { "epoch": 0.72, "learning_rate": 3.9200751436043225e-06, "loss": 0.4745, "step": 3794 }, { "epoch": 0.72, "learning_rate": 3.915217994703212e-06, "loss": 0.4973, "step": 3795 }, { "epoch": 0.72, "learning_rate": 3.910363124218132e-06, "loss": 0.465, "step": 3796 }, { "epoch": 0.72, "learning_rate": 3.905510533966959e-06, "loss": 0.6343, "step": 3797 }, { "epoch": 0.72, "learning_rate": 3.900660225766725e-06, "loss": 0.3217, "step": 3798 }, { "epoch": 0.72, "learning_rate": 3.8958122014336e-06, "loss": 0.4499, "step": 3799 }, { "epoch": 0.72, "learning_rate": 3.8909664627829034e-06, "loss": 0.4408, "step": 3800 }, { "epoch": 0.72, "learning_rate": 3.886123011629098e-06, "loss": 0.4612, "step": 3801 }, { "epoch": 0.72, "learning_rate": 3.881281849785789e-06, "loss": 0.4905, "step": 3802 }, { "epoch": 0.72, "learning_rate": 3.876442979065727e-06, "loss": 0.3577, "step": 3803 }, { "epoch": 0.72, "learning_rate": 3.871606401280801e-06, "loss": 0.587, "step": 3804 }, { "epoch": 0.72, "learning_rate": 3.866772118242044e-06, "loss": 0.5098, "step": 3805 }, { "epoch": 0.72, "learning_rate": 3.861940131759629e-06, "loss": 0.4247, "step": 3806 }, { "epoch": 0.72, "learning_rate": 3.857110443642864e-06, "loss": 0.5158, "step": 3807 }, { "epoch": 0.72, "learning_rate": 3.852283055700213e-06, "loss": 0.4944, "step": 3808 }, { "epoch": 0.72, "learning_rate": 3.847457969739254e-06, "loss": 0.5539, "step": 3809 }, { "epoch": 0.72, "learning_rate": 3.842635187566727e-06, "loss": 0.478, "step": 3810 }, { "epoch": 0.72, "learning_rate": 3.837814710988492e-06, "loss": 0.6128, "step": 3811 }, { "epoch": 0.72, "learning_rate": 3.832996541809555e-06, "loss": 0.5673, "step": 3812 }, { "epoch": 0.72, "learning_rate": 3.828180681834055e-06, "loss": 0.5087, "step": 3813 }, { "epoch": 0.72, "learning_rate": 3.823367132865266e-06, "loss": 0.5193, "step": 3814 }, { "epoch": 0.72, "learning_rate": 3.8185558967055965e-06, "loss": 0.5362, "step": 3815 }, { "epoch": 0.72, "learning_rate": 3.8137469751565912e-06, "loss": 0.5483, "step": 3816 }, { "epoch": 0.72, "learning_rate": 3.8089403700189254e-06, "loss": 0.3834, "step": 3817 }, { "epoch": 0.72, "learning_rate": 3.804136083092409e-06, "loss": 0.5153, "step": 3818 }, { "epoch": 0.72, "learning_rate": 3.7993341161759824e-06, "loss": 0.5049, "step": 3819 }, { "epoch": 0.72, "learning_rate": 3.794534471067718e-06, "loss": 0.5341, "step": 3820 }, { "epoch": 0.72, "learning_rate": 3.7897371495648163e-06, "loss": 0.5074, "step": 3821 }, { "epoch": 0.72, "learning_rate": 3.7849421534636187e-06, "loss": 0.483, "step": 3822 }, { "epoch": 0.72, "learning_rate": 3.7801494845595787e-06, "loss": 0.6113, "step": 3823 }, { "epoch": 0.72, "learning_rate": 3.7753591446472872e-06, "loss": 0.3917, "step": 3824 }, { "epoch": 0.72, "learning_rate": 3.770571135520472e-06, "loss": 0.578, "step": 3825 }, { "epoch": 0.72, "learning_rate": 3.76578545897197e-06, "loss": 0.4042, "step": 3826 }, { "epoch": 0.72, "learning_rate": 3.7610021167937526e-06, "loss": 0.5865, "step": 3827 }, { "epoch": 0.72, "learning_rate": 3.756221110776925e-06, "loss": 0.3881, "step": 3828 }, { "epoch": 0.72, "learning_rate": 3.7514424427117124e-06, "loss": 0.5404, "step": 3829 }, { "epoch": 0.72, "learning_rate": 3.7466661143874504e-06, "loss": 0.4527, "step": 3830 }, { "epoch": 0.72, "learning_rate": 3.7418921275926245e-06, "loss": 0.477, "step": 3831 }, { "epoch": 0.72, "learning_rate": 3.7371204841148233e-06, "loss": 0.5471, "step": 3832 }, { "epoch": 0.72, "learning_rate": 3.7323511857407666e-06, "loss": 0.4987, "step": 3833 }, { "epoch": 0.72, "learning_rate": 3.727584234256295e-06, "loss": 0.6179, "step": 3834 }, { "epoch": 0.72, "learning_rate": 3.7228196314463672e-06, "loss": 0.4221, "step": 3835 }, { "epoch": 0.72, "learning_rate": 3.718057379095067e-06, "loss": 0.5962, "step": 3836 }, { "epoch": 0.72, "learning_rate": 3.713297478985595e-06, "loss": 0.486, "step": 3837 }, { "epoch": 0.73, "learning_rate": 3.708539932900267e-06, "loss": 0.4185, "step": 3838 }, { "epoch": 0.73, "learning_rate": 3.703784742620534e-06, "loss": 0.4756, "step": 3839 }, { "epoch": 0.73, "learning_rate": 3.699031909926942e-06, "loss": 0.4113, "step": 3840 }, { "epoch": 0.73, "learning_rate": 3.6942814365991673e-06, "loss": 0.3678, "step": 3841 }, { "epoch": 0.73, "learning_rate": 3.689533324416006e-06, "loss": 0.4985, "step": 3842 }, { "epoch": 0.73, "learning_rate": 3.6847875751553643e-06, "loss": 0.456, "step": 3843 }, { "epoch": 0.73, "learning_rate": 3.680044190594254e-06, "loss": 0.4022, "step": 3844 }, { "epoch": 0.73, "learning_rate": 3.675303172508824e-06, "loss": 0.4013, "step": 3845 }, { "epoch": 0.73, "learning_rate": 3.6705645226743216e-06, "loss": 0.584, "step": 3846 }, { "epoch": 0.73, "learning_rate": 3.6658282428651026e-06, "loss": 0.4976, "step": 3847 }, { "epoch": 0.73, "learning_rate": 3.6610943348546524e-06, "loss": 0.4388, "step": 3848 }, { "epoch": 0.73, "learning_rate": 3.6563628004155548e-06, "loss": 0.515, "step": 3849 }, { "epoch": 0.73, "learning_rate": 3.651633641319511e-06, "loss": 0.6309, "step": 3850 }, { "epoch": 0.73, "learning_rate": 3.646906859337329e-06, "loss": 0.5026, "step": 3851 }, { "epoch": 0.73, "learning_rate": 3.6421824562389306e-06, "loss": 0.5728, "step": 3852 }, { "epoch": 0.73, "learning_rate": 3.6374604337933428e-06, "loss": 0.4941, "step": 3853 }, { "epoch": 0.73, "learning_rate": 3.6327407937687045e-06, "loss": 0.388, "step": 3854 }, { "epoch": 0.73, "learning_rate": 3.6280235379322593e-06, "loss": 0.7063, "step": 3855 }, { "epoch": 0.73, "learning_rate": 3.6233086680503593e-06, "loss": 0.3833, "step": 3856 }, { "epoch": 0.73, "learning_rate": 3.618596185888471e-06, "loss": 0.5553, "step": 3857 }, { "epoch": 0.73, "learning_rate": 3.613886093211152e-06, "loss": 0.4384, "step": 3858 }, { "epoch": 0.73, "learning_rate": 3.609178391782072e-06, "loss": 0.4815, "step": 3859 }, { "epoch": 0.73, "learning_rate": 3.604473083364016e-06, "loss": 0.3943, "step": 3860 }, { "epoch": 0.73, "learning_rate": 3.5997701697188526e-06, "loss": 0.4274, "step": 3861 }, { "epoch": 0.73, "learning_rate": 3.5950696526075646e-06, "loss": 0.4683, "step": 3862 }, { "epoch": 0.73, "learning_rate": 3.5903715337902444e-06, "loss": 0.4619, "step": 3863 }, { "epoch": 0.73, "learning_rate": 3.5856758150260783e-06, "loss": 0.425, "step": 3864 }, { "epoch": 0.73, "learning_rate": 3.5809824980733445e-06, "loss": 0.5336, "step": 3865 }, { "epoch": 0.73, "learning_rate": 3.576291584689442e-06, "loss": 0.4015, "step": 3866 }, { "epoch": 0.73, "learning_rate": 3.5716030766308553e-06, "loss": 0.4468, "step": 3867 }, { "epoch": 0.73, "learning_rate": 3.566916975653175e-06, "loss": 0.5315, "step": 3868 }, { "epoch": 0.73, "learning_rate": 3.5622332835110862e-06, "loss": 0.4179, "step": 3869 }, { "epoch": 0.73, "learning_rate": 3.5575520019583753e-06, "loss": 0.538, "step": 3870 }, { "epoch": 0.73, "learning_rate": 3.5528731327479227e-06, "loss": 0.4655, "step": 3871 }, { "epoch": 0.73, "learning_rate": 3.548196677631709e-06, "loss": 0.458, "step": 3872 }, { "epoch": 0.73, "learning_rate": 3.5435226383608058e-06, "loss": 0.5596, "step": 3873 }, { "epoch": 0.73, "learning_rate": 3.538851016685394e-06, "loss": 0.4399, "step": 3874 }, { "epoch": 0.73, "learning_rate": 3.5341818143547267e-06, "loss": 0.5176, "step": 3875 }, { "epoch": 0.73, "learning_rate": 3.529515033117166e-06, "loss": 0.418, "step": 3876 }, { "epoch": 0.73, "learning_rate": 3.5248506747201694e-06, "loss": 0.3836, "step": 3877 }, { "epoch": 0.73, "learning_rate": 3.5201887409102843e-06, "loss": 0.5511, "step": 3878 }, { "epoch": 0.73, "learning_rate": 3.515529233433136e-06, "loss": 0.6809, "step": 3879 }, { "epoch": 0.73, "learning_rate": 3.5108721540334643e-06, "loss": 0.4412, "step": 3880 }, { "epoch": 0.73, "learning_rate": 3.5062175044550917e-06, "loss": 0.4829, "step": 3881 }, { "epoch": 0.73, "learning_rate": 3.5015652864409142e-06, "loss": 0.4425, "step": 3882 }, { "epoch": 0.73, "learning_rate": 3.496915501732945e-06, "loss": 0.4752, "step": 3883 }, { "epoch": 0.73, "learning_rate": 3.4922681520722677e-06, "loss": 0.4303, "step": 3884 }, { "epoch": 0.73, "learning_rate": 3.4876232391990582e-06, "loss": 0.4997, "step": 3885 }, { "epoch": 0.73, "learning_rate": 3.4829807648525827e-06, "loss": 0.3038, "step": 3886 }, { "epoch": 0.73, "learning_rate": 3.4783407307711913e-06, "loss": 0.4452, "step": 3887 }, { "epoch": 0.73, "learning_rate": 3.4737031386923216e-06, "loss": 0.4766, "step": 3888 }, { "epoch": 0.73, "learning_rate": 3.469067990352496e-06, "loss": 0.4172, "step": 3889 }, { "epoch": 0.73, "learning_rate": 3.464435287487323e-06, "loss": 0.3605, "step": 3890 }, { "epoch": 0.74, "learning_rate": 3.459805031831496e-06, "loss": 0.524, "step": 3891 }, { "epoch": 0.74, "learning_rate": 3.45517722511879e-06, "loss": 0.4426, "step": 3892 }, { "epoch": 0.74, "learning_rate": 3.450551869082063e-06, "loss": 0.4629, "step": 3893 }, { "epoch": 0.74, "learning_rate": 3.4459289654532535e-06, "loss": 0.5318, "step": 3894 }, { "epoch": 0.74, "learning_rate": 3.4413085159633963e-06, "loss": 0.5196, "step": 3895 }, { "epoch": 0.74, "learning_rate": 3.4366905223425827e-06, "loss": 0.5, "step": 3896 }, { "epoch": 0.74, "learning_rate": 3.4320749863199987e-06, "loss": 0.3308, "step": 3897 }, { "epoch": 0.74, "learning_rate": 3.427461909623916e-06, "loss": 0.4668, "step": 3898 }, { "epoch": 0.74, "learning_rate": 3.422851293981676e-06, "loss": 0.3678, "step": 3899 }, { "epoch": 0.74, "learning_rate": 3.4182431411196924e-06, "loss": 0.422, "step": 3900 }, { "epoch": 0.74, "learning_rate": 3.413637452763473e-06, "loss": 0.3922, "step": 3901 }, { "epoch": 0.74, "learning_rate": 3.4090342306375932e-06, "loss": 0.5034, "step": 3902 }, { "epoch": 0.74, "learning_rate": 3.404433476465706e-06, "loss": 0.4181, "step": 3903 }, { "epoch": 0.74, "learning_rate": 3.3998351919705396e-06, "loss": 0.5289, "step": 3904 }, { "epoch": 0.74, "learning_rate": 3.395239378873899e-06, "loss": 0.3658, "step": 3905 }, { "epoch": 0.74, "learning_rate": 3.3906460388966633e-06, "loss": 0.4191, "step": 3906 }, { "epoch": 0.74, "learning_rate": 3.3860551737587857e-06, "loss": 0.6117, "step": 3907 }, { "epoch": 0.74, "learning_rate": 3.381466785179289e-06, "loss": 0.4797, "step": 3908 }, { "epoch": 0.74, "learning_rate": 3.3768808748762836e-06, "loss": 0.5778, "step": 3909 }, { "epoch": 0.74, "learning_rate": 3.372297444566929e-06, "loss": 0.4297, "step": 3910 }, { "epoch": 0.74, "learning_rate": 3.367716495967467e-06, "loss": 0.4823, "step": 3911 }, { "epoch": 0.74, "learning_rate": 3.3631380307932228e-06, "loss": 0.5823, "step": 3912 }, { "epoch": 0.74, "learning_rate": 3.358562050758568e-06, "loss": 0.5463, "step": 3913 }, { "epoch": 0.74, "learning_rate": 3.3539885575769572e-06, "loss": 0.4504, "step": 3914 }, { "epoch": 0.74, "learning_rate": 3.3494175529609163e-06, "loss": 0.4671, "step": 3915 }, { "epoch": 0.74, "learning_rate": 3.3448490386220355e-06, "loss": 0.6241, "step": 3916 }, { "epoch": 0.74, "learning_rate": 3.3402830162709644e-06, "loss": 0.4628, "step": 3917 }, { "epoch": 0.74, "learning_rate": 3.335719487617434e-06, "loss": 0.5251, "step": 3918 }, { "epoch": 0.74, "learning_rate": 3.3311584543702324e-06, "loss": 0.6705, "step": 3919 }, { "epoch": 0.74, "learning_rate": 3.3265999182372166e-06, "loss": 0.5785, "step": 3920 }, { "epoch": 0.74, "learning_rate": 3.3220438809253065e-06, "loss": 0.5323, "step": 3921 }, { "epoch": 0.74, "learning_rate": 3.317490344140488e-06, "loss": 0.4916, "step": 3922 }, { "epoch": 0.74, "learning_rate": 3.3129393095878094e-06, "loss": 0.5338, "step": 3923 }, { "epoch": 0.74, "learning_rate": 3.3083907789713822e-06, "loss": 0.4664, "step": 3924 }, { "epoch": 0.74, "learning_rate": 3.303844753994382e-06, "loss": 0.4136, "step": 3925 }, { "epoch": 0.74, "learning_rate": 3.2993012363590447e-06, "loss": 0.4656, "step": 3926 }, { "epoch": 0.74, "learning_rate": 3.2947602277666678e-06, "loss": 0.652, "step": 3927 }, { "epoch": 0.74, "learning_rate": 3.290221729917609e-06, "loss": 0.4904, "step": 3928 }, { "epoch": 0.74, "learning_rate": 3.285685744511281e-06, "loss": 0.5377, "step": 3929 }, { "epoch": 0.74, "learning_rate": 3.281152273246174e-06, "loss": 0.4793, "step": 3930 }, { "epoch": 0.74, "learning_rate": 3.2766213178198112e-06, "loss": 0.405, "step": 3931 }, { "epoch": 0.74, "learning_rate": 3.2720928799287865e-06, "loss": 0.5592, "step": 3932 }, { "epoch": 0.74, "learning_rate": 3.2675669612687565e-06, "loss": 0.5377, "step": 3933 }, { "epoch": 0.74, "learning_rate": 3.2630435635344283e-06, "loss": 0.4975, "step": 3934 }, { "epoch": 0.74, "learning_rate": 3.258522688419562e-06, "loss": 0.4816, "step": 3935 }, { "epoch": 0.74, "learning_rate": 3.2540043376169784e-06, "loss": 0.4347, "step": 3936 }, { "epoch": 0.74, "learning_rate": 3.2494885128185517e-06, "loss": 0.4417, "step": 3937 }, { "epoch": 0.74, "learning_rate": 3.244975215715208e-06, "loss": 0.4166, "step": 3938 }, { "epoch": 0.74, "learning_rate": 3.2404644479969294e-06, "loss": 0.575, "step": 3939 }, { "epoch": 0.74, "learning_rate": 3.2359562113527508e-06, "loss": 0.4471, "step": 3940 }, { "epoch": 0.74, "learning_rate": 3.2314505074707593e-06, "loss": 0.4033, "step": 3941 }, { "epoch": 0.74, "learning_rate": 3.226947338038091e-06, "loss": 0.4252, "step": 3942 }, { "epoch": 0.74, "learning_rate": 3.2224467047409335e-06, "loss": 0.3165, "step": 3943 }, { "epoch": 0.75, "learning_rate": 3.217948609264535e-06, "loss": 0.4165, "step": 3944 }, { "epoch": 0.75, "learning_rate": 3.2134530532931764e-06, "loss": 0.6771, "step": 3945 }, { "epoch": 0.75, "learning_rate": 3.208960038510195e-06, "loss": 0.4269, "step": 3946 }, { "epoch": 0.75, "learning_rate": 3.2044695665979865e-06, "loss": 0.5372, "step": 3947 }, { "epoch": 0.75, "learning_rate": 3.199981639237978e-06, "loss": 0.4498, "step": 3948 }, { "epoch": 0.75, "learning_rate": 3.1954962581106495e-06, "loss": 0.497, "step": 3949 }, { "epoch": 0.75, "learning_rate": 3.191013424895536e-06, "loss": 0.4927, "step": 3950 }, { "epoch": 0.75, "learning_rate": 3.186533141271213e-06, "loss": 0.4376, "step": 3951 }, { "epoch": 0.75, "learning_rate": 3.18205540891529e-06, "loss": 0.4356, "step": 3952 }, { "epoch": 0.75, "learning_rate": 3.17758022950444e-06, "loss": 0.4992, "step": 3953 }, { "epoch": 0.75, "learning_rate": 3.1731076047143704e-06, "loss": 0.3876, "step": 3954 }, { "epoch": 0.75, "learning_rate": 3.1686375362198307e-06, "loss": 0.524, "step": 3955 }, { "epoch": 0.75, "learning_rate": 3.164170025694617e-06, "loss": 0.4753, "step": 3956 }, { "epoch": 0.75, "learning_rate": 3.1597050748115655e-06, "loss": 0.3757, "step": 3957 }, { "epoch": 0.75, "learning_rate": 3.1552426852425555e-06, "loss": 0.5555, "step": 3958 }, { "epoch": 0.75, "learning_rate": 3.1507828586585053e-06, "loss": 0.4316, "step": 3959 }, { "epoch": 0.75, "learning_rate": 3.146325596729375e-06, "loss": 0.4768, "step": 3960 }, { "epoch": 0.75, "learning_rate": 3.141870901124164e-06, "loss": 0.5106, "step": 3961 }, { "epoch": 0.75, "learning_rate": 3.137418773510911e-06, "loss": 0.3733, "step": 3962 }, { "epoch": 0.75, "learning_rate": 3.1329692155566884e-06, "loss": 0.4776, "step": 3963 }, { "epoch": 0.75, "learning_rate": 3.1285222289276175e-06, "loss": 0.6169, "step": 3964 }, { "epoch": 0.75, "learning_rate": 3.1240778152888518e-06, "loss": 0.607, "step": 3965 }, { "epoch": 0.75, "learning_rate": 3.119635976304568e-06, "loss": 0.4655, "step": 3966 }, { "epoch": 0.75, "learning_rate": 3.115196713638e-06, "loss": 0.4431, "step": 3967 }, { "epoch": 0.75, "learning_rate": 3.1107600289514096e-06, "loss": 0.5519, "step": 3968 }, { "epoch": 0.75, "learning_rate": 3.1063259239060795e-06, "loss": 0.4935, "step": 3969 }, { "epoch": 0.75, "learning_rate": 3.1018944001623486e-06, "loss": 0.4343, "step": 3970 }, { "epoch": 0.75, "learning_rate": 3.097465459379574e-06, "loss": 0.4046, "step": 3971 }, { "epoch": 0.75, "learning_rate": 3.093039103216152e-06, "loss": 0.4091, "step": 3972 }, { "epoch": 0.75, "learning_rate": 3.088615333329509e-06, "loss": 0.5641, "step": 3973 }, { "epoch": 0.75, "learning_rate": 3.084194151376102e-06, "loss": 0.4844, "step": 3974 }, { "epoch": 0.75, "learning_rate": 3.079775559011422e-06, "loss": 0.4174, "step": 3975 }, { "epoch": 0.75, "learning_rate": 3.0753595578899885e-06, "loss": 0.5111, "step": 3976 }, { "epoch": 0.75, "learning_rate": 3.0709461496653504e-06, "loss": 0.4386, "step": 3977 }, { "epoch": 0.75, "learning_rate": 3.0665353359900864e-06, "loss": 0.4519, "step": 3978 }, { "epoch": 0.75, "learning_rate": 3.0621271185158043e-06, "loss": 0.477, "step": 3979 }, { "epoch": 0.75, "learning_rate": 3.0577214988931368e-06, "loss": 0.3561, "step": 3980 }, { "epoch": 0.75, "learning_rate": 3.0533184787717452e-06, "loss": 0.4431, "step": 3981 }, { "epoch": 0.75, "learning_rate": 3.0489180598003264e-06, "loss": 0.5416, "step": 3982 }, { "epoch": 0.75, "learning_rate": 3.0445202436265864e-06, "loss": 0.4741, "step": 3983 }, { "epoch": 0.75, "learning_rate": 3.0401250318972643e-06, "loss": 0.4461, "step": 3984 }, { "epoch": 0.75, "learning_rate": 3.035732426258132e-06, "loss": 0.4903, "step": 3985 }, { "epoch": 0.75, "learning_rate": 3.0313424283539796e-06, "loss": 0.3884, "step": 3986 }, { "epoch": 0.75, "learning_rate": 3.0269550398286096e-06, "loss": 0.5311, "step": 3987 }, { "epoch": 0.75, "learning_rate": 3.0225702623248655e-06, "loss": 0.3829, "step": 3988 }, { "epoch": 0.75, "learning_rate": 3.0181880974846067e-06, "loss": 0.4991, "step": 3989 }, { "epoch": 0.75, "learning_rate": 3.0138085469487033e-06, "loss": 0.4277, "step": 3990 }, { "epoch": 0.75, "learning_rate": 3.0094316123570653e-06, "loss": 0.3977, "step": 3991 }, { "epoch": 0.75, "learning_rate": 3.0050572953486113e-06, "loss": 0.5415, "step": 3992 }, { "epoch": 0.75, "learning_rate": 3.00068559756128e-06, "loss": 0.4422, "step": 3993 }, { "epoch": 0.75, "learning_rate": 2.996316520632034e-06, "loss": 0.4658, "step": 3994 }, { "epoch": 0.75, "learning_rate": 2.9919500661968494e-06, "loss": 0.5025, "step": 3995 }, { "epoch": 0.75, "learning_rate": 2.9875862358907302e-06, "loss": 0.4415, "step": 3996 }, { "epoch": 0.76, "learning_rate": 2.983225031347683e-06, "loss": 0.3975, "step": 3997 }, { "epoch": 0.76, "learning_rate": 2.978866454200737e-06, "loss": 0.4308, "step": 3998 }, { "epoch": 0.76, "learning_rate": 2.9745105060819524e-06, "loss": 0.3831, "step": 3999 }, { "epoch": 0.76, "learning_rate": 2.97015718862238e-06, "loss": 0.499, "step": 4000 }, { "epoch": 0.76, "learning_rate": 2.965806503452098e-06, "loss": 0.5143, "step": 4001 }, { "epoch": 0.76, "learning_rate": 2.9614584522002055e-06, "loss": 0.4695, "step": 4002 }, { "epoch": 0.76, "learning_rate": 2.9571130364948077e-06, "loss": 0.467, "step": 4003 }, { "epoch": 0.76, "learning_rate": 2.9527702579630145e-06, "loss": 0.4985, "step": 4004 }, { "epoch": 0.76, "learning_rate": 2.948430118230967e-06, "loss": 0.52, "step": 4005 }, { "epoch": 0.76, "learning_rate": 2.9440926189238063e-06, "loss": 0.5338, "step": 4006 }, { "epoch": 0.76, "learning_rate": 2.939757761665686e-06, "loss": 0.3799, "step": 4007 }, { "epoch": 0.76, "learning_rate": 2.9354255480797713e-06, "loss": 0.4223, "step": 4008 }, { "epoch": 0.76, "learning_rate": 2.93109597978824e-06, "loss": 0.4877, "step": 4009 }, { "epoch": 0.76, "learning_rate": 2.9267690584122745e-06, "loss": 0.4866, "step": 4010 }, { "epoch": 0.76, "learning_rate": 2.9224447855720707e-06, "loss": 0.528, "step": 4011 }, { "epoch": 0.76, "learning_rate": 2.91812316288683e-06, "loss": 0.4669, "step": 4012 }, { "epoch": 0.76, "learning_rate": 2.9138041919747617e-06, "loss": 0.4451, "step": 4013 }, { "epoch": 0.76, "learning_rate": 2.909487874453084e-06, "loss": 0.5917, "step": 4014 }, { "epoch": 0.76, "learning_rate": 2.90517421193802e-06, "loss": 0.3269, "step": 4015 }, { "epoch": 0.76, "learning_rate": 2.900863206044795e-06, "loss": 0.4495, "step": 4016 }, { "epoch": 0.76, "learning_rate": 2.8965548583876534e-06, "loss": 0.5395, "step": 4017 }, { "epoch": 0.76, "learning_rate": 2.892249170579826e-06, "loss": 0.5437, "step": 4018 }, { "epoch": 0.76, "learning_rate": 2.8879461442335545e-06, "loss": 0.5069, "step": 4019 }, { "epoch": 0.76, "learning_rate": 2.8836457809600914e-06, "loss": 0.4299, "step": 4020 }, { "epoch": 0.76, "learning_rate": 2.8793480823696874e-06, "loss": 0.5303, "step": 4021 }, { "epoch": 0.76, "learning_rate": 2.8750530500715836e-06, "loss": 0.5008, "step": 4022 }, { "epoch": 0.76, "learning_rate": 2.8707606856740435e-06, "loss": 0.4259, "step": 4023 }, { "epoch": 0.76, "learning_rate": 2.866470990784321e-06, "loss": 0.5439, "step": 4024 }, { "epoch": 0.76, "learning_rate": 2.8621839670086616e-06, "loss": 0.3737, "step": 4025 }, { "epoch": 0.76, "learning_rate": 2.85789961595233e-06, "loss": 0.6321, "step": 4026 }, { "epoch": 0.76, "learning_rate": 2.853617939219574e-06, "loss": 0.4247, "step": 4027 }, { "epoch": 0.76, "learning_rate": 2.8493389384136494e-06, "loss": 0.3738, "step": 4028 }, { "epoch": 0.76, "learning_rate": 2.8450626151368044e-06, "loss": 0.4416, "step": 4029 }, { "epoch": 0.76, "learning_rate": 2.8407889709902836e-06, "loss": 0.4914, "step": 4030 }, { "epoch": 0.76, "learning_rate": 2.8365180075743413e-06, "loss": 0.4859, "step": 4031 }, { "epoch": 0.76, "learning_rate": 2.83224972648821e-06, "loss": 0.4401, "step": 4032 }, { "epoch": 0.76, "learning_rate": 2.8279841293301245e-06, "loss": 0.437, "step": 4033 }, { "epoch": 0.76, "learning_rate": 2.823721217697326e-06, "loss": 0.3967, "step": 4034 }, { "epoch": 0.76, "learning_rate": 2.819460993186032e-06, "loss": 0.3884, "step": 4035 }, { "epoch": 0.76, "learning_rate": 2.8152034573914613e-06, "loss": 0.4695, "step": 4036 }, { "epoch": 0.76, "learning_rate": 2.810948611907832e-06, "loss": 0.5464, "step": 4037 }, { "epoch": 0.76, "learning_rate": 2.8066964583283517e-06, "loss": 0.567, "step": 4038 }, { "epoch": 0.76, "learning_rate": 2.8024469982452072e-06, "loss": 0.4235, "step": 4039 }, { "epoch": 0.76, "learning_rate": 2.7982002332495974e-06, "loss": 0.4406, "step": 4040 }, { "epoch": 0.76, "learning_rate": 2.793956164931698e-06, "loss": 0.4721, "step": 4041 }, { "epoch": 0.76, "learning_rate": 2.7897147948806803e-06, "loss": 0.3203, "step": 4042 }, { "epoch": 0.76, "learning_rate": 2.785476124684704e-06, "loss": 0.5424, "step": 4043 }, { "epoch": 0.76, "learning_rate": 2.7812401559309165e-06, "loss": 0.4231, "step": 4044 }, { "epoch": 0.76, "learning_rate": 2.777006890205456e-06, "loss": 0.5175, "step": 4045 }, { "epoch": 0.76, "learning_rate": 2.772776329093447e-06, "loss": 0.6944, "step": 4046 }, { "epoch": 0.76, "learning_rate": 2.7685484741790023e-06, "loss": 0.4056, "step": 4047 }, { "epoch": 0.76, "learning_rate": 2.764323327045221e-06, "loss": 0.5775, "step": 4048 }, { "epoch": 0.76, "learning_rate": 2.7601008892741877e-06, "loss": 0.4805, "step": 4049 }, { "epoch": 0.77, "learning_rate": 2.755881162446974e-06, "loss": 0.5077, "step": 4050 }, { "epoch": 0.77, "learning_rate": 2.751664148143631e-06, "loss": 0.4957, "step": 4051 }, { "epoch": 0.77, "learning_rate": 2.7474498479432087e-06, "loss": 0.5301, "step": 4052 }, { "epoch": 0.77, "learning_rate": 2.743238263423721e-06, "loss": 0.5686, "step": 4053 }, { "epoch": 0.77, "learning_rate": 2.739029396162174e-06, "loss": 0.5944, "step": 4054 }, { "epoch": 0.77, "learning_rate": 2.734823247734567e-06, "loss": 0.3937, "step": 4055 }, { "epoch": 0.77, "learning_rate": 2.7306198197158616e-06, "loss": 0.3549, "step": 4056 }, { "epoch": 0.77, "learning_rate": 2.7264191136800112e-06, "loss": 0.5628, "step": 4057 }, { "epoch": 0.77, "learning_rate": 2.7222211311999536e-06, "loss": 0.5884, "step": 4058 }, { "epoch": 0.77, "learning_rate": 2.7180258738475994e-06, "loss": 0.5179, "step": 4059 }, { "epoch": 0.77, "learning_rate": 2.7138333431938423e-06, "loss": 0.3981, "step": 4060 }, { "epoch": 0.77, "learning_rate": 2.709643540808554e-06, "loss": 0.5186, "step": 4061 }, { "epoch": 0.77, "learning_rate": 2.7054564682605857e-06, "loss": 0.5683, "step": 4062 }, { "epoch": 0.77, "learning_rate": 2.7012721271177643e-06, "loss": 0.4678, "step": 4063 }, { "epoch": 0.77, "learning_rate": 2.6970905189468965e-06, "loss": 0.5296, "step": 4064 }, { "epoch": 0.77, "learning_rate": 2.6929116453137637e-06, "loss": 0.6048, "step": 4065 }, { "epoch": 0.77, "learning_rate": 2.6887355077831247e-06, "loss": 0.5011, "step": 4066 }, { "epoch": 0.77, "learning_rate": 2.6845621079187122e-06, "loss": 0.6209, "step": 4067 }, { "epoch": 0.77, "learning_rate": 2.6803914472832328e-06, "loss": 0.3989, "step": 4068 }, { "epoch": 0.77, "learning_rate": 2.6762235274383775e-06, "loss": 0.43, "step": 4069 }, { "epoch": 0.77, "learning_rate": 2.672058349944795e-06, "loss": 0.3818, "step": 4070 }, { "epoch": 0.77, "learning_rate": 2.667895916362114e-06, "loss": 0.6568, "step": 4071 }, { "epoch": 0.77, "learning_rate": 2.6637362282489444e-06, "loss": 0.4175, "step": 4072 }, { "epoch": 0.77, "learning_rate": 2.659579287162859e-06, "loss": 0.5332, "step": 4073 }, { "epoch": 0.77, "learning_rate": 2.6554250946603954e-06, "loss": 0.3919, "step": 4074 }, { "epoch": 0.77, "learning_rate": 2.6512736522970782e-06, "loss": 0.4652, "step": 4075 }, { "epoch": 0.77, "learning_rate": 2.647124961627394e-06, "loss": 0.4125, "step": 4076 }, { "epoch": 0.77, "learning_rate": 2.6429790242047927e-06, "loss": 0.52, "step": 4077 }, { "epoch": 0.77, "learning_rate": 2.638835841581705e-06, "loss": 0.4526, "step": 4078 }, { "epoch": 0.77, "learning_rate": 2.634695415309524e-06, "loss": 0.544, "step": 4079 }, { "epoch": 0.77, "learning_rate": 2.6305577469386113e-06, "loss": 0.4686, "step": 4080 }, { "epoch": 0.77, "learning_rate": 2.6264228380182955e-06, "loss": 0.458, "step": 4081 }, { "epoch": 0.77, "learning_rate": 2.6222906900968724e-06, "loss": 0.3937, "step": 4082 }, { "epoch": 0.77, "learning_rate": 2.6181613047216047e-06, "loss": 0.5559, "step": 4083 }, { "epoch": 0.77, "learning_rate": 2.61403468343872e-06, "loss": 0.462, "step": 4084 }, { "epoch": 0.77, "learning_rate": 2.6099108277934105e-06, "loss": 0.4818, "step": 4085 }, { "epoch": 0.77, "learning_rate": 2.6057897393298328e-06, "loss": 0.566, "step": 4086 }, { "epoch": 0.77, "learning_rate": 2.6016714195911085e-06, "loss": 0.4388, "step": 4087 }, { "epoch": 0.77, "learning_rate": 2.5975558701193183e-06, "loss": 0.3881, "step": 4088 }, { "epoch": 0.77, "learning_rate": 2.5934430924555153e-06, "loss": 0.5095, "step": 4089 }, { "epoch": 0.77, "learning_rate": 2.5893330881397084e-06, "loss": 0.5064, "step": 4090 }, { "epoch": 0.77, "learning_rate": 2.5852258587108582e-06, "loss": 0.3859, "step": 4091 }, { "epoch": 0.77, "learning_rate": 2.5811214057069068e-06, "loss": 0.555, "step": 4092 }, { "epoch": 0.77, "learning_rate": 2.5770197306647416e-06, "loss": 0.3634, "step": 4093 }, { "epoch": 0.77, "learning_rate": 2.572920835120214e-06, "loss": 0.6211, "step": 4094 }, { "epoch": 0.77, "learning_rate": 2.5688247206081364e-06, "loss": 0.6196, "step": 4095 }, { "epoch": 0.77, "learning_rate": 2.564731388662276e-06, "loss": 0.5079, "step": 4096 }, { "epoch": 0.77, "learning_rate": 2.560640840815363e-06, "loss": 0.4313, "step": 4097 }, { "epoch": 0.77, "learning_rate": 2.5565530785990798e-06, "loss": 0.456, "step": 4098 }, { "epoch": 0.77, "learning_rate": 2.55246810354407e-06, "loss": 0.463, "step": 4099 }, { "epoch": 0.77, "learning_rate": 2.548385917179933e-06, "loss": 0.3461, "step": 4100 }, { "epoch": 0.77, "learning_rate": 2.5443065210352202e-06, "loss": 0.4827, "step": 4101 }, { "epoch": 0.77, "learning_rate": 2.5402299166374435e-06, "loss": 0.323, "step": 4102 }, { "epoch": 0.78, "learning_rate": 2.5361561055130625e-06, "loss": 0.3566, "step": 4103 }, { "epoch": 0.78, "learning_rate": 2.532085089187505e-06, "loss": 0.5247, "step": 4104 }, { "epoch": 0.78, "learning_rate": 2.5280168691851325e-06, "loss": 0.3731, "step": 4105 }, { "epoch": 0.78, "learning_rate": 2.52395144702927e-06, "loss": 0.569, "step": 4106 }, { "epoch": 0.78, "learning_rate": 2.5198888242422014e-06, "loss": 0.503, "step": 4107 }, { "epoch": 0.78, "learning_rate": 2.515829002345155e-06, "loss": 0.3921, "step": 4108 }, { "epoch": 0.78, "learning_rate": 2.511771982858301e-06, "loss": 0.2602, "step": 4109 }, { "epoch": 0.78, "learning_rate": 2.507717767300778e-06, "loss": 0.4531, "step": 4110 }, { "epoch": 0.78, "learning_rate": 2.5036663571906693e-06, "loss": 0.5123, "step": 4111 }, { "epoch": 0.78, "learning_rate": 2.4996177540449952e-06, "loss": 0.477, "step": 4112 }, { "epoch": 0.78, "learning_rate": 2.495571959379742e-06, "loss": 0.424, "step": 4113 }, { "epoch": 0.78, "learning_rate": 2.4915289747098357e-06, "loss": 0.5817, "step": 4114 }, { "epoch": 0.78, "learning_rate": 2.4874888015491516e-06, "loss": 0.3698, "step": 4115 }, { "epoch": 0.78, "learning_rate": 2.4834514414105115e-06, "loss": 0.4479, "step": 4116 }, { "epoch": 0.78, "learning_rate": 2.4794168958056854e-06, "loss": 0.3534, "step": 4117 }, { "epoch": 0.78, "learning_rate": 2.475385166245388e-06, "loss": 0.4428, "step": 4118 }, { "epoch": 0.78, "learning_rate": 2.47135625423928e-06, "loss": 0.5796, "step": 4119 }, { "epoch": 0.78, "learning_rate": 2.4673301612959653e-06, "loss": 0.5936, "step": 4120 }, { "epoch": 0.78, "learning_rate": 2.4633068889230027e-06, "loss": 0.4907, "step": 4121 }, { "epoch": 0.78, "learning_rate": 2.459286438626877e-06, "loss": 0.4203, "step": 4122 }, { "epoch": 0.78, "learning_rate": 2.4552688119130254e-06, "loss": 0.5458, "step": 4123 }, { "epoch": 0.78, "learning_rate": 2.4512540102858338e-06, "loss": 0.4572, "step": 4124 }, { "epoch": 0.78, "learning_rate": 2.447242035248626e-06, "loss": 0.5093, "step": 4125 }, { "epoch": 0.78, "learning_rate": 2.443232888303655e-06, "loss": 0.4557, "step": 4126 }, { "epoch": 0.78, "learning_rate": 2.439226570952137e-06, "loss": 0.4032, "step": 4127 }, { "epoch": 0.78, "learning_rate": 2.4352230846942116e-06, "loss": 0.4734, "step": 4128 }, { "epoch": 0.78, "learning_rate": 2.4312224310289654e-06, "loss": 0.446, "step": 4129 }, { "epoch": 0.78, "learning_rate": 2.427224611454424e-06, "loss": 0.4982, "step": 4130 }, { "epoch": 0.78, "learning_rate": 2.4232296274675503e-06, "loss": 0.42, "step": 4131 }, { "epoch": 0.78, "learning_rate": 2.4192374805642447e-06, "loss": 0.3976, "step": 4132 }, { "epoch": 0.78, "learning_rate": 2.4152481722393482e-06, "loss": 0.3924, "step": 4133 }, { "epoch": 0.78, "learning_rate": 2.411261703986636e-06, "loss": 0.4225, "step": 4134 }, { "epoch": 0.78, "learning_rate": 2.4072780772988224e-06, "loss": 0.3734, "step": 4135 }, { "epoch": 0.78, "learning_rate": 2.403297293667555e-06, "loss": 0.3684, "step": 4136 }, { "epoch": 0.78, "learning_rate": 2.3993193545834182e-06, "loss": 0.4606, "step": 4137 }, { "epoch": 0.78, "learning_rate": 2.39534426153593e-06, "loss": 0.494, "step": 4138 }, { "epoch": 0.78, "learning_rate": 2.391372016013551e-06, "loss": 0.6671, "step": 4139 }, { "epoch": 0.78, "learning_rate": 2.3874026195036615e-06, "loss": 0.4033, "step": 4140 }, { "epoch": 0.78, "learning_rate": 2.38343607349258e-06, "loss": 0.4289, "step": 4141 }, { "epoch": 0.78, "learning_rate": 2.3794723794655705e-06, "loss": 0.5231, "step": 4142 }, { "epoch": 0.78, "learning_rate": 2.3755115389068086e-06, "loss": 0.4162, "step": 4143 }, { "epoch": 0.78, "learning_rate": 2.3715535532994103e-06, "loss": 0.3402, "step": 4144 }, { "epoch": 0.78, "learning_rate": 2.3675984241254314e-06, "loss": 0.4007, "step": 4145 }, { "epoch": 0.78, "learning_rate": 2.363646152865847e-06, "loss": 0.588, "step": 4146 }, { "epoch": 0.78, "learning_rate": 2.35969674100056e-06, "loss": 0.341, "step": 4147 }, { "epoch": 0.78, "learning_rate": 2.3557501900084143e-06, "loss": 0.3958, "step": 4148 }, { "epoch": 0.78, "learning_rate": 2.351806501367173e-06, "loss": 0.3805, "step": 4149 }, { "epoch": 0.78, "learning_rate": 2.3478656765535322e-06, "loss": 0.4854, "step": 4150 }, { "epoch": 0.78, "learning_rate": 2.343927717043113e-06, "loss": 0.4493, "step": 4151 }, { "epoch": 0.78, "learning_rate": 2.3399926243104642e-06, "loss": 0.5169, "step": 4152 }, { "epoch": 0.78, "learning_rate": 2.336060399829061e-06, "loss": 0.3678, "step": 4153 }, { "epoch": 0.78, "learning_rate": 2.3321310450713066e-06, "loss": 0.5847, "step": 4154 }, { "epoch": 0.78, "learning_rate": 2.3282045615085246e-06, "loss": 0.4593, "step": 4155 }, { "epoch": 0.79, "learning_rate": 2.3242809506109755e-06, "loss": 0.4679, "step": 4156 }, { "epoch": 0.79, "learning_rate": 2.3203602138478264e-06, "loss": 0.4131, "step": 4157 }, { "epoch": 0.79, "learning_rate": 2.3164423526871795e-06, "loss": 0.4316, "step": 4158 }, { "epoch": 0.79, "learning_rate": 2.3125273685960614e-06, "loss": 0.4641, "step": 4159 }, { "epoch": 0.79, "learning_rate": 2.30861526304042e-06, "loss": 0.4391, "step": 4160 }, { "epoch": 0.79, "learning_rate": 2.304706037485114e-06, "loss": 0.4048, "step": 4161 }, { "epoch": 0.79, "learning_rate": 2.3007996933939424e-06, "loss": 0.4374, "step": 4162 }, { "epoch": 0.79, "learning_rate": 2.296896232229616e-06, "loss": 0.421, "step": 4163 }, { "epoch": 0.79, "learning_rate": 2.292995655453758e-06, "loss": 0.4362, "step": 4164 }, { "epoch": 0.79, "learning_rate": 2.2890979645269274e-06, "loss": 0.4025, "step": 4165 }, { "epoch": 0.79, "learning_rate": 2.285203160908592e-06, "loss": 0.4894, "step": 4166 }, { "epoch": 0.79, "learning_rate": 2.281311246057143e-06, "loss": 0.4725, "step": 4167 }, { "epoch": 0.79, "learning_rate": 2.277422221429887e-06, "loss": 0.4159, "step": 4168 }, { "epoch": 0.79, "learning_rate": 2.2735360884830506e-06, "loss": 0.5743, "step": 4169 }, { "epoch": 0.79, "learning_rate": 2.269652848671775e-06, "loss": 0.4866, "step": 4170 }, { "epoch": 0.79, "learning_rate": 2.265772503450122e-06, "loss": 0.3624, "step": 4171 }, { "epoch": 0.79, "learning_rate": 2.261895054271066e-06, "loss": 0.4548, "step": 4172 }, { "epoch": 0.79, "learning_rate": 2.2580205025864965e-06, "loss": 0.4654, "step": 4173 }, { "epoch": 0.79, "learning_rate": 2.2541488498472276e-06, "loss": 0.4578, "step": 4174 }, { "epoch": 0.79, "learning_rate": 2.250280097502973e-06, "loss": 0.4985, "step": 4175 }, { "epoch": 0.79, "learning_rate": 2.2464142470023644e-06, "loss": 0.4496, "step": 4176 }, { "epoch": 0.79, "learning_rate": 2.242551299792962e-06, "loss": 0.4837, "step": 4177 }, { "epoch": 0.79, "learning_rate": 2.238691257321217e-06, "loss": 0.5034, "step": 4178 }, { "epoch": 0.79, "learning_rate": 2.234834121032503e-06, "loss": 0.4488, "step": 4179 }, { "epoch": 0.79, "learning_rate": 2.230979892371111e-06, "loss": 0.4991, "step": 4180 }, { "epoch": 0.79, "learning_rate": 2.2271285727802373e-06, "loss": 0.4707, "step": 4181 }, { "epoch": 0.79, "learning_rate": 2.2232801637019808e-06, "loss": 0.2763, "step": 4182 }, { "epoch": 0.79, "learning_rate": 2.219434666577368e-06, "loss": 0.4725, "step": 4183 }, { "epoch": 0.79, "learning_rate": 2.2155920828463217e-06, "loss": 0.5044, "step": 4184 }, { "epoch": 0.79, "learning_rate": 2.2117524139476797e-06, "loss": 0.4263, "step": 4185 }, { "epoch": 0.79, "learning_rate": 2.2079156613191866e-06, "loss": 0.4112, "step": 4186 }, { "epoch": 0.79, "learning_rate": 2.204081826397494e-06, "loss": 0.4848, "step": 4187 }, { "epoch": 0.79, "learning_rate": 2.2002509106181625e-06, "loss": 0.3832, "step": 4188 }, { "epoch": 0.79, "learning_rate": 2.196422915415659e-06, "loss": 0.3419, "step": 4189 }, { "epoch": 0.79, "learning_rate": 2.1925978422233562e-06, "loss": 0.4167, "step": 4190 }, { "epoch": 0.79, "learning_rate": 2.1887756924735393e-06, "loss": 0.5241, "step": 4191 }, { "epoch": 0.79, "learning_rate": 2.1849564675973857e-06, "loss": 0.5519, "step": 4192 }, { "epoch": 0.79, "learning_rate": 2.1811401690249846e-06, "loss": 0.4463, "step": 4193 }, { "epoch": 0.79, "learning_rate": 2.1773267981853362e-06, "loss": 0.3482, "step": 4194 }, { "epoch": 0.79, "learning_rate": 2.173516356506339e-06, "loss": 0.4619, "step": 4195 }, { "epoch": 0.79, "learning_rate": 2.169708845414782e-06, "loss": 0.5241, "step": 4196 }, { "epoch": 0.79, "learning_rate": 2.1659042663363795e-06, "loss": 0.4797, "step": 4197 }, { "epoch": 0.79, "learning_rate": 2.1621026206957374e-06, "loss": 0.3778, "step": 4198 }, { "epoch": 0.79, "learning_rate": 2.1583039099163527e-06, "loss": 0.5337, "step": 4199 }, { "epoch": 0.79, "learning_rate": 2.154508135420642e-06, "loss": 0.4348, "step": 4200 }, { "epoch": 0.79, "learning_rate": 2.150715298629913e-06, "loss": 0.4047, "step": 4201 }, { "epoch": 0.79, "learning_rate": 2.1469254009643724e-06, "loss": 0.3876, "step": 4202 }, { "epoch": 0.79, "learning_rate": 2.1431384438431303e-06, "loss": 0.4484, "step": 4203 }, { "epoch": 0.79, "learning_rate": 2.1393544286841918e-06, "loss": 0.587, "step": 4204 }, { "epoch": 0.79, "learning_rate": 2.1355733569044633e-06, "loss": 0.3627, "step": 4205 }, { "epoch": 0.79, "learning_rate": 2.1317952299197486e-06, "loss": 0.4068, "step": 4206 }, { "epoch": 0.79, "learning_rate": 2.1280200491447465e-06, "loss": 0.4411, "step": 4207 }, { "epoch": 0.8, "learning_rate": 2.124247815993057e-06, "loss": 0.4256, "step": 4208 }, { "epoch": 0.8, "learning_rate": 2.1204785318771717e-06, "loss": 0.4039, "step": 4209 }, { "epoch": 0.8, "learning_rate": 2.1167121982084815e-06, "loss": 0.4994, "step": 4210 }, { "epoch": 0.8, "learning_rate": 2.1129488163972667e-06, "loss": 0.6369, "step": 4211 }, { "epoch": 0.8, "learning_rate": 2.1091883878527174e-06, "loss": 0.4348, "step": 4212 }, { "epoch": 0.8, "learning_rate": 2.1054309139828945e-06, "loss": 0.4072, "step": 4213 }, { "epoch": 0.8, "learning_rate": 2.1016763961947728e-06, "loss": 0.4269, "step": 4214 }, { "epoch": 0.8, "learning_rate": 2.0979248358942117e-06, "loss": 0.4938, "step": 4215 }, { "epoch": 0.8, "learning_rate": 2.094176234485963e-06, "loss": 0.6142, "step": 4216 }, { "epoch": 0.8, "learning_rate": 2.0904305933736714e-06, "loss": 0.3099, "step": 4217 }, { "epoch": 0.8, "learning_rate": 2.0866879139598738e-06, "loss": 0.4814, "step": 4218 }, { "epoch": 0.8, "learning_rate": 2.082948197645999e-06, "loss": 0.427, "step": 4219 }, { "epoch": 0.8, "learning_rate": 2.079211445832362e-06, "loss": 0.3811, "step": 4220 }, { "epoch": 0.8, "learning_rate": 2.075477659918174e-06, "loss": 0.4214, "step": 4221 }, { "epoch": 0.8, "learning_rate": 2.0717468413015285e-06, "loss": 0.3537, "step": 4222 }, { "epoch": 0.8, "learning_rate": 2.0680189913794156e-06, "loss": 0.5867, "step": 4223 }, { "epoch": 0.8, "learning_rate": 2.0642941115477087e-06, "loss": 0.4768, "step": 4224 }, { "epoch": 0.8, "learning_rate": 2.0605722032011664e-06, "loss": 0.3119, "step": 4225 }, { "epoch": 0.8, "learning_rate": 2.0568532677334485e-06, "loss": 0.2748, "step": 4226 }, { "epoch": 0.8, "learning_rate": 2.053137306537082e-06, "loss": 0.4182, "step": 4227 }, { "epoch": 0.8, "learning_rate": 2.0494243210034905e-06, "loss": 0.4885, "step": 4228 }, { "epoch": 0.8, "learning_rate": 2.0457143125229907e-06, "loss": 0.3746, "step": 4229 }, { "epoch": 0.8, "learning_rate": 2.0420072824847693e-06, "loss": 0.4857, "step": 4230 }, { "epoch": 0.8, "learning_rate": 2.0383032322769047e-06, "loss": 0.5273, "step": 4231 }, { "epoch": 0.8, "learning_rate": 2.0346021632863643e-06, "loss": 0.4093, "step": 4232 }, { "epoch": 0.8, "learning_rate": 2.030904076898996e-06, "loss": 0.497, "step": 4233 }, { "epoch": 0.8, "learning_rate": 2.0272089744995216e-06, "loss": 0.4741, "step": 4234 }, { "epoch": 0.8, "learning_rate": 2.0235168574715604e-06, "loss": 0.4934, "step": 4235 }, { "epoch": 0.8, "learning_rate": 2.019827727197605e-06, "loss": 0.5458, "step": 4236 }, { "epoch": 0.8, "learning_rate": 2.0161415850590327e-06, "loss": 0.5033, "step": 4237 }, { "epoch": 0.8, "learning_rate": 2.0124584324360998e-06, "loss": 0.6008, "step": 4238 }, { "epoch": 0.8, "learning_rate": 2.008778270707944e-06, "loss": 0.4388, "step": 4239 }, { "epoch": 0.8, "learning_rate": 2.0051011012525845e-06, "loss": 0.5125, "step": 4240 }, { "epoch": 0.8, "learning_rate": 2.0014269254469176e-06, "loss": 0.5884, "step": 4241 }, { "epoch": 0.8, "learning_rate": 1.997755744666722e-06, "loss": 0.4486, "step": 4242 }, { "epoch": 0.8, "learning_rate": 1.9940875602866506e-06, "loss": 0.5187, "step": 4243 }, { "epoch": 0.8, "learning_rate": 1.9904223736802375e-06, "loss": 0.3926, "step": 4244 }, { "epoch": 0.8, "learning_rate": 1.9867601862198906e-06, "loss": 0.4412, "step": 4245 }, { "epoch": 0.8, "learning_rate": 1.9831009992769036e-06, "loss": 0.4335, "step": 4246 }, { "epoch": 0.8, "learning_rate": 1.9794448142214396e-06, "loss": 0.5506, "step": 4247 }, { "epoch": 0.8, "learning_rate": 1.9757916324225313e-06, "loss": 0.4198, "step": 4248 }, { "epoch": 0.8, "learning_rate": 1.972141455248102e-06, "loss": 0.4364, "step": 4249 }, { "epoch": 0.8, "learning_rate": 1.9684942840649423e-06, "loss": 0.3555, "step": 4250 }, { "epoch": 0.8, "learning_rate": 1.9648501202387103e-06, "loss": 0.4667, "step": 4251 }, { "epoch": 0.8, "learning_rate": 1.9612089651339504e-06, "loss": 0.5693, "step": 4252 }, { "epoch": 0.8, "learning_rate": 1.9575708201140743e-06, "loss": 0.4127, "step": 4253 }, { "epoch": 0.8, "learning_rate": 1.953935686541366e-06, "loss": 0.5141, "step": 4254 }, { "epoch": 0.8, "learning_rate": 1.9503035657769832e-06, "loss": 0.4444, "step": 4255 }, { "epoch": 0.8, "learning_rate": 1.946674459180955e-06, "loss": 0.4253, "step": 4256 }, { "epoch": 0.8, "learning_rate": 1.9430483681121836e-06, "loss": 0.4052, "step": 4257 }, { "epoch": 0.8, "learning_rate": 1.9394252939284385e-06, "loss": 0.3543, "step": 4258 }, { "epoch": 0.8, "learning_rate": 1.9358052379863622e-06, "loss": 0.5268, "step": 4259 }, { "epoch": 0.8, "learning_rate": 1.932188201641465e-06, "loss": 0.3659, "step": 4260 }, { "epoch": 0.81, "learning_rate": 1.9285741862481343e-06, "loss": 0.3849, "step": 4261 }, { "epoch": 0.81, "learning_rate": 1.9249631931596136e-06, "loss": 0.548, "step": 4262 }, { "epoch": 0.81, "learning_rate": 1.9213552237280196e-06, "loss": 0.5072, "step": 4263 }, { "epoch": 0.81, "learning_rate": 1.917750279304347e-06, "loss": 0.6105, "step": 4264 }, { "epoch": 0.81, "learning_rate": 1.9141483612384415e-06, "loss": 0.4785, "step": 4265 }, { "epoch": 0.81, "learning_rate": 1.9105494708790227e-06, "loss": 0.4302, "step": 4266 }, { "epoch": 0.81, "learning_rate": 1.9069536095736817e-06, "loss": 0.3906, "step": 4267 }, { "epoch": 0.81, "learning_rate": 1.9033607786688724e-06, "loss": 0.5944, "step": 4268 }, { "epoch": 0.81, "learning_rate": 1.899770979509903e-06, "loss": 0.548, "step": 4269 }, { "epoch": 0.81, "learning_rate": 1.8961842134409636e-06, "loss": 0.348, "step": 4270 }, { "epoch": 0.81, "learning_rate": 1.8926004818050991e-06, "loss": 0.4113, "step": 4271 }, { "epoch": 0.81, "learning_rate": 1.8890197859442182e-06, "loss": 0.459, "step": 4272 }, { "epoch": 0.81, "learning_rate": 1.8854421271990964e-06, "loss": 0.4815, "step": 4273 }, { "epoch": 0.81, "learning_rate": 1.8818675069093684e-06, "loss": 0.4311, "step": 4274 }, { "epoch": 0.81, "learning_rate": 1.8782959264135347e-06, "loss": 0.5172, "step": 4275 }, { "epoch": 0.81, "learning_rate": 1.8747273870489536e-06, "loss": 0.5479, "step": 4276 }, { "epoch": 0.81, "learning_rate": 1.8711618901518446e-06, "loss": 0.5785, "step": 4277 }, { "epoch": 0.81, "learning_rate": 1.867599437057298e-06, "loss": 0.5492, "step": 4278 }, { "epoch": 0.81, "learning_rate": 1.86404002909925e-06, "loss": 0.3489, "step": 4279 }, { "epoch": 0.81, "learning_rate": 1.8604836676105009e-06, "loss": 0.5162, "step": 4280 }, { "epoch": 0.81, "learning_rate": 1.856930353922719e-06, "loss": 0.6864, "step": 4281 }, { "epoch": 0.81, "learning_rate": 1.8533800893664245e-06, "loss": 0.5591, "step": 4282 }, { "epoch": 0.81, "learning_rate": 1.849832875270987e-06, "loss": 0.3896, "step": 4283 }, { "epoch": 0.81, "learning_rate": 1.8462887129646534e-06, "loss": 0.3685, "step": 4284 }, { "epoch": 0.81, "learning_rate": 1.842747603774515e-06, "loss": 0.4057, "step": 4285 }, { "epoch": 0.81, "learning_rate": 1.839209549026516e-06, "loss": 0.4456, "step": 4286 }, { "epoch": 0.81, "learning_rate": 1.8356745500454699e-06, "loss": 0.2935, "step": 4287 }, { "epoch": 0.81, "learning_rate": 1.8321426081550375e-06, "loss": 0.585, "step": 4288 }, { "epoch": 0.81, "learning_rate": 1.8286137246777369e-06, "loss": 0.6112, "step": 4289 }, { "epoch": 0.81, "learning_rate": 1.8250879009349398e-06, "loss": 0.4596, "step": 4290 }, { "epoch": 0.81, "learning_rate": 1.8215651382468745e-06, "loss": 0.4268, "step": 4291 }, { "epoch": 0.81, "learning_rate": 1.8180454379326207e-06, "loss": 0.5969, "step": 4292 }, { "epoch": 0.81, "learning_rate": 1.8145288013101124e-06, "loss": 0.6214, "step": 4293 }, { "epoch": 0.81, "learning_rate": 1.811015229696137e-06, "loss": 0.4836, "step": 4294 }, { "epoch": 0.81, "learning_rate": 1.8075047244063338e-06, "loss": 0.4509, "step": 4295 }, { "epoch": 0.81, "learning_rate": 1.8039972867551935e-06, "loss": 0.4505, "step": 4296 }, { "epoch": 0.81, "learning_rate": 1.8004929180560582e-06, "loss": 0.4865, "step": 4297 }, { "epoch": 0.81, "learning_rate": 1.79699161962112e-06, "loss": 0.4492, "step": 4298 }, { "epoch": 0.81, "learning_rate": 1.7934933927614284e-06, "loss": 0.5445, "step": 4299 }, { "epoch": 0.81, "learning_rate": 1.789998238786871e-06, "loss": 0.4584, "step": 4300 }, { "epoch": 0.81, "learning_rate": 1.7865061590061884e-06, "loss": 0.4298, "step": 4301 }, { "epoch": 0.81, "learning_rate": 1.7830171547269792e-06, "loss": 0.3315, "step": 4302 }, { "epoch": 0.81, "learning_rate": 1.779531227255683e-06, "loss": 0.4269, "step": 4303 }, { "epoch": 0.81, "learning_rate": 1.7760483778975801e-06, "loss": 0.493, "step": 4304 }, { "epoch": 0.81, "learning_rate": 1.7725686079568138e-06, "loss": 0.5213, "step": 4305 }, { "epoch": 0.81, "learning_rate": 1.7690919187363664e-06, "loss": 0.4437, "step": 4306 }, { "epoch": 0.81, "learning_rate": 1.7656183115380577e-06, "loss": 0.4102, "step": 4307 }, { "epoch": 0.81, "learning_rate": 1.7621477876625725e-06, "loss": 0.3597, "step": 4308 }, { "epoch": 0.81, "learning_rate": 1.7586803484094272e-06, "loss": 0.4728, "step": 4309 }, { "epoch": 0.81, "learning_rate": 1.7552159950769864e-06, "loss": 0.3907, "step": 4310 }, { "epoch": 0.81, "learning_rate": 1.7517547289624605e-06, "loss": 0.3175, "step": 4311 }, { "epoch": 0.81, "learning_rate": 1.7482965513619011e-06, "loss": 0.2884, "step": 4312 }, { "epoch": 0.81, "learning_rate": 1.7448414635702126e-06, "loss": 0.3995, "step": 4313 }, { "epoch": 0.82, "learning_rate": 1.7413894668811272e-06, "loss": 0.3587, "step": 4314 }, { "epoch": 0.82, "learning_rate": 1.7379405625872281e-06, "loss": 0.5239, "step": 4315 }, { "epoch": 0.82, "learning_rate": 1.7344947519799483e-06, "loss": 0.5055, "step": 4316 }, { "epoch": 0.82, "learning_rate": 1.7310520363495454e-06, "loss": 0.4996, "step": 4317 }, { "epoch": 0.82, "learning_rate": 1.7276124169851272e-06, "loss": 0.3903, "step": 4318 }, { "epoch": 0.82, "learning_rate": 1.7241758951746479e-06, "loss": 0.5545, "step": 4319 }, { "epoch": 0.82, "learning_rate": 1.720742472204896e-06, "loss": 0.4664, "step": 4320 }, { "epoch": 0.82, "learning_rate": 1.7173121493614908e-06, "loss": 0.5095, "step": 4321 }, { "epoch": 0.82, "learning_rate": 1.7138849279289072e-06, "loss": 0.3635, "step": 4322 }, { "epoch": 0.82, "learning_rate": 1.7104608091904484e-06, "loss": 0.3831, "step": 4323 }, { "epoch": 0.82, "learning_rate": 1.707039794428259e-06, "loss": 0.4692, "step": 4324 }, { "epoch": 0.82, "learning_rate": 1.703621884923321e-06, "loss": 0.4907, "step": 4325 }, { "epoch": 0.82, "learning_rate": 1.7002070819554527e-06, "loss": 0.4236, "step": 4326 }, { "epoch": 0.82, "learning_rate": 1.6967953868033104e-06, "loss": 0.6326, "step": 4327 }, { "epoch": 0.82, "learning_rate": 1.6933868007443854e-06, "loss": 0.415, "step": 4328 }, { "epoch": 0.82, "learning_rate": 1.6899813250550068e-06, "loss": 0.4109, "step": 4329 }, { "epoch": 0.82, "learning_rate": 1.6865789610103367e-06, "loss": 0.4963, "step": 4330 }, { "epoch": 0.82, "learning_rate": 1.6831797098843738e-06, "loss": 0.3788, "step": 4331 }, { "epoch": 0.82, "learning_rate": 1.6797835729499501e-06, "loss": 0.4441, "step": 4332 }, { "epoch": 0.82, "learning_rate": 1.6763905514787304e-06, "loss": 0.6014, "step": 4333 }, { "epoch": 0.82, "learning_rate": 1.673000646741222e-06, "loss": 0.4545, "step": 4334 }, { "epoch": 0.82, "learning_rate": 1.6696138600067491e-06, "loss": 0.4254, "step": 4335 }, { "epoch": 0.82, "learning_rate": 1.6662301925434786e-06, "loss": 0.4602, "step": 4336 }, { "epoch": 0.82, "learning_rate": 1.6628496456184107e-06, "loss": 0.3568, "step": 4337 }, { "epoch": 0.82, "learning_rate": 1.6594722204973756e-06, "loss": 0.6188, "step": 4338 }, { "epoch": 0.82, "learning_rate": 1.6560979184450254e-06, "loss": 0.5029, "step": 4339 }, { "epoch": 0.82, "learning_rate": 1.6527267407248583e-06, "loss": 0.2604, "step": 4340 }, { "epoch": 0.82, "learning_rate": 1.6493586885991908e-06, "loss": 0.5635, "step": 4341 }, { "epoch": 0.82, "learning_rate": 1.6459937633291756e-06, "loss": 0.481, "step": 4342 }, { "epoch": 0.82, "learning_rate": 1.6426319661747902e-06, "loss": 0.4804, "step": 4343 }, { "epoch": 0.82, "learning_rate": 1.6392732983948434e-06, "loss": 0.5107, "step": 4344 }, { "epoch": 0.82, "learning_rate": 1.6359177612469711e-06, "loss": 0.4547, "step": 4345 }, { "epoch": 0.82, "learning_rate": 1.6325653559876375e-06, "loss": 0.4127, "step": 4346 }, { "epoch": 0.82, "learning_rate": 1.6292160838721316e-06, "loss": 0.3686, "step": 4347 }, { "epoch": 0.82, "learning_rate": 1.625869946154578e-06, "loss": 0.4092, "step": 4348 }, { "epoch": 0.82, "learning_rate": 1.6225269440879144e-06, "loss": 0.5758, "step": 4349 }, { "epoch": 0.82, "learning_rate": 1.6191870789239116e-06, "loss": 0.3886, "step": 4350 }, { "epoch": 0.82, "learning_rate": 1.615850351913173e-06, "loss": 0.377, "step": 4351 }, { "epoch": 0.82, "learning_rate": 1.6125167643051098e-06, "loss": 0.3583, "step": 4352 }, { "epoch": 0.82, "learning_rate": 1.6091863173479694e-06, "loss": 0.4211, "step": 4353 }, { "epoch": 0.82, "learning_rate": 1.605859012288824e-06, "loss": 0.4417, "step": 4354 }, { "epoch": 0.82, "learning_rate": 1.602534850373567e-06, "loss": 0.4318, "step": 4355 }, { "epoch": 0.82, "learning_rate": 1.5992138328469077e-06, "loss": 0.5362, "step": 4356 }, { "epoch": 0.82, "learning_rate": 1.5958959609523905e-06, "loss": 0.5131, "step": 4357 }, { "epoch": 0.82, "learning_rate": 1.5925812359323745e-06, "loss": 0.4893, "step": 4358 }, { "epoch": 0.82, "learning_rate": 1.589269659028041e-06, "loss": 0.3718, "step": 4359 }, { "epoch": 0.82, "learning_rate": 1.5859612314793927e-06, "loss": 0.5238, "step": 4360 }, { "epoch": 0.82, "learning_rate": 1.582655954525255e-06, "loss": 0.3914, "step": 4361 }, { "epoch": 0.82, "learning_rate": 1.5793538294032717e-06, "loss": 0.4338, "step": 4362 }, { "epoch": 0.82, "learning_rate": 1.5760548573499068e-06, "loss": 0.4023, "step": 4363 }, { "epoch": 0.82, "learning_rate": 1.5727590396004445e-06, "loss": 0.4562, "step": 4364 }, { "epoch": 0.82, "learning_rate": 1.5694663773889851e-06, "loss": 0.4707, "step": 4365 }, { "epoch": 0.82, "learning_rate": 1.5661768719484526e-06, "loss": 0.5468, "step": 4366 }, { "epoch": 0.83, "learning_rate": 1.562890524510583e-06, "loss": 0.5161, "step": 4367 }, { "epoch": 0.83, "learning_rate": 1.5596073363059316e-06, "loss": 0.4573, "step": 4368 }, { "epoch": 0.83, "learning_rate": 1.5563273085638785e-06, "loss": 0.426, "step": 4369 }, { "epoch": 0.83, "learning_rate": 1.5530504425126047e-06, "loss": 0.4628, "step": 4370 }, { "epoch": 0.83, "learning_rate": 1.549776739379122e-06, "loss": 0.3811, "step": 4371 }, { "epoch": 0.83, "learning_rate": 1.546506200389253e-06, "loss": 0.4728, "step": 4372 }, { "epoch": 0.83, "learning_rate": 1.543238826767627e-06, "loss": 0.529, "step": 4373 }, { "epoch": 0.83, "learning_rate": 1.539974619737704e-06, "loss": 0.5223, "step": 4374 }, { "epoch": 0.83, "learning_rate": 1.536713580521746e-06, "loss": 0.4623, "step": 4375 }, { "epoch": 0.83, "learning_rate": 1.5334557103408332e-06, "loss": 0.3491, "step": 4376 }, { "epoch": 0.83, "learning_rate": 1.530201010414859e-06, "loss": 0.5557, "step": 4377 }, { "epoch": 0.83, "learning_rate": 1.5269494819625286e-06, "loss": 0.4762, "step": 4378 }, { "epoch": 0.83, "learning_rate": 1.5237011262013624e-06, "loss": 0.4682, "step": 4379 }, { "epoch": 0.83, "learning_rate": 1.5204559443476886e-06, "loss": 0.4267, "step": 4380 }, { "epoch": 0.83, "learning_rate": 1.5172139376166507e-06, "loss": 0.3609, "step": 4381 }, { "epoch": 0.83, "learning_rate": 1.513975107222201e-06, "loss": 0.3598, "step": 4382 }, { "epoch": 0.83, "learning_rate": 1.5107394543771037e-06, "loss": 0.4746, "step": 4383 }, { "epoch": 0.83, "learning_rate": 1.507506980292933e-06, "loss": 0.3881, "step": 4384 }, { "epoch": 0.83, "learning_rate": 1.50427768618007e-06, "loss": 0.3472, "step": 4385 }, { "epoch": 0.83, "learning_rate": 1.5010515732477138e-06, "loss": 0.3608, "step": 4386 }, { "epoch": 0.83, "learning_rate": 1.4978286427038602e-06, "loss": 0.5057, "step": 4387 }, { "epoch": 0.83, "learning_rate": 1.4946088957553196e-06, "loss": 0.4629, "step": 4388 }, { "epoch": 0.83, "learning_rate": 1.491392333607713e-06, "loss": 0.4646, "step": 4389 }, { "epoch": 0.83, "learning_rate": 1.4881789574654681e-06, "loss": 0.3662, "step": 4390 }, { "epoch": 0.83, "learning_rate": 1.4849687685318092e-06, "loss": 0.3658, "step": 4391 }, { "epoch": 0.83, "learning_rate": 1.4817617680087826e-06, "loss": 0.5085, "step": 4392 }, { "epoch": 0.83, "learning_rate": 1.478557957097233e-06, "loss": 0.5192, "step": 4393 }, { "epoch": 0.83, "learning_rate": 1.4753573369968054e-06, "loss": 0.4579, "step": 4394 }, { "epoch": 0.83, "learning_rate": 1.4721599089059624e-06, "loss": 0.42, "step": 4395 }, { "epoch": 0.83, "learning_rate": 1.4689656740219615e-06, "loss": 0.3216, "step": 4396 }, { "epoch": 0.83, "learning_rate": 1.4657746335408695e-06, "loss": 0.5495, "step": 4397 }, { "epoch": 0.83, "learning_rate": 1.4625867886575552e-06, "loss": 0.4509, "step": 4398 }, { "epoch": 0.83, "learning_rate": 1.459402140565691e-06, "loss": 0.4841, "step": 4399 }, { "epoch": 0.83, "learning_rate": 1.4562206904577514e-06, "loss": 0.4082, "step": 4400 }, { "epoch": 0.83, "learning_rate": 1.4530424395250154e-06, "loss": 0.521, "step": 4401 }, { "epoch": 0.83, "learning_rate": 1.44986738895756e-06, "loss": 0.4269, "step": 4402 }, { "epoch": 0.83, "learning_rate": 1.446695539944275e-06, "loss": 0.4324, "step": 4403 }, { "epoch": 0.83, "learning_rate": 1.4435268936728352e-06, "loss": 0.5534, "step": 4404 }, { "epoch": 0.83, "learning_rate": 1.4403614513297248e-06, "loss": 0.4441, "step": 4405 }, { "epoch": 0.83, "learning_rate": 1.4371992141002323e-06, "loss": 0.4878, "step": 4406 }, { "epoch": 0.83, "learning_rate": 1.4340401831684413e-06, "loss": 0.3497, "step": 4407 }, { "epoch": 0.83, "learning_rate": 1.4308843597172284e-06, "loss": 0.4469, "step": 4408 }, { "epoch": 0.83, "learning_rate": 1.4277317449282834e-06, "loss": 0.4753, "step": 4409 }, { "epoch": 0.83, "learning_rate": 1.4245823399820835e-06, "loss": 0.441, "step": 4410 }, { "epoch": 0.83, "learning_rate": 1.4214361460579086e-06, "loss": 0.4567, "step": 4411 }, { "epoch": 0.83, "learning_rate": 1.4182931643338337e-06, "loss": 0.3356, "step": 4412 }, { "epoch": 0.83, "learning_rate": 1.4151533959867348e-06, "loss": 0.4192, "step": 4413 }, { "epoch": 0.83, "learning_rate": 1.4120168421922798e-06, "loss": 0.4289, "step": 4414 }, { "epoch": 0.83, "learning_rate": 1.4088835041249383e-06, "loss": 0.492, "step": 4415 }, { "epoch": 0.83, "learning_rate": 1.4057533829579706e-06, "loss": 0.512, "step": 4416 }, { "epoch": 0.83, "learning_rate": 1.4026264798634359e-06, "loss": 0.4437, "step": 4417 }, { "epoch": 0.83, "learning_rate": 1.3995027960121866e-06, "loss": 0.3996, "step": 4418 }, { "epoch": 0.83, "learning_rate": 1.3963823325738713e-06, "loss": 0.4932, "step": 4419 }, { "epoch": 0.84, "learning_rate": 1.3932650907169298e-06, "loss": 0.3365, "step": 4420 }, { "epoch": 0.84, "learning_rate": 1.390151071608603e-06, "loss": 0.5138, "step": 4421 }, { "epoch": 0.84, "learning_rate": 1.3870402764149149e-06, "loss": 0.6978, "step": 4422 }, { "epoch": 0.84, "learning_rate": 1.3839327063006847e-06, "loss": 0.4263, "step": 4423 }, { "epoch": 0.84, "learning_rate": 1.3808283624295337e-06, "loss": 0.3676, "step": 4424 }, { "epoch": 0.84, "learning_rate": 1.3777272459638646e-06, "loss": 0.4164, "step": 4425 }, { "epoch": 0.84, "learning_rate": 1.3746293580648718e-06, "loss": 0.3535, "step": 4426 }, { "epoch": 0.84, "learning_rate": 1.371534699892547e-06, "loss": 0.3616, "step": 4427 }, { "epoch": 0.84, "learning_rate": 1.3684432726056717e-06, "loss": 0.5169, "step": 4428 }, { "epoch": 0.84, "learning_rate": 1.3653550773618085e-06, "loss": 0.4073, "step": 4429 }, { "epoch": 0.84, "learning_rate": 1.3622701153173222e-06, "loss": 0.4144, "step": 4430 }, { "epoch": 0.84, "learning_rate": 1.3591883876273594e-06, "loss": 0.4666, "step": 4431 }, { "epoch": 0.84, "learning_rate": 1.3561098954458585e-06, "loss": 0.4784, "step": 4432 }, { "epoch": 0.84, "learning_rate": 1.3530346399255444e-06, "loss": 0.4376, "step": 4433 }, { "epoch": 0.84, "learning_rate": 1.3499626222179286e-06, "loss": 0.4778, "step": 4434 }, { "epoch": 0.84, "learning_rate": 1.3468938434733203e-06, "loss": 0.4653, "step": 4435 }, { "epoch": 0.84, "learning_rate": 1.3438283048408019e-06, "loss": 0.3715, "step": 4436 }, { "epoch": 0.84, "learning_rate": 1.3407660074682472e-06, "loss": 0.494, "step": 4437 }, { "epoch": 0.84, "learning_rate": 1.3377069525023267e-06, "loss": 0.5235, "step": 4438 }, { "epoch": 0.84, "learning_rate": 1.33465114108848e-06, "loss": 0.3026, "step": 4439 }, { "epoch": 0.84, "learning_rate": 1.3315985743709403e-06, "loss": 0.5725, "step": 4440 }, { "epoch": 0.84, "learning_rate": 1.3285492534927313e-06, "loss": 0.4422, "step": 4441 }, { "epoch": 0.84, "learning_rate": 1.3255031795956552e-06, "loss": 0.4291, "step": 4442 }, { "epoch": 0.84, "learning_rate": 1.3224603538202929e-06, "loss": 0.3817, "step": 4443 }, { "epoch": 0.84, "learning_rate": 1.3194207773060219e-06, "loss": 0.3826, "step": 4444 }, { "epoch": 0.84, "learning_rate": 1.3163844511909928e-06, "loss": 0.4083, "step": 4445 }, { "epoch": 0.84, "learning_rate": 1.3133513766121442e-06, "loss": 0.4775, "step": 4446 }, { "epoch": 0.84, "learning_rate": 1.3103215547051962e-06, "loss": 0.3876, "step": 4447 }, { "epoch": 0.84, "learning_rate": 1.307294986604648e-06, "loss": 0.5535, "step": 4448 }, { "epoch": 0.84, "learning_rate": 1.3042716734437843e-06, "loss": 0.4701, "step": 4449 }, { "epoch": 0.84, "learning_rate": 1.3012516163546685e-06, "loss": 0.4474, "step": 4450 }, { "epoch": 0.84, "learning_rate": 1.298234816468148e-06, "loss": 0.3249, "step": 4451 }, { "epoch": 0.84, "learning_rate": 1.295221274913846e-06, "loss": 0.6299, "step": 4452 }, { "epoch": 0.84, "learning_rate": 1.2922109928201699e-06, "loss": 0.3859, "step": 4453 }, { "epoch": 0.84, "learning_rate": 1.2892039713143024e-06, "loss": 0.5043, "step": 4454 }, { "epoch": 0.84, "learning_rate": 1.2862002115222071e-06, "loss": 0.6504, "step": 4455 }, { "epoch": 0.84, "learning_rate": 1.2831997145686326e-06, "loss": 0.3192, "step": 4456 }, { "epoch": 0.84, "learning_rate": 1.2802024815770942e-06, "loss": 0.4336, "step": 4457 }, { "epoch": 0.84, "learning_rate": 1.277208513669891e-06, "loss": 0.3456, "step": 4458 }, { "epoch": 0.84, "learning_rate": 1.2742178119681037e-06, "loss": 0.4172, "step": 4459 }, { "epoch": 0.84, "learning_rate": 1.2712303775915803e-06, "loss": 0.4784, "step": 4460 }, { "epoch": 0.84, "learning_rate": 1.2682462116589512e-06, "loss": 0.2923, "step": 4461 }, { "epoch": 0.84, "learning_rate": 1.265265315287625e-06, "loss": 0.442, "step": 4462 }, { "epoch": 0.84, "learning_rate": 1.2622876895937842e-06, "loss": 0.5033, "step": 4463 }, { "epoch": 0.84, "learning_rate": 1.2593133356923803e-06, "loss": 0.4039, "step": 4464 }, { "epoch": 0.84, "learning_rate": 1.2563422546971504e-06, "loss": 0.5379, "step": 4465 }, { "epoch": 0.84, "learning_rate": 1.253374447720599e-06, "loss": 0.404, "step": 4466 }, { "epoch": 0.84, "learning_rate": 1.250409915874007e-06, "loss": 0.4198, "step": 4467 }, { "epoch": 0.84, "learning_rate": 1.2474486602674274e-06, "loss": 0.5725, "step": 4468 }, { "epoch": 0.84, "learning_rate": 1.2444906820096881e-06, "loss": 0.3667, "step": 4469 }, { "epoch": 0.84, "learning_rate": 1.2415359822083893e-06, "loss": 0.5099, "step": 4470 }, { "epoch": 0.84, "learning_rate": 1.2385845619699032e-06, "loss": 0.553, "step": 4471 }, { "epoch": 0.84, "learning_rate": 1.2356364223993723e-06, "loss": 0.377, "step": 4472 }, { "epoch": 0.85, "learning_rate": 1.2326915646007187e-06, "loss": 0.5357, "step": 4473 }, { "epoch": 0.85, "learning_rate": 1.2297499896766241e-06, "loss": 0.5247, "step": 4474 }, { "epoch": 0.85, "learning_rate": 1.226811698728546e-06, "loss": 0.4649, "step": 4475 }, { "epoch": 0.85, "learning_rate": 1.2238766928567158e-06, "loss": 0.4344, "step": 4476 }, { "epoch": 0.85, "learning_rate": 1.220944973160133e-06, "loss": 0.4309, "step": 4477 }, { "epoch": 0.85, "learning_rate": 1.2180165407365586e-06, "loss": 0.5159, "step": 4478 }, { "epoch": 0.85, "learning_rate": 1.2150913966825361e-06, "loss": 0.4978, "step": 4479 }, { "epoch": 0.85, "learning_rate": 1.212169542093371e-06, "loss": 0.4029, "step": 4480 }, { "epoch": 0.85, "learning_rate": 1.209250978063129e-06, "loss": 0.3945, "step": 4481 }, { "epoch": 0.85, "learning_rate": 1.2063357056846604e-06, "loss": 0.3731, "step": 4482 }, { "epoch": 0.85, "learning_rate": 1.2034237260495708e-06, "loss": 0.3561, "step": 4483 }, { "epoch": 0.85, "learning_rate": 1.2005150402482369e-06, "loss": 0.6454, "step": 4484 }, { "epoch": 0.85, "learning_rate": 1.1976096493698008e-06, "loss": 0.5069, "step": 4485 }, { "epoch": 0.85, "learning_rate": 1.1947075545021724e-06, "loss": 0.4338, "step": 4486 }, { "epoch": 0.85, "learning_rate": 1.1918087567320257e-06, "loss": 0.4902, "step": 4487 }, { "epoch": 0.85, "learning_rate": 1.1889132571448025e-06, "loss": 0.4461, "step": 4488 }, { "epoch": 0.85, "learning_rate": 1.1860210568247055e-06, "loss": 0.5654, "step": 4489 }, { "epoch": 0.85, "learning_rate": 1.1831321568547072e-06, "loss": 0.475, "step": 4490 }, { "epoch": 0.85, "learning_rate": 1.180246558316539e-06, "loss": 0.5116, "step": 4491 }, { "epoch": 0.85, "learning_rate": 1.1773642622907012e-06, "loss": 0.4765, "step": 4492 }, { "epoch": 0.85, "learning_rate": 1.1744852698564523e-06, "loss": 0.4673, "step": 4493 }, { "epoch": 0.85, "learning_rate": 1.1716095820918217e-06, "loss": 0.5653, "step": 4494 }, { "epoch": 0.85, "learning_rate": 1.1687372000735898e-06, "loss": 0.4819, "step": 4495 }, { "epoch": 0.85, "learning_rate": 1.1658681248773096e-06, "loss": 0.4153, "step": 4496 }, { "epoch": 0.85, "learning_rate": 1.1630023575772908e-06, "loss": 0.6085, "step": 4497 }, { "epoch": 0.85, "learning_rate": 1.1601398992466061e-06, "loss": 0.3435, "step": 4498 }, { "epoch": 0.85, "learning_rate": 1.1572807509570883e-06, "loss": 0.4097, "step": 4499 }, { "epoch": 0.85, "learning_rate": 1.1544249137793317e-06, "loss": 0.5463, "step": 4500 }, { "epoch": 0.85, "learning_rate": 1.151572388782688e-06, "loss": 0.4147, "step": 4501 }, { "epoch": 0.85, "learning_rate": 1.1487231770352736e-06, "loss": 0.4242, "step": 4502 }, { "epoch": 0.85, "learning_rate": 1.1458772796039597e-06, "loss": 0.442, "step": 4503 }, { "epoch": 0.85, "learning_rate": 1.1430346975543793e-06, "loss": 0.5083, "step": 4504 }, { "epoch": 0.85, "learning_rate": 1.1401954319509213e-06, "loss": 0.4865, "step": 4505 }, { "epoch": 0.85, "learning_rate": 1.137359483856737e-06, "loss": 0.6597, "step": 4506 }, { "epoch": 0.85, "learning_rate": 1.1345268543337283e-06, "loss": 0.4086, "step": 4507 }, { "epoch": 0.85, "learning_rate": 1.131697544442567e-06, "loss": 0.4697, "step": 4508 }, { "epoch": 0.85, "learning_rate": 1.1288715552426666e-06, "loss": 0.7194, "step": 4509 }, { "epoch": 0.85, "learning_rate": 1.126048887792206e-06, "loss": 0.4776, "step": 4510 }, { "epoch": 0.85, "learning_rate": 1.1232295431481222e-06, "loss": 0.3922, "step": 4511 }, { "epoch": 0.85, "learning_rate": 1.120413522366104e-06, "loss": 0.547, "step": 4512 }, { "epoch": 0.85, "learning_rate": 1.11760082650059e-06, "loss": 0.5074, "step": 4513 }, { "epoch": 0.85, "learning_rate": 1.114791456604788e-06, "loss": 0.3752, "step": 4514 }, { "epoch": 0.85, "learning_rate": 1.1119854137306507e-06, "loss": 0.4769, "step": 4515 }, { "epoch": 0.85, "learning_rate": 1.1091826989288811e-06, "loss": 0.4047, "step": 4516 }, { "epoch": 0.85, "learning_rate": 1.1063833132489477e-06, "loss": 0.4845, "step": 4517 }, { "epoch": 0.85, "learning_rate": 1.103587257739064e-06, "loss": 0.5022, "step": 4518 }, { "epoch": 0.85, "learning_rate": 1.1007945334462e-06, "loss": 0.4824, "step": 4519 }, { "epoch": 0.85, "learning_rate": 1.0980051414160764e-06, "loss": 0.3594, "step": 4520 }, { "epoch": 0.85, "learning_rate": 1.0952190826931674e-06, "loss": 0.3615, "step": 4521 }, { "epoch": 0.85, "learning_rate": 1.0924363583206986e-06, "loss": 0.4267, "step": 4522 }, { "epoch": 0.85, "learning_rate": 1.0896569693406468e-06, "loss": 0.5236, "step": 4523 }, { "epoch": 0.85, "learning_rate": 1.08688091679374e-06, "loss": 0.4887, "step": 4524 }, { "epoch": 0.85, "learning_rate": 1.0841082017194583e-06, "loss": 0.3948, "step": 4525 }, { "epoch": 0.86, "learning_rate": 1.081338825156031e-06, "loss": 0.4658, "step": 4526 }, { "epoch": 0.86, "learning_rate": 1.0785727881404329e-06, "loss": 0.3377, "step": 4527 }, { "epoch": 0.86, "learning_rate": 1.075810091708399e-06, "loss": 0.4933, "step": 4528 }, { "epoch": 0.86, "learning_rate": 1.0730507368944066e-06, "loss": 0.419, "step": 4529 }, { "epoch": 0.86, "learning_rate": 1.0702947247316765e-06, "loss": 0.4352, "step": 4530 }, { "epoch": 0.86, "learning_rate": 1.0675420562521876e-06, "loss": 0.6728, "step": 4531 }, { "epoch": 0.86, "learning_rate": 1.0647927324866624e-06, "loss": 0.4231, "step": 4532 }, { "epoch": 0.86, "learning_rate": 1.0620467544645719e-06, "loss": 0.4546, "step": 4533 }, { "epoch": 0.86, "learning_rate": 1.0593041232141333e-06, "loss": 0.4, "step": 4534 }, { "epoch": 0.86, "learning_rate": 1.0565648397623108e-06, "loss": 0.429, "step": 4535 }, { "epoch": 0.86, "learning_rate": 1.053828905134815e-06, "loss": 0.4915, "step": 4536 }, { "epoch": 0.86, "learning_rate": 1.051096320356103e-06, "loss": 0.3471, "step": 4537 }, { "epoch": 0.86, "learning_rate": 1.0483670864493777e-06, "loss": 0.2603, "step": 4538 }, { "epoch": 0.86, "learning_rate": 1.0456412044365872e-06, "loss": 0.5257, "step": 4539 }, { "epoch": 0.86, "learning_rate": 1.0429186753384234e-06, "loss": 0.3231, "step": 4540 }, { "epoch": 0.86, "learning_rate": 1.0401995001743248e-06, "loss": 0.2725, "step": 4541 }, { "epoch": 0.86, "learning_rate": 1.037483679962471e-06, "loss": 0.3536, "step": 4542 }, { "epoch": 0.86, "learning_rate": 1.034771215719792e-06, "loss": 0.3998, "step": 4543 }, { "epoch": 0.86, "learning_rate": 1.0320621084619508e-06, "loss": 0.5626, "step": 4544 }, { "epoch": 0.86, "learning_rate": 1.0293563592033595e-06, "loss": 0.5161, "step": 4545 }, { "epoch": 0.86, "learning_rate": 1.0266539689571775e-06, "loss": 0.4284, "step": 4546 }, { "epoch": 0.86, "learning_rate": 1.0239549387352954e-06, "loss": 0.4485, "step": 4547 }, { "epoch": 0.86, "learning_rate": 1.0212592695483524e-06, "loss": 0.4912, "step": 4548 }, { "epoch": 0.86, "learning_rate": 1.0185669624057327e-06, "loss": 0.4101, "step": 4549 }, { "epoch": 0.86, "learning_rate": 1.0158780183155547e-06, "loss": 0.398, "step": 4550 }, { "epoch": 0.86, "learning_rate": 1.013192438284676e-06, "loss": 0.4233, "step": 4551 }, { "epoch": 0.86, "learning_rate": 1.0105102233187036e-06, "loss": 0.4092, "step": 4552 }, { "epoch": 0.86, "learning_rate": 1.0078313744219791e-06, "loss": 0.5178, "step": 4553 }, { "epoch": 0.86, "learning_rate": 1.0051558925975813e-06, "loss": 0.3877, "step": 4554 }, { "epoch": 0.86, "learning_rate": 1.002483778847333e-06, "loss": 0.2311, "step": 4555 }, { "epoch": 0.86, "learning_rate": 9.998150341717927e-07, "loss": 0.3367, "step": 4556 }, { "epoch": 0.86, "learning_rate": 9.97149659570259e-07, "loss": 0.288, "step": 4557 }, { "epoch": 0.86, "learning_rate": 9.94487656040768e-07, "loss": 0.4986, "step": 4558 }, { "epoch": 0.86, "learning_rate": 9.91829024580091e-07, "loss": 0.5176, "step": 4559 }, { "epoch": 0.86, "learning_rate": 9.891737661837463e-07, "loss": 0.3319, "step": 4560 }, { "epoch": 0.86, "learning_rate": 9.865218818459742e-07, "loss": 0.3067, "step": 4561 }, { "epoch": 0.86, "learning_rate": 9.838733725597615e-07, "loss": 0.4669, "step": 4562 }, { "epoch": 0.86, "learning_rate": 9.812282393168315e-07, "loss": 0.5565, "step": 4563 }, { "epoch": 0.86, "learning_rate": 9.785864831076418e-07, "loss": 0.5029, "step": 4564 }, { "epoch": 0.86, "learning_rate": 9.759481049213792e-07, "loss": 0.5006, "step": 4565 }, { "epoch": 0.86, "learning_rate": 9.733131057459767e-07, "loss": 0.4564, "step": 4566 }, { "epoch": 0.86, "learning_rate": 9.706814865680957e-07, "loss": 0.3777, "step": 4567 }, { "epoch": 0.86, "learning_rate": 9.680532483731287e-07, "loss": 0.4976, "step": 4568 }, { "epoch": 0.86, "learning_rate": 9.6542839214521e-07, "loss": 0.4417, "step": 4569 }, { "epoch": 0.86, "learning_rate": 9.62806918867204e-07, "loss": 0.4843, "step": 4570 }, { "epoch": 0.86, "learning_rate": 9.601888295207063e-07, "loss": 0.3446, "step": 4571 }, { "epoch": 0.86, "learning_rate": 9.575741250860482e-07, "loss": 0.3729, "step": 4572 }, { "epoch": 0.86, "learning_rate": 9.549628065422922e-07, "loss": 0.5169, "step": 4573 }, { "epoch": 0.86, "learning_rate": 9.52354874867234e-07, "loss": 0.4229, "step": 4574 }, { "epoch": 0.86, "learning_rate": 9.497503310374012e-07, "loss": 0.3014, "step": 4575 }, { "epoch": 0.86, "learning_rate": 9.471491760280494e-07, "loss": 0.5147, "step": 4576 }, { "epoch": 0.86, "learning_rate": 9.445514108131693e-07, "loss": 0.5292, "step": 4577 }, { "epoch": 0.86, "learning_rate": 9.419570363654851e-07, "loss": 0.3667, "step": 4578 }, { "epoch": 0.87, "learning_rate": 9.393660536564408e-07, "loss": 0.5919, "step": 4579 }, { "epoch": 0.87, "learning_rate": 9.367784636562183e-07, "loss": 0.5281, "step": 4580 }, { "epoch": 0.87, "learning_rate": 9.341942673337335e-07, "loss": 0.3432, "step": 4581 }, { "epoch": 0.87, "learning_rate": 9.316134656566189e-07, "loss": 0.4038, "step": 4582 }, { "epoch": 0.87, "learning_rate": 9.290360595912429e-07, "loss": 0.3473, "step": 4583 }, { "epoch": 0.87, "learning_rate": 9.264620501027066e-07, "loss": 0.5126, "step": 4584 }, { "epoch": 0.87, "learning_rate": 9.238914381548347e-07, "loss": 0.469, "step": 4585 }, { "epoch": 0.87, "learning_rate": 9.213242247101739e-07, "loss": 0.5398, "step": 4586 }, { "epoch": 0.87, "learning_rate": 9.187604107300107e-07, "loss": 0.4031, "step": 4587 }, { "epoch": 0.87, "learning_rate": 9.161999971743507e-07, "loss": 0.375, "step": 4588 }, { "epoch": 0.87, "learning_rate": 9.136429850019268e-07, "loss": 0.4173, "step": 4589 }, { "epoch": 0.87, "learning_rate": 9.110893751701988e-07, "loss": 0.3209, "step": 4590 }, { "epoch": 0.87, "learning_rate": 9.08539168635355e-07, "loss": 0.4489, "step": 4591 }, { "epoch": 0.87, "learning_rate": 9.059923663523074e-07, "loss": 0.3049, "step": 4592 }, { "epoch": 0.87, "learning_rate": 9.034489692746906e-07, "loss": 0.4224, "step": 4593 }, { "epoch": 0.87, "learning_rate": 9.009089783548686e-07, "loss": 0.3851, "step": 4594 }, { "epoch": 0.87, "learning_rate": 8.983723945439304e-07, "loss": 0.5131, "step": 4595 }, { "epoch": 0.87, "learning_rate": 8.958392187916842e-07, "loss": 0.5575, "step": 4596 }, { "epoch": 0.87, "learning_rate": 8.933094520466634e-07, "loss": 0.4109, "step": 4597 }, { "epoch": 0.87, "learning_rate": 8.907830952561303e-07, "loss": 0.5893, "step": 4598 }, { "epoch": 0.87, "learning_rate": 8.88260149366067e-07, "loss": 0.3955, "step": 4599 }, { "epoch": 0.87, "learning_rate": 8.857406153211722e-07, "loss": 0.492, "step": 4600 }, { "epoch": 0.87, "learning_rate": 8.832244940648782e-07, "loss": 0.5716, "step": 4601 }, { "epoch": 0.87, "learning_rate": 8.807117865393344e-07, "loss": 0.3964, "step": 4602 }, { "epoch": 0.87, "learning_rate": 8.78202493685405e-07, "loss": 0.5571, "step": 4603 }, { "epoch": 0.87, "learning_rate": 8.756966164426884e-07, "loss": 0.5159, "step": 4604 }, { "epoch": 0.87, "learning_rate": 8.731941557494949e-07, "loss": 0.4399, "step": 4605 }, { "epoch": 0.87, "learning_rate": 8.706951125428597e-07, "loss": 0.437, "step": 4606 }, { "epoch": 0.87, "learning_rate": 8.681994877585365e-07, "loss": 0.338, "step": 4607 }, { "epoch": 0.87, "learning_rate": 8.657072823309986e-07, "loss": 0.4614, "step": 4608 }, { "epoch": 0.87, "learning_rate": 8.63218497193441e-07, "loss": 0.504, "step": 4609 }, { "epoch": 0.87, "learning_rate": 8.607331332777758e-07, "loss": 0.3848, "step": 4610 }, { "epoch": 0.87, "learning_rate": 8.582511915146363e-07, "loss": 0.5553, "step": 4611 }, { "epoch": 0.87, "learning_rate": 8.557726728333715e-07, "loss": 0.433, "step": 4612 }, { "epoch": 0.87, "learning_rate": 8.532975781620511e-07, "loss": 0.3816, "step": 4613 }, { "epoch": 0.87, "learning_rate": 8.508259084274628e-07, "loss": 0.4625, "step": 4614 }, { "epoch": 0.87, "learning_rate": 8.483576645551072e-07, "loss": 0.3616, "step": 4615 }, { "epoch": 0.87, "learning_rate": 8.458928474692118e-07, "loss": 0.5007, "step": 4616 }, { "epoch": 0.87, "learning_rate": 8.434314580927105e-07, "loss": 0.3055, "step": 4617 }, { "epoch": 0.87, "learning_rate": 8.409734973472572e-07, "loss": 0.312, "step": 4618 }, { "epoch": 0.87, "learning_rate": 8.385189661532278e-07, "loss": 0.3812, "step": 4619 }, { "epoch": 0.87, "learning_rate": 8.360678654297049e-07, "loss": 0.5736, "step": 4620 }, { "epoch": 0.87, "learning_rate": 8.336201960944935e-07, "loss": 0.4656, "step": 4621 }, { "epoch": 0.87, "learning_rate": 8.311759590641099e-07, "loss": 0.3843, "step": 4622 }, { "epoch": 0.87, "learning_rate": 8.287351552537859e-07, "loss": 0.568, "step": 4623 }, { "epoch": 0.87, "learning_rate": 8.26297785577469e-07, "loss": 0.3242, "step": 4624 }, { "epoch": 0.87, "learning_rate": 8.238638509478202e-07, "loss": 0.445, "step": 4625 }, { "epoch": 0.87, "learning_rate": 8.21433352276213e-07, "loss": 0.3593, "step": 4626 }, { "epoch": 0.87, "learning_rate": 8.19006290472737e-07, "loss": 0.4418, "step": 4627 }, { "epoch": 0.87, "learning_rate": 8.165826664461918e-07, "loss": 0.5307, "step": 4628 }, { "epoch": 0.87, "learning_rate": 8.141624811040894e-07, "loss": 0.3135, "step": 4629 }, { "epoch": 0.87, "learning_rate": 8.117457353526626e-07, "loss": 0.4371, "step": 4630 }, { "epoch": 0.87, "learning_rate": 8.093324300968442e-07, "loss": 0.4245, "step": 4631 }, { "epoch": 0.88, "learning_rate": 8.069225662402835e-07, "loss": 0.4166, "step": 4632 }, { "epoch": 0.88, "learning_rate": 8.045161446853466e-07, "loss": 0.414, "step": 4633 }, { "epoch": 0.88, "learning_rate": 8.021131663331027e-07, "loss": 0.4751, "step": 4634 }, { "epoch": 0.88, "learning_rate": 7.997136320833332e-07, "loss": 0.3984, "step": 4635 }, { "epoch": 0.88, "learning_rate": 7.973175428345359e-07, "loss": 0.4691, "step": 4636 }, { "epoch": 0.88, "learning_rate": 7.949248994839131e-07, "loss": 0.4196, "step": 4637 }, { "epoch": 0.88, "learning_rate": 7.92535702927375e-07, "loss": 0.4222, "step": 4638 }, { "epoch": 0.88, "learning_rate": 7.901499540595492e-07, "loss": 0.4218, "step": 4639 }, { "epoch": 0.88, "learning_rate": 7.877676537737633e-07, "loss": 0.4869, "step": 4640 }, { "epoch": 0.88, "learning_rate": 7.853888029620604e-07, "loss": 0.3958, "step": 4641 }, { "epoch": 0.88, "learning_rate": 7.83013402515187e-07, "loss": 0.363, "step": 4642 }, { "epoch": 0.88, "learning_rate": 7.806414533226014e-07, "loss": 0.461, "step": 4643 }, { "epoch": 0.88, "learning_rate": 7.782729562724678e-07, "loss": 0.409, "step": 4644 }, { "epoch": 0.88, "learning_rate": 7.759079122516566e-07, "loss": 0.3841, "step": 4645 }, { "epoch": 0.88, "learning_rate": 7.735463221457496e-07, "loss": 0.3589, "step": 4646 }, { "epoch": 0.88, "learning_rate": 7.711881868390292e-07, "loss": 0.5451, "step": 4647 }, { "epoch": 0.88, "learning_rate": 7.688335072144882e-07, "loss": 0.5478, "step": 4648 }, { "epoch": 0.88, "learning_rate": 7.664822841538267e-07, "loss": 0.3569, "step": 4649 }, { "epoch": 0.88, "learning_rate": 7.641345185374438e-07, "loss": 0.3881, "step": 4650 }, { "epoch": 0.88, "learning_rate": 7.617902112444542e-07, "loss": 0.6265, "step": 4651 }, { "epoch": 0.88, "learning_rate": 7.594493631526667e-07, "loss": 0.4872, "step": 4652 }, { "epoch": 0.88, "learning_rate": 7.571119751386035e-07, "loss": 0.4549, "step": 4653 }, { "epoch": 0.88, "learning_rate": 7.54778048077488e-07, "loss": 0.5077, "step": 4654 }, { "epoch": 0.88, "learning_rate": 7.524475828432453e-07, "loss": 0.604, "step": 4655 }, { "epoch": 0.88, "learning_rate": 7.501205803085076e-07, "loss": 0.2191, "step": 4656 }, { "epoch": 0.88, "learning_rate": 7.477970413446089e-07, "loss": 0.4496, "step": 4657 }, { "epoch": 0.88, "learning_rate": 7.454769668215867e-07, "loss": 0.3789, "step": 4658 }, { "epoch": 0.88, "learning_rate": 7.431603576081814e-07, "loss": 0.3441, "step": 4659 }, { "epoch": 0.88, "learning_rate": 7.40847214571836e-07, "loss": 0.301, "step": 4660 }, { "epoch": 0.88, "learning_rate": 7.385375385786952e-07, "loss": 0.3613, "step": 4661 }, { "epoch": 0.88, "learning_rate": 7.362313304936052e-07, "loss": 0.4861, "step": 4662 }, { "epoch": 0.88, "learning_rate": 7.33928591180113e-07, "loss": 0.5214, "step": 4663 }, { "epoch": 0.88, "learning_rate": 7.316293215004689e-07, "loss": 0.636, "step": 4664 }, { "epoch": 0.88, "learning_rate": 7.29333522315625e-07, "loss": 0.4496, "step": 4665 }, { "epoch": 0.88, "learning_rate": 7.270411944852285e-07, "loss": 0.5043, "step": 4666 }, { "epoch": 0.88, "learning_rate": 7.247523388676292e-07, "loss": 0.3628, "step": 4667 }, { "epoch": 0.88, "learning_rate": 7.224669563198827e-07, "loss": 0.4365, "step": 4668 }, { "epoch": 0.88, "learning_rate": 7.201850476977346e-07, "loss": 0.3645, "step": 4669 }, { "epoch": 0.88, "learning_rate": 7.179066138556334e-07, "loss": 0.4588, "step": 4670 }, { "epoch": 0.88, "learning_rate": 7.156316556467302e-07, "loss": 0.3884, "step": 4671 }, { "epoch": 0.88, "learning_rate": 7.133601739228735e-07, "loss": 0.5064, "step": 4672 }, { "epoch": 0.88, "learning_rate": 7.110921695346007e-07, "loss": 0.5612, "step": 4673 }, { "epoch": 0.88, "learning_rate": 7.088276433311614e-07, "loss": 0.3232, "step": 4674 }, { "epoch": 0.88, "learning_rate": 7.06566596160494e-07, "loss": 0.4567, "step": 4675 }, { "epoch": 0.88, "learning_rate": 7.043090288692367e-07, "loss": 0.3832, "step": 4676 }, { "epoch": 0.88, "learning_rate": 7.020549423027223e-07, "loss": 0.4134, "step": 4677 }, { "epoch": 0.88, "learning_rate": 6.998043373049845e-07, "loss": 0.4939, "step": 4678 }, { "epoch": 0.88, "learning_rate": 6.975572147187503e-07, "loss": 0.3565, "step": 4679 }, { "epoch": 0.88, "learning_rate": 6.953135753854423e-07, "loss": 0.3928, "step": 4680 }, { "epoch": 0.88, "learning_rate": 6.930734201451817e-07, "loss": 0.4446, "step": 4681 }, { "epoch": 0.88, "learning_rate": 6.908367498367819e-07, "loss": 0.5913, "step": 4682 }, { "epoch": 0.88, "learning_rate": 6.886035652977541e-07, "loss": 0.4237, "step": 4683 }, { "epoch": 0.88, "learning_rate": 6.863738673643006e-07, "loss": 0.4456, "step": 4684 }, { "epoch": 0.89, "learning_rate": 6.841476568713234e-07, "loss": 0.4087, "step": 4685 }, { "epoch": 0.89, "learning_rate": 6.819249346524171e-07, "loss": 0.4661, "step": 4686 }, { "epoch": 0.89, "learning_rate": 6.797057015398634e-07, "loss": 0.443, "step": 4687 }, { "epoch": 0.89, "learning_rate": 6.774899583646478e-07, "loss": 0.3886, "step": 4688 }, { "epoch": 0.89, "learning_rate": 6.752777059564431e-07, "loss": 0.3969, "step": 4689 }, { "epoch": 0.89, "learning_rate": 6.730689451436134e-07, "loss": 0.6363, "step": 4690 }, { "epoch": 0.89, "learning_rate": 6.708636767532218e-07, "loss": 0.3361, "step": 4691 }, { "epoch": 0.89, "learning_rate": 6.686619016110186e-07, "loss": 0.4692, "step": 4692 }, { "epoch": 0.89, "learning_rate": 6.664636205414465e-07, "loss": 0.5039, "step": 4693 }, { "epoch": 0.89, "learning_rate": 6.642688343676418e-07, "loss": 0.4603, "step": 4694 }, { "epoch": 0.89, "learning_rate": 6.620775439114302e-07, "loss": 0.4471, "step": 4695 }, { "epoch": 0.89, "learning_rate": 6.59889749993331e-07, "loss": 0.6286, "step": 4696 }, { "epoch": 0.89, "learning_rate": 6.577054534325511e-07, "loss": 0.4438, "step": 4697 }, { "epoch": 0.89, "learning_rate": 6.555246550469907e-07, "loss": 0.4633, "step": 4698 }, { "epoch": 0.89, "learning_rate": 6.533473556532366e-07, "loss": 0.3515, "step": 4699 }, { "epoch": 0.89, "learning_rate": 6.511735560665699e-07, "loss": 0.5262, "step": 4700 }, { "epoch": 0.89, "learning_rate": 6.490032571009586e-07, "loss": 0.4825, "step": 4701 }, { "epoch": 0.89, "learning_rate": 6.468364595690579e-07, "loss": 0.4828, "step": 4702 }, { "epoch": 0.89, "learning_rate": 6.44673164282219e-07, "loss": 0.4265, "step": 4703 }, { "epoch": 0.89, "learning_rate": 6.425133720504717e-07, "loss": 0.4584, "step": 4704 }, { "epoch": 0.89, "learning_rate": 6.403570836825413e-07, "loss": 0.3996, "step": 4705 }, { "epoch": 0.89, "learning_rate": 6.382042999858407e-07, "loss": 0.4865, "step": 4706 }, { "epoch": 0.89, "learning_rate": 6.360550217664685e-07, "loss": 0.6954, "step": 4707 }, { "epoch": 0.89, "learning_rate": 6.339092498292076e-07, "loss": 0.4397, "step": 4708 }, { "epoch": 0.89, "learning_rate": 6.317669849775365e-07, "loss": 0.3926, "step": 4709 }, { "epoch": 0.89, "learning_rate": 6.296282280136135e-07, "loss": 0.3287, "step": 4710 }, { "epoch": 0.89, "learning_rate": 6.274929797382834e-07, "loss": 0.3848, "step": 4711 }, { "epoch": 0.89, "learning_rate": 6.253612409510812e-07, "loss": 0.4506, "step": 4712 }, { "epoch": 0.89, "learning_rate": 6.232330124502262e-07, "loss": 0.5207, "step": 4713 }, { "epoch": 0.89, "learning_rate": 6.21108295032622e-07, "loss": 0.5029, "step": 4714 }, { "epoch": 0.89, "learning_rate": 6.189870894938587e-07, "loss": 0.4603, "step": 4715 }, { "epoch": 0.89, "learning_rate": 6.168693966282091e-07, "loss": 0.4384, "step": 4716 }, { "epoch": 0.89, "learning_rate": 6.147552172286375e-07, "loss": 0.5291, "step": 4717 }, { "epoch": 0.89, "learning_rate": 6.12644552086783e-07, "loss": 0.5228, "step": 4718 }, { "epoch": 0.89, "learning_rate": 6.105374019929733e-07, "loss": 0.5607, "step": 4719 }, { "epoch": 0.89, "learning_rate": 6.084337677362262e-07, "loss": 0.4597, "step": 4720 }, { "epoch": 0.89, "learning_rate": 6.063336501042305e-07, "loss": 0.4981, "step": 4721 }, { "epoch": 0.89, "learning_rate": 6.04237049883365e-07, "loss": 0.6502, "step": 4722 }, { "epoch": 0.89, "learning_rate": 6.02143967858696e-07, "loss": 0.4657, "step": 4723 }, { "epoch": 0.89, "learning_rate": 6.000544048139645e-07, "loss": 0.4357, "step": 4724 }, { "epoch": 0.89, "learning_rate": 5.979683615315957e-07, "loss": 0.4124, "step": 4725 }, { "epoch": 0.89, "learning_rate": 5.958858387926991e-07, "loss": 0.5485, "step": 4726 }, { "epoch": 0.89, "learning_rate": 5.938068373770667e-07, "loss": 0.657, "step": 4727 }, { "epoch": 0.89, "learning_rate": 5.917313580631678e-07, "loss": 0.5689, "step": 4728 }, { "epoch": 0.89, "learning_rate": 5.896594016281553e-07, "loss": 0.3134, "step": 4729 }, { "epoch": 0.89, "learning_rate": 5.875909688478643e-07, "loss": 0.2703, "step": 4730 }, { "epoch": 0.89, "learning_rate": 5.855260604968083e-07, "loss": 0.4522, "step": 4731 }, { "epoch": 0.89, "learning_rate": 5.834646773481811e-07, "loss": 0.345, "step": 4732 }, { "epoch": 0.89, "learning_rate": 5.814068201738587e-07, "loss": 0.5418, "step": 4733 }, { "epoch": 0.89, "learning_rate": 5.793524897443947e-07, "loss": 0.412, "step": 4734 }, { "epoch": 0.89, "learning_rate": 5.773016868290226e-07, "loss": 0.427, "step": 4735 }, { "epoch": 0.89, "learning_rate": 5.752544121956549e-07, "loss": 0.3451, "step": 4736 }, { "epoch": 0.89, "learning_rate": 5.732106666108827e-07, "loss": 0.518, "step": 4737 }, { "epoch": 0.9, "learning_rate": 5.711704508399806e-07, "loss": 0.5816, "step": 4738 }, { "epoch": 0.9, "learning_rate": 5.69133765646891e-07, "loss": 0.7014, "step": 4739 }, { "epoch": 0.9, "learning_rate": 5.671006117942435e-07, "loss": 0.3864, "step": 4740 }, { "epoch": 0.9, "learning_rate": 5.650709900433427e-07, "loss": 0.5267, "step": 4741 }, { "epoch": 0.9, "learning_rate": 5.630449011541716e-07, "loss": 0.5206, "step": 4742 }, { "epoch": 0.9, "learning_rate": 5.610223458853836e-07, "loss": 0.4754, "step": 4743 }, { "epoch": 0.9, "learning_rate": 5.590033249943194e-07, "loss": 0.416, "step": 4744 }, { "epoch": 0.9, "learning_rate": 5.56987839236991e-07, "loss": 0.5505, "step": 4745 }, { "epoch": 0.9, "learning_rate": 5.549758893680834e-07, "loss": 0.4715, "step": 4746 }, { "epoch": 0.9, "learning_rate": 5.529674761409643e-07, "loss": 0.4859, "step": 4747 }, { "epoch": 0.9, "learning_rate": 5.509626003076718e-07, "loss": 0.5902, "step": 4748 }, { "epoch": 0.9, "learning_rate": 5.489612626189245e-07, "loss": 0.527, "step": 4749 }, { "epoch": 0.9, "learning_rate": 5.469634638241106e-07, "loss": 0.4438, "step": 4750 }, { "epoch": 0.9, "learning_rate": 5.449692046712951e-07, "loss": 0.5534, "step": 4751 }, { "epoch": 0.9, "learning_rate": 5.429784859072229e-07, "loss": 0.4029, "step": 4752 }, { "epoch": 0.9, "learning_rate": 5.409913082773032e-07, "loss": 0.3151, "step": 4753 }, { "epoch": 0.9, "learning_rate": 5.390076725256265e-07, "loss": 0.5193, "step": 4754 }, { "epoch": 0.9, "learning_rate": 5.370275793949586e-07, "loss": 0.3671, "step": 4755 }, { "epoch": 0.9, "learning_rate": 5.35051029626732e-07, "loss": 0.3798, "step": 4756 }, { "epoch": 0.9, "learning_rate": 5.330780239610534e-07, "loss": 0.3386, "step": 4757 }, { "epoch": 0.9, "learning_rate": 5.311085631367108e-07, "loss": 0.3674, "step": 4758 }, { "epoch": 0.9, "learning_rate": 5.291426478911565e-07, "loss": 0.2639, "step": 4759 }, { "epoch": 0.9, "learning_rate": 5.27180278960514e-07, "loss": 0.4063, "step": 4760 }, { "epoch": 0.9, "learning_rate": 5.252214570795877e-07, "loss": 0.5063, "step": 4761 }, { "epoch": 0.9, "learning_rate": 5.232661829818464e-07, "loss": 0.4754, "step": 4762 }, { "epoch": 0.9, "learning_rate": 5.213144573994322e-07, "loss": 0.4331, "step": 4763 }, { "epoch": 0.9, "learning_rate": 5.193662810631595e-07, "loss": 0.6323, "step": 4764 }, { "epoch": 0.9, "learning_rate": 5.174216547025124e-07, "loss": 0.6156, "step": 4765 }, { "epoch": 0.9, "learning_rate": 5.154805790456486e-07, "loss": 0.4101, "step": 4766 }, { "epoch": 0.9, "learning_rate": 5.135430548193909e-07, "loss": 0.5782, "step": 4767 }, { "epoch": 0.9, "learning_rate": 5.116090827492381e-07, "loss": 0.5882, "step": 4768 }, { "epoch": 0.9, "learning_rate": 5.096786635593565e-07, "loss": 0.406, "step": 4769 }, { "epoch": 0.9, "learning_rate": 5.077517979725788e-07, "loss": 0.5302, "step": 4770 }, { "epoch": 0.9, "learning_rate": 5.058284867104135e-07, "loss": 0.4328, "step": 4771 }, { "epoch": 0.9, "learning_rate": 5.039087304930323e-07, "loss": 0.4481, "step": 4772 }, { "epoch": 0.9, "learning_rate": 5.019925300392814e-07, "loss": 0.4652, "step": 4773 }, { "epoch": 0.9, "learning_rate": 5.000798860666689e-07, "loss": 0.533, "step": 4774 }, { "epoch": 0.9, "learning_rate": 4.981707992913753e-07, "loss": 0.3416, "step": 4775 }, { "epoch": 0.9, "learning_rate": 4.962652704282533e-07, "loss": 0.4372, "step": 4776 }, { "epoch": 0.9, "learning_rate": 4.943633001908111e-07, "loss": 0.5058, "step": 4777 }, { "epoch": 0.9, "learning_rate": 4.924648892912365e-07, "loss": 0.4588, "step": 4778 }, { "epoch": 0.9, "learning_rate": 4.9057003844038e-07, "loss": 0.4679, "step": 4779 }, { "epoch": 0.9, "learning_rate": 4.886787483477573e-07, "loss": 0.5046, "step": 4780 }, { "epoch": 0.9, "learning_rate": 4.86791019721553e-07, "loss": 0.4106, "step": 4781 }, { "epoch": 0.9, "learning_rate": 4.849068532686185e-07, "loss": 0.361, "step": 4782 }, { "epoch": 0.9, "learning_rate": 4.830262496944693e-07, "loss": 0.3534, "step": 4783 }, { "epoch": 0.9, "learning_rate": 4.811492097032888e-07, "loss": 0.5078, "step": 4784 }, { "epoch": 0.9, "learning_rate": 4.792757339979248e-07, "loss": 0.4319, "step": 4785 }, { "epoch": 0.9, "learning_rate": 4.774058232798906e-07, "loss": 0.5215, "step": 4786 }, { "epoch": 0.9, "learning_rate": 4.7553947824936496e-07, "loss": 0.4092, "step": 4787 }, { "epoch": 0.9, "learning_rate": 4.7367669960519115e-07, "loss": 0.3416, "step": 4788 }, { "epoch": 0.9, "learning_rate": 4.718174880448767e-07, "loss": 0.4719, "step": 4789 }, { "epoch": 0.9, "learning_rate": 4.69961844264597e-07, "loss": 0.468, "step": 4790 }, { "epoch": 0.91, "learning_rate": 4.6810976895918623e-07, "loss": 0.6524, "step": 4791 }, { "epoch": 0.91, "learning_rate": 4.66261262822143e-07, "loss": 0.4645, "step": 4792 }, { "epoch": 0.91, "learning_rate": 4.6441632654563473e-07, "loss": 0.5071, "step": 4793 }, { "epoch": 0.91, "learning_rate": 4.625749608204877e-07, "loss": 0.2939, "step": 4794 }, { "epoch": 0.91, "learning_rate": 4.6073716633618925e-07, "loss": 0.5187, "step": 4795 }, { "epoch": 0.91, "learning_rate": 4.589029437808956e-07, "loss": 0.5587, "step": 4796 }, { "epoch": 0.91, "learning_rate": 4.5707229384142184e-07, "loss": 0.3857, "step": 4797 }, { "epoch": 0.91, "learning_rate": 4.5524521720324176e-07, "loss": 0.3955, "step": 4798 }, { "epoch": 0.91, "learning_rate": 4.5342171455049933e-07, "loss": 0.507, "step": 4799 }, { "epoch": 0.91, "learning_rate": 4.5160178656599495e-07, "loss": 0.4756, "step": 4800 }, { "epoch": 0.91, "learning_rate": 4.4978543393119023e-07, "loss": 0.4073, "step": 4801 }, { "epoch": 0.91, "learning_rate": 4.479726573262111e-07, "loss": 0.4203, "step": 4802 }, { "epoch": 0.91, "learning_rate": 4.4616345742984123e-07, "loss": 0.4066, "step": 4803 }, { "epoch": 0.91, "learning_rate": 4.4435783491952655e-07, "loss": 0.4113, "step": 4804 }, { "epoch": 0.91, "learning_rate": 4.42555790471374e-07, "loss": 0.4009, "step": 4805 }, { "epoch": 0.91, "learning_rate": 4.407573247601493e-07, "loss": 0.5109, "step": 4806 }, { "epoch": 0.91, "learning_rate": 4.3896243845927943e-07, "loss": 0.4121, "step": 4807 }, { "epoch": 0.91, "learning_rate": 4.371711322408501e-07, "loss": 0.5554, "step": 4808 }, { "epoch": 0.91, "learning_rate": 4.353834067756069e-07, "loss": 0.5243, "step": 4809 }, { "epoch": 0.91, "learning_rate": 4.335992627329555e-07, "loss": 0.4669, "step": 4810 }, { "epoch": 0.91, "learning_rate": 4.3181870078096047e-07, "loss": 0.489, "step": 4811 }, { "epoch": 0.91, "learning_rate": 4.300417215863406e-07, "loss": 0.4242, "step": 4812 }, { "epoch": 0.91, "learning_rate": 4.282683258144793e-07, "loss": 0.4773, "step": 4813 }, { "epoch": 0.91, "learning_rate": 4.2649851412941644e-07, "loss": 0.3248, "step": 4814 }, { "epoch": 0.91, "learning_rate": 4.2473228719384863e-07, "loss": 0.3414, "step": 4815 }, { "epoch": 0.91, "learning_rate": 4.229696456691301e-07, "loss": 0.5359, "step": 4816 }, { "epoch": 0.91, "learning_rate": 4.21210590215273e-07, "loss": 0.4933, "step": 4817 }, { "epoch": 0.91, "learning_rate": 4.1945512149094703e-07, "loss": 0.4705, "step": 4818 }, { "epoch": 0.91, "learning_rate": 4.1770324015347865e-07, "loss": 0.5122, "step": 4819 }, { "epoch": 0.91, "learning_rate": 4.1595494685885087e-07, "loss": 0.3634, "step": 4820 }, { "epoch": 0.91, "learning_rate": 4.1421024226170335e-07, "loss": 0.4787, "step": 4821 }, { "epoch": 0.91, "learning_rate": 4.1246912701533246e-07, "loss": 0.4548, "step": 4822 }, { "epoch": 0.91, "learning_rate": 4.1073160177169e-07, "loss": 0.4518, "step": 4823 }, { "epoch": 0.91, "learning_rate": 4.089976671813811e-07, "loss": 0.4998, "step": 4824 }, { "epoch": 0.91, "learning_rate": 4.072673238936742e-07, "loss": 0.4892, "step": 4825 }, { "epoch": 0.91, "learning_rate": 4.0554057255648315e-07, "loss": 0.4068, "step": 4826 }, { "epoch": 0.91, "learning_rate": 4.0381741381638085e-07, "loss": 0.4294, "step": 4827 }, { "epoch": 0.91, "learning_rate": 4.020978483185989e-07, "loss": 0.3783, "step": 4828 }, { "epoch": 0.91, "learning_rate": 4.003818767070189e-07, "loss": 0.3962, "step": 4829 }, { "epoch": 0.91, "learning_rate": 3.9866949962417465e-07, "loss": 0.3698, "step": 4830 }, { "epoch": 0.91, "learning_rate": 3.969607177112611e-07, "loss": 0.4191, "step": 4831 }, { "epoch": 0.91, "learning_rate": 3.95255531608123e-07, "loss": 0.4231, "step": 4832 }, { "epoch": 0.91, "learning_rate": 3.9355394195325414e-07, "loss": 0.5547, "step": 4833 }, { "epoch": 0.91, "learning_rate": 3.918559493838114e-07, "loss": 0.4448, "step": 4834 }, { "epoch": 0.91, "learning_rate": 3.901615545355963e-07, "loss": 0.4406, "step": 4835 }, { "epoch": 0.91, "learning_rate": 3.88470758043068e-07, "loss": 0.4828, "step": 4836 }, { "epoch": 0.91, "learning_rate": 3.8678356053933666e-07, "loss": 0.4338, "step": 4837 }, { "epoch": 0.91, "learning_rate": 3.8509996265616466e-07, "loss": 0.4797, "step": 4838 }, { "epoch": 0.91, "learning_rate": 3.834199650239667e-07, "loss": 0.3929, "step": 4839 }, { "epoch": 0.91, "learning_rate": 3.817435682718096e-07, "loss": 0.3972, "step": 4840 }, { "epoch": 0.91, "learning_rate": 3.8007077302741e-07, "loss": 0.3205, "step": 4841 }, { "epoch": 0.91, "learning_rate": 3.7840157991714256e-07, "loss": 0.4332, "step": 4842 }, { "epoch": 0.91, "learning_rate": 3.7673598956602276e-07, "loss": 0.5134, "step": 4843 }, { "epoch": 0.92, "learning_rate": 3.75074002597724e-07, "loss": 0.3759, "step": 4844 }, { "epoch": 0.92, "learning_rate": 3.734156196345706e-07, "loss": 0.2775, "step": 4845 }, { "epoch": 0.92, "learning_rate": 3.7176084129753596e-07, "loss": 0.5539, "step": 4846 }, { "epoch": 0.92, "learning_rate": 3.7010966820623996e-07, "loss": 0.4004, "step": 4847 }, { "epoch": 0.92, "learning_rate": 3.684621009789602e-07, "loss": 0.4751, "step": 4848 }, { "epoch": 0.92, "learning_rate": 3.668181402326165e-07, "loss": 0.3445, "step": 4849 }, { "epoch": 0.92, "learning_rate": 3.6517778658278546e-07, "loss": 0.5193, "step": 4850 }, { "epoch": 0.92, "learning_rate": 3.635410406436857e-07, "loss": 0.4555, "step": 4851 }, { "epoch": 0.92, "learning_rate": 3.619079030281902e-07, "loss": 0.4478, "step": 4852 }, { "epoch": 0.92, "learning_rate": 3.6027837434781886e-07, "loss": 0.3711, "step": 4853 }, { "epoch": 0.92, "learning_rate": 3.586524552127413e-07, "loss": 0.4055, "step": 4854 }, { "epoch": 0.92, "learning_rate": 3.570301462317727e-07, "loss": 0.468, "step": 4855 }, { "epoch": 0.92, "learning_rate": 3.554114480123805e-07, "loss": 0.5527, "step": 4856 }, { "epoch": 0.92, "learning_rate": 3.5379636116067764e-07, "loss": 0.4733, "step": 4857 }, { "epoch": 0.92, "learning_rate": 3.521848862814248e-07, "loss": 0.3929, "step": 4858 }, { "epoch": 0.92, "learning_rate": 3.505770239780304e-07, "loss": 0.5241, "step": 4859 }, { "epoch": 0.92, "learning_rate": 3.489727748525518e-07, "loss": 0.3505, "step": 4860 }, { "epoch": 0.92, "learning_rate": 3.473721395056906e-07, "loss": 0.3301, "step": 4861 }, { "epoch": 0.92, "learning_rate": 3.4577511853679637e-07, "loss": 0.4449, "step": 4862 }, { "epoch": 0.92, "learning_rate": 3.4418171254386844e-07, "loss": 0.5318, "step": 4863 }, { "epoch": 0.92, "learning_rate": 3.425919221235463e-07, "loss": 0.4173, "step": 4864 }, { "epoch": 0.92, "learning_rate": 3.4100574787111927e-07, "loss": 0.4953, "step": 4865 }, { "epoch": 0.92, "learning_rate": 3.394231903805245e-07, "loss": 0.4146, "step": 4866 }, { "epoch": 0.92, "learning_rate": 3.378442502443424e-07, "loss": 0.41, "step": 4867 }, { "epoch": 0.92, "learning_rate": 3.3626892805379565e-07, "loss": 0.3671, "step": 4868 }, { "epoch": 0.92, "learning_rate": 3.3469722439876007e-07, "loss": 0.4766, "step": 4869 }, { "epoch": 0.92, "learning_rate": 3.3312913986775055e-07, "loss": 0.5288, "step": 4870 }, { "epoch": 0.92, "learning_rate": 3.3156467504792846e-07, "loss": 0.5301, "step": 4871 }, { "epoch": 0.92, "learning_rate": 3.3000383052509853e-07, "loss": 0.3755, "step": 4872 }, { "epoch": 0.92, "learning_rate": 3.284466068837133e-07, "loss": 0.3879, "step": 4873 }, { "epoch": 0.92, "learning_rate": 3.2689300470686633e-07, "loss": 0.4814, "step": 4874 }, { "epoch": 0.92, "learning_rate": 3.2534302457629674e-07, "loss": 0.5066, "step": 4875 }, { "epoch": 0.92, "learning_rate": 3.2379666707238464e-07, "loss": 0.3163, "step": 4876 }, { "epoch": 0.92, "learning_rate": 3.222539327741592e-07, "loss": 0.5546, "step": 4877 }, { "epoch": 0.92, "learning_rate": 3.207148222592871e-07, "loss": 0.4886, "step": 4878 }, { "epoch": 0.92, "learning_rate": 3.191793361040796e-07, "loss": 0.5329, "step": 4879 }, { "epoch": 0.92, "learning_rate": 3.176474748834946e-07, "loss": 0.4125, "step": 4880 }, { "epoch": 0.92, "learning_rate": 3.1611923917112987e-07, "loss": 0.5257, "step": 4881 }, { "epoch": 0.92, "learning_rate": 3.1459462953922104e-07, "loss": 0.3612, "step": 4882 }, { "epoch": 0.92, "learning_rate": 3.130736465586559e-07, "loss": 0.489, "step": 4883 }, { "epoch": 0.92, "learning_rate": 3.1155629079895775e-07, "loss": 0.5645, "step": 4884 }, { "epoch": 0.92, "learning_rate": 3.100425628282899e-07, "loss": 0.4006, "step": 4885 }, { "epoch": 0.92, "learning_rate": 3.085324632134623e-07, "loss": 0.5257, "step": 4886 }, { "epoch": 0.92, "learning_rate": 3.070259925199248e-07, "loss": 0.7462, "step": 4887 }, { "epoch": 0.92, "learning_rate": 3.0552315131176625e-07, "loss": 0.3979, "step": 4888 }, { "epoch": 0.92, "learning_rate": 3.040239401517187e-07, "loss": 0.3904, "step": 4889 }, { "epoch": 0.92, "learning_rate": 3.0252835960115524e-07, "loss": 0.4184, "step": 4890 }, { "epoch": 0.92, "learning_rate": 3.010364102200869e-07, "loss": 0.4603, "step": 4891 }, { "epoch": 0.92, "learning_rate": 2.9954809256716677e-07, "loss": 0.3515, "step": 4892 }, { "epoch": 0.92, "learning_rate": 2.9806340719968795e-07, "loss": 0.3631, "step": 4893 }, { "epoch": 0.92, "learning_rate": 2.9658235467358356e-07, "loss": 0.3496, "step": 4894 }, { "epoch": 0.92, "learning_rate": 2.951049355434277e-07, "loss": 0.342, "step": 4895 }, { "epoch": 0.92, "learning_rate": 2.9363115036243116e-07, "loss": 0.4748, "step": 4896 }, { "epoch": 0.93, "learning_rate": 2.921609996824437e-07, "loss": 0.4662, "step": 4897 }, { "epoch": 0.93, "learning_rate": 2.906944840539605e-07, "loss": 0.4973, "step": 4898 }, { "epoch": 0.93, "learning_rate": 2.892316040261067e-07, "loss": 0.3884, "step": 4899 }, { "epoch": 0.93, "learning_rate": 2.8777236014665086e-07, "loss": 0.4985, "step": 4900 }, { "epoch": 0.93, "learning_rate": 2.863167529620015e-07, "loss": 0.4041, "step": 4901 }, { "epoch": 0.93, "learning_rate": 2.8486478301720246e-07, "loss": 0.3986, "step": 4902 }, { "epoch": 0.93, "learning_rate": 2.834164508559367e-07, "loss": 0.4765, "step": 4903 }, { "epoch": 0.93, "learning_rate": 2.819717570205238e-07, "loss": 0.3913, "step": 4904 }, { "epoch": 0.93, "learning_rate": 2.8053070205192433e-07, "loss": 0.4888, "step": 4905 }, { "epoch": 0.93, "learning_rate": 2.790932864897322e-07, "loss": 0.5525, "step": 4906 }, { "epoch": 0.93, "learning_rate": 2.7765951087218134e-07, "loss": 0.4809, "step": 4907 }, { "epoch": 0.93, "learning_rate": 2.762293757361412e-07, "loss": 0.3934, "step": 4908 }, { "epoch": 0.93, "learning_rate": 2.748028816171189e-07, "loss": 0.3164, "step": 4909 }, { "epoch": 0.93, "learning_rate": 2.733800290492572e-07, "loss": 0.5103, "step": 4910 }, { "epoch": 0.93, "learning_rate": 2.7196081856533775e-07, "loss": 0.3697, "step": 4911 }, { "epoch": 0.93, "learning_rate": 2.7054525069677653e-07, "loss": 0.4568, "step": 4912 }, { "epoch": 0.93, "learning_rate": 2.6913332597362506e-07, "loss": 0.3642, "step": 4913 }, { "epoch": 0.93, "learning_rate": 2.677250449245705e-07, "loss": 0.4797, "step": 4914 }, { "epoch": 0.93, "learning_rate": 2.663204080769388e-07, "loss": 0.4867, "step": 4915 }, { "epoch": 0.93, "learning_rate": 2.6491941595668926e-07, "loss": 0.4492, "step": 4916 }, { "epoch": 0.93, "learning_rate": 2.6352206908841325e-07, "loss": 0.3513, "step": 4917 }, { "epoch": 0.93, "learning_rate": 2.6212836799534237e-07, "loss": 0.4956, "step": 4918 }, { "epoch": 0.93, "learning_rate": 2.607383131993424e-07, "loss": 0.4402, "step": 4919 }, { "epoch": 0.93, "learning_rate": 2.5935190522090924e-07, "loss": 0.4343, "step": 4920 }, { "epoch": 0.93, "learning_rate": 2.579691445791788e-07, "loss": 0.4526, "step": 4921 }, { "epoch": 0.93, "learning_rate": 2.565900317919179e-07, "loss": 0.3998, "step": 4922 }, { "epoch": 0.93, "learning_rate": 2.5521456737552906e-07, "loss": 0.5346, "step": 4923 }, { "epoch": 0.93, "learning_rate": 2.5384275184504705e-07, "loss": 0.4271, "step": 4924 }, { "epoch": 0.93, "learning_rate": 2.5247458571414083e-07, "loss": 0.4155, "step": 4925 }, { "epoch": 0.93, "learning_rate": 2.511100694951152e-07, "loss": 0.4444, "step": 4926 }, { "epoch": 0.93, "learning_rate": 2.497492036989058e-07, "loss": 0.3934, "step": 4927 }, { "epoch": 0.93, "learning_rate": 2.4839198883508055e-07, "loss": 0.4071, "step": 4928 }, { "epoch": 0.93, "learning_rate": 2.4703842541184407e-07, "loss": 0.4191, "step": 4929 }, { "epoch": 0.93, "learning_rate": 2.4568851393602877e-07, "loss": 0.502, "step": 4930 }, { "epoch": 0.93, "learning_rate": 2.443422549131047e-07, "loss": 0.5576, "step": 4931 }, { "epoch": 0.93, "learning_rate": 2.4299964884716867e-07, "loss": 0.4374, "step": 4932 }, { "epoch": 0.93, "learning_rate": 2.4166069624095736e-07, "loss": 0.5818, "step": 4933 }, { "epoch": 0.93, "learning_rate": 2.403253975958297e-07, "loss": 0.4857, "step": 4934 }, { "epoch": 0.93, "learning_rate": 2.3899375341178476e-07, "loss": 0.402, "step": 4935 }, { "epoch": 0.93, "learning_rate": 2.3766576418745024e-07, "loss": 0.4552, "step": 4936 }, { "epoch": 0.93, "learning_rate": 2.3634143042008396e-07, "loss": 0.525, "step": 4937 }, { "epoch": 0.93, "learning_rate": 2.3502075260557588e-07, "loss": 0.3957, "step": 4938 }, { "epoch": 0.93, "learning_rate": 2.3370373123844715e-07, "loss": 0.4856, "step": 4939 }, { "epoch": 0.93, "learning_rate": 2.323903668118499e-07, "loss": 0.3601, "step": 4940 }, { "epoch": 0.93, "learning_rate": 2.3108065981756743e-07, "loss": 0.3488, "step": 4941 }, { "epoch": 0.93, "learning_rate": 2.2977461074601304e-07, "loss": 0.383, "step": 4942 }, { "epoch": 0.93, "learning_rate": 2.2847222008622993e-07, "loss": 0.416, "step": 4943 }, { "epoch": 0.93, "learning_rate": 2.2717348832589025e-07, "loss": 0.6218, "step": 4944 }, { "epoch": 0.93, "learning_rate": 2.2587841595129945e-07, "loss": 0.4051, "step": 4945 }, { "epoch": 0.93, "learning_rate": 2.245870034473896e-07, "loss": 0.414, "step": 4946 }, { "epoch": 0.93, "learning_rate": 2.2329925129772613e-07, "loss": 0.5589, "step": 4947 }, { "epoch": 0.93, "learning_rate": 2.2201515998449884e-07, "loss": 0.4458, "step": 4948 }, { "epoch": 0.94, "learning_rate": 2.2073472998852874e-07, "loss": 0.3412, "step": 4949 }, { "epoch": 0.94, "learning_rate": 2.1945796178927003e-07, "loss": 0.4456, "step": 4950 }, { "epoch": 0.94, "learning_rate": 2.181848558647981e-07, "loss": 0.4796, "step": 4951 }, { "epoch": 0.94, "learning_rate": 2.169154126918216e-07, "loss": 0.4019, "step": 4952 }, { "epoch": 0.94, "learning_rate": 2.1564963274568028e-07, "loss": 0.4488, "step": 4953 }, { "epoch": 0.94, "learning_rate": 2.143875165003373e-07, "loss": 0.3775, "step": 4954 }, { "epoch": 0.94, "learning_rate": 2.1312906442838344e-07, "loss": 0.4453, "step": 4955 }, { "epoch": 0.94, "learning_rate": 2.1187427700104292e-07, "loss": 0.5731, "step": 4956 }, { "epoch": 0.94, "learning_rate": 2.1062315468816318e-07, "loss": 0.5096, "step": 4957 }, { "epoch": 0.94, "learning_rate": 2.0937569795822177e-07, "loss": 0.4922, "step": 4958 }, { "epoch": 0.94, "learning_rate": 2.0813190727832055e-07, "loss": 0.4735, "step": 4959 }, { "epoch": 0.94, "learning_rate": 2.0689178311419257e-07, "loss": 0.5767, "step": 4960 }, { "epoch": 0.94, "learning_rate": 2.0565532593019632e-07, "loss": 0.3884, "step": 4961 }, { "epoch": 0.94, "learning_rate": 2.0442253618931484e-07, "loss": 0.4009, "step": 4962 }, { "epoch": 0.94, "learning_rate": 2.031934143531611e-07, "loss": 0.4019, "step": 4963 }, { "epoch": 0.94, "learning_rate": 2.0196796088197468e-07, "loss": 0.3898, "step": 4964 }, { "epoch": 0.94, "learning_rate": 2.0074617623461968e-07, "loss": 0.5633, "step": 4965 }, { "epoch": 0.94, "learning_rate": 1.9952806086858567e-07, "loss": 0.3963, "step": 4966 }, { "epoch": 0.94, "learning_rate": 1.9831361523999227e-07, "loss": 0.5418, "step": 4967 }, { "epoch": 0.94, "learning_rate": 1.9710283980358237e-07, "loss": 0.4345, "step": 4968 }, { "epoch": 0.94, "learning_rate": 1.9589573501272218e-07, "loss": 0.4501, "step": 4969 }, { "epoch": 0.94, "learning_rate": 1.9469230131940907e-07, "loss": 0.2699, "step": 4970 }, { "epoch": 0.94, "learning_rate": 1.934925391742626e-07, "loss": 0.3776, "step": 4971 }, { "epoch": 0.94, "learning_rate": 1.9229644902652333e-07, "loss": 0.5306, "step": 4972 }, { "epoch": 0.94, "learning_rate": 1.911040313240664e-07, "loss": 0.3894, "step": 4973 }, { "epoch": 0.94, "learning_rate": 1.8991528651338465e-07, "loss": 0.5127, "step": 4974 }, { "epoch": 0.94, "learning_rate": 1.8873021503959644e-07, "loss": 0.5183, "step": 4975 }, { "epoch": 0.94, "learning_rate": 1.8754881734644792e-07, "loss": 0.4378, "step": 4976 }, { "epoch": 0.94, "learning_rate": 1.8637109387630637e-07, "loss": 0.3916, "step": 4977 }, { "epoch": 0.94, "learning_rate": 1.8519704507016455e-07, "loss": 0.4314, "step": 4978 }, { "epoch": 0.94, "learning_rate": 1.8402667136763864e-07, "loss": 0.3611, "step": 4979 }, { "epoch": 0.94, "learning_rate": 1.8285997320697024e-07, "loss": 0.5461, "step": 4980 }, { "epoch": 0.94, "learning_rate": 1.8169695102502217e-07, "loss": 0.4065, "step": 4981 }, { "epoch": 0.94, "learning_rate": 1.8053760525728493e-07, "loss": 0.5932, "step": 4982 }, { "epoch": 0.94, "learning_rate": 1.7938193633786682e-07, "loss": 0.3875, "step": 4983 }, { "epoch": 0.94, "learning_rate": 1.7822994469950282e-07, "loss": 0.4371, "step": 4984 }, { "epoch": 0.94, "learning_rate": 1.7708163077355234e-07, "loss": 0.4773, "step": 4985 }, { "epoch": 0.94, "learning_rate": 1.759369949899936e-07, "loss": 0.4485, "step": 4986 }, { "epoch": 0.94, "learning_rate": 1.7479603777742937e-07, "loss": 0.3606, "step": 4987 }, { "epoch": 0.94, "learning_rate": 1.7365875956308787e-07, "loss": 0.4681, "step": 4988 }, { "epoch": 0.94, "learning_rate": 1.7252516077281623e-07, "loss": 0.3942, "step": 4989 }, { "epoch": 0.94, "learning_rate": 1.713952418310827e-07, "loss": 0.408, "step": 4990 }, { "epoch": 0.94, "learning_rate": 1.7026900316098217e-07, "loss": 0.2346, "step": 4991 }, { "epoch": 0.94, "learning_rate": 1.691464451842284e-07, "loss": 0.4081, "step": 4992 }, { "epoch": 0.94, "learning_rate": 1.680275683211574e-07, "loss": 0.4445, "step": 4993 }, { "epoch": 0.94, "learning_rate": 1.6691237299072738e-07, "loss": 0.5196, "step": 4994 }, { "epoch": 0.94, "learning_rate": 1.658008596105176e-07, "loss": 0.4522, "step": 4995 }, { "epoch": 0.94, "learning_rate": 1.6469302859672743e-07, "loss": 0.3698, "step": 4996 }, { "epoch": 0.94, "learning_rate": 1.6358888036418053e-07, "loss": 0.3549, "step": 4997 }, { "epoch": 0.94, "learning_rate": 1.6248841532631731e-07, "loss": 0.4717, "step": 4998 }, { "epoch": 0.94, "learning_rate": 1.6139163389520375e-07, "loss": 0.4656, "step": 4999 }, { "epoch": 0.94, "learning_rate": 1.602985364815235e-07, "loss": 0.4128, "step": 5000 }, { "epoch": 0.94, "learning_rate": 1.5920912349457918e-07, "loss": 0.455, "step": 5001 }, { "epoch": 0.95, "learning_rate": 1.5812339534229893e-07, "loss": 0.537, "step": 5002 }, { "epoch": 0.95, "learning_rate": 1.5704135243122865e-07, "loss": 0.4549, "step": 5003 }, { "epoch": 0.95, "learning_rate": 1.559629951665298e-07, "loss": 0.6598, "step": 5004 }, { "epoch": 0.95, "learning_rate": 1.5488832395199162e-07, "loss": 0.4226, "step": 5005 }, { "epoch": 0.95, "learning_rate": 1.5381733919001884e-07, "loss": 0.4363, "step": 5006 }, { "epoch": 0.95, "learning_rate": 1.5275004128163407e-07, "loss": 0.4881, "step": 5007 }, { "epoch": 0.95, "learning_rate": 1.5168643062648425e-07, "loss": 0.3358, "step": 5008 }, { "epoch": 0.95, "learning_rate": 1.5062650762283193e-07, "loss": 0.4808, "step": 5009 }, { "epoch": 0.95, "learning_rate": 1.4957027266755963e-07, "loss": 0.4883, "step": 5010 }, { "epoch": 0.95, "learning_rate": 1.4851772615616878e-07, "loss": 0.3586, "step": 5011 }, { "epoch": 0.95, "learning_rate": 1.4746886848278187e-07, "loss": 0.3737, "step": 5012 }, { "epoch": 0.95, "learning_rate": 1.4642370004013696e-07, "loss": 0.5082, "step": 5013 }, { "epoch": 0.95, "learning_rate": 1.4538222121959322e-07, "loss": 0.3786, "step": 5014 }, { "epoch": 0.95, "learning_rate": 1.4434443241112538e-07, "loss": 0.5012, "step": 5015 }, { "epoch": 0.95, "learning_rate": 1.433103340033304e-07, "loss": 0.453, "step": 5016 }, { "epoch": 0.95, "learning_rate": 1.422799263834196e-07, "loss": 0.372, "step": 5017 }, { "epoch": 0.95, "learning_rate": 1.4125320993722435e-07, "loss": 0.4712, "step": 5018 }, { "epoch": 0.95, "learning_rate": 1.4023018504919384e-07, "loss": 0.3681, "step": 5019 }, { "epoch": 0.95, "learning_rate": 1.3921085210239494e-07, "loss": 0.4414, "step": 5020 }, { "epoch": 0.95, "learning_rate": 1.3819521147851122e-07, "loss": 0.4931, "step": 5021 }, { "epoch": 0.95, "learning_rate": 1.3718326355784295e-07, "loss": 0.5331, "step": 5022 }, { "epoch": 0.95, "learning_rate": 1.3617500871931143e-07, "loss": 0.4664, "step": 5023 }, { "epoch": 0.95, "learning_rate": 1.3517044734045136e-07, "loss": 0.4558, "step": 5024 }, { "epoch": 0.95, "learning_rate": 1.3416957979741407e-07, "loss": 0.4248, "step": 5025 }, { "epoch": 0.95, "learning_rate": 1.3317240646497197e-07, "loss": 0.548, "step": 5026 }, { "epoch": 0.95, "learning_rate": 1.3217892771651087e-07, "loss": 0.5644, "step": 5027 }, { "epoch": 0.95, "learning_rate": 1.3118914392403426e-07, "loss": 0.4784, "step": 5028 }, { "epoch": 0.95, "learning_rate": 1.3020305545816015e-07, "loss": 0.4526, "step": 5029 }, { "epoch": 0.95, "learning_rate": 1.292206626881254e-07, "loss": 0.4054, "step": 5030 }, { "epoch": 0.95, "learning_rate": 1.282419659817824e-07, "loss": 0.4217, "step": 5031 }, { "epoch": 0.95, "learning_rate": 1.2726696570559915e-07, "loss": 0.5513, "step": 5032 }, { "epoch": 0.95, "learning_rate": 1.2629566222465805e-07, "loss": 0.4128, "step": 5033 }, { "epoch": 0.95, "learning_rate": 1.253280559026615e-07, "loss": 0.3836, "step": 5034 }, { "epoch": 0.95, "learning_rate": 1.2436414710192302e-07, "loss": 0.4227, "step": 5035 }, { "epoch": 0.95, "learning_rate": 1.2340393618337167e-07, "loss": 0.5342, "step": 5036 }, { "epoch": 0.95, "learning_rate": 1.224474235065587e-07, "loss": 0.3971, "step": 5037 }, { "epoch": 0.95, "learning_rate": 1.2149460942964097e-07, "loss": 0.4964, "step": 5038 }, { "epoch": 0.95, "learning_rate": 1.2054549430939532e-07, "loss": 0.4371, "step": 5039 }, { "epoch": 0.95, "learning_rate": 1.1960007850121523e-07, "loss": 0.4003, "step": 5040 }, { "epoch": 0.95, "learning_rate": 1.1865836235910755e-07, "loss": 0.4778, "step": 5041 }, { "epoch": 0.95, "learning_rate": 1.1772034623568907e-07, "loss": 0.451, "step": 5042 }, { "epoch": 0.95, "learning_rate": 1.1678603048219773e-07, "loss": 0.4424, "step": 5043 }, { "epoch": 0.95, "learning_rate": 1.1585541544848478e-07, "loss": 0.5064, "step": 5044 }, { "epoch": 0.95, "learning_rate": 1.1492850148301148e-07, "loss": 0.4037, "step": 5045 }, { "epoch": 0.95, "learning_rate": 1.1400528893285801e-07, "loss": 0.4784, "step": 5046 }, { "epoch": 0.95, "learning_rate": 1.1308577814371669e-07, "loss": 0.3321, "step": 5047 }, { "epoch": 0.95, "learning_rate": 1.1216996945989212e-07, "loss": 0.4276, "step": 5048 }, { "epoch": 0.95, "learning_rate": 1.1125786322430554e-07, "loss": 0.3975, "step": 5049 }, { "epoch": 0.95, "learning_rate": 1.1034945977849043e-07, "loss": 0.4559, "step": 5050 }, { "epoch": 0.95, "learning_rate": 1.0944475946259358e-07, "loss": 0.4288, "step": 5051 }, { "epoch": 0.95, "learning_rate": 1.0854376261537513e-07, "loss": 0.4418, "step": 5052 }, { "epoch": 0.95, "learning_rate": 1.0764646957420966e-07, "loss": 0.4648, "step": 5053 }, { "epoch": 0.95, "learning_rate": 1.0675288067508394e-07, "loss": 0.4224, "step": 5054 }, { "epoch": 0.96, "learning_rate": 1.0586299625259699e-07, "loss": 0.4774, "step": 5055 }, { "epoch": 0.96, "learning_rate": 1.049768166399634e-07, "loss": 0.4133, "step": 5056 }, { "epoch": 0.96, "learning_rate": 1.040943421690055e-07, "loss": 0.3888, "step": 5057 }, { "epoch": 0.96, "learning_rate": 1.032155731701645e-07, "loss": 0.4277, "step": 5058 }, { "epoch": 0.96, "learning_rate": 1.0234050997248945e-07, "loss": 0.3141, "step": 5059 }, { "epoch": 0.96, "learning_rate": 1.014691529036449e-07, "loss": 0.6237, "step": 5060 }, { "epoch": 0.96, "learning_rate": 1.0060150228990429e-07, "loss": 0.5081, "step": 5061 }, { "epoch": 0.96, "learning_rate": 9.973755845615551e-08, "loss": 0.5245, "step": 5062 }, { "epoch": 0.96, "learning_rate": 9.887732172589759e-08, "loss": 0.4649, "step": 5063 }, { "epoch": 0.96, "learning_rate": 9.802079242124396e-08, "loss": 0.3948, "step": 5064 }, { "epoch": 0.96, "learning_rate": 9.716797086291475e-08, "loss": 0.3736, "step": 5065 }, { "epoch": 0.96, "learning_rate": 9.631885737024782e-08, "loss": 0.4515, "step": 5066 }, { "epoch": 0.96, "learning_rate": 9.547345226118666e-08, "loss": 0.5459, "step": 5067 }, { "epoch": 0.96, "learning_rate": 9.463175585228913e-08, "loss": 0.3576, "step": 5068 }, { "epoch": 0.96, "learning_rate": 9.379376845872756e-08, "loss": 0.4435, "step": 5069 }, { "epoch": 0.96, "learning_rate": 9.295949039427876e-08, "loss": 0.4364, "step": 5070 }, { "epoch": 0.96, "learning_rate": 9.212892197133394e-08, "loss": 0.4831, "step": 5071 }, { "epoch": 0.96, "learning_rate": 9.130206350089765e-08, "loss": 0.4581, "step": 5072 }, { "epoch": 0.96, "learning_rate": 9.047891529258112e-08, "loss": 0.5572, "step": 5073 }, { "epoch": 0.96, "learning_rate": 8.96594776546078e-08, "loss": 0.4738, "step": 5074 }, { "epoch": 0.96, "learning_rate": 8.884375089381336e-08, "loss": 0.3899, "step": 5075 }, { "epoch": 0.96, "learning_rate": 8.803173531564235e-08, "loss": 0.512, "step": 5076 }, { "epoch": 0.96, "learning_rate": 8.722343122414823e-08, "loss": 0.5958, "step": 5077 }, { "epoch": 0.96, "learning_rate": 8.641883892199775e-08, "loss": 0.3792, "step": 5078 }, { "epoch": 0.96, "learning_rate": 8.561795871046774e-08, "loss": 0.4832, "step": 5079 }, { "epoch": 0.96, "learning_rate": 8.482079088944051e-08, "loss": 0.4642, "step": 5080 }, { "epoch": 0.96, "learning_rate": 8.402733575741506e-08, "loss": 0.4044, "step": 5081 }, { "epoch": 0.96, "learning_rate": 8.323759361149374e-08, "loss": 0.426, "step": 5082 }, { "epoch": 0.96, "learning_rate": 8.245156474739335e-08, "loss": 0.5061, "step": 5083 }, { "epoch": 0.96, "learning_rate": 8.166924945943733e-08, "loss": 0.4776, "step": 5084 }, { "epoch": 0.96, "learning_rate": 8.089064804056135e-08, "loss": 0.4444, "step": 5085 }, { "epoch": 0.96, "learning_rate": 8.011576078230665e-08, "loss": 0.501, "step": 5086 }, { "epoch": 0.96, "learning_rate": 7.93445879748267e-08, "loss": 0.4391, "step": 5087 }, { "epoch": 0.96, "learning_rate": 7.857712990688492e-08, "loss": 0.4102, "step": 5088 }, { "epoch": 0.96, "learning_rate": 7.781338686584928e-08, "loss": 0.3935, "step": 5089 }, { "epoch": 0.96, "learning_rate": 7.705335913770207e-08, "loss": 0.4654, "step": 5090 }, { "epoch": 0.96, "learning_rate": 7.629704700702901e-08, "loss": 0.4139, "step": 5091 }, { "epoch": 0.96, "learning_rate": 7.554445075703021e-08, "loss": 0.3171, "step": 5092 }, { "epoch": 0.96, "learning_rate": 7.479557066951138e-08, "loss": 0.4253, "step": 5093 }, { "epoch": 0.96, "learning_rate": 7.405040702488486e-08, "loss": 0.4391, "step": 5094 }, { "epoch": 0.96, "learning_rate": 7.330896010217414e-08, "loss": 0.5141, "step": 5095 }, { "epoch": 0.96, "learning_rate": 7.257123017901158e-08, "loss": 0.4487, "step": 5096 }, { "epoch": 0.96, "learning_rate": 7.183721753163508e-08, "loss": 0.4113, "step": 5097 }, { "epoch": 0.96, "learning_rate": 7.110692243489259e-08, "loss": 0.4651, "step": 5098 }, { "epoch": 0.96, "learning_rate": 7.038034516223869e-08, "loss": 0.4359, "step": 5099 }, { "epoch": 0.96, "learning_rate": 6.965748598573797e-08, "loss": 0.4132, "step": 5100 }, { "epoch": 0.96, "learning_rate": 6.893834517606057e-08, "loss": 0.4952, "step": 5101 }, { "epoch": 0.96, "learning_rate": 6.822292300248445e-08, "loss": 0.4818, "step": 5102 }, { "epoch": 0.96, "learning_rate": 6.751121973289754e-08, "loss": 0.62, "step": 5103 }, { "epoch": 0.96, "learning_rate": 6.68032356337922e-08, "loss": 0.489, "step": 5104 }, { "epoch": 0.96, "learning_rate": 6.609897097026974e-08, "loss": 0.6902, "step": 5105 }, { "epoch": 0.96, "learning_rate": 6.539842600603918e-08, "loss": 0.3033, "step": 5106 }, { "epoch": 0.96, "learning_rate": 6.470160100341516e-08, "loss": 0.3292, "step": 5107 }, { "epoch": 0.97, "learning_rate": 6.400849622332118e-08, "loss": 0.3272, "step": 5108 }, { "epoch": 0.97, "learning_rate": 6.331911192528516e-08, "loss": 0.5088, "step": 5109 }, { "epoch": 0.97, "learning_rate": 6.263344836744623e-08, "loss": 0.3662, "step": 5110 }, { "epoch": 0.97, "learning_rate": 6.195150580654563e-08, "loss": 0.4956, "step": 5111 }, { "epoch": 0.97, "learning_rate": 6.127328449793357e-08, "loss": 0.3773, "step": 5112 }, { "epoch": 0.97, "learning_rate": 6.059878469556801e-08, "loss": 0.4356, "step": 5113 }, { "epoch": 0.97, "learning_rate": 5.992800665201137e-08, "loss": 0.3229, "step": 5114 }, { "epoch": 0.97, "learning_rate": 5.9260950618430515e-08, "loss": 0.3992, "step": 5115 }, { "epoch": 0.97, "learning_rate": 5.859761684460563e-08, "loss": 0.3662, "step": 5116 }, { "epoch": 0.97, "learning_rate": 5.793800557891471e-08, "loss": 0.4117, "step": 5117 }, { "epoch": 0.97, "learning_rate": 5.728211706834908e-08, "loss": 0.5436, "step": 5118 }, { "epoch": 0.97, "learning_rate": 5.662995155850115e-08, "loss": 0.367, "step": 5119 }, { "epoch": 0.97, "learning_rate": 5.5981509293571154e-08, "loss": 0.2543, "step": 5120 }, { "epoch": 0.97, "learning_rate": 5.533679051636487e-08, "loss": 0.4326, "step": 5121 }, { "epoch": 0.97, "learning_rate": 5.469579546829473e-08, "loss": 0.3761, "step": 5122 }, { "epoch": 0.97, "learning_rate": 5.405852438937764e-08, "loss": 0.4994, "step": 5123 }, { "epoch": 0.97, "learning_rate": 5.3424977518237163e-08, "loss": 0.3981, "step": 5124 }, { "epoch": 0.97, "learning_rate": 5.279515509210131e-08, "loss": 0.4525, "step": 5125 }, { "epoch": 0.97, "learning_rate": 5.2169057346802556e-08, "loss": 0.6014, "step": 5126 }, { "epoch": 0.97, "learning_rate": 5.154668451678224e-08, "loss": 0.4561, "step": 5127 }, { "epoch": 0.97, "learning_rate": 5.092803683508396e-08, "loss": 0.4997, "step": 5128 }, { "epoch": 0.97, "learning_rate": 5.031311453335685e-08, "loss": 0.3828, "step": 5129 }, { "epoch": 0.97, "learning_rate": 4.970191784185563e-08, "loss": 0.5553, "step": 5130 }, { "epoch": 0.97, "learning_rate": 4.9094446989440546e-08, "loss": 0.4458, "step": 5131 }, { "epoch": 0.97, "learning_rate": 4.8490702203575215e-08, "loss": 0.3551, "step": 5132 }, { "epoch": 0.97, "learning_rate": 4.789068371032879e-08, "loss": 0.5681, "step": 5133 }, { "epoch": 0.97, "learning_rate": 4.7294391734376e-08, "loss": 0.3037, "step": 5134 }, { "epoch": 0.97, "learning_rate": 4.6701826498993794e-08, "loss": 0.4467, "step": 5135 }, { "epoch": 0.97, "learning_rate": 4.611298822606691e-08, "loss": 0.5709, "step": 5136 }, { "epoch": 0.97, "learning_rate": 4.552787713608231e-08, "loss": 0.4378, "step": 5137 }, { "epoch": 0.97, "learning_rate": 4.49464934481314e-08, "loss": 0.4745, "step": 5138 }, { "epoch": 0.97, "learning_rate": 4.436883737991005e-08, "loss": 0.447, "step": 5139 }, { "epoch": 0.97, "learning_rate": 4.3794909147720776e-08, "loss": 0.4863, "step": 5140 }, { "epoch": 0.97, "learning_rate": 4.322470896646502e-08, "loss": 0.3981, "step": 5141 }, { "epoch": 0.97, "learning_rate": 4.2658237049655325e-08, "loss": 0.5081, "step": 5142 }, { "epoch": 0.97, "learning_rate": 4.209549360940091e-08, "loss": 0.5154, "step": 5143 }, { "epoch": 0.97, "learning_rate": 4.153647885641876e-08, "loss": 0.3915, "step": 5144 }, { "epoch": 0.97, "learning_rate": 4.098119300003034e-08, "loss": 0.4725, "step": 5145 }, { "epoch": 0.97, "learning_rate": 4.042963624815932e-08, "loss": 0.4194, "step": 5146 }, { "epoch": 0.97, "learning_rate": 3.988180880733161e-08, "loss": 0.4245, "step": 5147 }, { "epoch": 0.97, "learning_rate": 3.9337710882682014e-08, "loss": 0.4158, "step": 5148 }, { "epoch": 0.97, "learning_rate": 3.879734267794199e-08, "loss": 0.4197, "step": 5149 }, { "epoch": 0.97, "learning_rate": 3.8260704395450824e-08, "loss": 0.4438, "step": 5150 }, { "epoch": 0.97, "learning_rate": 3.772779623614997e-08, "loss": 0.5332, "step": 5151 }, { "epoch": 0.97, "learning_rate": 3.719861839958428e-08, "loss": 0.5063, "step": 5152 }, { "epoch": 0.97, "learning_rate": 3.6673171083901894e-08, "loss": 0.3063, "step": 5153 }, { "epoch": 0.97, "learning_rate": 3.6151454485853225e-08, "loss": 0.448, "step": 5154 }, { "epoch": 0.97, "learning_rate": 3.5633468800793103e-08, "loss": 0.4914, "step": 5155 }, { "epoch": 0.97, "learning_rate": 3.511921422267972e-08, "loss": 0.4349, "step": 5156 }, { "epoch": 0.97, "learning_rate": 3.460869094407127e-08, "loss": 0.3723, "step": 5157 }, { "epoch": 0.97, "learning_rate": 3.410189915613038e-08, "loss": 0.4262, "step": 5158 }, { "epoch": 0.97, "learning_rate": 3.359883904862415e-08, "loss": 0.3784, "step": 5159 }, { "epoch": 0.97, "learning_rate": 3.3099510809920795e-08, "loss": 0.415, "step": 5160 }, { "epoch": 0.98, "learning_rate": 3.260391462698964e-08, "loss": 0.5329, "step": 5161 }, { "epoch": 0.98, "learning_rate": 3.211205068540668e-08, "loss": 0.3383, "step": 5162 }, { "epoch": 0.98, "learning_rate": 3.162391916934682e-08, "loss": 0.4294, "step": 5163 }, { "epoch": 0.98, "learning_rate": 3.113952026158607e-08, "loss": 0.3861, "step": 5164 }, { "epoch": 0.98, "learning_rate": 3.065885414350933e-08, "loss": 0.4187, "step": 5165 }, { "epoch": 0.98, "learning_rate": 3.018192099509709e-08, "loss": 0.3642, "step": 5166 }, { "epoch": 0.98, "learning_rate": 2.9708720994934272e-08, "loss": 0.5562, "step": 5167 }, { "epoch": 0.98, "learning_rate": 2.923925432021024e-08, "loss": 0.5825, "step": 5168 }, { "epoch": 0.98, "learning_rate": 2.8773521146712168e-08, "loss": 0.436, "step": 5169 }, { "epoch": 0.98, "learning_rate": 2.8311521648832775e-08, "loss": 0.403, "step": 5170 }, { "epoch": 0.98, "learning_rate": 2.7853255999564787e-08, "loss": 0.4805, "step": 5171 }, { "epoch": 0.98, "learning_rate": 2.739872437050539e-08, "loss": 0.3858, "step": 5172 }, { "epoch": 0.98, "learning_rate": 2.6947926931848444e-08, "loss": 0.5647, "step": 5173 }, { "epoch": 0.98, "learning_rate": 2.6500863852395585e-08, "loss": 0.5556, "step": 5174 }, { "epoch": 0.98, "learning_rate": 2.6057535299546245e-08, "loss": 0.3799, "step": 5175 }, { "epoch": 0.98, "learning_rate": 2.5617941439302073e-08, "loss": 0.4397, "step": 5176 }, { "epoch": 0.98, "learning_rate": 2.5182082436266963e-08, "loss": 0.5317, "step": 5177 }, { "epoch": 0.98, "learning_rate": 2.4749958453647027e-08, "loss": 0.4814, "step": 5178 }, { "epoch": 0.98, "learning_rate": 2.4321569653248388e-08, "loss": 0.4527, "step": 5179 }, { "epoch": 0.98, "learning_rate": 2.3896916195479404e-08, "loss": 0.3527, "step": 5180 }, { "epoch": 0.98, "learning_rate": 2.347599823934843e-08, "loss": 0.5549, "step": 5181 }, { "epoch": 0.98, "learning_rate": 2.3058815942467172e-08, "loss": 0.3972, "step": 5182 }, { "epoch": 0.98, "learning_rate": 2.264536946104623e-08, "loss": 0.3983, "step": 5183 }, { "epoch": 0.98, "learning_rate": 2.223565894990065e-08, "loss": 0.4789, "step": 5184 }, { "epoch": 0.98, "learning_rate": 2.1829684562442165e-08, "loss": 0.4785, "step": 5185 }, { "epoch": 0.98, "learning_rate": 2.1427446450686952e-08, "loss": 0.3398, "step": 5186 }, { "epoch": 0.98, "learning_rate": 2.1028944765251193e-08, "loss": 0.3493, "step": 5187 }, { "epoch": 0.98, "learning_rate": 2.0634179655351084e-08, "loss": 0.4578, "step": 5188 }, { "epoch": 0.98, "learning_rate": 2.024315126880505e-08, "loss": 0.5201, "step": 5189 }, { "epoch": 0.98, "learning_rate": 1.9855859752031526e-08, "loss": 0.4861, "step": 5190 }, { "epoch": 0.98, "learning_rate": 1.947230525005006e-08, "loss": 0.3994, "step": 5191 }, { "epoch": 0.98, "learning_rate": 1.9092487906480217e-08, "loss": 0.6497, "step": 5192 }, { "epoch": 0.98, "learning_rate": 1.8716407863543784e-08, "loss": 0.3898, "step": 5193 }, { "epoch": 0.98, "learning_rate": 1.834406526206034e-08, "loss": 0.4595, "step": 5194 }, { "epoch": 0.98, "learning_rate": 1.7975460241453913e-08, "loss": 0.4548, "step": 5195 }, { "epoch": 0.98, "learning_rate": 1.76105929397441e-08, "loss": 0.5892, "step": 5196 }, { "epoch": 0.98, "learning_rate": 1.724946349355605e-08, "loss": 0.4218, "step": 5197 }, { "epoch": 0.98, "learning_rate": 1.6892072038111605e-08, "loss": 0.3186, "step": 5198 }, { "epoch": 0.98, "learning_rate": 1.6538418707233716e-08, "loss": 0.4447, "step": 5199 }, { "epoch": 0.98, "learning_rate": 1.6188503633347562e-08, "loss": 0.3255, "step": 5200 }, { "epoch": 0.98, "learning_rate": 1.5842326947477226e-08, "loss": 0.5474, "step": 5201 }, { "epoch": 0.98, "learning_rate": 1.5499888779244577e-08, "loss": 0.4452, "step": 5202 }, { "epoch": 0.98, "learning_rate": 1.516118925687593e-08, "loss": 0.339, "step": 5203 }, { "epoch": 0.98, "learning_rate": 1.4826228507195394e-08, "loss": 0.4639, "step": 5204 }, { "epoch": 0.98, "learning_rate": 1.4495006655625976e-08, "loss": 0.3752, "step": 5205 }, { "epoch": 0.98, "learning_rate": 1.4167523826194018e-08, "loss": 0.5313, "step": 5206 }, { "epoch": 0.98, "learning_rate": 1.3843780141521435e-08, "loss": 0.4551, "step": 5207 }, { "epoch": 0.98, "learning_rate": 1.3523775722834586e-08, "loss": 0.5271, "step": 5208 }, { "epoch": 0.98, "learning_rate": 1.3207510689956515e-08, "loss": 0.4961, "step": 5209 }, { "epoch": 0.98, "learning_rate": 1.2894985161311379e-08, "loss": 0.521, "step": 5210 }, { "epoch": 0.98, "learning_rate": 1.2586199253922238e-08, "loss": 0.4206, "step": 5211 }, { "epoch": 0.98, "learning_rate": 1.2281153083413267e-08, "loss": 0.5598, "step": 5212 }, { "epoch": 0.98, "learning_rate": 1.1979846764006431e-08, "loss": 0.402, "step": 5213 }, { "epoch": 0.99, "learning_rate": 1.1682280408525926e-08, "loss": 0.4417, "step": 5214 }, { "epoch": 0.99, "learning_rate": 1.1388454128392624e-08, "loss": 0.6069, "step": 5215 }, { "epoch": 0.99, "learning_rate": 1.1098368033628514e-08, "loss": 0.4692, "step": 5216 }, { "epoch": 0.99, "learning_rate": 1.081202223285449e-08, "loss": 0.4391, "step": 5217 }, { "epoch": 0.99, "learning_rate": 1.052941683329256e-08, "loss": 0.5085, "step": 5218 }, { "epoch": 0.99, "learning_rate": 1.025055194076252e-08, "loss": 0.5021, "step": 5219 }, { "epoch": 0.99, "learning_rate": 9.975427659683069e-09, "loss": 0.5962, "step": 5220 }, { "epoch": 0.99, "learning_rate": 9.704044093072906e-09, "loss": 0.5479, "step": 5221 }, { "epoch": 0.99, "learning_rate": 9.43640134255186e-09, "loss": 0.4335, "step": 5222 }, { "epoch": 0.99, "learning_rate": 9.17249950833754e-09, "loss": 0.4472, "step": 5223 }, { "epoch": 0.99, "learning_rate": 8.912338689244238e-09, "loss": 0.4401, "step": 5224 }, { "epoch": 0.99, "learning_rate": 8.655918982689582e-09, "loss": 0.4624, "step": 5225 }, { "epoch": 0.99, "learning_rate": 8.40324048468788e-09, "loss": 0.3957, "step": 5226 }, { "epoch": 0.99, "learning_rate": 8.154303289854559e-09, "loss": 0.3794, "step": 5227 }, { "epoch": 0.99, "learning_rate": 7.909107491401723e-09, "loss": 0.3555, "step": 5228 }, { "epoch": 0.99, "learning_rate": 7.667653181142598e-09, "loss": 0.4709, "step": 5229 }, { "epoch": 0.99, "learning_rate": 7.429940449488193e-09, "loss": 0.4103, "step": 5230 }, { "epoch": 0.99, "learning_rate": 7.19596938544731e-09, "loss": 0.4513, "step": 5231 }, { "epoch": 0.99, "learning_rate": 6.965740076632088e-09, "loss": 0.5765, "step": 5232 }, { "epoch": 0.99, "learning_rate": 6.739252609249125e-09, "loss": 0.37, "step": 5233 }, { "epoch": 0.99, "learning_rate": 6.516507068105027e-09, "loss": 0.4782, "step": 5234 }, { "epoch": 0.99, "learning_rate": 6.297503536606408e-09, "loss": 0.4788, "step": 5235 }, { "epoch": 0.99, "learning_rate": 6.0822420967587835e-09, "loss": 0.3519, "step": 5236 }, { "epoch": 0.99, "learning_rate": 5.870722829164344e-09, "loss": 0.5393, "step": 5237 }, { "epoch": 0.99, "learning_rate": 5.662945813025289e-09, "loss": 0.5254, "step": 5238 }, { "epoch": 0.99, "learning_rate": 5.458911126144939e-09, "loss": 0.4272, "step": 5239 }, { "epoch": 0.99, "learning_rate": 5.258618844919961e-09, "loss": 0.4159, "step": 5240 }, { "epoch": 0.99, "learning_rate": 5.0620690443514696e-09, "loss": 0.4348, "step": 5241 }, { "epoch": 0.99, "learning_rate": 4.869261798035041e-09, "loss": 0.4283, "step": 5242 }, { "epoch": 0.99, "learning_rate": 4.680197178167367e-09, "loss": 0.5041, "step": 5243 }, { "epoch": 0.99, "learning_rate": 4.4948752555418196e-09, "loss": 0.4753, "step": 5244 }, { "epoch": 0.99, "learning_rate": 4.313296099550668e-09, "loss": 0.4027, "step": 5245 }, { "epoch": 0.99, "learning_rate": 4.135459778187301e-09, "loss": 0.4504, "step": 5246 }, { "epoch": 0.99, "learning_rate": 3.9613663580406745e-09, "loss": 0.4456, "step": 5247 }, { "epoch": 0.99, "learning_rate": 3.791015904298645e-09, "loss": 0.4626, "step": 5248 }, { "epoch": 0.99, "learning_rate": 3.624408480747965e-09, "loss": 0.3714, "step": 5249 }, { "epoch": 0.99, "learning_rate": 3.461544149775398e-09, "loss": 0.3102, "step": 5250 }, { "epoch": 0.99, "learning_rate": 3.302422972363273e-09, "loss": 0.4041, "step": 5251 }, { "epoch": 0.99, "learning_rate": 3.14704500809504e-09, "loss": 0.4435, "step": 5252 }, { "epoch": 0.99, "learning_rate": 2.995410315149716e-09, "loss": 0.4388, "step": 5253 }, { "epoch": 0.99, "learning_rate": 2.847518950306327e-09, "loss": 0.5492, "step": 5254 }, { "epoch": 0.99, "learning_rate": 2.7033709689439082e-09, "loss": 0.4913, "step": 5255 }, { "epoch": 0.99, "learning_rate": 2.562966425034841e-09, "loss": 0.4523, "step": 5256 }, { "epoch": 0.99, "learning_rate": 2.426305371155957e-09, "loss": 0.5, "step": 5257 }, { "epoch": 0.99, "learning_rate": 2.293387858477436e-09, "loss": 0.414, "step": 5258 }, { "epoch": 0.99, "learning_rate": 2.164213936770576e-09, "loss": 0.7016, "step": 5259 }, { "epoch": 0.99, "learning_rate": 2.0387836544033535e-09, "loss": 0.4377, "step": 5260 }, { "epoch": 0.99, "learning_rate": 1.9170970583426428e-09, "loss": 0.5295, "step": 5261 }, { "epoch": 0.99, "learning_rate": 1.7991541941531076e-09, "loss": 0.5015, "step": 5262 }, { "epoch": 0.99, "learning_rate": 1.684955105997199e-09, "loss": 0.4784, "step": 5263 }, { "epoch": 0.99, "learning_rate": 1.574499836638488e-09, "loss": 0.4029, "step": 5264 }, { "epoch": 0.99, "learning_rate": 1.4677884274338915e-09, "loss": 0.4796, "step": 5265 }, { "epoch": 0.99, "learning_rate": 1.3648209183425575e-09, "loss": 0.4857, "step": 5266 }, { "epoch": 1.0, "learning_rate": 1.265597347920311e-09, "loss": 0.38, "step": 5267 }, { "epoch": 1.0, "learning_rate": 1.170117753319655e-09, "loss": 0.4451, "step": 5268 }, { "epoch": 1.0, "learning_rate": 1.0783821702931018e-09, "loss": 0.4205, "step": 5269 }, { "epoch": 1.0, "learning_rate": 9.903906331909518e-10, "loss": 0.2654, "step": 5270 }, { "epoch": 1.0, "learning_rate": 9.061431749601835e-10, "loss": 0.4334, "step": 5271 }, { "epoch": 1.0, "learning_rate": 8.256398271466737e-10, "loss": 0.4729, "step": 5272 }, { "epoch": 1.0, "learning_rate": 7.488806198963083e-10, "loss": 0.4737, "step": 5273 }, { "epoch": 1.0, "learning_rate": 6.758655819505411e-10, "loss": 0.4109, "step": 5274 }, { "epoch": 1.0, "learning_rate": 6.065947406475036e-10, "loss": 0.4104, "step": 5275 }, { "epoch": 1.0, "learning_rate": 5.410681219286673e-10, "loss": 0.396, "step": 5276 }, { "epoch": 1.0, "learning_rate": 4.792857503266301e-10, "loss": 0.46, "step": 5277 }, { "epoch": 1.0, "learning_rate": 4.2124764897844005e-10, "loss": 0.4403, "step": 5278 }, { "epoch": 1.0, "learning_rate": 3.6695383961449227e-10, "loss": 0.3591, "step": 5279 }, { "epoch": 1.0, "learning_rate": 3.164043425651908e-10, "loss": 0.3992, "step": 5280 }, { "epoch": 1.0, "learning_rate": 2.69599176758728e-10, "loss": 0.3438, "step": 5281 }, { "epoch": 1.0, "learning_rate": 2.2653835972108463e-10, "loss": 0.3423, "step": 5282 }, { "epoch": 1.0, "learning_rate": 1.8722190757602955e-10, "loss": 0.4566, "step": 5283 }, { "epoch": 1.0, "learning_rate": 1.516498350451201e-10, "loss": 0.4102, "step": 5284 }, { "epoch": 1.0, "learning_rate": 1.1982215544881214e-10, "loss": 0.3734, "step": 5285 }, { "epoch": 1.0, "learning_rate": 9.173888070423964e-11, "loss": 0.4433, "step": 5286 }, { "epoch": 1.0, "learning_rate": 6.740002132743506e-11, "loss": 0.4371, "step": 5287 }, { "epoch": 1.0, "learning_rate": 4.680558643221922e-11, "loss": 0.4685, "step": 5288 }, { "epoch": 1.0, "learning_rate": 2.995558372909102e-11, "loss": 0.3886, "step": 5289 }, { "epoch": 1.0, "learning_rate": 1.6850019528558136e-11, "loss": 0.5164, "step": 5290 }, { "epoch": 1.0, "learning_rate": 7.488898736696116e-12, "loss": 0.4592, "step": 5291 }, { "epoch": 1.0, "learning_rate": 1.8722248595892668e-12, "loss": 0.4171, "step": 5292 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.52, "step": 5293 }, { "epoch": 1.0, "step": 5293, "total_flos": 241756536569856.0, "train_loss": 0.5514708950724287, "train_runtime": 24636.6893, "train_samples_per_second": 13.749, "train_steps_per_second": 0.215 } ], "logging_steps": 1.0, "max_steps": 5293, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "total_flos": 241756536569856.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }