{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5516, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.8072289156626508e-07, "loss": 1.7061, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.6144578313253016e-07, "loss": 1.7686, "step": 2 }, { "epoch": 0.0, "learning_rate": 5.421686746987951e-07, "loss": 1.7734, "step": 3 }, { "epoch": 0.0, "learning_rate": 7.228915662650603e-07, "loss": 1.6436, "step": 4 }, { "epoch": 0.0, "learning_rate": 9.036144578313253e-07, "loss": 1.7549, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.0843373493975903e-06, "loss": 1.7617, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.2650602409638555e-06, "loss": 1.7646, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.4457831325301207e-06, "loss": 1.749, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.6265060240963856e-06, "loss": 1.8721, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.8072289156626506e-06, "loss": 1.6729, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.987951807228916e-06, "loss": 1.8223, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.1686746987951806e-06, "loss": 1.7676, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.3493975903614457e-06, "loss": 1.6191, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.530120481927711e-06, "loss": 1.7412, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.710843373493976e-06, "loss": 1.4268, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.8915662650602413e-06, "loss": 1.3779, "step": 16 }, { "epoch": 0.0, "learning_rate": 3.072289156626506e-06, "loss": 1.4453, "step": 17 }, { "epoch": 0.0, "learning_rate": 3.2530120481927713e-06, "loss": 1.4521, "step": 18 }, { "epoch": 0.0, "learning_rate": 3.433734939759036e-06, "loss": 1.3408, "step": 19 }, { "epoch": 0.0, "learning_rate": 3.614457831325301e-06, "loss": 1.2671, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.795180722891566e-06, "loss": 1.1611, "step": 21 }, { "epoch": 0.0, "learning_rate": 3.975903614457832e-06, "loss": 1.1055, "step": 22 }, { "epoch": 0.0, "learning_rate": 4.156626506024097e-06, "loss": 1.0977, "step": 23 }, { "epoch": 0.0, "learning_rate": 4.337349397590361e-06, "loss": 1.1367, "step": 24 }, { "epoch": 0.0, "learning_rate": 4.518072289156627e-06, "loss": 1.0688, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.6987951807228915e-06, "loss": 0.9209, "step": 26 }, { "epoch": 0.0, "learning_rate": 4.879518072289157e-06, "loss": 0.9883, "step": 27 }, { "epoch": 0.01, "learning_rate": 5.060240963855422e-06, "loss": 1.0303, "step": 28 }, { "epoch": 0.01, "learning_rate": 5.240963855421687e-06, "loss": 0.9077, "step": 29 }, { "epoch": 0.01, "learning_rate": 5.421686746987952e-06, "loss": 0.8877, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.6024096385542166e-06, "loss": 0.8687, "step": 31 }, { "epoch": 0.01, "learning_rate": 5.783132530120483e-06, "loss": 0.8457, "step": 32 }, { "epoch": 0.01, "learning_rate": 5.963855421686747e-06, "loss": 0.7983, "step": 33 }, { "epoch": 0.01, "learning_rate": 6.144578313253012e-06, "loss": 0.7891, "step": 34 }, { "epoch": 0.01, "learning_rate": 6.325301204819277e-06, "loss": 0.8379, "step": 35 }, { "epoch": 0.01, "learning_rate": 6.5060240963855425e-06, "loss": 0.7261, "step": 36 }, { "epoch": 0.01, "learning_rate": 6.686746987951808e-06, "loss": 0.8394, "step": 37 }, { "epoch": 0.01, "learning_rate": 6.867469879518072e-06, "loss": 0.7759, "step": 38 }, { "epoch": 0.01, "learning_rate": 7.048192771084338e-06, "loss": 0.7397, "step": 39 }, { "epoch": 0.01, "learning_rate": 7.228915662650602e-06, "loss": 0.7642, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.409638554216868e-06, "loss": 0.7476, "step": 41 }, { "epoch": 0.01, "learning_rate": 7.590361445783132e-06, "loss": 0.6963, "step": 42 }, { "epoch": 0.01, "learning_rate": 7.771084337349398e-06, "loss": 0.7349, "step": 43 }, { "epoch": 0.01, "learning_rate": 7.951807228915663e-06, "loss": 0.7808, "step": 44 }, { "epoch": 0.01, "learning_rate": 8.132530120481927e-06, "loss": 0.7422, "step": 45 }, { "epoch": 0.01, "learning_rate": 8.313253012048194e-06, "loss": 0.7222, "step": 46 }, { "epoch": 0.01, "learning_rate": 8.493975903614459e-06, "loss": 0.6675, "step": 47 }, { "epoch": 0.01, "learning_rate": 8.674698795180722e-06, "loss": 0.7019, "step": 48 }, { "epoch": 0.01, "learning_rate": 8.855421686746989e-06, "loss": 0.6636, "step": 49 }, { "epoch": 0.01, "learning_rate": 9.036144578313254e-06, "loss": 0.637, "step": 50 }, { "epoch": 0.01, "learning_rate": 9.216867469879518e-06, "loss": 0.6982, "step": 51 }, { "epoch": 0.01, "learning_rate": 9.397590361445783e-06, "loss": 0.6479, "step": 52 }, { "epoch": 0.01, "learning_rate": 9.578313253012048e-06, "loss": 0.6287, "step": 53 }, { "epoch": 0.01, "learning_rate": 9.759036144578313e-06, "loss": 0.7285, "step": 54 }, { "epoch": 0.01, "learning_rate": 9.939759036144579e-06, "loss": 0.5959, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.0120481927710844e-05, "loss": 0.6807, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.0301204819277109e-05, "loss": 0.6182, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.0481927710843374e-05, "loss": 0.6794, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.0662650602409638e-05, "loss": 0.5823, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.0843373493975904e-05, "loss": 0.593, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.102409638554217e-05, "loss": 0.5481, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.1204819277108433e-05, "loss": 0.6658, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.1385542168674698e-05, "loss": 0.6709, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.1566265060240965e-05, "loss": 0.5955, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.1746987951807229e-05, "loss": 0.603, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.1927710843373494e-05, "loss": 0.6357, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.210843373493976e-05, "loss": 0.6147, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.2289156626506024e-05, "loss": 0.6055, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.246987951807229e-05, "loss": 0.5688, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.2650602409638555e-05, "loss": 0.5989, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.283132530120482e-05, "loss": 0.6797, "step": 71 }, { "epoch": 0.01, "learning_rate": 1.3012048192771085e-05, "loss": 0.5649, "step": 72 }, { "epoch": 0.01, "learning_rate": 1.3192771084337349e-05, "loss": 0.5371, "step": 73 }, { "epoch": 0.01, "learning_rate": 1.3373493975903615e-05, "loss": 0.5176, "step": 74 }, { "epoch": 0.01, "learning_rate": 1.355421686746988e-05, "loss": 0.5637, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.3734939759036144e-05, "loss": 0.5269, "step": 76 }, { "epoch": 0.01, "learning_rate": 1.391566265060241e-05, "loss": 0.55, "step": 77 }, { "epoch": 0.01, "learning_rate": 1.4096385542168676e-05, "loss": 0.5876, "step": 78 }, { "epoch": 0.01, "learning_rate": 1.427710843373494e-05, "loss": 0.5266, "step": 79 }, { "epoch": 0.01, "learning_rate": 1.4457831325301205e-05, "loss": 0.4822, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.463855421686747e-05, "loss": 0.5359, "step": 81 }, { "epoch": 0.01, "learning_rate": 1.4819277108433735e-05, "loss": 0.5381, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.5e-05, "loss": 0.5176, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.5180722891566264e-05, "loss": 0.5979, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.5361445783132532e-05, "loss": 0.5586, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.5542168674698796e-05, "loss": 0.5513, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.572289156626506e-05, "loss": 0.553, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.5903614457831326e-05, "loss": 0.5562, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.608433734939759e-05, "loss": 0.5278, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.6265060240963853e-05, "loss": 0.5757, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.6445783132530124e-05, "loss": 0.5933, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.6626506024096387e-05, "loss": 0.5012, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.680722891566265e-05, "loss": 0.5896, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.6987951807228917e-05, "loss": 0.5581, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.716867469879518e-05, "loss": 0.5273, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.7349397590361444e-05, "loss": 0.4734, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.753012048192771e-05, "loss": 0.5203, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.7710843373493978e-05, "loss": 0.5527, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.789156626506024e-05, "loss": 0.5723, "step": 99 }, { "epoch": 0.02, "learning_rate": 1.807228915662651e-05, "loss": 0.5398, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.8253012048192772e-05, "loss": 0.5525, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.8433734939759036e-05, "loss": 0.4905, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.8614457831325302e-05, "loss": 0.5251, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.8795180722891566e-05, "loss": 0.6082, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.8975903614457833e-05, "loss": 0.4399, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.9156626506024096e-05, "loss": 0.5859, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.9337349397590363e-05, "loss": 0.5288, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.9518072289156627e-05, "loss": 0.5535, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.969879518072289e-05, "loss": 0.5208, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.9879518072289157e-05, "loss": 0.5051, "step": 110 }, { "epoch": 0.02, "learning_rate": 2.006024096385542e-05, "loss": 0.5164, "step": 111 }, { "epoch": 0.02, "learning_rate": 2.0240963855421687e-05, "loss": 0.5095, "step": 112 }, { "epoch": 0.02, "learning_rate": 2.0421686746987954e-05, "loss": 0.5862, "step": 113 }, { "epoch": 0.02, "learning_rate": 2.0602409638554218e-05, "loss": 0.519, "step": 114 }, { "epoch": 0.02, "learning_rate": 2.078313253012048e-05, "loss": 0.4758, "step": 115 }, { "epoch": 0.02, "learning_rate": 2.0963855421686748e-05, "loss": 0.4805, "step": 116 }, { "epoch": 0.02, "learning_rate": 2.114457831325301e-05, "loss": 0.5725, "step": 117 }, { "epoch": 0.02, "learning_rate": 2.1325301204819275e-05, "loss": 0.4248, "step": 118 }, { "epoch": 0.02, "learning_rate": 2.1506024096385542e-05, "loss": 0.4963, "step": 119 }, { "epoch": 0.02, "learning_rate": 2.168674698795181e-05, "loss": 0.4688, "step": 120 }, { "epoch": 0.02, "learning_rate": 2.1867469879518072e-05, "loss": 0.5164, "step": 121 }, { "epoch": 0.02, "learning_rate": 2.204819277108434e-05, "loss": 0.4622, "step": 122 }, { "epoch": 0.02, "learning_rate": 2.2228915662650603e-05, "loss": 0.4536, "step": 123 }, { "epoch": 0.02, "learning_rate": 2.2409638554216866e-05, "loss": 0.5105, "step": 124 }, { "epoch": 0.02, "learning_rate": 2.2590361445783133e-05, "loss": 0.4951, "step": 125 }, { "epoch": 0.02, "learning_rate": 2.2771084337349397e-05, "loss": 0.5205, "step": 126 }, { "epoch": 0.02, "learning_rate": 2.2951807228915664e-05, "loss": 0.5493, "step": 127 }, { "epoch": 0.02, "learning_rate": 2.313253012048193e-05, "loss": 0.5457, "step": 128 }, { "epoch": 0.02, "learning_rate": 2.3313253012048194e-05, "loss": 0.5232, "step": 129 }, { "epoch": 0.02, "learning_rate": 2.3493975903614457e-05, "loss": 0.4551, "step": 130 }, { "epoch": 0.02, "learning_rate": 2.3674698795180724e-05, "loss": 0.5659, "step": 131 }, { "epoch": 0.02, "learning_rate": 2.3855421686746988e-05, "loss": 0.4949, "step": 132 }, { "epoch": 0.02, "learning_rate": 2.403614457831325e-05, "loss": 0.4216, "step": 133 }, { "epoch": 0.02, "learning_rate": 2.421686746987952e-05, "loss": 0.5132, "step": 134 }, { "epoch": 0.02, "learning_rate": 2.4397590361445785e-05, "loss": 0.4954, "step": 135 }, { "epoch": 0.02, "learning_rate": 2.457831325301205e-05, "loss": 0.4956, "step": 136 }, { "epoch": 0.02, "learning_rate": 2.4759036144578315e-05, "loss": 0.5957, "step": 137 }, { "epoch": 0.03, "learning_rate": 2.493975903614458e-05, "loss": 0.4617, "step": 138 }, { "epoch": 0.03, "learning_rate": 2.5120481927710842e-05, "loss": 0.4573, "step": 139 }, { "epoch": 0.03, "learning_rate": 2.530120481927711e-05, "loss": 0.4639, "step": 140 }, { "epoch": 0.03, "learning_rate": 2.5481927710843376e-05, "loss": 0.5581, "step": 141 }, { "epoch": 0.03, "learning_rate": 2.566265060240964e-05, "loss": 0.5417, "step": 142 }, { "epoch": 0.03, "learning_rate": 2.5843373493975907e-05, "loss": 0.4087, "step": 143 }, { "epoch": 0.03, "learning_rate": 2.602409638554217e-05, "loss": 0.4893, "step": 144 }, { "epoch": 0.03, "learning_rate": 2.6204819277108434e-05, "loss": 0.6072, "step": 145 }, { "epoch": 0.03, "learning_rate": 2.6385542168674697e-05, "loss": 0.563, "step": 146 }, { "epoch": 0.03, "learning_rate": 2.6566265060240964e-05, "loss": 0.4885, "step": 147 }, { "epoch": 0.03, "learning_rate": 2.674698795180723e-05, "loss": 0.4773, "step": 148 }, { "epoch": 0.03, "learning_rate": 2.6927710843373494e-05, "loss": 0.4846, "step": 149 }, { "epoch": 0.03, "learning_rate": 2.710843373493976e-05, "loss": 0.5886, "step": 150 }, { "epoch": 0.03, "learning_rate": 2.7289156626506025e-05, "loss": 0.5215, "step": 151 }, { "epoch": 0.03, "learning_rate": 2.7469879518072288e-05, "loss": 0.5081, "step": 152 }, { "epoch": 0.03, "learning_rate": 2.7650602409638555e-05, "loss": 0.448, "step": 153 }, { "epoch": 0.03, "learning_rate": 2.783132530120482e-05, "loss": 0.5205, "step": 154 }, { "epoch": 0.03, "learning_rate": 2.8012048192771082e-05, "loss": 0.46, "step": 155 }, { "epoch": 0.03, "learning_rate": 2.8192771084337352e-05, "loss": 0.4219, "step": 156 }, { "epoch": 0.03, "learning_rate": 2.8373493975903616e-05, "loss": 0.3542, "step": 157 }, { "epoch": 0.03, "learning_rate": 2.855421686746988e-05, "loss": 0.5156, "step": 158 }, { "epoch": 0.03, "learning_rate": 2.8734939759036146e-05, "loss": 0.5371, "step": 159 }, { "epoch": 0.03, "learning_rate": 2.891566265060241e-05, "loss": 0.5215, "step": 160 }, { "epoch": 0.03, "learning_rate": 2.9096385542168673e-05, "loss": 0.4541, "step": 161 }, { "epoch": 0.03, "learning_rate": 2.927710843373494e-05, "loss": 0.4524, "step": 162 }, { "epoch": 0.03, "learning_rate": 2.9457831325301207e-05, "loss": 0.4717, "step": 163 }, { "epoch": 0.03, "learning_rate": 2.963855421686747e-05, "loss": 0.5159, "step": 164 }, { "epoch": 0.03, "learning_rate": 2.9819277108433737e-05, "loss": 0.4897, "step": 165 }, { "epoch": 0.03, "learning_rate": 3e-05, "loss": 0.5225, "step": 166 }, { "epoch": 0.03, "learning_rate": 2.9999997413851657e-05, "loss": 0.4788, "step": 167 }, { "epoch": 0.03, "learning_rate": 2.9999989655407532e-05, "loss": 0.5085, "step": 168 }, { "epoch": 0.03, "learning_rate": 2.9999976724670285e-05, "loss": 0.4639, "step": 169 }, { "epoch": 0.03, "learning_rate": 2.9999958621644384e-05, "loss": 0.5459, "step": 170 }, { "epoch": 0.03, "learning_rate": 2.999993534633607e-05, "loss": 0.4797, "step": 171 }, { "epoch": 0.03, "learning_rate": 2.999990689875337e-05, "loss": 0.449, "step": 172 }, { "epoch": 0.03, "learning_rate": 2.999987327890609e-05, "loss": 0.4958, "step": 173 }, { "epoch": 0.03, "learning_rate": 2.9999834486805827e-05, "loss": 0.4553, "step": 174 }, { "epoch": 0.03, "learning_rate": 2.9999790522465954e-05, "loss": 0.5, "step": 175 }, { "epoch": 0.03, "learning_rate": 2.999974138590163e-05, "loss": 0.4727, "step": 176 }, { "epoch": 0.03, "learning_rate": 2.9999687077129798e-05, "loss": 0.4578, "step": 177 }, { "epoch": 0.03, "learning_rate": 2.9999627596169192e-05, "loss": 0.4531, "step": 178 }, { "epoch": 0.03, "learning_rate": 2.9999562943040313e-05, "loss": 0.4497, "step": 179 }, { "epoch": 0.03, "learning_rate": 2.999949311776546e-05, "loss": 0.4895, "step": 180 }, { "epoch": 0.03, "learning_rate": 2.999941812036871e-05, "loss": 0.4587, "step": 181 }, { "epoch": 0.03, "learning_rate": 2.9999337950875917e-05, "loss": 0.4351, "step": 182 }, { "epoch": 0.03, "learning_rate": 2.9999252609314738e-05, "loss": 0.5151, "step": 183 }, { "epoch": 0.03, "learning_rate": 2.999916209571459e-05, "loss": 0.3977, "step": 184 }, { "epoch": 0.03, "learning_rate": 2.999906641010668e-05, "loss": 0.509, "step": 185 }, { "epoch": 0.03, "learning_rate": 2.9998965552524016e-05, "loss": 0.4507, "step": 186 }, { "epoch": 0.03, "learning_rate": 2.9998859523001367e-05, "loss": 0.4348, "step": 187 }, { "epoch": 0.03, "learning_rate": 2.9998748321575295e-05, "loss": 0.5098, "step": 188 }, { "epoch": 0.03, "learning_rate": 2.9998631948284148e-05, "loss": 0.5376, "step": 189 }, { "epoch": 0.03, "learning_rate": 2.9998510403168047e-05, "loss": 0.4542, "step": 190 }, { "epoch": 0.03, "learning_rate": 2.999838368626891e-05, "loss": 0.4146, "step": 191 }, { "epoch": 0.03, "learning_rate": 2.9998251797630428e-05, "loss": 0.4741, "step": 192 }, { "epoch": 0.03, "learning_rate": 2.9998114737298076e-05, "loss": 0.5283, "step": 193 }, { "epoch": 0.04, "learning_rate": 2.999797250531912e-05, "loss": 0.4966, "step": 194 }, { "epoch": 0.04, "learning_rate": 2.9997825101742604e-05, "loss": 0.4712, "step": 195 }, { "epoch": 0.04, "learning_rate": 2.9997672526619356e-05, "loss": 0.4895, "step": 196 }, { "epoch": 0.04, "learning_rate": 2.9997514780001982e-05, "loss": 0.4302, "step": 197 }, { "epoch": 0.04, "learning_rate": 2.9997351861944882e-05, "loss": 0.4773, "step": 198 }, { "epoch": 0.04, "learning_rate": 2.999718377250423e-05, "loss": 0.5063, "step": 199 }, { "epoch": 0.04, "learning_rate": 2.9997010511737993e-05, "loss": 0.4963, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.99968320797059e-05, "loss": 0.4841, "step": 201 }, { "epoch": 0.04, "learning_rate": 2.9996648476469495e-05, "loss": 0.5115, "step": 202 }, { "epoch": 0.04, "learning_rate": 2.9996459702092075e-05, "loss": 0.4573, "step": 203 }, { "epoch": 0.04, "learning_rate": 2.9996265756638743e-05, "loss": 0.4382, "step": 204 }, { "epoch": 0.04, "learning_rate": 2.9996066640176364e-05, "loss": 0.48, "step": 205 }, { "epoch": 0.04, "learning_rate": 2.999586235277361e-05, "loss": 0.3889, "step": 206 }, { "epoch": 0.04, "learning_rate": 2.9995652894500914e-05, "loss": 0.3959, "step": 207 }, { "epoch": 0.04, "learning_rate": 2.9995438265430508e-05, "loss": 0.3949, "step": 208 }, { "epoch": 0.04, "learning_rate": 2.9995218465636396e-05, "loss": 0.4744, "step": 209 }, { "epoch": 0.04, "learning_rate": 2.9994993495194368e-05, "loss": 0.5298, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.9994763354182e-05, "loss": 0.4556, "step": 211 }, { "epoch": 0.04, "learning_rate": 2.999452804267865e-05, "loss": 0.5078, "step": 212 }, { "epoch": 0.04, "learning_rate": 2.999428756076546e-05, "loss": 0.3728, "step": 213 }, { "epoch": 0.04, "learning_rate": 2.999404190852535e-05, "loss": 0.4465, "step": 214 }, { "epoch": 0.04, "learning_rate": 2.999379108604303e-05, "loss": 0.3809, "step": 215 }, { "epoch": 0.04, "learning_rate": 2.9993535093404975e-05, "loss": 0.3787, "step": 216 }, { "epoch": 0.04, "learning_rate": 2.9993273930699473e-05, "loss": 0.3843, "step": 217 }, { "epoch": 0.04, "learning_rate": 2.9993007598016572e-05, "loss": 0.4968, "step": 218 }, { "epoch": 0.04, "learning_rate": 2.9992736095448108e-05, "loss": 0.4556, "step": 219 }, { "epoch": 0.04, "learning_rate": 2.9992459423087698e-05, "loss": 0.4768, "step": 220 }, { "epoch": 0.04, "learning_rate": 2.999217758103075e-05, "loss": 0.4607, "step": 221 }, { "epoch": 0.04, "learning_rate": 2.9991890569374442e-05, "loss": 0.4292, "step": 222 }, { "epoch": 0.04, "learning_rate": 2.9991598388217745e-05, "loss": 0.4192, "step": 223 }, { "epoch": 0.04, "learning_rate": 2.999130103766141e-05, "loss": 0.4783, "step": 224 }, { "epoch": 0.04, "learning_rate": 2.9990998517807967e-05, "loss": 0.4266, "step": 225 }, { "epoch": 0.04, "learning_rate": 2.999069082876173e-05, "loss": 0.4509, "step": 226 }, { "epoch": 0.04, "learning_rate": 2.9990377970628804e-05, "loss": 0.3618, "step": 227 }, { "epoch": 0.04, "learning_rate": 2.9990059943517058e-05, "loss": 0.4106, "step": 228 }, { "epoch": 0.04, "learning_rate": 2.998973674753616e-05, "loss": 0.3846, "step": 229 }, { "epoch": 0.04, "learning_rate": 2.9989408382797553e-05, "loss": 0.4109, "step": 230 }, { "epoch": 0.04, "learning_rate": 2.998907484941446e-05, "loss": 0.5044, "step": 231 }, { "epoch": 0.04, "learning_rate": 2.9988736147501902e-05, "loss": 0.4368, "step": 232 }, { "epoch": 0.04, "learning_rate": 2.998839227717666e-05, "loss": 0.4451, "step": 233 }, { "epoch": 0.04, "learning_rate": 2.9988043238557305e-05, "loss": 0.3862, "step": 234 }, { "epoch": 0.04, "learning_rate": 2.9987689031764194e-05, "loss": 0.4138, "step": 235 }, { "epoch": 0.04, "learning_rate": 2.998732965691947e-05, "loss": 0.4565, "step": 236 }, { "epoch": 0.04, "learning_rate": 2.9986965114147048e-05, "loss": 0.4756, "step": 237 }, { "epoch": 0.04, "learning_rate": 2.9986595403572634e-05, "loss": 0.4778, "step": 238 }, { "epoch": 0.04, "learning_rate": 2.998622052532371e-05, "loss": 0.4712, "step": 239 }, { "epoch": 0.04, "learning_rate": 2.9985840479529535e-05, "loss": 0.4824, "step": 240 }, { "epoch": 0.04, "learning_rate": 2.9985455266321168e-05, "loss": 0.4802, "step": 241 }, { "epoch": 0.04, "learning_rate": 2.9985064885831427e-05, "loss": 0.4644, "step": 242 }, { "epoch": 0.04, "learning_rate": 2.998466933819493e-05, "loss": 0.4263, "step": 243 }, { "epoch": 0.04, "learning_rate": 2.998426862354806e-05, "loss": 0.3604, "step": 244 }, { "epoch": 0.04, "learning_rate": 2.9983862742029006e-05, "loss": 0.3936, "step": 245 }, { "epoch": 0.04, "learning_rate": 2.9983451693777715e-05, "loss": 0.4714, "step": 246 }, { "epoch": 0.04, "learning_rate": 2.9983035478935925e-05, "loss": 0.4163, "step": 247 }, { "epoch": 0.04, "learning_rate": 2.9982614097647156e-05, "loss": 0.4402, "step": 248 }, { "epoch": 0.05, "learning_rate": 2.9982187550056708e-05, "loss": 0.4395, "step": 249 }, { "epoch": 0.05, "learning_rate": 2.9981755836311663e-05, "loss": 0.4705, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.9981318956560887e-05, "loss": 0.5591, "step": 251 }, { "epoch": 0.05, "learning_rate": 2.9980876910955024e-05, "loss": 0.4656, "step": 252 }, { "epoch": 0.05, "learning_rate": 2.9980429699646495e-05, "loss": 0.4812, "step": 253 }, { "epoch": 0.05, "learning_rate": 2.9979977322789512e-05, "loss": 0.5596, "step": 254 }, { "epoch": 0.05, "learning_rate": 2.9979519780540067e-05, "loss": 0.4934, "step": 255 }, { "epoch": 0.05, "learning_rate": 2.9979057073055922e-05, "loss": 0.5183, "step": 256 }, { "epoch": 0.05, "learning_rate": 2.997858920049663e-05, "loss": 0.4116, "step": 257 }, { "epoch": 0.05, "learning_rate": 2.9978116163023526e-05, "loss": 0.4587, "step": 258 }, { "epoch": 0.05, "learning_rate": 2.997763796079972e-05, "loss": 0.4773, "step": 259 }, { "epoch": 0.05, "learning_rate": 2.9977154593990107e-05, "loss": 0.4438, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.9976666062761358e-05, "loss": 0.3767, "step": 261 }, { "epoch": 0.05, "learning_rate": 2.9976172367281932e-05, "loss": 0.3672, "step": 262 }, { "epoch": 0.05, "learning_rate": 2.9975673507722063e-05, "loss": 0.4032, "step": 263 }, { "epoch": 0.05, "learning_rate": 2.997516948425377e-05, "loss": 0.4402, "step": 264 }, { "epoch": 0.05, "learning_rate": 2.9974660297050846e-05, "loss": 0.4712, "step": 265 }, { "epoch": 0.05, "learning_rate": 2.997414594628888e-05, "loss": 0.4673, "step": 266 }, { "epoch": 0.05, "learning_rate": 2.997362643214521e-05, "loss": 0.4409, "step": 267 }, { "epoch": 0.05, "learning_rate": 2.997310175479899e-05, "loss": 0.4895, "step": 268 }, { "epoch": 0.05, "learning_rate": 2.9972571914431138e-05, "loss": 0.5349, "step": 269 }, { "epoch": 0.05, "learning_rate": 2.9972036911224352e-05, "loss": 0.4524, "step": 270 }, { "epoch": 0.05, "learning_rate": 2.9971496745363107e-05, "loss": 0.4048, "step": 271 }, { "epoch": 0.05, "learning_rate": 2.9970951417033667e-05, "loss": 0.5017, "step": 272 }, { "epoch": 0.05, "learning_rate": 2.9970400926424075e-05, "loss": 0.4543, "step": 273 }, { "epoch": 0.05, "learning_rate": 2.9969845273724147e-05, "loss": 0.3789, "step": 274 }, { "epoch": 0.05, "learning_rate": 2.996928445912548e-05, "loss": 0.4993, "step": 275 }, { "epoch": 0.05, "learning_rate": 2.9968718482821457e-05, "loss": 0.4854, "step": 276 }, { "epoch": 0.05, "learning_rate": 2.9968147345007247e-05, "loss": 0.5366, "step": 277 }, { "epoch": 0.05, "learning_rate": 2.9967571045879775e-05, "loss": 0.3815, "step": 278 }, { "epoch": 0.05, "learning_rate": 2.9966989585637766e-05, "loss": 0.4033, "step": 279 }, { "epoch": 0.05, "learning_rate": 2.996640296448172e-05, "loss": 0.5112, "step": 280 }, { "epoch": 0.05, "learning_rate": 2.996581118261391e-05, "loss": 0.4324, "step": 281 }, { "epoch": 0.05, "learning_rate": 2.996521424023841e-05, "loss": 0.5283, "step": 282 }, { "epoch": 0.05, "learning_rate": 2.9964612137561038e-05, "loss": 0.3833, "step": 283 }, { "epoch": 0.05, "learning_rate": 2.9964004874789425e-05, "loss": 0.5164, "step": 284 }, { "epoch": 0.05, "learning_rate": 2.9963392452132956e-05, "loss": 0.4702, "step": 285 }, { "epoch": 0.05, "learning_rate": 2.996277486980282e-05, "loss": 0.4207, "step": 286 }, { "epoch": 0.05, "learning_rate": 2.9962152128011955e-05, "loss": 0.4209, "step": 287 }, { "epoch": 0.05, "learning_rate": 2.996152422697511e-05, "loss": 0.4365, "step": 288 }, { "epoch": 0.05, "learning_rate": 2.9960891166908786e-05, "loss": 0.3882, "step": 289 }, { "epoch": 0.05, "learning_rate": 2.9960252948031286e-05, "loss": 0.4373, "step": 290 }, { "epoch": 0.05, "learning_rate": 2.995960957056267e-05, "loss": 0.3931, "step": 291 }, { "epoch": 0.05, "learning_rate": 2.9958961034724792e-05, "loss": 0.4668, "step": 292 }, { "epoch": 0.05, "learning_rate": 2.995830734074128e-05, "loss": 0.4666, "step": 293 }, { "epoch": 0.05, "learning_rate": 2.995764848883754e-05, "loss": 0.4739, "step": 294 }, { "epoch": 0.05, "learning_rate": 2.9956984479240756e-05, "loss": 0.4268, "step": 295 }, { "epoch": 0.05, "learning_rate": 2.99563153121799e-05, "loss": 0.4187, "step": 296 }, { "epoch": 0.05, "learning_rate": 2.9955640987885695e-05, "loss": 0.4474, "step": 297 }, { "epoch": 0.05, "learning_rate": 2.9954961506590684e-05, "loss": 0.4365, "step": 298 }, { "epoch": 0.05, "learning_rate": 2.995427686852915e-05, "loss": 0.4297, "step": 299 }, { "epoch": 0.05, "learning_rate": 2.9953587073937176e-05, "loss": 0.4897, "step": 300 }, { "epoch": 0.05, "learning_rate": 2.9952892123052614e-05, "loss": 0.5405, "step": 301 }, { "epoch": 0.05, "learning_rate": 2.9952192016115102e-05, "loss": 0.4182, "step": 302 }, { "epoch": 0.05, "learning_rate": 2.9951486753366043e-05, "loss": 0.4174, "step": 303 }, { "epoch": 0.06, "learning_rate": 2.995077633504863e-05, "loss": 0.4546, "step": 304 }, { "epoch": 0.06, "learning_rate": 2.995006076140783e-05, "loss": 0.4519, "step": 305 }, { "epoch": 0.06, "learning_rate": 2.9949340032690387e-05, "loss": 0.5073, "step": 306 }, { "epoch": 0.06, "learning_rate": 2.994861414914482e-05, "loss": 0.5002, "step": 307 }, { "epoch": 0.06, "learning_rate": 2.994788311102143e-05, "loss": 0.4343, "step": 308 }, { "epoch": 0.06, "learning_rate": 2.9947146918572295e-05, "loss": 0.438, "step": 309 }, { "epoch": 0.06, "learning_rate": 2.9946405572051262e-05, "loss": 0.4744, "step": 310 }, { "epoch": 0.06, "learning_rate": 2.9945659071713968e-05, "loss": 0.4204, "step": 311 }, { "epoch": 0.06, "learning_rate": 2.994490741781782e-05, "loss": 0.4929, "step": 312 }, { "epoch": 0.06, "learning_rate": 2.9944150610622e-05, "loss": 0.4119, "step": 313 }, { "epoch": 0.06, "learning_rate": 2.9943388650387478e-05, "loss": 0.3779, "step": 314 }, { "epoch": 0.06, "learning_rate": 2.9942621537376985e-05, "loss": 0.5283, "step": 315 }, { "epoch": 0.06, "learning_rate": 2.9941849271855037e-05, "loss": 0.4307, "step": 316 }, { "epoch": 0.06, "learning_rate": 2.9941071854087933e-05, "loss": 0.4526, "step": 317 }, { "epoch": 0.06, "learning_rate": 2.9940289284343734e-05, "loss": 0.5437, "step": 318 }, { "epoch": 0.06, "learning_rate": 2.9939501562892292e-05, "loss": 0.4231, "step": 319 }, { "epoch": 0.06, "learning_rate": 2.9938708690005226e-05, "loss": 0.4783, "step": 320 }, { "epoch": 0.06, "learning_rate": 2.9937910665955935e-05, "loss": 0.4014, "step": 321 }, { "epoch": 0.06, "learning_rate": 2.9937107491019593e-05, "loss": 0.4568, "step": 322 }, { "epoch": 0.06, "learning_rate": 2.993629916547315e-05, "loss": 0.5049, "step": 323 }, { "epoch": 0.06, "learning_rate": 2.9935485689595334e-05, "loss": 0.5149, "step": 324 }, { "epoch": 0.06, "learning_rate": 2.9934667063666645e-05, "loss": 0.4795, "step": 325 }, { "epoch": 0.06, "learning_rate": 2.9933843287969364e-05, "loss": 0.429, "step": 326 }, { "epoch": 0.06, "learning_rate": 2.9933014362787546e-05, "loss": 0.4834, "step": 327 }, { "epoch": 0.06, "learning_rate": 2.9932180288407017e-05, "loss": 0.397, "step": 328 }, { "epoch": 0.06, "learning_rate": 2.9931341065115388e-05, "loss": 0.4353, "step": 329 }, { "epoch": 0.06, "learning_rate": 2.9930496693202037e-05, "loss": 0.3971, "step": 330 }, { "epoch": 0.06, "learning_rate": 2.992964717295811e-05, "loss": 0.4521, "step": 331 }, { "epoch": 0.06, "learning_rate": 2.992879250467656e-05, "loss": 0.4077, "step": 332 }, { "epoch": 0.06, "learning_rate": 2.9927932688652073e-05, "loss": 0.3684, "step": 333 }, { "epoch": 0.06, "learning_rate": 2.9927067725181145e-05, "loss": 0.4275, "step": 334 }, { "epoch": 0.06, "learning_rate": 2.9926197614562024e-05, "loss": 0.4919, "step": 335 }, { "epoch": 0.06, "learning_rate": 2.9925322357094738e-05, "loss": 0.4705, "step": 336 }, { "epoch": 0.06, "learning_rate": 2.9924441953081106e-05, "loss": 0.5261, "step": 337 }, { "epoch": 0.06, "learning_rate": 2.9923556402824704e-05, "loss": 0.4868, "step": 338 }, { "epoch": 0.06, "learning_rate": 2.992266570663088e-05, "loss": 0.3552, "step": 339 }, { "epoch": 0.06, "learning_rate": 2.992176986480677e-05, "loss": 0.4082, "step": 340 }, { "epoch": 0.06, "learning_rate": 2.9920868877661275e-05, "loss": 0.4656, "step": 341 }, { "epoch": 0.06, "learning_rate": 2.9919962745505075e-05, "loss": 0.3955, "step": 342 }, { "epoch": 0.06, "learning_rate": 2.9919051468650623e-05, "loss": 0.4844, "step": 343 }, { "epoch": 0.06, "learning_rate": 2.9918135047412144e-05, "loss": 0.4062, "step": 344 }, { "epoch": 0.06, "learning_rate": 2.991721348210564e-05, "loss": 0.4717, "step": 345 }, { "epoch": 0.06, "learning_rate": 2.991628677304888e-05, "loss": 0.3552, "step": 346 }, { "epoch": 0.06, "learning_rate": 2.9915354920561417e-05, "loss": 0.4309, "step": 347 }, { "epoch": 0.06, "learning_rate": 2.9914417924964572e-05, "loss": 0.4299, "step": 348 }, { "epoch": 0.06, "learning_rate": 2.9913475786581438e-05, "loss": 0.3967, "step": 349 }, { "epoch": 0.06, "learning_rate": 2.991252850573688e-05, "loss": 0.4176, "step": 350 }, { "epoch": 0.06, "learning_rate": 2.9911576082757543e-05, "loss": 0.4341, "step": 351 }, { "epoch": 0.06, "learning_rate": 2.991061851797184e-05, "loss": 0.3679, "step": 352 }, { "epoch": 0.06, "learning_rate": 2.9909655811709955e-05, "loss": 0.4697, "step": 353 }, { "epoch": 0.06, "learning_rate": 2.9908687964303857e-05, "loss": 0.4377, "step": 354 }, { "epoch": 0.06, "learning_rate": 2.9907714976087273e-05, "loss": 0.3684, "step": 355 }, { "epoch": 0.06, "learning_rate": 2.9906736847395706e-05, "loss": 0.4287, "step": 356 }, { "epoch": 0.06, "learning_rate": 2.990575357856644e-05, "loss": 0.3798, "step": 357 }, { "epoch": 0.06, "learning_rate": 2.9904765169938522e-05, "loss": 0.4023, "step": 358 }, { "epoch": 0.07, "learning_rate": 2.9903771621852776e-05, "loss": 0.4734, "step": 359 }, { "epoch": 0.07, "learning_rate": 2.9902772934651795e-05, "loss": 0.4265, "step": 360 }, { "epoch": 0.07, "learning_rate": 2.990176910867995e-05, "loss": 0.4387, "step": 361 }, { "epoch": 0.07, "learning_rate": 2.9900760144283376e-05, "loss": 0.4226, "step": 362 }, { "epoch": 0.07, "learning_rate": 2.9899746041809987e-05, "loss": 0.4133, "step": 363 }, { "epoch": 0.07, "learning_rate": 2.9898726801609464e-05, "loss": 0.4583, "step": 364 }, { "epoch": 0.07, "learning_rate": 2.9897702424033264e-05, "loss": 0.3823, "step": 365 }, { "epoch": 0.07, "learning_rate": 2.9896672909434604e-05, "loss": 0.4321, "step": 366 }, { "epoch": 0.07, "learning_rate": 2.9895638258168493e-05, "loss": 0.4089, "step": 367 }, { "epoch": 0.07, "learning_rate": 2.989459847059169e-05, "loss": 0.4648, "step": 368 }, { "epoch": 0.07, "learning_rate": 2.989355354706274e-05, "loss": 0.4282, "step": 369 }, { "epoch": 0.07, "learning_rate": 2.9892503487941952e-05, "loss": 0.4172, "step": 370 }, { "epoch": 0.07, "learning_rate": 2.9891448293591402e-05, "loss": 0.3865, "step": 371 }, { "epoch": 0.07, "learning_rate": 2.9890387964374948e-05, "loss": 0.3408, "step": 372 }, { "epoch": 0.07, "learning_rate": 2.9889322500658213e-05, "loss": 0.4009, "step": 373 }, { "epoch": 0.07, "learning_rate": 2.9888251902808582e-05, "loss": 0.4294, "step": 374 }, { "epoch": 0.07, "learning_rate": 2.988717617119523e-05, "loss": 0.5657, "step": 375 }, { "epoch": 0.07, "learning_rate": 2.988609530618908e-05, "loss": 0.4338, "step": 376 }, { "epoch": 0.07, "learning_rate": 2.9885009308162842e-05, "loss": 0.4653, "step": 377 }, { "epoch": 0.07, "learning_rate": 2.9883918177490987e-05, "loss": 0.3987, "step": 378 }, { "epoch": 0.07, "learning_rate": 2.9882821914549756e-05, "loss": 0.4077, "step": 379 }, { "epoch": 0.07, "learning_rate": 2.988172051971717e-05, "loss": 0.3885, "step": 380 }, { "epoch": 0.07, "learning_rate": 2.9880613993373005e-05, "loss": 0.3829, "step": 381 }, { "epoch": 0.07, "learning_rate": 2.9879502335898813e-05, "loss": 0.4612, "step": 382 }, { "epoch": 0.07, "learning_rate": 2.9878385547677918e-05, "loss": 0.3862, "step": 383 }, { "epoch": 0.07, "learning_rate": 2.987726362909541e-05, "loss": 0.46, "step": 384 }, { "epoch": 0.07, "learning_rate": 2.9876136580538152e-05, "loss": 0.4299, "step": 385 }, { "epoch": 0.07, "learning_rate": 2.987500440239477e-05, "loss": 0.3789, "step": 386 }, { "epoch": 0.07, "learning_rate": 2.987386709505566e-05, "loss": 0.5227, "step": 387 }, { "epoch": 0.07, "learning_rate": 2.9872724658912983e-05, "loss": 0.3588, "step": 388 }, { "epoch": 0.07, "learning_rate": 2.9871577094360688e-05, "loss": 0.4844, "step": 389 }, { "epoch": 0.07, "learning_rate": 2.9870424401794463e-05, "loss": 0.4656, "step": 390 }, { "epoch": 0.07, "learning_rate": 2.986926658161179e-05, "loss": 0.4792, "step": 391 }, { "epoch": 0.07, "learning_rate": 2.9868103634211902e-05, "loss": 0.407, "step": 392 }, { "epoch": 0.07, "learning_rate": 2.986693555999581e-05, "loss": 0.3535, "step": 393 }, { "epoch": 0.07, "learning_rate": 2.9865762359366285e-05, "loss": 0.4153, "step": 394 }, { "epoch": 0.07, "learning_rate": 2.9864584032727874e-05, "loss": 0.3926, "step": 395 }, { "epoch": 0.07, "learning_rate": 2.9863400580486884e-05, "loss": 0.3306, "step": 396 }, { "epoch": 0.07, "learning_rate": 2.9862212003051395e-05, "loss": 0.3918, "step": 397 }, { "epoch": 0.07, "learning_rate": 2.9861018300831253e-05, "loss": 0.4031, "step": 398 }, { "epoch": 0.07, "learning_rate": 2.985981947423807e-05, "loss": 0.4634, "step": 399 }, { "epoch": 0.07, "learning_rate": 2.9858615523685218e-05, "loss": 0.4614, "step": 400 }, { "epoch": 0.07, "learning_rate": 2.985740644958785e-05, "loss": 0.3723, "step": 401 }, { "epoch": 0.07, "learning_rate": 2.985619225236288e-05, "loss": 0.4744, "step": 402 }, { "epoch": 0.07, "learning_rate": 2.985497293242898e-05, "loss": 0.3374, "step": 403 }, { "epoch": 0.07, "learning_rate": 2.9853748490206606e-05, "loss": 0.408, "step": 404 }, { "epoch": 0.07, "learning_rate": 2.985251892611796e-05, "loss": 0.4243, "step": 405 }, { "epoch": 0.07, "learning_rate": 2.9851284240587023e-05, "loss": 0.4521, "step": 406 }, { "epoch": 0.07, "learning_rate": 2.985004443403954e-05, "loss": 0.3936, "step": 407 }, { "epoch": 0.07, "learning_rate": 2.9848799506903027e-05, "loss": 0.3916, "step": 408 }, { "epoch": 0.07, "learning_rate": 2.9847549459606744e-05, "loss": 0.4392, "step": 409 }, { "epoch": 0.07, "learning_rate": 2.9846294292581747e-05, "loss": 0.4282, "step": 410 }, { "epoch": 0.07, "learning_rate": 2.9845034006260837e-05, "loss": 0.4626, "step": 411 }, { "epoch": 0.07, "learning_rate": 2.984376860107858e-05, "loss": 0.3967, "step": 412 }, { "epoch": 0.07, "learning_rate": 2.9842498077471323e-05, "loss": 0.4885, "step": 413 }, { "epoch": 0.08, "learning_rate": 2.9841222435877163e-05, "loss": 0.4031, "step": 414 }, { "epoch": 0.08, "learning_rate": 2.983994167673596e-05, "loss": 0.3878, "step": 415 }, { "epoch": 0.08, "learning_rate": 2.9838655800489357e-05, "loss": 0.3779, "step": 416 }, { "epoch": 0.08, "learning_rate": 2.9837364807580743e-05, "loss": 0.4249, "step": 417 }, { "epoch": 0.08, "learning_rate": 2.9836068698455277e-05, "loss": 0.3531, "step": 418 }, { "epoch": 0.08, "learning_rate": 2.9834767473559886e-05, "loss": 0.4192, "step": 419 }, { "epoch": 0.08, "learning_rate": 2.9833461133343255e-05, "loss": 0.4174, "step": 420 }, { "epoch": 0.08, "learning_rate": 2.983214967825584e-05, "loss": 0.4656, "step": 421 }, { "epoch": 0.08, "learning_rate": 2.9830833108749858e-05, "loss": 0.4504, "step": 422 }, { "epoch": 0.08, "learning_rate": 2.9829511425279278e-05, "loss": 0.3398, "step": 423 }, { "epoch": 0.08, "learning_rate": 2.982818462829985e-05, "loss": 0.4102, "step": 424 }, { "epoch": 0.08, "learning_rate": 2.982685271826908e-05, "loss": 0.4504, "step": 425 }, { "epoch": 0.08, "learning_rate": 2.982551569564624e-05, "loss": 0.4404, "step": 426 }, { "epoch": 0.08, "learning_rate": 2.9824173560892354e-05, "loss": 0.3883, "step": 427 }, { "epoch": 0.08, "learning_rate": 2.982282631447022e-05, "loss": 0.4355, "step": 428 }, { "epoch": 0.08, "learning_rate": 2.9821473956844398e-05, "loss": 0.4438, "step": 429 }, { "epoch": 0.08, "learning_rate": 2.9820116488481203e-05, "loss": 0.3503, "step": 430 }, { "epoch": 0.08, "learning_rate": 2.9818753909848722e-05, "loss": 0.4382, "step": 431 }, { "epoch": 0.08, "learning_rate": 2.9817386221416796e-05, "loss": 0.353, "step": 432 }, { "epoch": 0.08, "learning_rate": 2.981601342365703e-05, "loss": 0.3799, "step": 433 }, { "epoch": 0.08, "learning_rate": 2.9814635517042792e-05, "loss": 0.4622, "step": 434 }, { "epoch": 0.08, "learning_rate": 2.9813252502049218e-05, "loss": 0.5107, "step": 435 }, { "epoch": 0.08, "learning_rate": 2.9811864379153187e-05, "loss": 0.5117, "step": 436 }, { "epoch": 0.08, "learning_rate": 2.981047114883336e-05, "loss": 0.4006, "step": 437 }, { "epoch": 0.08, "learning_rate": 2.9809072811570147e-05, "loss": 0.3372, "step": 438 }, { "epoch": 0.08, "learning_rate": 2.9807669367845726e-05, "loss": 0.385, "step": 439 }, { "epoch": 0.08, "learning_rate": 2.980626081814403e-05, "loss": 0.3972, "step": 440 }, { "epoch": 0.08, "learning_rate": 2.9804847162950753e-05, "loss": 0.5063, "step": 441 }, { "epoch": 0.08, "learning_rate": 2.9803428402753355e-05, "loss": 0.4639, "step": 442 }, { "epoch": 0.08, "learning_rate": 2.9802004538041045e-05, "loss": 0.4758, "step": 443 }, { "epoch": 0.08, "learning_rate": 2.9800575569304812e-05, "loss": 0.369, "step": 444 }, { "epoch": 0.08, "learning_rate": 2.9799141497037385e-05, "loss": 0.416, "step": 445 }, { "epoch": 0.08, "learning_rate": 2.9797702321733254e-05, "loss": 0.4473, "step": 446 }, { "epoch": 0.08, "learning_rate": 2.9796258043888687e-05, "loss": 0.4243, "step": 447 }, { "epoch": 0.08, "learning_rate": 2.9794808664001694e-05, "loss": 0.4368, "step": 448 }, { "epoch": 0.08, "learning_rate": 2.9793354182572052e-05, "loss": 0.415, "step": 449 }, { "epoch": 0.08, "learning_rate": 2.979189460010129e-05, "loss": 0.4807, "step": 450 }, { "epoch": 0.08, "learning_rate": 2.979042991709271e-05, "loss": 0.4348, "step": 451 }, { "epoch": 0.08, "learning_rate": 2.9788960134051356e-05, "loss": 0.4492, "step": 452 }, { "epoch": 0.08, "learning_rate": 2.978748525148404e-05, "loss": 0.3767, "step": 453 }, { "epoch": 0.08, "learning_rate": 2.978600526989933e-05, "loss": 0.489, "step": 454 }, { "epoch": 0.08, "learning_rate": 2.9784520189807558e-05, "loss": 0.5044, "step": 455 }, { "epoch": 0.08, "learning_rate": 2.97830300117208e-05, "loss": 0.4241, "step": 456 }, { "epoch": 0.08, "learning_rate": 2.9781534736152908e-05, "loss": 0.4165, "step": 457 }, { "epoch": 0.08, "learning_rate": 2.9780034363619476e-05, "loss": 0.4258, "step": 458 }, { "epoch": 0.08, "learning_rate": 2.977852889463787e-05, "loss": 0.4082, "step": 459 }, { "epoch": 0.08, "learning_rate": 2.9777018329727194e-05, "loss": 0.3507, "step": 460 }, { "epoch": 0.08, "learning_rate": 2.977550266940833e-05, "loss": 0.4268, "step": 461 }, { "epoch": 0.08, "learning_rate": 2.97739819142039e-05, "loss": 0.3821, "step": 462 }, { "epoch": 0.08, "learning_rate": 2.97724560646383e-05, "loss": 0.4426, "step": 463 }, { "epoch": 0.08, "learning_rate": 2.9770925121237667e-05, "loss": 0.4194, "step": 464 }, { "epoch": 0.08, "learning_rate": 2.97693890845299e-05, "loss": 0.413, "step": 465 }, { "epoch": 0.08, "learning_rate": 2.976784795504466e-05, "loss": 0.355, "step": 466 }, { "epoch": 0.08, "learning_rate": 2.9766301733313352e-05, "loss": 0.3721, "step": 467 }, { "epoch": 0.08, "learning_rate": 2.9764750419869146e-05, "loss": 0.481, "step": 468 }, { "epoch": 0.09, "learning_rate": 2.9763194015246973e-05, "loss": 0.4182, "step": 469 }, { "epoch": 0.09, "learning_rate": 2.97616325199835e-05, "loss": 0.3562, "step": 470 }, { "epoch": 0.09, "learning_rate": 2.976006593461717e-05, "loss": 0.4197, "step": 471 }, { "epoch": 0.09, "learning_rate": 2.975849425968817e-05, "loss": 0.3984, "step": 472 }, { "epoch": 0.09, "learning_rate": 2.9756917495738446e-05, "loss": 0.4163, "step": 473 }, { "epoch": 0.09, "learning_rate": 2.9755335643311693e-05, "loss": 0.4583, "step": 474 }, { "epoch": 0.09, "learning_rate": 2.9753748702953365e-05, "loss": 0.447, "step": 475 }, { "epoch": 0.09, "learning_rate": 2.975215667521068e-05, "loss": 0.3943, "step": 476 }, { "epoch": 0.09, "learning_rate": 2.9750559560632592e-05, "loss": 0.396, "step": 477 }, { "epoch": 0.09, "learning_rate": 2.974895735976982e-05, "loss": 0.4221, "step": 478 }, { "epoch": 0.09, "learning_rate": 2.974735007317483e-05, "loss": 0.3909, "step": 479 }, { "epoch": 0.09, "learning_rate": 2.9745737701401854e-05, "loss": 0.4453, "step": 480 }, { "epoch": 0.09, "learning_rate": 2.974412024500687e-05, "loss": 0.3777, "step": 481 }, { "epoch": 0.09, "learning_rate": 2.9742497704547596e-05, "loss": 0.4485, "step": 482 }, { "epoch": 0.09, "learning_rate": 2.974087008058353e-05, "loss": 0.4351, "step": 483 }, { "epoch": 0.09, "learning_rate": 2.973923737367591e-05, "loss": 0.5037, "step": 484 }, { "epoch": 0.09, "learning_rate": 2.9737599584387714e-05, "loss": 0.3772, "step": 485 }, { "epoch": 0.09, "learning_rate": 2.9735956713283688e-05, "loss": 0.3794, "step": 486 }, { "epoch": 0.09, "learning_rate": 2.9734308760930333e-05, "loss": 0.4153, "step": 487 }, { "epoch": 0.09, "learning_rate": 2.9732655727895884e-05, "loss": 0.3105, "step": 488 }, { "epoch": 0.09, "learning_rate": 2.9730997614750352e-05, "loss": 0.4182, "step": 489 }, { "epoch": 0.09, "learning_rate": 2.9729334422065482e-05, "loss": 0.3937, "step": 490 }, { "epoch": 0.09, "learning_rate": 2.9727666150414773e-05, "loss": 0.4512, "step": 491 }, { "epoch": 0.09, "learning_rate": 2.972599280037348e-05, "loss": 0.3733, "step": 492 }, { "epoch": 0.09, "learning_rate": 2.9724314372518613e-05, "loss": 0.416, "step": 493 }, { "epoch": 0.09, "learning_rate": 2.9722630867428916e-05, "loss": 0.3567, "step": 494 }, { "epoch": 0.09, "learning_rate": 2.97209422856849e-05, "loss": 0.4358, "step": 495 }, { "epoch": 0.09, "learning_rate": 2.9719248627868823e-05, "loss": 0.4968, "step": 496 }, { "epoch": 0.09, "learning_rate": 2.9717549894564694e-05, "loss": 0.3527, "step": 497 }, { "epoch": 0.09, "learning_rate": 2.971584608635827e-05, "loss": 0.384, "step": 498 }, { "epoch": 0.09, "learning_rate": 2.9714137203837044e-05, "loss": 0.4402, "step": 499 }, { "epoch": 0.09, "learning_rate": 2.971242324759029e-05, "loss": 0.386, "step": 500 }, { "epoch": 0.09, "learning_rate": 2.9710704218209e-05, "loss": 0.4346, "step": 501 }, { "epoch": 0.09, "learning_rate": 2.970898011628594e-05, "loss": 0.3862, "step": 502 }, { "epoch": 0.09, "learning_rate": 2.9707250942415612e-05, "loss": 0.3892, "step": 503 }, { "epoch": 0.09, "learning_rate": 2.9705516697194265e-05, "loss": 0.3857, "step": 504 }, { "epoch": 0.09, "learning_rate": 2.9703777381219906e-05, "loss": 0.3308, "step": 505 }, { "epoch": 0.09, "learning_rate": 2.9702032995092286e-05, "loss": 0.4082, "step": 506 }, { "epoch": 0.09, "learning_rate": 2.9700283539412896e-05, "loss": 0.4351, "step": 507 }, { "epoch": 0.09, "learning_rate": 2.969852901478499e-05, "loss": 0.3887, "step": 508 }, { "epoch": 0.09, "learning_rate": 2.9696769421813563e-05, "loss": 0.4272, "step": 509 }, { "epoch": 0.09, "learning_rate": 2.9695004761105355e-05, "loss": 0.4082, "step": 510 }, { "epoch": 0.09, "learning_rate": 2.9693235033268857e-05, "loss": 0.3855, "step": 511 }, { "epoch": 0.09, "learning_rate": 2.9691460238914302e-05, "loss": 0.4124, "step": 512 }, { "epoch": 0.09, "learning_rate": 2.968968037865368e-05, "loss": 0.5244, "step": 513 }, { "epoch": 0.09, "learning_rate": 2.9687895453100716e-05, "loss": 0.3599, "step": 514 }, { "epoch": 0.09, "learning_rate": 2.96861054628709e-05, "loss": 0.3579, "step": 515 }, { "epoch": 0.09, "learning_rate": 2.968431040858144e-05, "loss": 0.4541, "step": 516 }, { "epoch": 0.09, "learning_rate": 2.9682510290851317e-05, "loss": 0.3116, "step": 517 }, { "epoch": 0.09, "learning_rate": 2.9680705110301243e-05, "loss": 0.4385, "step": 518 }, { "epoch": 0.09, "learning_rate": 2.9678894867553684e-05, "loss": 0.3833, "step": 519 }, { "epoch": 0.09, "learning_rate": 2.967707956323284e-05, "loss": 0.3735, "step": 520 }, { "epoch": 0.09, "learning_rate": 2.9675259197964673e-05, "loss": 0.4309, "step": 521 }, { "epoch": 0.09, "learning_rate": 2.9673433772376873e-05, "loss": 0.3619, "step": 522 }, { "epoch": 0.09, "learning_rate": 2.9671603287098884e-05, "loss": 0.3907, "step": 523 }, { "epoch": 0.09, "learning_rate": 2.96697677427619e-05, "loss": 0.4492, "step": 524 }, { "epoch": 0.1, "learning_rate": 2.9667927139998844e-05, "loss": 0.317, "step": 525 }, { "epoch": 0.1, "learning_rate": 2.9666081479444397e-05, "loss": 0.4154, "step": 526 }, { "epoch": 0.1, "learning_rate": 2.9664230761734983e-05, "loss": 0.4326, "step": 527 }, { "epoch": 0.1, "learning_rate": 2.966237498750876e-05, "loss": 0.3864, "step": 528 }, { "epoch": 0.1, "learning_rate": 2.9660514157405637e-05, "loss": 0.3633, "step": 529 }, { "epoch": 0.1, "learning_rate": 2.9658648272067266e-05, "loss": 0.4059, "step": 530 }, { "epoch": 0.1, "learning_rate": 2.9656777332137042e-05, "loss": 0.3848, "step": 531 }, { "epoch": 0.1, "learning_rate": 2.96549013382601e-05, "loss": 0.4331, "step": 532 }, { "epoch": 0.1, "learning_rate": 2.965302029108332e-05, "loss": 0.4048, "step": 533 }, { "epoch": 0.1, "learning_rate": 2.9651134191255328e-05, "loss": 0.3707, "step": 534 }, { "epoch": 0.1, "learning_rate": 2.9649243039426485e-05, "loss": 0.3356, "step": 535 }, { "epoch": 0.1, "learning_rate": 2.9647346836248896e-05, "loss": 0.3535, "step": 536 }, { "epoch": 0.1, "learning_rate": 2.9645445582376414e-05, "loss": 0.4165, "step": 537 }, { "epoch": 0.1, "learning_rate": 2.9643539278464624e-05, "loss": 0.3685, "step": 538 }, { "epoch": 0.1, "learning_rate": 2.964162792517086e-05, "loss": 0.3717, "step": 539 }, { "epoch": 0.1, "learning_rate": 2.9639711523154196e-05, "loss": 0.3451, "step": 540 }, { "epoch": 0.1, "learning_rate": 2.9637790073075442e-05, "loss": 0.4023, "step": 541 }, { "epoch": 0.1, "learning_rate": 2.9635863575597154e-05, "loss": 0.3257, "step": 542 }, { "epoch": 0.1, "learning_rate": 2.9633932031383626e-05, "loss": 0.3977, "step": 543 }, { "epoch": 0.1, "learning_rate": 2.9631995441100894e-05, "loss": 0.405, "step": 544 }, { "epoch": 0.1, "learning_rate": 2.963005380541673e-05, "loss": 0.3783, "step": 545 }, { "epoch": 0.1, "learning_rate": 2.9628107125000648e-05, "loss": 0.3182, "step": 546 }, { "epoch": 0.1, "learning_rate": 2.9626155400523904e-05, "loss": 0.3245, "step": 547 }, { "epoch": 0.1, "learning_rate": 2.9624198632659497e-05, "loss": 0.4236, "step": 548 }, { "epoch": 0.1, "learning_rate": 2.9622236822082146e-05, "loss": 0.3552, "step": 549 }, { "epoch": 0.1, "learning_rate": 2.962026996946833e-05, "loss": 0.4491, "step": 550 }, { "epoch": 0.1, "learning_rate": 2.961829807549626e-05, "loss": 0.3165, "step": 551 }, { "epoch": 0.1, "learning_rate": 2.961632114084588e-05, "loss": 0.4197, "step": 552 }, { "epoch": 0.1, "learning_rate": 2.9614339166198874e-05, "loss": 0.4512, "step": 553 }, { "epoch": 0.1, "learning_rate": 2.9612352152238678e-05, "loss": 0.407, "step": 554 }, { "epoch": 0.1, "learning_rate": 2.9610360099650442e-05, "loss": 0.3644, "step": 555 }, { "epoch": 0.1, "learning_rate": 2.9608363009121066e-05, "loss": 0.3302, "step": 556 }, { "epoch": 0.1, "learning_rate": 2.9606360881339197e-05, "loss": 0.3221, "step": 557 }, { "epoch": 0.1, "learning_rate": 2.960435371699519e-05, "loss": 0.3867, "step": 558 }, { "epoch": 0.1, "learning_rate": 2.9602341516781172e-05, "loss": 0.395, "step": 559 }, { "epoch": 0.1, "learning_rate": 2.9600324281390983e-05, "loss": 0.3896, "step": 560 }, { "epoch": 0.1, "learning_rate": 2.9598302011520203e-05, "loss": 0.4103, "step": 561 }, { "epoch": 0.1, "learning_rate": 2.9596274707866158e-05, "loss": 0.3209, "step": 562 }, { "epoch": 0.1, "learning_rate": 2.9594242371127893e-05, "loss": 0.4153, "step": 563 }, { "epoch": 0.1, "learning_rate": 2.9592205002006202e-05, "loss": 0.4167, "step": 564 }, { "epoch": 0.1, "learning_rate": 2.9590162601203613e-05, "loss": 0.3779, "step": 565 }, { "epoch": 0.1, "learning_rate": 2.9588115169424384e-05, "loss": 0.5332, "step": 566 }, { "epoch": 0.1, "learning_rate": 2.9586062707374504e-05, "loss": 0.3506, "step": 567 }, { "epoch": 0.1, "learning_rate": 2.9584005215761715e-05, "loss": 0.365, "step": 568 }, { "epoch": 0.1, "learning_rate": 2.9581942695295472e-05, "loss": 0.3774, "step": 569 }, { "epoch": 0.1, "learning_rate": 2.957987514668698e-05, "loss": 0.4309, "step": 570 }, { "epoch": 0.1, "learning_rate": 2.9577802570649158e-05, "loss": 0.3625, "step": 571 }, { "epoch": 0.1, "learning_rate": 2.9575724967896683e-05, "loss": 0.3943, "step": 572 }, { "epoch": 0.1, "learning_rate": 2.9573642339145947e-05, "loss": 0.4656, "step": 573 }, { "epoch": 0.1, "learning_rate": 2.9571554685115085e-05, "loss": 0.4272, "step": 574 }, { "epoch": 0.1, "learning_rate": 2.956946200652396e-05, "loss": 0.428, "step": 575 }, { "epoch": 0.1, "learning_rate": 2.9567364304094168e-05, "loss": 0.3799, "step": 576 }, { "epoch": 0.1, "learning_rate": 2.9565261578549042e-05, "loss": 0.437, "step": 577 }, { "epoch": 0.1, "learning_rate": 2.956315383061364e-05, "loss": 0.4045, "step": 578 }, { "epoch": 0.1, "learning_rate": 2.9561041061014757e-05, "loss": 0.4426, "step": 579 }, { "epoch": 0.11, "learning_rate": 2.9558923270480916e-05, "loss": 0.3474, "step": 580 }, { "epoch": 0.11, "learning_rate": 2.9556800459742373e-05, "loss": 0.4684, "step": 581 }, { "epoch": 0.11, "learning_rate": 2.9554672629531116e-05, "loss": 0.3414, "step": 582 }, { "epoch": 0.11, "learning_rate": 2.9552539780580863e-05, "loss": 0.3674, "step": 583 }, { "epoch": 0.11, "learning_rate": 2.9550401913627064e-05, "loss": 0.467, "step": 584 }, { "epoch": 0.11, "learning_rate": 2.9548259029406894e-05, "loss": 0.4219, "step": 585 }, { "epoch": 0.11, "learning_rate": 2.9546111128659265e-05, "loss": 0.4646, "step": 586 }, { "epoch": 0.11, "learning_rate": 2.9543958212124818e-05, "loss": 0.3665, "step": 587 }, { "epoch": 0.11, "learning_rate": 2.9541800280545917e-05, "loss": 0.37, "step": 588 }, { "epoch": 0.11, "learning_rate": 2.953963733466666e-05, "loss": 0.4153, "step": 589 }, { "epoch": 0.11, "learning_rate": 2.9537469375232873e-05, "loss": 0.3352, "step": 590 }, { "epoch": 0.11, "learning_rate": 2.953529640299211e-05, "loss": 0.4343, "step": 591 }, { "epoch": 0.11, "learning_rate": 2.9533118418693664e-05, "loss": 0.4053, "step": 592 }, { "epoch": 0.11, "learning_rate": 2.9530935423088533e-05, "loss": 0.4021, "step": 593 }, { "epoch": 0.11, "learning_rate": 2.9528747416929467e-05, "loss": 0.3792, "step": 594 }, { "epoch": 0.11, "learning_rate": 2.952655440097093e-05, "loss": 0.4019, "step": 595 }, { "epoch": 0.11, "learning_rate": 2.952435637596912e-05, "loss": 0.3586, "step": 596 }, { "epoch": 0.11, "learning_rate": 2.952215334268196e-05, "loss": 0.4209, "step": 597 }, { "epoch": 0.11, "learning_rate": 2.9519945301869096e-05, "loss": 0.3574, "step": 598 }, { "epoch": 0.11, "learning_rate": 2.9517732254291903e-05, "loss": 0.4028, "step": 599 }, { "epoch": 0.11, "learning_rate": 2.9515514200713487e-05, "loss": 0.3517, "step": 600 }, { "epoch": 0.11, "learning_rate": 2.9513291141898676e-05, "loss": 0.3828, "step": 601 }, { "epoch": 0.11, "learning_rate": 2.9511063078614024e-05, "loss": 0.3138, "step": 602 }, { "epoch": 0.11, "learning_rate": 2.950883001162781e-05, "loss": 0.3989, "step": 603 }, { "epoch": 0.11, "learning_rate": 2.9506591941710043e-05, "loss": 0.3613, "step": 604 }, { "epoch": 0.11, "learning_rate": 2.9504348869632456e-05, "loss": 0.469, "step": 605 }, { "epoch": 0.11, "learning_rate": 2.9502100796168493e-05, "loss": 0.4009, "step": 606 }, { "epoch": 0.11, "learning_rate": 2.949984772209335e-05, "loss": 0.3274, "step": 607 }, { "epoch": 0.11, "learning_rate": 2.949758964818392e-05, "loss": 0.4453, "step": 608 }, { "epoch": 0.11, "learning_rate": 2.9495326575218834e-05, "loss": 0.4465, "step": 609 }, { "epoch": 0.11, "learning_rate": 2.9493058503978446e-05, "loss": 0.3678, "step": 610 }, { "epoch": 0.11, "learning_rate": 2.9490785435244835e-05, "loss": 0.3276, "step": 611 }, { "epoch": 0.11, "learning_rate": 2.948850736980179e-05, "loss": 0.4404, "step": 612 }, { "epoch": 0.11, "learning_rate": 2.948622430843484e-05, "loss": 0.4312, "step": 613 }, { "epoch": 0.11, "learning_rate": 2.9483936251931233e-05, "loss": 0.335, "step": 614 }, { "epoch": 0.11, "learning_rate": 2.948164320107993e-05, "loss": 0.4236, "step": 615 }, { "epoch": 0.11, "learning_rate": 2.9479345156671622e-05, "loss": 0.3938, "step": 616 }, { "epoch": 0.11, "learning_rate": 2.947704211949872e-05, "loss": 0.4216, "step": 617 }, { "epoch": 0.11, "learning_rate": 2.9474734090355356e-05, "loss": 0.3748, "step": 618 }, { "epoch": 0.11, "learning_rate": 2.947242107003739e-05, "loss": 0.4551, "step": 619 }, { "epoch": 0.11, "learning_rate": 2.9470103059342385e-05, "loss": 0.4006, "step": 620 }, { "epoch": 0.11, "learning_rate": 2.946778005906965e-05, "loss": 0.3617, "step": 621 }, { "epoch": 0.11, "learning_rate": 2.94654520700202e-05, "loss": 0.3478, "step": 622 }, { "epoch": 0.11, "learning_rate": 2.9463119092996758e-05, "loss": 0.3629, "step": 623 }, { "epoch": 0.11, "learning_rate": 2.9460781128803796e-05, "loss": 0.4517, "step": 624 }, { "epoch": 0.11, "learning_rate": 2.9458438178247482e-05, "loss": 0.3738, "step": 625 }, { "epoch": 0.11, "learning_rate": 2.9456090242135718e-05, "loss": 0.4709, "step": 626 }, { "epoch": 0.11, "learning_rate": 2.9453737321278113e-05, "loss": 0.3403, "step": 627 }, { "epoch": 0.11, "learning_rate": 2.9451379416486005e-05, "loss": 0.4324, "step": 628 }, { "epoch": 0.11, "learning_rate": 2.9449016528572438e-05, "loss": 0.3706, "step": 629 }, { "epoch": 0.11, "learning_rate": 2.9446648658352192e-05, "loss": 0.3163, "step": 630 }, { "epoch": 0.11, "learning_rate": 2.944427580664175e-05, "loss": 0.4106, "step": 631 }, { "epoch": 0.11, "learning_rate": 2.9441897974259324e-05, "loss": 0.3271, "step": 632 }, { "epoch": 0.11, "learning_rate": 2.943951516202483e-05, "loss": 0.3989, "step": 633 }, { "epoch": 0.11, "learning_rate": 2.9437127370759914e-05, "loss": 0.2885, "step": 634 }, { "epoch": 0.12, "learning_rate": 2.9434734601287928e-05, "loss": 0.4148, "step": 635 }, { "epoch": 0.12, "learning_rate": 2.9432336854433956e-05, "loss": 0.3882, "step": 636 }, { "epoch": 0.12, "learning_rate": 2.9429934131024783e-05, "loss": 0.3527, "step": 637 }, { "epoch": 0.12, "learning_rate": 2.942752643188891e-05, "loss": 0.342, "step": 638 }, { "epoch": 0.12, "learning_rate": 2.9425113757856573e-05, "loss": 0.3608, "step": 639 }, { "epoch": 0.12, "learning_rate": 2.9422696109759693e-05, "loss": 0.3755, "step": 640 }, { "epoch": 0.12, "learning_rate": 2.9420273488431937e-05, "loss": 0.3756, "step": 641 }, { "epoch": 0.12, "learning_rate": 2.9417845894708662e-05, "loss": 0.3972, "step": 642 }, { "epoch": 0.12, "learning_rate": 2.941541332942696e-05, "loss": 0.4121, "step": 643 }, { "epoch": 0.12, "learning_rate": 2.941297579342562e-05, "loss": 0.3943, "step": 644 }, { "epoch": 0.12, "learning_rate": 2.941053328754516e-05, "loss": 0.4226, "step": 645 }, { "epoch": 0.12, "learning_rate": 2.9408085812627797e-05, "loss": 0.3765, "step": 646 }, { "epoch": 0.12, "learning_rate": 2.940563336951747e-05, "loss": 0.3538, "step": 647 }, { "epoch": 0.12, "learning_rate": 2.9403175959059836e-05, "loss": 0.3511, "step": 648 }, { "epoch": 0.12, "learning_rate": 2.940071358210225e-05, "loss": 0.3617, "step": 649 }, { "epoch": 0.12, "learning_rate": 2.9398246239493797e-05, "loss": 0.3599, "step": 650 }, { "epoch": 0.12, "learning_rate": 2.939577393208526e-05, "loss": 0.387, "step": 651 }, { "epoch": 0.12, "learning_rate": 2.9393296660729136e-05, "loss": 0.3767, "step": 652 }, { "epoch": 0.12, "learning_rate": 2.939081442627965e-05, "loss": 0.3103, "step": 653 }, { "epoch": 0.12, "learning_rate": 2.938832722959271e-05, "loss": 0.3885, "step": 654 }, { "epoch": 0.12, "learning_rate": 2.938583507152596e-05, "loss": 0.3596, "step": 655 }, { "epoch": 0.12, "learning_rate": 2.9383337952938742e-05, "loss": 0.3644, "step": 656 }, { "epoch": 0.12, "learning_rate": 2.9380835874692118e-05, "loss": 0.4214, "step": 657 }, { "epoch": 0.12, "learning_rate": 2.937832883764885e-05, "loss": 0.3787, "step": 658 }, { "epoch": 0.12, "learning_rate": 2.937581684267341e-05, "loss": 0.3334, "step": 659 }, { "epoch": 0.12, "learning_rate": 2.9373299890631982e-05, "loss": 0.3466, "step": 660 }, { "epoch": 0.12, "learning_rate": 2.9370777982392472e-05, "loss": 0.2915, "step": 661 }, { "epoch": 0.12, "learning_rate": 2.936825111882448e-05, "loss": 0.3622, "step": 662 }, { "epoch": 0.12, "learning_rate": 2.9365719300799307e-05, "loss": 0.333, "step": 663 }, { "epoch": 0.12, "learning_rate": 2.936318252918999e-05, "loss": 0.4208, "step": 664 }, { "epoch": 0.12, "learning_rate": 2.936064080487125e-05, "loss": 0.3112, "step": 665 }, { "epoch": 0.12, "learning_rate": 2.9358094128719524e-05, "loss": 0.3213, "step": 666 }, { "epoch": 0.12, "learning_rate": 2.9355542501612957e-05, "loss": 0.3907, "step": 667 }, { "epoch": 0.12, "learning_rate": 2.93529859244314e-05, "loss": 0.368, "step": 668 }, { "epoch": 0.12, "learning_rate": 2.9350424398056417e-05, "loss": 0.3363, "step": 669 }, { "epoch": 0.12, "learning_rate": 2.9347857923371263e-05, "loss": 0.3638, "step": 670 }, { "epoch": 0.12, "learning_rate": 2.9345286501260915e-05, "loss": 0.415, "step": 671 }, { "epoch": 0.12, "learning_rate": 2.9342710132612048e-05, "loss": 0.4216, "step": 672 }, { "epoch": 0.12, "learning_rate": 2.934012881831305e-05, "loss": 0.3838, "step": 673 }, { "epoch": 0.12, "learning_rate": 2.9337542559254e-05, "loss": 0.3541, "step": 674 }, { "epoch": 0.12, "learning_rate": 2.9334951356326705e-05, "loss": 0.396, "step": 675 }, { "epoch": 0.12, "learning_rate": 2.933235521042465e-05, "loss": 0.3169, "step": 676 }, { "epoch": 0.12, "learning_rate": 2.932975412244304e-05, "loss": 0.3378, "step": 677 }, { "epoch": 0.12, "learning_rate": 2.932714809327879e-05, "loss": 0.3336, "step": 678 }, { "epoch": 0.12, "learning_rate": 2.9324537123830496e-05, "loss": 0.3467, "step": 679 }, { "epoch": 0.12, "learning_rate": 2.9321921214998485e-05, "loss": 0.4705, "step": 680 }, { "epoch": 0.12, "learning_rate": 2.9319300367684767e-05, "loss": 0.3538, "step": 681 }, { "epoch": 0.12, "learning_rate": 2.931667458279307e-05, "loss": 0.3182, "step": 682 }, { "epoch": 0.12, "learning_rate": 2.9314043861228805e-05, "loss": 0.4193, "step": 683 }, { "epoch": 0.12, "learning_rate": 2.9311408203899106e-05, "loss": 0.3965, "step": 684 }, { "epoch": 0.12, "learning_rate": 2.9308767611712794e-05, "loss": 0.3181, "step": 685 }, { "epoch": 0.12, "learning_rate": 2.93061220855804e-05, "loss": 0.3992, "step": 686 }, { "epoch": 0.12, "learning_rate": 2.9303471626414152e-05, "loss": 0.3792, "step": 687 }, { "epoch": 0.12, "learning_rate": 2.9300816235127984e-05, "loss": 0.3539, "step": 688 }, { "epoch": 0.12, "learning_rate": 2.9298155912637533e-05, "loss": 0.3484, "step": 689 }, { "epoch": 0.13, "learning_rate": 2.9295490659860113e-05, "loss": 0.356, "step": 690 }, { "epoch": 0.13, "learning_rate": 2.929282047771477e-05, "loss": 0.4263, "step": 691 }, { "epoch": 0.13, "learning_rate": 2.9290145367122232e-05, "loss": 0.4314, "step": 692 }, { "epoch": 0.13, "learning_rate": 2.9287465329004935e-05, "loss": 0.3269, "step": 693 }, { "epoch": 0.13, "learning_rate": 2.9284780364286997e-05, "loss": 0.3196, "step": 694 }, { "epoch": 0.13, "learning_rate": 2.9282090473894258e-05, "loss": 0.3987, "step": 695 }, { "epoch": 0.13, "learning_rate": 2.927939565875424e-05, "loss": 0.3933, "step": 696 }, { "epoch": 0.13, "learning_rate": 2.927669591979617e-05, "loss": 0.3175, "step": 697 }, { "epoch": 0.13, "learning_rate": 2.9273991257950972e-05, "loss": 0.3916, "step": 698 }, { "epoch": 0.13, "learning_rate": 2.9271281674151266e-05, "loss": 0.3999, "step": 699 }, { "epoch": 0.13, "learning_rate": 2.9268567169331376e-05, "loss": 0.426, "step": 700 }, { "epoch": 0.13, "learning_rate": 2.9265847744427305e-05, "loss": 0.4082, "step": 701 }, { "epoch": 0.13, "learning_rate": 2.926312340037677e-05, "loss": 0.3721, "step": 702 }, { "epoch": 0.13, "learning_rate": 2.9260394138119183e-05, "loss": 0.4319, "step": 703 }, { "epoch": 0.13, "learning_rate": 2.9257659958595644e-05, "loss": 0.3387, "step": 704 }, { "epoch": 0.13, "learning_rate": 2.9254920862748954e-05, "loss": 0.3896, "step": 705 }, { "epoch": 0.13, "learning_rate": 2.9252176851523603e-05, "loss": 0.3801, "step": 706 }, { "epoch": 0.13, "learning_rate": 2.9249427925865787e-05, "loss": 0.4258, "step": 707 }, { "epoch": 0.13, "learning_rate": 2.9246674086723385e-05, "loss": 0.4407, "step": 708 }, { "epoch": 0.13, "learning_rate": 2.924391533504597e-05, "loss": 0.3921, "step": 709 }, { "epoch": 0.13, "learning_rate": 2.924115167178483e-05, "loss": 0.3503, "step": 710 }, { "epoch": 0.13, "learning_rate": 2.9238383097892914e-05, "loss": 0.3918, "step": 711 }, { "epoch": 0.13, "learning_rate": 2.9235609614324892e-05, "loss": 0.4081, "step": 712 }, { "epoch": 0.13, "learning_rate": 2.9232831222037114e-05, "loss": 0.3108, "step": 713 }, { "epoch": 0.13, "learning_rate": 2.9230047921987616e-05, "loss": 0.3413, "step": 714 }, { "epoch": 0.13, "learning_rate": 2.922725971513615e-05, "loss": 0.303, "step": 715 }, { "epoch": 0.13, "learning_rate": 2.9224466602444128e-05, "loss": 0.3214, "step": 716 }, { "epoch": 0.13, "learning_rate": 2.9221668584874686e-05, "loss": 0.5195, "step": 717 }, { "epoch": 0.13, "learning_rate": 2.921886566339263e-05, "loss": 0.3887, "step": 718 }, { "epoch": 0.13, "learning_rate": 2.921605783896446e-05, "loss": 0.2977, "step": 719 }, { "epoch": 0.13, "learning_rate": 2.9213245112558366e-05, "loss": 0.4827, "step": 720 }, { "epoch": 0.13, "learning_rate": 2.921042748514424e-05, "loss": 0.3142, "step": 721 }, { "epoch": 0.13, "learning_rate": 2.9207604957693654e-05, "loss": 0.3428, "step": 722 }, { "epoch": 0.13, "learning_rate": 2.920477753117987e-05, "loss": 0.3579, "step": 723 }, { "epoch": 0.13, "learning_rate": 2.920194520657784e-05, "loss": 0.4058, "step": 724 }, { "epoch": 0.13, "learning_rate": 2.9199107984864207e-05, "loss": 0.4109, "step": 725 }, { "epoch": 0.13, "learning_rate": 2.91962658670173e-05, "loss": 0.3369, "step": 726 }, { "epoch": 0.13, "learning_rate": 2.9193418854017134e-05, "loss": 0.3821, "step": 727 }, { "epoch": 0.13, "learning_rate": 2.919056694684542e-05, "loss": 0.3523, "step": 728 }, { "epoch": 0.13, "learning_rate": 2.9187710146485552e-05, "loss": 0.4, "step": 729 }, { "epoch": 0.13, "learning_rate": 2.918484845392261e-05, "loss": 0.3433, "step": 730 }, { "epoch": 0.13, "learning_rate": 2.918198187014336e-05, "loss": 0.3511, "step": 731 }, { "epoch": 0.13, "learning_rate": 2.9179110396136263e-05, "loss": 0.3799, "step": 732 }, { "epoch": 0.13, "learning_rate": 2.9176234032891458e-05, "loss": 0.395, "step": 733 }, { "epoch": 0.13, "learning_rate": 2.917335278140077e-05, "loss": 0.3302, "step": 734 }, { "epoch": 0.13, "learning_rate": 2.917046664265771e-05, "loss": 0.3123, "step": 735 }, { "epoch": 0.13, "learning_rate": 2.9167575617657472e-05, "loss": 0.365, "step": 736 }, { "epoch": 0.13, "learning_rate": 2.9164679707396945e-05, "loss": 0.4448, "step": 737 }, { "epoch": 0.13, "learning_rate": 2.91617789128747e-05, "loss": 0.3654, "step": 738 }, { "epoch": 0.13, "learning_rate": 2.9158873235090985e-05, "loss": 0.3323, "step": 739 }, { "epoch": 0.13, "learning_rate": 2.915596267504773e-05, "loss": 0.3418, "step": 740 }, { "epoch": 0.13, "learning_rate": 2.9153047233748556e-05, "loss": 0.3958, "step": 741 }, { "epoch": 0.13, "learning_rate": 2.915012691219877e-05, "loss": 0.3655, "step": 742 }, { "epoch": 0.13, "learning_rate": 2.914720171140535e-05, "loss": 0.3552, "step": 743 }, { "epoch": 0.13, "learning_rate": 2.9144271632376965e-05, "loss": 0.405, "step": 744 }, { "epoch": 0.14, "learning_rate": 2.914133667612397e-05, "loss": 0.4004, "step": 745 }, { "epoch": 0.14, "learning_rate": 2.9138396843658383e-05, "loss": 0.3208, "step": 746 }, { "epoch": 0.14, "learning_rate": 2.9135452135993933e-05, "loss": 0.4224, "step": 747 }, { "epoch": 0.14, "learning_rate": 2.9132502554146e-05, "loss": 0.3501, "step": 748 }, { "epoch": 0.14, "learning_rate": 2.9129548099131665e-05, "loss": 0.3479, "step": 749 }, { "epoch": 0.14, "learning_rate": 2.9126588771969674e-05, "loss": 0.3391, "step": 750 }, { "epoch": 0.14, "learning_rate": 2.912362457368048e-05, "loss": 0.4279, "step": 751 }, { "epoch": 0.14, "learning_rate": 2.9120655505286176e-05, "loss": 0.311, "step": 752 }, { "epoch": 0.14, "learning_rate": 2.9117681567810568e-05, "loss": 0.402, "step": 753 }, { "epoch": 0.14, "learning_rate": 2.911470276227912e-05, "loss": 0.3687, "step": 754 }, { "epoch": 0.14, "learning_rate": 2.9111719089718997e-05, "loss": 0.4017, "step": 755 }, { "epoch": 0.14, "learning_rate": 2.9108730551159013e-05, "loss": 0.3951, "step": 756 }, { "epoch": 0.14, "learning_rate": 2.9105737147629687e-05, "loss": 0.3323, "step": 757 }, { "epoch": 0.14, "learning_rate": 2.9102738880163193e-05, "loss": 0.3545, "step": 758 }, { "epoch": 0.14, "learning_rate": 2.9099735749793403e-05, "loss": 0.3767, "step": 759 }, { "epoch": 0.14, "learning_rate": 2.9096727757555852e-05, "loss": 0.3655, "step": 760 }, { "epoch": 0.14, "learning_rate": 2.909371490448775e-05, "loss": 0.3334, "step": 761 }, { "epoch": 0.14, "learning_rate": 2.9090697191627995e-05, "loss": 0.3662, "step": 762 }, { "epoch": 0.14, "learning_rate": 2.908767462001715e-05, "loss": 0.381, "step": 763 }, { "epoch": 0.14, "learning_rate": 2.9084647190697462e-05, "loss": 0.3168, "step": 764 }, { "epoch": 0.14, "learning_rate": 2.9081614904712843e-05, "loss": 0.3574, "step": 765 }, { "epoch": 0.14, "learning_rate": 2.9078577763108892e-05, "loss": 0.3273, "step": 766 }, { "epoch": 0.14, "learning_rate": 2.907553576693287e-05, "loss": 0.3679, "step": 767 }, { "epoch": 0.14, "learning_rate": 2.9072488917233714e-05, "loss": 0.3311, "step": 768 }, { "epoch": 0.14, "learning_rate": 2.9069437215062053e-05, "loss": 0.4446, "step": 769 }, { "epoch": 0.14, "learning_rate": 2.9066380661470155e-05, "loss": 0.2891, "step": 770 }, { "epoch": 0.14, "learning_rate": 2.906331925751199e-05, "loss": 0.3242, "step": 771 }, { "epoch": 0.14, "learning_rate": 2.9060253004243193e-05, "loss": 0.3118, "step": 772 }, { "epoch": 0.14, "learning_rate": 2.905718190272107e-05, "loss": 0.3671, "step": 773 }, { "epoch": 0.14, "learning_rate": 2.905410595400459e-05, "loss": 0.3402, "step": 774 }, { "epoch": 0.14, "learning_rate": 2.90510251591544e-05, "loss": 0.3508, "step": 775 }, { "epoch": 0.14, "learning_rate": 2.9047939519232827e-05, "loss": 0.3334, "step": 776 }, { "epoch": 0.14, "learning_rate": 2.9044849035303856e-05, "loss": 0.3317, "step": 777 }, { "epoch": 0.14, "learning_rate": 2.9041753708433148e-05, "loss": 0.3685, "step": 778 }, { "epoch": 0.14, "learning_rate": 2.9038653539688033e-05, "loss": 0.3779, "step": 779 }, { "epoch": 0.14, "learning_rate": 2.9035548530137513e-05, "loss": 0.3309, "step": 780 }, { "epoch": 0.14, "learning_rate": 2.903243868085225e-05, "loss": 0.3455, "step": 781 }, { "epoch": 0.14, "learning_rate": 2.9029323992904587e-05, "loss": 0.2985, "step": 782 }, { "epoch": 0.14, "learning_rate": 2.9026204467368532e-05, "loss": 0.2827, "step": 783 }, { "epoch": 0.14, "learning_rate": 2.9023080105319753e-05, "loss": 0.4124, "step": 784 }, { "epoch": 0.14, "learning_rate": 2.9019950907835594e-05, "loss": 0.4055, "step": 785 }, { "epoch": 0.14, "learning_rate": 2.9016816875995064e-05, "loss": 0.3158, "step": 786 }, { "epoch": 0.14, "learning_rate": 2.9013678010878843e-05, "loss": 0.2926, "step": 787 }, { "epoch": 0.14, "learning_rate": 2.9010534313569265e-05, "loss": 0.3452, "step": 788 }, { "epoch": 0.14, "learning_rate": 2.900738578515035e-05, "loss": 0.3445, "step": 789 }, { "epoch": 0.14, "learning_rate": 2.900423242670777e-05, "loss": 0.4043, "step": 790 }, { "epoch": 0.14, "learning_rate": 2.9001074239328856e-05, "loss": 0.4073, "step": 791 }, { "epoch": 0.14, "learning_rate": 2.8997911224102623e-05, "loss": 0.3784, "step": 792 }, { "epoch": 0.14, "learning_rate": 2.899474338211974e-05, "loss": 0.2933, "step": 793 }, { "epoch": 0.14, "learning_rate": 2.899157071447254e-05, "loss": 0.3257, "step": 794 }, { "epoch": 0.14, "learning_rate": 2.8988393222255024e-05, "loss": 0.3177, "step": 795 }, { "epoch": 0.14, "learning_rate": 2.8985210906562845e-05, "loss": 0.3738, "step": 796 }, { "epoch": 0.14, "learning_rate": 2.898202376849334e-05, "loss": 0.3811, "step": 797 }, { "epoch": 0.14, "learning_rate": 2.897883180914549e-05, "loss": 0.2476, "step": 798 }, { "epoch": 0.14, "learning_rate": 2.8975635029619956e-05, "loss": 0.3796, "step": 799 }, { "epoch": 0.15, "learning_rate": 2.8972433431019035e-05, "loss": 0.3407, "step": 800 }, { "epoch": 0.15, "learning_rate": 2.896922701444671e-05, "loss": 0.3706, "step": 801 }, { "epoch": 0.15, "learning_rate": 2.896601578100862e-05, "loss": 0.3419, "step": 802 }, { "epoch": 0.15, "learning_rate": 2.8962799731812055e-05, "loss": 0.4209, "step": 803 }, { "epoch": 0.15, "learning_rate": 2.895957886796598e-05, "loss": 0.3807, "step": 804 }, { "epoch": 0.15, "learning_rate": 2.8956353190581005e-05, "loss": 0.3793, "step": 805 }, { "epoch": 0.15, "learning_rate": 2.895312270076941e-05, "loss": 0.3193, "step": 806 }, { "epoch": 0.15, "learning_rate": 2.8949887399645137e-05, "loss": 0.3462, "step": 807 }, { "epoch": 0.15, "learning_rate": 2.894664728832377e-05, "loss": 0.3474, "step": 808 }, { "epoch": 0.15, "learning_rate": 2.8943402367922578e-05, "loss": 0.4385, "step": 809 }, { "epoch": 0.15, "learning_rate": 2.8940152639560463e-05, "loss": 0.3137, "step": 810 }, { "epoch": 0.15, "learning_rate": 2.8936898104357998e-05, "loss": 0.4023, "step": 811 }, { "epoch": 0.15, "learning_rate": 2.893363876343741e-05, "loss": 0.4016, "step": 812 }, { "epoch": 0.15, "learning_rate": 2.893037461792259e-05, "loss": 0.3153, "step": 813 }, { "epoch": 0.15, "learning_rate": 2.892710566893908e-05, "loss": 0.3442, "step": 814 }, { "epoch": 0.15, "learning_rate": 2.8923831917614062e-05, "loss": 0.3413, "step": 815 }, { "epoch": 0.15, "learning_rate": 2.8920553365076415e-05, "loss": 0.3783, "step": 816 }, { "epoch": 0.15, "learning_rate": 2.891727001245663e-05, "loss": 0.2982, "step": 817 }, { "epoch": 0.15, "learning_rate": 2.8913981860886882e-05, "loss": 0.2952, "step": 818 }, { "epoch": 0.15, "learning_rate": 2.8910688911500985e-05, "loss": 0.363, "step": 819 }, { "epoch": 0.15, "learning_rate": 2.8907391165434417e-05, "loss": 0.3525, "step": 820 }, { "epoch": 0.15, "learning_rate": 2.89040886238243e-05, "loss": 0.2944, "step": 821 }, { "epoch": 0.15, "learning_rate": 2.8900781287809423e-05, "loss": 0.325, "step": 822 }, { "epoch": 0.15, "learning_rate": 2.889746915853022e-05, "loss": 0.3604, "step": 823 }, { "epoch": 0.15, "learning_rate": 2.889415223712877e-05, "loss": 0.3865, "step": 824 }, { "epoch": 0.15, "learning_rate": 2.8890830524748827e-05, "loss": 0.2974, "step": 825 }, { "epoch": 0.15, "learning_rate": 2.888750402253577e-05, "loss": 0.3635, "step": 826 }, { "epoch": 0.15, "learning_rate": 2.8884172731636652e-05, "loss": 0.359, "step": 827 }, { "epoch": 0.15, "learning_rate": 2.8880836653200165e-05, "loss": 0.3436, "step": 828 }, { "epoch": 0.15, "learning_rate": 2.887749578837665e-05, "loss": 0.4028, "step": 829 }, { "epoch": 0.15, "learning_rate": 2.887415013831811e-05, "loss": 0.2952, "step": 830 }, { "epoch": 0.15, "learning_rate": 2.887079970417819e-05, "loss": 0.3082, "step": 831 }, { "epoch": 0.15, "learning_rate": 2.8867444487112183e-05, "loss": 0.3671, "step": 832 }, { "epoch": 0.15, "learning_rate": 2.8864084488277038e-05, "loss": 0.3459, "step": 833 }, { "epoch": 0.15, "learning_rate": 2.8860719708831345e-05, "loss": 0.3606, "step": 834 }, { "epoch": 0.15, "learning_rate": 2.885735014993535e-05, "loss": 0.2896, "step": 835 }, { "epoch": 0.15, "learning_rate": 2.8853975812750942e-05, "loss": 0.3101, "step": 836 }, { "epoch": 0.15, "learning_rate": 2.8850596698441657e-05, "loss": 0.344, "step": 837 }, { "epoch": 0.15, "learning_rate": 2.8847212808172685e-05, "loss": 0.2996, "step": 838 }, { "epoch": 0.15, "learning_rate": 2.8843824143110857e-05, "loss": 0.3395, "step": 839 }, { "epoch": 0.15, "learning_rate": 2.884043070442465e-05, "loss": 0.3264, "step": 840 }, { "epoch": 0.15, "learning_rate": 2.8837032493284188e-05, "loss": 0.3698, "step": 841 }, { "epoch": 0.15, "learning_rate": 2.8833629510861245e-05, "loss": 0.351, "step": 842 }, { "epoch": 0.15, "learning_rate": 2.8830221758329236e-05, "loss": 0.338, "step": 843 }, { "epoch": 0.15, "learning_rate": 2.8826809236863217e-05, "loss": 0.4351, "step": 844 }, { "epoch": 0.15, "learning_rate": 2.8823391947639893e-05, "loss": 0.412, "step": 845 }, { "epoch": 0.15, "learning_rate": 2.881996989183762e-05, "loss": 0.3489, "step": 846 }, { "epoch": 0.15, "learning_rate": 2.881654307063639e-05, "loss": 0.4011, "step": 847 }, { "epoch": 0.15, "learning_rate": 2.881311148521783e-05, "loss": 0.3923, "step": 848 }, { "epoch": 0.15, "learning_rate": 2.8809675136765224e-05, "loss": 0.3832, "step": 849 }, { "epoch": 0.15, "learning_rate": 2.8806234026463493e-05, "loss": 0.2767, "step": 850 }, { "epoch": 0.15, "learning_rate": 2.8802788155499205e-05, "loss": 0.3645, "step": 851 }, { "epoch": 0.15, "learning_rate": 2.8799337525060554e-05, "loss": 0.3333, "step": 852 }, { "epoch": 0.15, "learning_rate": 2.879588213633739e-05, "loss": 0.3839, "step": 853 }, { "epoch": 0.15, "learning_rate": 2.87924219905212e-05, "loss": 0.4009, "step": 854 }, { "epoch": 0.16, "learning_rate": 2.878895708880511e-05, "loss": 0.4133, "step": 855 }, { "epoch": 0.16, "learning_rate": 2.878548743238389e-05, "loss": 0.3096, "step": 856 }, { "epoch": 0.16, "learning_rate": 2.8782013022453944e-05, "loss": 0.3171, "step": 857 }, { "epoch": 0.16, "learning_rate": 2.8778533860213315e-05, "loss": 0.2649, "step": 858 }, { "epoch": 0.16, "learning_rate": 2.8775049946861688e-05, "loss": 0.3682, "step": 859 }, { "epoch": 0.16, "learning_rate": 2.8771561283600387e-05, "loss": 0.3729, "step": 860 }, { "epoch": 0.16, "learning_rate": 2.876806787163237e-05, "loss": 0.3811, "step": 861 }, { "epoch": 0.16, "learning_rate": 2.8764569712162237e-05, "loss": 0.3556, "step": 862 }, { "epoch": 0.16, "learning_rate": 2.876106680639622e-05, "loss": 0.3033, "step": 863 }, { "epoch": 0.16, "learning_rate": 2.8757559155542193e-05, "loss": 0.4045, "step": 864 }, { "epoch": 0.16, "learning_rate": 2.8754046760809664e-05, "loss": 0.342, "step": 865 }, { "epoch": 0.16, "learning_rate": 2.8750529623409766e-05, "loss": 0.3655, "step": 866 }, { "epoch": 0.16, "learning_rate": 2.8747007744555293e-05, "loss": 0.3535, "step": 867 }, { "epoch": 0.16, "learning_rate": 2.8743481125460648e-05, "loss": 0.2811, "step": 868 }, { "epoch": 0.16, "learning_rate": 2.8739949767341878e-05, "loss": 0.3983, "step": 869 }, { "epoch": 0.16, "learning_rate": 2.8736413671416675e-05, "loss": 0.3179, "step": 870 }, { "epoch": 0.16, "learning_rate": 2.8732872838904345e-05, "loss": 0.3926, "step": 871 }, { "epoch": 0.16, "learning_rate": 2.8729327271025847e-05, "loss": 0.3197, "step": 872 }, { "epoch": 0.16, "learning_rate": 2.8725776969003753e-05, "loss": 0.3477, "step": 873 }, { "epoch": 0.16, "learning_rate": 2.8722221934062284e-05, "loss": 0.3748, "step": 874 }, { "epoch": 0.16, "learning_rate": 2.871866216742728e-05, "loss": 0.4115, "step": 875 }, { "epoch": 0.16, "learning_rate": 2.8715097670326222e-05, "loss": 0.3289, "step": 876 }, { "epoch": 0.16, "learning_rate": 2.871152844398822e-05, "loss": 0.3362, "step": 877 }, { "epoch": 0.16, "learning_rate": 2.8707954489644015e-05, "loss": 0.3271, "step": 878 }, { "epoch": 0.16, "learning_rate": 2.8704375808525976e-05, "loss": 0.3275, "step": 879 }, { "epoch": 0.16, "learning_rate": 2.8700792401868103e-05, "loss": 0.2829, "step": 880 }, { "epoch": 0.16, "learning_rate": 2.869720427090602e-05, "loss": 0.4241, "step": 881 }, { "epoch": 0.16, "learning_rate": 2.8693611416877e-05, "loss": 0.3948, "step": 882 }, { "epoch": 0.16, "learning_rate": 2.8690013841019915e-05, "loss": 0.3308, "step": 883 }, { "epoch": 0.16, "learning_rate": 2.8686411544575283e-05, "loss": 0.3333, "step": 884 }, { "epoch": 0.16, "learning_rate": 2.8682804528785258e-05, "loss": 0.4283, "step": 885 }, { "epoch": 0.16, "learning_rate": 2.8679192794893597e-05, "loss": 0.3788, "step": 886 }, { "epoch": 0.16, "learning_rate": 2.8675576344145707e-05, "loss": 0.3252, "step": 887 }, { "epoch": 0.16, "learning_rate": 2.8671955177788603e-05, "loss": 0.3906, "step": 888 }, { "epoch": 0.16, "learning_rate": 2.8668329297070942e-05, "loss": 0.4368, "step": 889 }, { "epoch": 0.16, "learning_rate": 2.8664698703242997e-05, "loss": 0.2887, "step": 890 }, { "epoch": 0.16, "learning_rate": 2.8661063397556667e-05, "loss": 0.3759, "step": 891 }, { "epoch": 0.16, "learning_rate": 2.865742338126548e-05, "loss": 0.3268, "step": 892 }, { "epoch": 0.16, "learning_rate": 2.8653778655624585e-05, "loss": 0.4326, "step": 893 }, { "epoch": 0.16, "learning_rate": 2.865012922189075e-05, "loss": 0.3383, "step": 894 }, { "epoch": 0.16, "learning_rate": 2.8646475081322382e-05, "loss": 0.4536, "step": 895 }, { "epoch": 0.16, "learning_rate": 2.8642816235179497e-05, "loss": 0.3088, "step": 896 }, { "epoch": 0.16, "learning_rate": 2.863915268472373e-05, "loss": 0.3206, "step": 897 }, { "epoch": 0.16, "learning_rate": 2.8635484431218358e-05, "loss": 0.326, "step": 898 }, { "epoch": 0.16, "learning_rate": 2.8631811475928256e-05, "loss": 0.2992, "step": 899 }, { "epoch": 0.16, "learning_rate": 2.8628133820119937e-05, "loss": 0.3679, "step": 900 }, { "epoch": 0.16, "learning_rate": 2.8624451465061535e-05, "loss": 0.3672, "step": 901 }, { "epoch": 0.16, "learning_rate": 2.8620764412022786e-05, "loss": 0.3965, "step": 902 }, { "epoch": 0.16, "learning_rate": 2.861707266227507e-05, "loss": 0.3781, "step": 903 }, { "epoch": 0.16, "learning_rate": 2.861337621709137e-05, "loss": 0.3057, "step": 904 }, { "epoch": 0.16, "learning_rate": 2.8609675077746294e-05, "loss": 0.3425, "step": 905 }, { "epoch": 0.16, "learning_rate": 2.8605969245516073e-05, "loss": 0.3245, "step": 906 }, { "epoch": 0.16, "learning_rate": 2.8602258721678543e-05, "loss": 0.3185, "step": 907 }, { "epoch": 0.16, "learning_rate": 2.8598543507513172e-05, "loss": 0.3376, "step": 908 }, { "epoch": 0.16, "learning_rate": 2.8594823604301035e-05, "loss": 0.3253, "step": 909 }, { "epoch": 0.16, "learning_rate": 2.859109901332483e-05, "loss": 0.3049, "step": 910 }, { "epoch": 0.17, "learning_rate": 2.858736973586887e-05, "loss": 0.3184, "step": 911 }, { "epoch": 0.17, "learning_rate": 2.858363577321909e-05, "loss": 0.3254, "step": 912 }, { "epoch": 0.17, "learning_rate": 2.8579897126663022e-05, "loss": 0.3671, "step": 913 }, { "epoch": 0.17, "learning_rate": 2.857615379748983e-05, "loss": 0.2767, "step": 914 }, { "epoch": 0.17, "learning_rate": 2.8572405786990293e-05, "loss": 0.3203, "step": 915 }, { "epoch": 0.17, "learning_rate": 2.8568653096456793e-05, "loss": 0.3569, "step": 916 }, { "epoch": 0.17, "learning_rate": 2.8564895727183336e-05, "loss": 0.3547, "step": 917 }, { "epoch": 0.17, "learning_rate": 2.8561133680465538e-05, "loss": 0.299, "step": 918 }, { "epoch": 0.17, "learning_rate": 2.855736695760062e-05, "loss": 0.3481, "step": 919 }, { "epoch": 0.17, "learning_rate": 2.8553595559887426e-05, "loss": 0.3477, "step": 920 }, { "epoch": 0.17, "learning_rate": 2.8549819488626417e-05, "loss": 0.4158, "step": 921 }, { "epoch": 0.17, "learning_rate": 2.8546038745119645e-05, "loss": 0.3225, "step": 922 }, { "epoch": 0.17, "learning_rate": 2.8542253330670788e-05, "loss": 0.254, "step": 923 }, { "epoch": 0.17, "learning_rate": 2.8538463246585135e-05, "loss": 0.3776, "step": 924 }, { "epoch": 0.17, "learning_rate": 2.853466849416958e-05, "loss": 0.3419, "step": 925 }, { "epoch": 0.17, "learning_rate": 2.8530869074732633e-05, "loss": 0.3879, "step": 926 }, { "epoch": 0.17, "learning_rate": 2.8527064989584403e-05, "loss": 0.4397, "step": 927 }, { "epoch": 0.17, "learning_rate": 2.8523256240036616e-05, "loss": 0.2614, "step": 928 }, { "epoch": 0.17, "learning_rate": 2.8519442827402605e-05, "loss": 0.3293, "step": 929 }, { "epoch": 0.17, "learning_rate": 2.8515624752997305e-05, "loss": 0.3408, "step": 930 }, { "epoch": 0.17, "learning_rate": 2.8511802018137275e-05, "loss": 0.3579, "step": 931 }, { "epoch": 0.17, "learning_rate": 2.8507974624140657e-05, "loss": 0.3586, "step": 932 }, { "epoch": 0.17, "learning_rate": 2.850414257232722e-05, "loss": 0.406, "step": 933 }, { "epoch": 0.17, "learning_rate": 2.8500305864018325e-05, "loss": 0.3223, "step": 934 }, { "epoch": 0.17, "learning_rate": 2.8496464500536955e-05, "loss": 0.3141, "step": 935 }, { "epoch": 0.17, "learning_rate": 2.849261848320768e-05, "loss": 0.269, "step": 936 }, { "epoch": 0.17, "learning_rate": 2.8488767813356683e-05, "loss": 0.3228, "step": 937 }, { "epoch": 0.17, "learning_rate": 2.8484912492311753e-05, "loss": 0.387, "step": 938 }, { "epoch": 0.17, "learning_rate": 2.848105252140228e-05, "loss": 0.4028, "step": 939 }, { "epoch": 0.17, "learning_rate": 2.847718790195926e-05, "loss": 0.3277, "step": 940 }, { "epoch": 0.17, "learning_rate": 2.8473318635315293e-05, "loss": 0.3577, "step": 941 }, { "epoch": 0.17, "learning_rate": 2.846944472280457e-05, "loss": 0.3303, "step": 942 }, { "epoch": 0.17, "learning_rate": 2.8465566165762902e-05, "loss": 0.3145, "step": 943 }, { "epoch": 0.17, "learning_rate": 2.8461682965527686e-05, "loss": 0.3706, "step": 944 }, { "epoch": 0.17, "learning_rate": 2.845779512343793e-05, "loss": 0.3215, "step": 945 }, { "epoch": 0.17, "learning_rate": 2.8453902640834232e-05, "loss": 0.3043, "step": 946 }, { "epoch": 0.17, "learning_rate": 2.8450005519058806e-05, "loss": 0.343, "step": 947 }, { "epoch": 0.17, "learning_rate": 2.844610375945545e-05, "loss": 0.3561, "step": 948 }, { "epoch": 0.17, "learning_rate": 2.8442197363369574e-05, "loss": 0.324, "step": 949 }, { "epoch": 0.17, "learning_rate": 2.843828633214817e-05, "loss": 0.3695, "step": 950 }, { "epoch": 0.17, "learning_rate": 2.8434370667139854e-05, "loss": 0.4331, "step": 951 }, { "epoch": 0.17, "learning_rate": 2.8430450369694812e-05, "loss": 0.3282, "step": 952 }, { "epoch": 0.17, "learning_rate": 2.842652544116485e-05, "loss": 0.3479, "step": 953 }, { "epoch": 0.17, "learning_rate": 2.842259588290335e-05, "loss": 0.4327, "step": 954 }, { "epoch": 0.17, "learning_rate": 2.841866169626531e-05, "loss": 0.3517, "step": 955 }, { "epoch": 0.17, "learning_rate": 2.8414722882607316e-05, "loss": 0.2996, "step": 956 }, { "epoch": 0.17, "learning_rate": 2.8410779443287542e-05, "loss": 0.3236, "step": 957 }, { "epoch": 0.17, "learning_rate": 2.8406831379665766e-05, "loss": 0.3187, "step": 958 }, { "epoch": 0.17, "learning_rate": 2.8402878693103363e-05, "loss": 0.3499, "step": 959 }, { "epoch": 0.17, "learning_rate": 2.839892138496329e-05, "loss": 0.3267, "step": 960 }, { "epoch": 0.17, "learning_rate": 2.839495945661011e-05, "loss": 0.3065, "step": 961 }, { "epoch": 0.17, "learning_rate": 2.8390992909409976e-05, "loss": 0.3226, "step": 962 }, { "epoch": 0.17, "learning_rate": 2.8387021744730634e-05, "loss": 0.3873, "step": 963 }, { "epoch": 0.17, "learning_rate": 2.838304596394141e-05, "loss": 0.3621, "step": 964 }, { "epoch": 0.17, "learning_rate": 2.8379065568413238e-05, "loss": 0.2726, "step": 965 }, { "epoch": 0.18, "learning_rate": 2.8375080559518636e-05, "loss": 0.2919, "step": 966 }, { "epoch": 0.18, "learning_rate": 2.837109093863172e-05, "loss": 0.3353, "step": 967 }, { "epoch": 0.18, "learning_rate": 2.836709670712818e-05, "loss": 0.2792, "step": 968 }, { "epoch": 0.18, "learning_rate": 2.836309786638531e-05, "loss": 0.4244, "step": 969 }, { "epoch": 0.18, "learning_rate": 2.8359094417781994e-05, "loss": 0.2991, "step": 970 }, { "epoch": 0.18, "learning_rate": 2.8355086362698696e-05, "loss": 0.3372, "step": 971 }, { "epoch": 0.18, "learning_rate": 2.835107370251747e-05, "loss": 0.311, "step": 972 }, { "epoch": 0.18, "learning_rate": 2.8347056438621966e-05, "loss": 0.3052, "step": 973 }, { "epoch": 0.18, "learning_rate": 2.834303457239741e-05, "loss": 0.3547, "step": 974 }, { "epoch": 0.18, "learning_rate": 2.833900810523063e-05, "loss": 0.3672, "step": 975 }, { "epoch": 0.18, "learning_rate": 2.833497703851002e-05, "loss": 0.286, "step": 976 }, { "epoch": 0.18, "learning_rate": 2.8330941373625585e-05, "loss": 0.3014, "step": 977 }, { "epoch": 0.18, "learning_rate": 2.8326901111968893e-05, "loss": 0.3003, "step": 978 }, { "epoch": 0.18, "learning_rate": 2.8322856254933108e-05, "loss": 0.4193, "step": 979 }, { "epoch": 0.18, "learning_rate": 2.8318806803912977e-05, "loss": 0.2711, "step": 980 }, { "epoch": 0.18, "learning_rate": 2.831475276030483e-05, "loss": 0.3516, "step": 981 }, { "epoch": 0.18, "learning_rate": 2.831069412550658e-05, "loss": 0.3562, "step": 982 }, { "epoch": 0.18, "learning_rate": 2.8306630900917727e-05, "loss": 0.34, "step": 983 }, { "epoch": 0.18, "learning_rate": 2.830256308793935e-05, "loss": 0.3916, "step": 984 }, { "epoch": 0.18, "learning_rate": 2.8298490687974115e-05, "loss": 0.3661, "step": 985 }, { "epoch": 0.18, "learning_rate": 2.8294413702426263e-05, "loss": 0.3168, "step": 986 }, { "epoch": 0.18, "learning_rate": 2.8290332132701616e-05, "loss": 0.3904, "step": 987 }, { "epoch": 0.18, "learning_rate": 2.8286245980207587e-05, "loss": 0.3354, "step": 988 }, { "epoch": 0.18, "learning_rate": 2.8282155246353155e-05, "loss": 0.3402, "step": 989 }, { "epoch": 0.18, "learning_rate": 2.8278059932548893e-05, "loss": 0.3579, "step": 990 }, { "epoch": 0.18, "learning_rate": 2.8273960040206944e-05, "loss": 0.3608, "step": 991 }, { "epoch": 0.18, "learning_rate": 2.8269855570741024e-05, "loss": 0.368, "step": 992 }, { "epoch": 0.18, "learning_rate": 2.826574652556645e-05, "loss": 0.3418, "step": 993 }, { "epoch": 0.18, "learning_rate": 2.8261632906100087e-05, "loss": 0.3127, "step": 994 }, { "epoch": 0.18, "learning_rate": 2.8257514713760407e-05, "loss": 0.2797, "step": 995 }, { "epoch": 0.18, "learning_rate": 2.825339194996743e-05, "loss": 0.3506, "step": 996 }, { "epoch": 0.18, "learning_rate": 2.8249264616142776e-05, "loss": 0.3604, "step": 997 }, { "epoch": 0.18, "learning_rate": 2.8245132713709625e-05, "loss": 0.2837, "step": 998 }, { "epoch": 0.18, "learning_rate": 2.8240996244092746e-05, "loss": 0.3318, "step": 999 }, { "epoch": 0.18, "learning_rate": 2.823685520871847e-05, "loss": 0.3539, "step": 1000 }, { "epoch": 0.18, "learning_rate": 2.823270960901471e-05, "loss": 0.3513, "step": 1001 }, { "epoch": 0.18, "learning_rate": 2.8228559446410948e-05, "loss": 0.3226, "step": 1002 }, { "epoch": 0.18, "learning_rate": 2.8224404722338247e-05, "loss": 0.3965, "step": 1003 }, { "epoch": 0.18, "learning_rate": 2.8220245438229237e-05, "loss": 0.3025, "step": 1004 }, { "epoch": 0.18, "learning_rate": 2.8216081595518115e-05, "loss": 0.3452, "step": 1005 }, { "epoch": 0.18, "learning_rate": 2.8211913195640662e-05, "loss": 0.3143, "step": 1006 }, { "epoch": 0.18, "learning_rate": 2.8207740240034226e-05, "loss": 0.3401, "step": 1007 }, { "epoch": 0.18, "learning_rate": 2.8203562730137726e-05, "loss": 0.3228, "step": 1008 }, { "epoch": 0.18, "learning_rate": 2.819938066739164e-05, "loss": 0.3423, "step": 1009 }, { "epoch": 0.18, "learning_rate": 2.8195194053238038e-05, "loss": 0.3134, "step": 1010 }, { "epoch": 0.18, "learning_rate": 2.8191002889120538e-05, "loss": 0.3591, "step": 1011 }, { "epoch": 0.18, "learning_rate": 2.818680717648434e-05, "loss": 0.3108, "step": 1012 }, { "epoch": 0.18, "learning_rate": 2.818260691677621e-05, "loss": 0.2738, "step": 1013 }, { "epoch": 0.18, "learning_rate": 2.8178402111444484e-05, "loss": 0.267, "step": 1014 }, { "epoch": 0.18, "learning_rate": 2.8174192761939052e-05, "loss": 0.3111, "step": 1015 }, { "epoch": 0.18, "learning_rate": 2.8169978869711387e-05, "loss": 0.329, "step": 1016 }, { "epoch": 0.18, "learning_rate": 2.8165760436214522e-05, "loss": 0.2606, "step": 1017 }, { "epoch": 0.18, "learning_rate": 2.816153746290306e-05, "loss": 0.3406, "step": 1018 }, { "epoch": 0.18, "learning_rate": 2.8157309951233155e-05, "loss": 0.3865, "step": 1019 }, { "epoch": 0.18, "learning_rate": 2.8153077902662548e-05, "loss": 0.4026, "step": 1020 }, { "epoch": 0.19, "learning_rate": 2.814884131865053e-05, "loss": 0.2983, "step": 1021 }, { "epoch": 0.19, "learning_rate": 2.8144600200657953e-05, "loss": 0.3187, "step": 1022 }, { "epoch": 0.19, "learning_rate": 2.814035455014725e-05, "loss": 0.2607, "step": 1023 }, { "epoch": 0.19, "learning_rate": 2.813610436858239e-05, "loss": 0.28, "step": 1024 }, { "epoch": 0.19, "learning_rate": 2.8131849657428933e-05, "loss": 0.3129, "step": 1025 }, { "epoch": 0.19, "learning_rate": 2.812759041815398e-05, "loss": 0.3075, "step": 1026 }, { "epoch": 0.19, "learning_rate": 2.8123326652226205e-05, "loss": 0.3268, "step": 1027 }, { "epoch": 0.19, "learning_rate": 2.811905836111584e-05, "loss": 0.3062, "step": 1028 }, { "epoch": 0.19, "learning_rate": 2.8114785546294668e-05, "loss": 0.3928, "step": 1029 }, { "epoch": 0.19, "learning_rate": 2.811050820923605e-05, "loss": 0.3201, "step": 1030 }, { "epoch": 0.19, "learning_rate": 2.8106226351414888e-05, "loss": 0.3756, "step": 1031 }, { "epoch": 0.19, "learning_rate": 2.8101939974307657e-05, "loss": 0.337, "step": 1032 }, { "epoch": 0.19, "learning_rate": 2.8097649079392382e-05, "loss": 0.3959, "step": 1033 }, { "epoch": 0.19, "learning_rate": 2.809335366814865e-05, "loss": 0.333, "step": 1034 }, { "epoch": 0.19, "learning_rate": 2.8089053742057603e-05, "loss": 0.3065, "step": 1035 }, { "epoch": 0.19, "learning_rate": 2.8084749302601937e-05, "loss": 0.2604, "step": 1036 }, { "epoch": 0.19, "learning_rate": 2.8080440351265918e-05, "loss": 0.3262, "step": 1037 }, { "epoch": 0.19, "learning_rate": 2.8076126889535344e-05, "loss": 0.324, "step": 1038 }, { "epoch": 0.19, "learning_rate": 2.8071808918897594e-05, "loss": 0.3667, "step": 1039 }, { "epoch": 0.19, "learning_rate": 2.8067486440841584e-05, "loss": 0.3468, "step": 1040 }, { "epoch": 0.19, "learning_rate": 2.806315945685779e-05, "loss": 0.3259, "step": 1041 }, { "epoch": 0.19, "learning_rate": 2.805882796843824e-05, "loss": 0.3478, "step": 1042 }, { "epoch": 0.19, "learning_rate": 2.8054491977076522e-05, "loss": 0.3612, "step": 1043 }, { "epoch": 0.19, "learning_rate": 2.8050151484267768e-05, "loss": 0.3167, "step": 1044 }, { "epoch": 0.19, "learning_rate": 2.8045806491508667e-05, "loss": 0.3242, "step": 1045 }, { "epoch": 0.19, "learning_rate": 2.8041457000297456e-05, "loss": 0.364, "step": 1046 }, { "epoch": 0.19, "learning_rate": 2.8037103012133928e-05, "loss": 0.2957, "step": 1047 }, { "epoch": 0.19, "learning_rate": 2.803274452851943e-05, "loss": 0.3472, "step": 1048 }, { "epoch": 0.19, "learning_rate": 2.8028381550956838e-05, "loss": 0.3866, "step": 1049 }, { "epoch": 0.19, "learning_rate": 2.8024014080950603e-05, "loss": 0.3457, "step": 1050 }, { "epoch": 0.19, "learning_rate": 2.801964212000672e-05, "loss": 0.2985, "step": 1051 }, { "epoch": 0.19, "learning_rate": 2.8015265669632715e-05, "loss": 0.3329, "step": 1052 }, { "epoch": 0.19, "learning_rate": 2.8010884731337685e-05, "loss": 0.3232, "step": 1053 }, { "epoch": 0.19, "learning_rate": 2.800649930663226e-05, "loss": 0.34, "step": 1054 }, { "epoch": 0.19, "learning_rate": 2.8002109397028624e-05, "loss": 0.3392, "step": 1055 }, { "epoch": 0.19, "learning_rate": 2.7997715004040495e-05, "loss": 0.3542, "step": 1056 }, { "epoch": 0.19, "learning_rate": 2.799331612918316e-05, "loss": 0.3708, "step": 1057 }, { "epoch": 0.19, "learning_rate": 2.7988912773973433e-05, "loss": 0.2976, "step": 1058 }, { "epoch": 0.19, "learning_rate": 2.7984504939929673e-05, "loss": 0.2957, "step": 1059 }, { "epoch": 0.19, "learning_rate": 2.7980092628571795e-05, "loss": 0.3351, "step": 1060 }, { "epoch": 0.19, "learning_rate": 2.797567584142125e-05, "loss": 0.3562, "step": 1061 }, { "epoch": 0.19, "learning_rate": 2.797125458000103e-05, "loss": 0.3427, "step": 1062 }, { "epoch": 0.19, "learning_rate": 2.7966828845835674e-05, "loss": 0.3011, "step": 1063 }, { "epoch": 0.19, "learning_rate": 2.7962398640451267e-05, "loss": 0.3057, "step": 1064 }, { "epoch": 0.19, "learning_rate": 2.7957963965375428e-05, "loss": 0.3131, "step": 1065 }, { "epoch": 0.19, "learning_rate": 2.795352482213732e-05, "loss": 0.2631, "step": 1066 }, { "epoch": 0.19, "learning_rate": 2.7949081212267647e-05, "loss": 0.3943, "step": 1067 }, { "epoch": 0.19, "learning_rate": 2.7944633137298657e-05, "loss": 0.3042, "step": 1068 }, { "epoch": 0.19, "learning_rate": 2.7940180598764134e-05, "loss": 0.4091, "step": 1069 }, { "epoch": 0.19, "learning_rate": 2.79357235981994e-05, "loss": 0.2692, "step": 1070 }, { "epoch": 0.19, "learning_rate": 2.7931262137141316e-05, "loss": 0.3282, "step": 1071 }, { "epoch": 0.19, "learning_rate": 2.7926796217128277e-05, "loss": 0.3578, "step": 1072 }, { "epoch": 0.19, "learning_rate": 2.7922325839700238e-05, "loss": 0.2742, "step": 1073 }, { "epoch": 0.19, "learning_rate": 2.7917851006398655e-05, "loss": 0.3203, "step": 1074 }, { "epoch": 0.19, "learning_rate": 2.7913371718766545e-05, "loss": 0.3853, "step": 1075 }, { "epoch": 0.2, "learning_rate": 2.7908887978348456e-05, "loss": 0.2819, "step": 1076 }, { "epoch": 0.2, "learning_rate": 2.7904399786690473e-05, "loss": 0.3575, "step": 1077 }, { "epoch": 0.2, "learning_rate": 2.789990714534021e-05, "loss": 0.371, "step": 1078 }, { "epoch": 0.2, "learning_rate": 2.7895410055846818e-05, "loss": 0.3623, "step": 1079 }, { "epoch": 0.2, "learning_rate": 2.7890908519760985e-05, "loss": 0.3459, "step": 1080 }, { "epoch": 0.2, "learning_rate": 2.788640253863493e-05, "loss": 0.3833, "step": 1081 }, { "epoch": 0.2, "learning_rate": 2.7881892114022398e-05, "loss": 0.3381, "step": 1082 }, { "epoch": 0.2, "learning_rate": 2.787737724747868e-05, "loss": 0.3671, "step": 1083 }, { "epoch": 0.2, "learning_rate": 2.7872857940560588e-05, "loss": 0.3606, "step": 1084 }, { "epoch": 0.2, "learning_rate": 2.7868334194826467e-05, "loss": 0.3617, "step": 1085 }, { "epoch": 0.2, "learning_rate": 2.78638060118362e-05, "loss": 0.3188, "step": 1086 }, { "epoch": 0.2, "learning_rate": 2.785927339315119e-05, "loss": 0.3348, "step": 1087 }, { "epoch": 0.2, "learning_rate": 2.785473634033437e-05, "loss": 0.3129, "step": 1088 }, { "epoch": 0.2, "learning_rate": 2.7850194854950208e-05, "loss": 0.3135, "step": 1089 }, { "epoch": 0.2, "learning_rate": 2.7845648938564704e-05, "loss": 0.3125, "step": 1090 }, { "epoch": 0.2, "learning_rate": 2.7841098592745374e-05, "loss": 0.2924, "step": 1091 }, { "epoch": 0.2, "learning_rate": 2.7836543819061264e-05, "loss": 0.4072, "step": 1092 }, { "epoch": 0.2, "learning_rate": 2.783198461908296e-05, "loss": 0.4026, "step": 1093 }, { "epoch": 0.2, "learning_rate": 2.7827420994382557e-05, "loss": 0.355, "step": 1094 }, { "epoch": 0.2, "learning_rate": 2.782285294653368e-05, "loss": 0.4048, "step": 1095 }, { "epoch": 0.2, "learning_rate": 2.781828047711149e-05, "loss": 0.2968, "step": 1096 }, { "epoch": 0.2, "learning_rate": 2.781370358769266e-05, "loss": 0.3136, "step": 1097 }, { "epoch": 0.2, "learning_rate": 2.78091222798554e-05, "loss": 0.3044, "step": 1098 }, { "epoch": 0.2, "learning_rate": 2.7804536555179427e-05, "loss": 0.3086, "step": 1099 }, { "epoch": 0.2, "learning_rate": 2.779994641524599e-05, "loss": 0.3534, "step": 1100 }, { "epoch": 0.2, "learning_rate": 2.7795351861637863e-05, "loss": 0.3058, "step": 1101 }, { "epoch": 0.2, "learning_rate": 2.779075289593934e-05, "loss": 0.2653, "step": 1102 }, { "epoch": 0.2, "learning_rate": 2.7786149519736227e-05, "loss": 0.3472, "step": 1103 }, { "epoch": 0.2, "learning_rate": 2.778154173461587e-05, "loss": 0.3566, "step": 1104 }, { "epoch": 0.2, "learning_rate": 2.777692954216712e-05, "loss": 0.3475, "step": 1105 }, { "epoch": 0.2, "learning_rate": 2.777231294398035e-05, "loss": 0.3213, "step": 1106 }, { "epoch": 0.2, "learning_rate": 2.7767691941647456e-05, "loss": 0.3209, "step": 1107 }, { "epoch": 0.2, "learning_rate": 2.7763066536761854e-05, "loss": 0.2352, "step": 1108 }, { "epoch": 0.2, "learning_rate": 2.775843673091847e-05, "loss": 0.3329, "step": 1109 }, { "epoch": 0.2, "learning_rate": 2.7753802525713757e-05, "loss": 0.3229, "step": 1110 }, { "epoch": 0.2, "learning_rate": 2.774916392274568e-05, "loss": 0.2699, "step": 1111 }, { "epoch": 0.2, "learning_rate": 2.7744520923613718e-05, "loss": 0.366, "step": 1112 }, { "epoch": 0.2, "learning_rate": 2.773987352991887e-05, "loss": 0.3175, "step": 1113 }, { "epoch": 0.2, "learning_rate": 2.7735221743263655e-05, "loss": 0.2882, "step": 1114 }, { "epoch": 0.2, "learning_rate": 2.773056556525209e-05, "loss": 0.3385, "step": 1115 }, { "epoch": 0.2, "learning_rate": 2.7725904997489725e-05, "loss": 0.3074, "step": 1116 }, { "epoch": 0.2, "learning_rate": 2.7721240041583616e-05, "loss": 0.3264, "step": 1117 }, { "epoch": 0.2, "learning_rate": 2.771657069914233e-05, "loss": 0.3123, "step": 1118 }, { "epoch": 0.2, "learning_rate": 2.7711896971775947e-05, "loss": 0.365, "step": 1119 }, { "epoch": 0.2, "learning_rate": 2.770721886109606e-05, "loss": 0.1991, "step": 1120 }, { "epoch": 0.2, "learning_rate": 2.7702536368715783e-05, "loss": 0.3077, "step": 1121 }, { "epoch": 0.2, "learning_rate": 2.769784949624972e-05, "loss": 0.3303, "step": 1122 }, { "epoch": 0.2, "learning_rate": 2.769315824531401e-05, "loss": 0.3184, "step": 1123 }, { "epoch": 0.2, "learning_rate": 2.7688462617526277e-05, "loss": 0.3091, "step": 1124 }, { "epoch": 0.2, "learning_rate": 2.768376261450567e-05, "loss": 0.355, "step": 1125 }, { "epoch": 0.2, "learning_rate": 2.7679058237872848e-05, "loss": 0.3506, "step": 1126 }, { "epoch": 0.2, "learning_rate": 2.7674349489249972e-05, "loss": 0.3323, "step": 1127 }, { "epoch": 0.2, "learning_rate": 2.7669636370260705e-05, "loss": 0.345, "step": 1128 }, { "epoch": 0.2, "learning_rate": 2.7664918882530227e-05, "loss": 0.2549, "step": 1129 }, { "epoch": 0.2, "learning_rate": 2.7660197027685226e-05, "loss": 0.3329, "step": 1130 }, { "epoch": 0.21, "learning_rate": 2.7655470807353883e-05, "loss": 0.3878, "step": 1131 }, { "epoch": 0.21, "learning_rate": 2.7650740223165896e-05, "loss": 0.3132, "step": 1132 }, { "epoch": 0.21, "learning_rate": 2.7646005276752467e-05, "loss": 0.2975, "step": 1133 }, { "epoch": 0.21, "learning_rate": 2.7641265969746293e-05, "loss": 0.3197, "step": 1134 }, { "epoch": 0.21, "learning_rate": 2.7636522303781586e-05, "loss": 0.3186, "step": 1135 }, { "epoch": 0.21, "learning_rate": 2.7631774280494057e-05, "loss": 0.3209, "step": 1136 }, { "epoch": 0.21, "learning_rate": 2.7627021901520912e-05, "loss": 0.2941, "step": 1137 }, { "epoch": 0.21, "learning_rate": 2.7622265168500866e-05, "loss": 0.3114, "step": 1138 }, { "epoch": 0.21, "learning_rate": 2.761750408307414e-05, "loss": 0.2904, "step": 1139 }, { "epoch": 0.21, "learning_rate": 2.7612738646882448e-05, "loss": 0.3291, "step": 1140 }, { "epoch": 0.21, "learning_rate": 2.760796886156901e-05, "loss": 0.286, "step": 1141 }, { "epoch": 0.21, "learning_rate": 2.7603194728778532e-05, "loss": 0.3533, "step": 1142 }, { "epoch": 0.21, "learning_rate": 2.7598416250157243e-05, "loss": 0.2766, "step": 1143 }, { "epoch": 0.21, "learning_rate": 2.759363342735284e-05, "loss": 0.2944, "step": 1144 }, { "epoch": 0.21, "learning_rate": 2.758884626201455e-05, "loss": 0.2754, "step": 1145 }, { "epoch": 0.21, "learning_rate": 2.758405475579308e-05, "loss": 0.3368, "step": 1146 }, { "epoch": 0.21, "learning_rate": 2.7579258910340627e-05, "loss": 0.304, "step": 1147 }, { "epoch": 0.21, "learning_rate": 2.7574458727310905e-05, "loss": 0.2952, "step": 1148 }, { "epoch": 0.21, "learning_rate": 2.7569654208359106e-05, "loss": 0.3146, "step": 1149 }, { "epoch": 0.21, "learning_rate": 2.7564845355141925e-05, "loss": 0.3181, "step": 1150 }, { "epoch": 0.21, "learning_rate": 2.7560032169317547e-05, "loss": 0.3152, "step": 1151 }, { "epoch": 0.21, "learning_rate": 2.7555214652545653e-05, "loss": 0.2949, "step": 1152 }, { "epoch": 0.21, "learning_rate": 2.7550392806487423e-05, "loss": 0.2891, "step": 1153 }, { "epoch": 0.21, "learning_rate": 2.7545566632805518e-05, "loss": 0.3331, "step": 1154 }, { "epoch": 0.21, "learning_rate": 2.7540736133164103e-05, "loss": 0.2499, "step": 1155 }, { "epoch": 0.21, "learning_rate": 2.7535901309228833e-05, "loss": 0.322, "step": 1156 }, { "epoch": 0.21, "learning_rate": 2.7531062162666847e-05, "loss": 0.3074, "step": 1157 }, { "epoch": 0.21, "learning_rate": 2.7526218695146777e-05, "loss": 0.2982, "step": 1158 }, { "epoch": 0.21, "learning_rate": 2.7521370908338748e-05, "loss": 0.3214, "step": 1159 }, { "epoch": 0.21, "learning_rate": 2.751651880391437e-05, "loss": 0.2393, "step": 1160 }, { "epoch": 0.21, "learning_rate": 2.751166238354675e-05, "loss": 0.2347, "step": 1161 }, { "epoch": 0.21, "learning_rate": 2.7506801648910476e-05, "loss": 0.4033, "step": 1162 }, { "epoch": 0.21, "learning_rate": 2.750193660168162e-05, "loss": 0.3744, "step": 1163 }, { "epoch": 0.21, "learning_rate": 2.7497067243537755e-05, "loss": 0.2903, "step": 1164 }, { "epoch": 0.21, "learning_rate": 2.7492193576157928e-05, "loss": 0.3607, "step": 1165 }, { "epoch": 0.21, "learning_rate": 2.7487315601222673e-05, "loss": 0.3069, "step": 1166 }, { "epoch": 0.21, "learning_rate": 2.7482433320414014e-05, "loss": 0.3154, "step": 1167 }, { "epoch": 0.21, "learning_rate": 2.7477546735415467e-05, "loss": 0.3658, "step": 1168 }, { "epoch": 0.21, "learning_rate": 2.747265584791201e-05, "loss": 0.291, "step": 1169 }, { "epoch": 0.21, "learning_rate": 2.746776065959012e-05, "loss": 0.3154, "step": 1170 }, { "epoch": 0.21, "learning_rate": 2.746286117213776e-05, "loss": 0.3022, "step": 1171 }, { "epoch": 0.21, "learning_rate": 2.745795738724437e-05, "loss": 0.3679, "step": 1172 }, { "epoch": 0.21, "learning_rate": 2.7453049306600872e-05, "loss": 0.3164, "step": 1173 }, { "epoch": 0.21, "learning_rate": 2.7448136931899666e-05, "loss": 0.2798, "step": 1174 }, { "epoch": 0.21, "learning_rate": 2.7443220264834636e-05, "loss": 0.3062, "step": 1175 }, { "epoch": 0.21, "learning_rate": 2.7438299307101145e-05, "loss": 0.3618, "step": 1176 }, { "epoch": 0.21, "learning_rate": 2.743337406039604e-05, "loss": 0.2921, "step": 1177 }, { "epoch": 0.21, "learning_rate": 2.742844452641764e-05, "loss": 0.3418, "step": 1178 }, { "epoch": 0.21, "learning_rate": 2.7423510706865752e-05, "loss": 0.2848, "step": 1179 }, { "epoch": 0.21, "learning_rate": 2.741857260344165e-05, "loss": 0.3197, "step": 1180 }, { "epoch": 0.21, "learning_rate": 2.741363021784809e-05, "loss": 0.2972, "step": 1181 }, { "epoch": 0.21, "learning_rate": 2.7408683551789304e-05, "loss": 0.2542, "step": 1182 }, { "epoch": 0.21, "learning_rate": 2.7403732606971e-05, "loss": 0.3152, "step": 1183 }, { "epoch": 0.21, "learning_rate": 2.7398777385100368e-05, "loss": 0.2778, "step": 1184 }, { "epoch": 0.21, "learning_rate": 2.7393817887886056e-05, "loss": 0.2583, "step": 1185 }, { "epoch": 0.22, "learning_rate": 2.7388854117038204e-05, "loss": 0.3367, "step": 1186 }, { "epoch": 0.22, "learning_rate": 2.7383886074268413e-05, "loss": 0.3416, "step": 1187 }, { "epoch": 0.22, "learning_rate": 2.737891376128977e-05, "loss": 0.3909, "step": 1188 }, { "epoch": 0.22, "learning_rate": 2.737393717981682e-05, "loss": 0.355, "step": 1189 }, { "epoch": 0.22, "learning_rate": 2.736895633156559e-05, "loss": 0.3544, "step": 1190 }, { "epoch": 0.22, "learning_rate": 2.7363971218253575e-05, "loss": 0.3394, "step": 1191 }, { "epoch": 0.22, "learning_rate": 2.735898184159974e-05, "loss": 0.3219, "step": 1192 }, { "epoch": 0.22, "learning_rate": 2.7353988203324523e-05, "loss": 0.3606, "step": 1193 }, { "epoch": 0.22, "learning_rate": 2.7348990305149824e-05, "loss": 0.3247, "step": 1194 }, { "epoch": 0.22, "learning_rate": 2.7343988148799018e-05, "loss": 0.257, "step": 1195 }, { "epoch": 0.22, "learning_rate": 2.733898173599695e-05, "loss": 0.2958, "step": 1196 }, { "epoch": 0.22, "learning_rate": 2.7333971068469932e-05, "loss": 0.3567, "step": 1197 }, { "epoch": 0.22, "learning_rate": 2.732895614794574e-05, "loss": 0.3379, "step": 1198 }, { "epoch": 0.22, "learning_rate": 2.7323936976153613e-05, "loss": 0.3264, "step": 1199 }, { "epoch": 0.22, "learning_rate": 2.731891355482427e-05, "loss": 0.3361, "step": 1200 }, { "epoch": 0.22, "learning_rate": 2.7313885885689876e-05, "loss": 0.3918, "step": 1201 }, { "epoch": 0.22, "learning_rate": 2.730885397048408e-05, "loss": 0.3411, "step": 1202 }, { "epoch": 0.22, "learning_rate": 2.7303817810941975e-05, "loss": 0.2894, "step": 1203 }, { "epoch": 0.22, "learning_rate": 2.7298777408800137e-05, "loss": 0.3317, "step": 1204 }, { "epoch": 0.22, "learning_rate": 2.7293732765796592e-05, "loss": 0.2806, "step": 1205 }, { "epoch": 0.22, "learning_rate": 2.728868388367084e-05, "loss": 0.3474, "step": 1206 }, { "epoch": 0.22, "learning_rate": 2.7283630764163827e-05, "loss": 0.3047, "step": 1207 }, { "epoch": 0.22, "learning_rate": 2.727857340901797e-05, "loss": 0.3601, "step": 1208 }, { "epoch": 0.22, "learning_rate": 2.727351181997715e-05, "loss": 0.2701, "step": 1209 }, { "epoch": 0.22, "learning_rate": 2.7268445998786694e-05, "loss": 0.3477, "step": 1210 }, { "epoch": 0.22, "learning_rate": 2.7263375947193406e-05, "loss": 0.236, "step": 1211 }, { "epoch": 0.22, "learning_rate": 2.725830166694554e-05, "loss": 0.2367, "step": 1212 }, { "epoch": 0.22, "learning_rate": 2.72532231597928e-05, "loss": 0.2867, "step": 1213 }, { "epoch": 0.22, "learning_rate": 2.7248140427486365e-05, "loss": 0.2673, "step": 1214 }, { "epoch": 0.22, "learning_rate": 2.7243053471778857e-05, "loss": 0.276, "step": 1215 }, { "epoch": 0.22, "learning_rate": 2.7237962294424358e-05, "loss": 0.3193, "step": 1216 }, { "epoch": 0.22, "learning_rate": 2.723286689717841e-05, "loss": 0.33, "step": 1217 }, { "epoch": 0.22, "learning_rate": 2.7227767281798002e-05, "loss": 0.2494, "step": 1218 }, { "epoch": 0.22, "learning_rate": 2.7222663450041593e-05, "loss": 0.3154, "step": 1219 }, { "epoch": 0.22, "learning_rate": 2.721755540366907e-05, "loss": 0.3512, "step": 1220 }, { "epoch": 0.22, "learning_rate": 2.7212443144441796e-05, "loss": 0.3611, "step": 1221 }, { "epoch": 0.22, "learning_rate": 2.720732667412258e-05, "loss": 0.3085, "step": 1222 }, { "epoch": 0.22, "learning_rate": 2.720220599447568e-05, "loss": 0.2802, "step": 1223 }, { "epoch": 0.22, "learning_rate": 2.719708110726681e-05, "loss": 0.2802, "step": 1224 }, { "epoch": 0.22, "learning_rate": 2.7191952014263132e-05, "loss": 0.3119, "step": 1225 }, { "epoch": 0.22, "learning_rate": 2.718681871723326e-05, "loss": 0.3424, "step": 1226 }, { "epoch": 0.22, "learning_rate": 2.7181681217947248e-05, "loss": 0.3242, "step": 1227 }, { "epoch": 0.22, "learning_rate": 2.7176539518176617e-05, "loss": 0.2517, "step": 1228 }, { "epoch": 0.22, "learning_rate": 2.717139361969432e-05, "loss": 0.2825, "step": 1229 }, { "epoch": 0.22, "learning_rate": 2.716624352427477e-05, "loss": 0.3124, "step": 1230 }, { "epoch": 0.22, "learning_rate": 2.7161089233693822e-05, "loss": 0.3245, "step": 1231 }, { "epoch": 0.22, "learning_rate": 2.7155930749728778e-05, "loss": 0.2714, "step": 1232 }, { "epoch": 0.22, "learning_rate": 2.715076807415837e-05, "loss": 0.3596, "step": 1233 }, { "epoch": 0.22, "learning_rate": 2.7145601208762816e-05, "loss": 0.254, "step": 1234 }, { "epoch": 0.22, "learning_rate": 2.7140430155323732e-05, "loss": 0.3286, "step": 1235 }, { "epoch": 0.22, "learning_rate": 2.7135254915624213e-05, "loss": 0.2886, "step": 1236 }, { "epoch": 0.22, "learning_rate": 2.7130075491448775e-05, "loss": 0.3004, "step": 1237 }, { "epoch": 0.22, "learning_rate": 2.7124891884583392e-05, "loss": 0.2765, "step": 1238 }, { "epoch": 0.22, "learning_rate": 2.711970409681547e-05, "loss": 0.2716, "step": 1239 }, { "epoch": 0.22, "learning_rate": 2.7114512129933867e-05, "loss": 0.1929, "step": 1240 }, { "epoch": 0.22, "learning_rate": 2.710931598572887e-05, "loss": 0.2913, "step": 1241 }, { "epoch": 0.23, "learning_rate": 2.710411566599221e-05, "loss": 0.3025, "step": 1242 }, { "epoch": 0.23, "learning_rate": 2.7098911172517063e-05, "loss": 0.2954, "step": 1243 }, { "epoch": 0.23, "learning_rate": 2.709370250709805e-05, "loss": 0.3602, "step": 1244 }, { "epoch": 0.23, "learning_rate": 2.7088489671531207e-05, "loss": 0.2942, "step": 1245 }, { "epoch": 0.23, "learning_rate": 2.7083272667614034e-05, "loss": 0.3082, "step": 1246 }, { "epoch": 0.23, "learning_rate": 2.707805149714545e-05, "loss": 0.3134, "step": 1247 }, { "epoch": 0.23, "learning_rate": 2.7072826161925825e-05, "loss": 0.3024, "step": 1248 }, { "epoch": 0.23, "learning_rate": 2.7067596663756953e-05, "loss": 0.2778, "step": 1249 }, { "epoch": 0.23, "learning_rate": 2.7062363004442067e-05, "loss": 0.2351, "step": 1250 }, { "epoch": 0.23, "learning_rate": 2.705712518578584e-05, "loss": 0.2589, "step": 1251 }, { "epoch": 0.23, "learning_rate": 2.7051883209594374e-05, "loss": 0.3158, "step": 1252 }, { "epoch": 0.23, "learning_rate": 2.7046637077675202e-05, "loss": 0.285, "step": 1253 }, { "epoch": 0.23, "learning_rate": 2.7041386791837302e-05, "loss": 0.2979, "step": 1254 }, { "epoch": 0.23, "learning_rate": 2.7036132353891075e-05, "loss": 0.2398, "step": 1255 }, { "epoch": 0.23, "learning_rate": 2.703087376564835e-05, "loss": 0.2104, "step": 1256 }, { "epoch": 0.23, "learning_rate": 2.7025611028922395e-05, "loss": 0.2856, "step": 1257 }, { "epoch": 0.23, "learning_rate": 2.7020344145527904e-05, "loss": 0.2637, "step": 1258 }, { "epoch": 0.23, "learning_rate": 2.7015073117281003e-05, "loss": 0.3582, "step": 1259 }, { "epoch": 0.23, "learning_rate": 2.7009797945999252e-05, "loss": 0.306, "step": 1260 }, { "epoch": 0.23, "learning_rate": 2.7004518633501628e-05, "loss": 0.2714, "step": 1261 }, { "epoch": 0.23, "learning_rate": 2.6999235181608546e-05, "loss": 0.2916, "step": 1262 }, { "epoch": 0.23, "learning_rate": 2.6993947592141842e-05, "loss": 0.3197, "step": 1263 }, { "epoch": 0.23, "learning_rate": 2.6988655866924782e-05, "loss": 0.3198, "step": 1264 }, { "epoch": 0.23, "learning_rate": 2.698336000778206e-05, "loss": 0.2814, "step": 1265 }, { "epoch": 0.23, "learning_rate": 2.697806001653979e-05, "loss": 0.3094, "step": 1266 }, { "epoch": 0.23, "learning_rate": 2.697275589502552e-05, "loss": 0.3195, "step": 1267 }, { "epoch": 0.23, "learning_rate": 2.6967447645068208e-05, "loss": 0.3347, "step": 1268 }, { "epoch": 0.23, "learning_rate": 2.6962135268498246e-05, "loss": 0.3239, "step": 1269 }, { "epoch": 0.23, "learning_rate": 2.6956818767147446e-05, "loss": 0.2999, "step": 1270 }, { "epoch": 0.23, "learning_rate": 2.6951498142849046e-05, "loss": 0.256, "step": 1271 }, { "epoch": 0.23, "learning_rate": 2.69461733974377e-05, "loss": 0.2979, "step": 1272 }, { "epoch": 0.23, "learning_rate": 2.6940844532749485e-05, "loss": 0.3196, "step": 1273 }, { "epoch": 0.23, "learning_rate": 2.6935511550621902e-05, "loss": 0.2766, "step": 1274 }, { "epoch": 0.23, "learning_rate": 2.693017445289387e-05, "loss": 0.3202, "step": 1275 }, { "epoch": 0.23, "learning_rate": 2.6924833241405715e-05, "loss": 0.3149, "step": 1276 }, { "epoch": 0.23, "learning_rate": 2.6919487917999204e-05, "loss": 0.3501, "step": 1277 }, { "epoch": 0.23, "learning_rate": 2.6914138484517507e-05, "loss": 0.3512, "step": 1278 }, { "epoch": 0.23, "learning_rate": 2.6908784942805213e-05, "loss": 0.3049, "step": 1279 }, { "epoch": 0.23, "learning_rate": 2.6903427294708327e-05, "loss": 0.365, "step": 1280 }, { "epoch": 0.23, "learning_rate": 2.6898065542074276e-05, "loss": 0.2509, "step": 1281 }, { "epoch": 0.23, "learning_rate": 2.68926996867519e-05, "loss": 0.2712, "step": 1282 }, { "epoch": 0.23, "learning_rate": 2.688732973059145e-05, "loss": 0.2429, "step": 1283 }, { "epoch": 0.23, "learning_rate": 2.688195567544459e-05, "loss": 0.23, "step": 1284 }, { "epoch": 0.23, "learning_rate": 2.6876577523164404e-05, "loss": 0.3025, "step": 1285 }, { "epoch": 0.23, "learning_rate": 2.687119527560538e-05, "loss": 0.3582, "step": 1286 }, { "epoch": 0.23, "learning_rate": 2.6865808934623433e-05, "loss": 0.3159, "step": 1287 }, { "epoch": 0.23, "learning_rate": 2.6860418502075866e-05, "loss": 0.3143, "step": 1288 }, { "epoch": 0.23, "learning_rate": 2.6855023979821424e-05, "loss": 0.303, "step": 1289 }, { "epoch": 0.23, "learning_rate": 2.6849625369720228e-05, "loss": 0.3375, "step": 1290 }, { "epoch": 0.23, "learning_rate": 2.684422267363384e-05, "loss": 0.3239, "step": 1291 }, { "epoch": 0.23, "learning_rate": 2.6838815893425212e-05, "loss": 0.3301, "step": 1292 }, { "epoch": 0.23, "learning_rate": 2.6833405030958702e-05, "loss": 0.3623, "step": 1293 }, { "epoch": 0.23, "learning_rate": 2.682799008810009e-05, "loss": 0.2821, "step": 1294 }, { "epoch": 0.23, "learning_rate": 2.682257106671655e-05, "loss": 0.3015, "step": 1295 }, { "epoch": 0.23, "learning_rate": 2.681714796867667e-05, "loss": 0.2959, "step": 1296 }, { "epoch": 0.24, "learning_rate": 2.6811720795850444e-05, "loss": 0.3115, "step": 1297 }, { "epoch": 0.24, "learning_rate": 2.680628955010927e-05, "loss": 0.2629, "step": 1298 }, { "epoch": 0.24, "learning_rate": 2.680085423332594e-05, "loss": 0.2758, "step": 1299 }, { "epoch": 0.24, "learning_rate": 2.6795414847374667e-05, "loss": 0.3154, "step": 1300 }, { "epoch": 0.24, "learning_rate": 2.6789971394131054e-05, "loss": 0.2805, "step": 1301 }, { "epoch": 0.24, "learning_rate": 2.6784523875472113e-05, "loss": 0.3092, "step": 1302 }, { "epoch": 0.24, "learning_rate": 2.6779072293276253e-05, "loss": 0.2877, "step": 1303 }, { "epoch": 0.24, "learning_rate": 2.6773616649423296e-05, "loss": 0.3224, "step": 1304 }, { "epoch": 0.24, "learning_rate": 2.676815694579445e-05, "loss": 0.3292, "step": 1305 }, { "epoch": 0.24, "learning_rate": 2.6762693184272334e-05, "loss": 0.2528, "step": 1306 }, { "epoch": 0.24, "learning_rate": 2.6757225366740946e-05, "loss": 0.3374, "step": 1307 }, { "epoch": 0.24, "learning_rate": 2.6751753495085718e-05, "loss": 0.3058, "step": 1308 }, { "epoch": 0.24, "learning_rate": 2.6746277571193445e-05, "loss": 0.3413, "step": 1309 }, { "epoch": 0.24, "learning_rate": 2.674079759695234e-05, "loss": 0.3201, "step": 1310 }, { "epoch": 0.24, "learning_rate": 2.6735313574252004e-05, "loss": 0.2799, "step": 1311 }, { "epoch": 0.24, "learning_rate": 2.6729825504983443e-05, "loss": 0.2488, "step": 1312 }, { "epoch": 0.24, "learning_rate": 2.672433339103904e-05, "loss": 0.2731, "step": 1313 }, { "epoch": 0.24, "learning_rate": 2.6718837234312593e-05, "loss": 0.319, "step": 1314 }, { "epoch": 0.24, "learning_rate": 2.6713337036699286e-05, "loss": 0.2799, "step": 1315 }, { "epoch": 0.24, "learning_rate": 2.670783280009569e-05, "loss": 0.3226, "step": 1316 }, { "epoch": 0.24, "learning_rate": 2.670232452639978e-05, "loss": 0.2753, "step": 1317 }, { "epoch": 0.24, "learning_rate": 2.6696812217510913e-05, "loss": 0.2585, "step": 1318 }, { "epoch": 0.24, "learning_rate": 2.6691295875329847e-05, "loss": 0.3494, "step": 1319 }, { "epoch": 0.24, "learning_rate": 2.6685775501758726e-05, "loss": 0.2794, "step": 1320 }, { "epoch": 0.24, "learning_rate": 2.668025109870108e-05, "loss": 0.3167, "step": 1321 }, { "epoch": 0.24, "learning_rate": 2.6674722668061832e-05, "loss": 0.2529, "step": 1322 }, { "epoch": 0.24, "learning_rate": 2.66691902117473e-05, "loss": 0.3025, "step": 1323 }, { "epoch": 0.24, "learning_rate": 2.6663653731665184e-05, "loss": 0.261, "step": 1324 }, { "epoch": 0.24, "learning_rate": 2.665811322972456e-05, "loss": 0.3524, "step": 1325 }, { "epoch": 0.24, "learning_rate": 2.665256870783592e-05, "loss": 0.2158, "step": 1326 }, { "epoch": 0.24, "learning_rate": 2.664702016791111e-05, "loss": 0.3137, "step": 1327 }, { "epoch": 0.24, "learning_rate": 2.6641467611863386e-05, "loss": 0.3456, "step": 1328 }, { "epoch": 0.24, "learning_rate": 2.6635911041607373e-05, "loss": 0.2635, "step": 1329 }, { "epoch": 0.24, "learning_rate": 2.6630350459059092e-05, "loss": 0.2416, "step": 1330 }, { "epoch": 0.24, "learning_rate": 2.6624785866135938e-05, "loss": 0.3204, "step": 1331 }, { "epoch": 0.24, "learning_rate": 2.6619217264756693e-05, "loss": 0.3972, "step": 1332 }, { "epoch": 0.24, "learning_rate": 2.661364465684152e-05, "loss": 0.2762, "step": 1333 }, { "epoch": 0.24, "learning_rate": 2.6608068044311966e-05, "loss": 0.2886, "step": 1334 }, { "epoch": 0.24, "learning_rate": 2.660248742909096e-05, "loss": 0.2562, "step": 1335 }, { "epoch": 0.24, "learning_rate": 2.65969028131028e-05, "loss": 0.2874, "step": 1336 }, { "epoch": 0.24, "learning_rate": 2.6591314198273187e-05, "loss": 0.2662, "step": 1337 }, { "epoch": 0.24, "learning_rate": 2.658572158652917e-05, "loss": 0.3242, "step": 1338 }, { "epoch": 0.24, "learning_rate": 2.65801249797992e-05, "loss": 0.2985, "step": 1339 }, { "epoch": 0.24, "learning_rate": 2.6574524380013095e-05, "loss": 0.3004, "step": 1340 }, { "epoch": 0.24, "learning_rate": 2.6568919789102054e-05, "loss": 0.3229, "step": 1341 }, { "epoch": 0.24, "learning_rate": 2.6563311208998655e-05, "loss": 0.2728, "step": 1342 }, { "epoch": 0.24, "learning_rate": 2.655769864163684e-05, "loss": 0.2478, "step": 1343 }, { "epoch": 0.24, "learning_rate": 2.6552082088951938e-05, "loss": 0.3226, "step": 1344 }, { "epoch": 0.24, "learning_rate": 2.654646155288064e-05, "loss": 0.2983, "step": 1345 }, { "epoch": 0.24, "learning_rate": 2.6540837035361033e-05, "loss": 0.2775, "step": 1346 }, { "epoch": 0.24, "learning_rate": 2.6535208538332543e-05, "loss": 0.2185, "step": 1347 }, { "epoch": 0.24, "learning_rate": 2.652957606373601e-05, "loss": 0.2999, "step": 1348 }, { "epoch": 0.24, "learning_rate": 2.65239396135136e-05, "loss": 0.3054, "step": 1349 }, { "epoch": 0.24, "learning_rate": 2.6518299189608885e-05, "loss": 0.3347, "step": 1350 }, { "epoch": 0.24, "learning_rate": 2.651265479396679e-05, "loss": 0.3633, "step": 1351 }, { "epoch": 0.25, "learning_rate": 2.6507006428533618e-05, "loss": 0.3154, "step": 1352 }, { "epoch": 0.25, "learning_rate": 2.650135409525703e-05, "loss": 0.2611, "step": 1353 }, { "epoch": 0.25, "learning_rate": 2.6495697796086073e-05, "loss": 0.2598, "step": 1354 }, { "epoch": 0.25, "learning_rate": 2.6490037532971145e-05, "loss": 0.377, "step": 1355 }, { "epoch": 0.25, "learning_rate": 2.6484373307864018e-05, "loss": 0.2477, "step": 1356 }, { "epoch": 0.25, "learning_rate": 2.6478705122717823e-05, "loss": 0.3733, "step": 1357 }, { "epoch": 0.25, "learning_rate": 2.647303297948707e-05, "loss": 0.2629, "step": 1358 }, { "epoch": 0.25, "learning_rate": 2.6467356880127622e-05, "loss": 0.2592, "step": 1359 }, { "epoch": 0.25, "learning_rate": 2.646167682659671e-05, "loss": 0.3474, "step": 1360 }, { "epoch": 0.25, "learning_rate": 2.6455992820852938e-05, "loss": 0.2823, "step": 1361 }, { "epoch": 0.25, "learning_rate": 2.6450304864856253e-05, "loss": 0.2787, "step": 1362 }, { "epoch": 0.25, "learning_rate": 2.6444612960567977e-05, "loss": 0.3047, "step": 1363 }, { "epoch": 0.25, "learning_rate": 2.6438917109950787e-05, "loss": 0.3029, "step": 1364 }, { "epoch": 0.25, "learning_rate": 2.6433217314968733e-05, "loss": 0.3042, "step": 1365 }, { "epoch": 0.25, "learning_rate": 2.642751357758722e-05, "loss": 0.2568, "step": 1366 }, { "epoch": 0.25, "learning_rate": 2.6421805899773e-05, "loss": 0.2954, "step": 1367 }, { "epoch": 0.25, "learning_rate": 2.6416094283494192e-05, "loss": 0.2558, "step": 1368 }, { "epoch": 0.25, "learning_rate": 2.641037873072028e-05, "loss": 0.3292, "step": 1369 }, { "epoch": 0.25, "learning_rate": 2.6404659243422103e-05, "loss": 0.2941, "step": 1370 }, { "epoch": 0.25, "learning_rate": 2.6398935823571847e-05, "loss": 0.302, "step": 1371 }, { "epoch": 0.25, "learning_rate": 2.639320847314306e-05, "loss": 0.2992, "step": 1372 }, { "epoch": 0.25, "learning_rate": 2.638747719411065e-05, "loss": 0.2945, "step": 1373 }, { "epoch": 0.25, "learning_rate": 2.6381741988450875e-05, "loss": 0.2933, "step": 1374 }, { "epoch": 0.25, "learning_rate": 2.637600285814134e-05, "loss": 0.1882, "step": 1375 }, { "epoch": 0.25, "learning_rate": 2.637025980516102e-05, "loss": 0.262, "step": 1376 }, { "epoch": 0.25, "learning_rate": 2.636451283149023e-05, "loss": 0.2703, "step": 1377 }, { "epoch": 0.25, "learning_rate": 2.635876193911064e-05, "loss": 0.2885, "step": 1378 }, { "epoch": 0.25, "learning_rate": 2.635300713000527e-05, "loss": 0.2349, "step": 1379 }, { "epoch": 0.25, "learning_rate": 2.6347248406158494e-05, "loss": 0.345, "step": 1380 }, { "epoch": 0.25, "learning_rate": 2.634148576955603e-05, "loss": 0.3435, "step": 1381 }, { "epoch": 0.25, "learning_rate": 2.6335719222184953e-05, "loss": 0.323, "step": 1382 }, { "epoch": 0.25, "learning_rate": 2.6329948766033685e-05, "loss": 0.2926, "step": 1383 }, { "epoch": 0.25, "learning_rate": 2.632417440309199e-05, "loss": 0.3143, "step": 1384 }, { "epoch": 0.25, "learning_rate": 2.631839613535098e-05, "loss": 0.3334, "step": 1385 }, { "epoch": 0.25, "learning_rate": 2.6312613964803122e-05, "loss": 0.3774, "step": 1386 }, { "epoch": 0.25, "learning_rate": 2.6306827893442215e-05, "loss": 0.3318, "step": 1387 }, { "epoch": 0.25, "learning_rate": 2.6301037923263418e-05, "loss": 0.3553, "step": 1388 }, { "epoch": 0.25, "learning_rate": 2.6295244056263227e-05, "loss": 0.2666, "step": 1389 }, { "epoch": 0.25, "learning_rate": 2.6289446294439473e-05, "loss": 0.2852, "step": 1390 }, { "epoch": 0.25, "learning_rate": 2.628364463979135e-05, "loss": 0.2452, "step": 1391 }, { "epoch": 0.25, "learning_rate": 2.6277839094319383e-05, "loss": 0.3062, "step": 1392 }, { "epoch": 0.25, "learning_rate": 2.6272029660025428e-05, "loss": 0.2869, "step": 1393 }, { "epoch": 0.25, "learning_rate": 2.62662163389127e-05, "loss": 0.2564, "step": 1394 }, { "epoch": 0.25, "learning_rate": 2.626039913298575e-05, "loss": 0.2629, "step": 1395 }, { "epoch": 0.25, "learning_rate": 2.625457804425046e-05, "loss": 0.3285, "step": 1396 }, { "epoch": 0.25, "learning_rate": 2.624875307471406e-05, "loss": 0.3669, "step": 1397 }, { "epoch": 0.25, "learning_rate": 2.624292422638511e-05, "loss": 0.2864, "step": 1398 }, { "epoch": 0.25, "learning_rate": 2.623709150127352e-05, "loss": 0.302, "step": 1399 }, { "epoch": 0.25, "learning_rate": 2.6231254901390526e-05, "loss": 0.2567, "step": 1400 }, { "epoch": 0.25, "learning_rate": 2.6225414428748694e-05, "loss": 0.3374, "step": 1401 }, { "epoch": 0.25, "learning_rate": 2.6219570085361948e-05, "loss": 0.3198, "step": 1402 }, { "epoch": 0.25, "learning_rate": 2.6213721873245524e-05, "loss": 0.327, "step": 1403 }, { "epoch": 0.25, "learning_rate": 2.620786979441601e-05, "loss": 0.3184, "step": 1404 }, { "epoch": 0.25, "learning_rate": 2.6202013850891307e-05, "loss": 0.3176, "step": 1405 }, { "epoch": 0.25, "learning_rate": 2.619615404469067e-05, "loss": 0.2416, "step": 1406 }, { "epoch": 0.26, "learning_rate": 2.619029037783467e-05, "loss": 0.1984, "step": 1407 }, { "epoch": 0.26, "learning_rate": 2.618442285234522e-05, "loss": 0.2494, "step": 1408 }, { "epoch": 0.26, "learning_rate": 2.6178551470245556e-05, "loss": 0.3137, "step": 1409 }, { "epoch": 0.26, "learning_rate": 2.6172676233560247e-05, "loss": 0.2518, "step": 1410 }, { "epoch": 0.26, "learning_rate": 2.6166797144315193e-05, "loss": 0.2988, "step": 1411 }, { "epoch": 0.26, "learning_rate": 2.6160914204537617e-05, "loss": 0.2788, "step": 1412 }, { "epoch": 0.26, "learning_rate": 2.615502741625607e-05, "loss": 0.2413, "step": 1413 }, { "epoch": 0.26, "learning_rate": 2.6149136781500443e-05, "loss": 0.2197, "step": 1414 }, { "epoch": 0.26, "learning_rate": 2.6143242302301934e-05, "loss": 0.2681, "step": 1415 }, { "epoch": 0.26, "learning_rate": 2.613734398069308e-05, "loss": 0.285, "step": 1416 }, { "epoch": 0.26, "learning_rate": 2.613144181870774e-05, "loss": 0.2791, "step": 1417 }, { "epoch": 0.26, "learning_rate": 2.6125535818381097e-05, "loss": 0.2931, "step": 1418 }, { "epoch": 0.26, "learning_rate": 2.611962598174965e-05, "loss": 0.2566, "step": 1419 }, { "epoch": 0.26, "learning_rate": 2.6113712310851235e-05, "loss": 0.3239, "step": 1420 }, { "epoch": 0.26, "learning_rate": 2.6107794807725e-05, "loss": 0.3005, "step": 1421 }, { "epoch": 0.26, "learning_rate": 2.610187347441142e-05, "loss": 0.3512, "step": 1422 }, { "epoch": 0.26, "learning_rate": 2.6095948312952277e-05, "loss": 0.3171, "step": 1423 }, { "epoch": 0.26, "learning_rate": 2.6090019325390693e-05, "loss": 0.2267, "step": 1424 }, { "epoch": 0.26, "learning_rate": 2.6084086513771102e-05, "loss": 0.2847, "step": 1425 }, { "epoch": 0.26, "learning_rate": 2.607814988013925e-05, "loss": 0.3236, "step": 1426 }, { "epoch": 0.26, "learning_rate": 2.607220942654221e-05, "loss": 0.2747, "step": 1427 }, { "epoch": 0.26, "learning_rate": 2.606626515502836e-05, "loss": 0.2144, "step": 1428 }, { "epoch": 0.26, "learning_rate": 2.6060317067647414e-05, "loss": 0.2841, "step": 1429 }, { "epoch": 0.26, "learning_rate": 2.605436516645038e-05, "loss": 0.3365, "step": 1430 }, { "epoch": 0.26, "learning_rate": 2.6048409453489596e-05, "loss": 0.3074, "step": 1431 }, { "epoch": 0.26, "learning_rate": 2.6042449930818706e-05, "loss": 0.2689, "step": 1432 }, { "epoch": 0.26, "learning_rate": 2.6036486600492677e-05, "loss": 0.2592, "step": 1433 }, { "epoch": 0.26, "learning_rate": 2.603051946456778e-05, "loss": 0.3119, "step": 1434 }, { "epoch": 0.26, "learning_rate": 2.60245485251016e-05, "loss": 0.3254, "step": 1435 }, { "epoch": 0.26, "learning_rate": 2.6018573784153038e-05, "loss": 0.3575, "step": 1436 }, { "epoch": 0.26, "learning_rate": 2.60125952437823e-05, "loss": 0.236, "step": 1437 }, { "epoch": 0.26, "learning_rate": 2.600661290605091e-05, "loss": 0.2488, "step": 1438 }, { "epoch": 0.26, "learning_rate": 2.600062677302169e-05, "loss": 0.3397, "step": 1439 }, { "epoch": 0.26, "learning_rate": 2.5994636846758778e-05, "loss": 0.2561, "step": 1440 }, { "epoch": 0.26, "learning_rate": 2.5988643129327624e-05, "loss": 0.2915, "step": 1441 }, { "epoch": 0.26, "learning_rate": 2.5982645622794975e-05, "loss": 0.3019, "step": 1442 }, { "epoch": 0.26, "learning_rate": 2.5976644329228894e-05, "loss": 0.2071, "step": 1443 }, { "epoch": 0.26, "learning_rate": 2.5970639250698745e-05, "loss": 0.2896, "step": 1444 }, { "epoch": 0.26, "learning_rate": 2.5964630389275195e-05, "loss": 0.3127, "step": 1445 }, { "epoch": 0.26, "learning_rate": 2.595861774703022e-05, "loss": 0.3522, "step": 1446 }, { "epoch": 0.26, "learning_rate": 2.5952601326037102e-05, "loss": 0.2603, "step": 1447 }, { "epoch": 0.26, "learning_rate": 2.5946581128370413e-05, "loss": 0.3085, "step": 1448 }, { "epoch": 0.26, "learning_rate": 2.5940557156106044e-05, "loss": 0.2742, "step": 1449 }, { "epoch": 0.26, "learning_rate": 2.5934529411321174e-05, "loss": 0.2283, "step": 1450 }, { "epoch": 0.26, "learning_rate": 2.5928497896094294e-05, "loss": 0.2529, "step": 1451 }, { "epoch": 0.26, "learning_rate": 2.5922462612505188e-05, "loss": 0.256, "step": 1452 }, { "epoch": 0.26, "learning_rate": 2.5916423562634938e-05, "loss": 0.2814, "step": 1453 }, { "epoch": 0.26, "learning_rate": 2.591038074856593e-05, "loss": 0.3049, "step": 1454 }, { "epoch": 0.26, "learning_rate": 2.5904334172381845e-05, "loss": 0.307, "step": 1455 }, { "epoch": 0.26, "learning_rate": 2.589828383616767e-05, "loss": 0.2723, "step": 1456 }, { "epoch": 0.26, "learning_rate": 2.5892229742009666e-05, "loss": 0.3027, "step": 1457 }, { "epoch": 0.26, "learning_rate": 2.5886171891995415e-05, "loss": 0.3119, "step": 1458 }, { "epoch": 0.26, "learning_rate": 2.5880110288213782e-05, "loss": 0.3035, "step": 1459 }, { "epoch": 0.26, "learning_rate": 2.5874044932754925e-05, "loss": 0.2502, "step": 1460 }, { "epoch": 0.26, "learning_rate": 2.58679758277103e-05, "loss": 0.3666, "step": 1461 }, { "epoch": 0.27, "learning_rate": 2.5861902975172657e-05, "loss": 0.396, "step": 1462 }, { "epoch": 0.27, "learning_rate": 2.5855826377236032e-05, "loss": 0.3435, "step": 1463 }, { "epoch": 0.27, "learning_rate": 2.584974603599576e-05, "loss": 0.2894, "step": 1464 }, { "epoch": 0.27, "learning_rate": 2.5843661953548453e-05, "loss": 0.2772, "step": 1465 }, { "epoch": 0.27, "learning_rate": 2.5837574131992033e-05, "loss": 0.2526, "step": 1466 }, { "epoch": 0.27, "learning_rate": 2.58314825734257e-05, "loss": 0.2667, "step": 1467 }, { "epoch": 0.27, "learning_rate": 2.5825387279949945e-05, "loss": 0.2947, "step": 1468 }, { "epoch": 0.27, "learning_rate": 2.5819288253666536e-05, "loss": 0.2976, "step": 1469 }, { "epoch": 0.27, "learning_rate": 2.5813185496678553e-05, "loss": 0.2412, "step": 1470 }, { "epoch": 0.27, "learning_rate": 2.5807079011090333e-05, "loss": 0.2909, "step": 1471 }, { "epoch": 0.27, "learning_rate": 2.5800968799007526e-05, "loss": 0.3926, "step": 1472 }, { "epoch": 0.27, "learning_rate": 2.5794854862537042e-05, "loss": 0.2867, "step": 1473 }, { "epoch": 0.27, "learning_rate": 2.5788737203787095e-05, "loss": 0.271, "step": 1474 }, { "epoch": 0.27, "learning_rate": 2.5782615824867173e-05, "loss": 0.2893, "step": 1475 }, { "epoch": 0.27, "learning_rate": 2.5776490727888045e-05, "loss": 0.2648, "step": 1476 }, { "epoch": 0.27, "learning_rate": 2.577036191496177e-05, "loss": 0.2375, "step": 1477 }, { "epoch": 0.27, "learning_rate": 2.576422938820168e-05, "loss": 0.3173, "step": 1478 }, { "epoch": 0.27, "learning_rate": 2.57580931497224e-05, "loss": 0.3059, "step": 1479 }, { "epoch": 0.27, "learning_rate": 2.5751953201639817e-05, "loss": 0.416, "step": 1480 }, { "epoch": 0.27, "learning_rate": 2.5745809546071107e-05, "loss": 0.2815, "step": 1481 }, { "epoch": 0.27, "learning_rate": 2.573966218513473e-05, "loss": 0.2397, "step": 1482 }, { "epoch": 0.27, "learning_rate": 2.573351112095041e-05, "loss": 0.2965, "step": 1483 }, { "epoch": 0.27, "learning_rate": 2.5727356355639167e-05, "loss": 0.2034, "step": 1484 }, { "epoch": 0.27, "learning_rate": 2.5721197891323272e-05, "loss": 0.3451, "step": 1485 }, { "epoch": 0.27, "learning_rate": 2.5715035730126296e-05, "loss": 0.2224, "step": 1486 }, { "epoch": 0.27, "learning_rate": 2.5708869874173074e-05, "loss": 0.3684, "step": 1487 }, { "epoch": 0.27, "learning_rate": 2.5702700325589704e-05, "loss": 0.2388, "step": 1488 }, { "epoch": 0.27, "learning_rate": 2.569652708650358e-05, "loss": 0.2374, "step": 1489 }, { "epoch": 0.27, "learning_rate": 2.569035015904335e-05, "loss": 0.2325, "step": 1490 }, { "epoch": 0.27, "learning_rate": 2.568416954533894e-05, "loss": 0.2037, "step": 1491 }, { "epoch": 0.27, "learning_rate": 2.567798524752156e-05, "loss": 0.3385, "step": 1492 }, { "epoch": 0.27, "learning_rate": 2.5671797267723664e-05, "loss": 0.3118, "step": 1493 }, { "epoch": 0.27, "learning_rate": 2.5665605608078994e-05, "loss": 0.2305, "step": 1494 }, { "epoch": 0.27, "learning_rate": 2.565941027072256e-05, "loss": 0.2681, "step": 1495 }, { "epoch": 0.27, "learning_rate": 2.5653211257790636e-05, "loss": 0.3044, "step": 1496 }, { "epoch": 0.27, "learning_rate": 2.564700857142076e-05, "loss": 0.2976, "step": 1497 }, { "epoch": 0.27, "learning_rate": 2.5640802213751748e-05, "loss": 0.2507, "step": 1498 }, { "epoch": 0.27, "learning_rate": 2.5634592186923666e-05, "loss": 0.3378, "step": 1499 }, { "epoch": 0.27, "learning_rate": 2.5628378493077862e-05, "loss": 0.3195, "step": 1500 }, { "epoch": 0.27, "learning_rate": 2.5622161134356935e-05, "loss": 0.2482, "step": 1501 }, { "epoch": 0.27, "learning_rate": 2.5615940112904758e-05, "loss": 0.3065, "step": 1502 }, { "epoch": 0.27, "learning_rate": 2.560971543086646e-05, "loss": 0.2401, "step": 1503 }, { "epoch": 0.27, "learning_rate": 2.5603487090388435e-05, "loss": 0.223, "step": 1504 }, { "epoch": 0.27, "learning_rate": 2.5597255093618333e-05, "loss": 0.2999, "step": 1505 }, { "epoch": 0.27, "learning_rate": 2.5591019442705078e-05, "loss": 0.2819, "step": 1506 }, { "epoch": 0.27, "learning_rate": 2.5584780139798842e-05, "loss": 0.3069, "step": 1507 }, { "epoch": 0.27, "learning_rate": 2.557853718705106e-05, "loss": 0.2246, "step": 1508 }, { "epoch": 0.27, "learning_rate": 2.5572290586614428e-05, "loss": 0.2908, "step": 1509 }, { "epoch": 0.27, "learning_rate": 2.556604034064289e-05, "loss": 0.2717, "step": 1510 }, { "epoch": 0.27, "learning_rate": 2.555978645129166e-05, "loss": 0.2875, "step": 1511 }, { "epoch": 0.27, "learning_rate": 2.5553528920717202e-05, "loss": 0.3411, "step": 1512 }, { "epoch": 0.27, "learning_rate": 2.554726775107724e-05, "loss": 0.2733, "step": 1513 }, { "epoch": 0.27, "learning_rate": 2.5541002944530746e-05, "loss": 0.3328, "step": 1514 }, { "epoch": 0.27, "learning_rate": 2.5534734503237945e-05, "loss": 0.3113, "step": 1515 }, { "epoch": 0.27, "learning_rate": 2.5528462429360324e-05, "loss": 0.254, "step": 1516 }, { "epoch": 0.28, "learning_rate": 2.552218672506062e-05, "loss": 0.3491, "step": 1517 }, { "epoch": 0.28, "learning_rate": 2.5515907392502816e-05, "loss": 0.3394, "step": 1518 }, { "epoch": 0.28, "learning_rate": 2.550962443385215e-05, "loss": 0.3203, "step": 1519 }, { "epoch": 0.28, "learning_rate": 2.5503337851275113e-05, "loss": 0.2981, "step": 1520 }, { "epoch": 0.28, "learning_rate": 2.549704764693944e-05, "loss": 0.2291, "step": 1521 }, { "epoch": 0.28, "learning_rate": 2.549075382301412e-05, "loss": 0.2397, "step": 1522 }, { "epoch": 0.28, "learning_rate": 2.5484456381669393e-05, "loss": 0.2494, "step": 1523 }, { "epoch": 0.28, "learning_rate": 2.5478155325076733e-05, "loss": 0.3149, "step": 1524 }, { "epoch": 0.28, "learning_rate": 2.547185065540887e-05, "loss": 0.2424, "step": 1525 }, { "epoch": 0.28, "learning_rate": 2.546554237483978e-05, "loss": 0.3345, "step": 1526 }, { "epoch": 0.28, "learning_rate": 2.5459230485544687e-05, "loss": 0.3065, "step": 1527 }, { "epoch": 0.28, "learning_rate": 2.5452914989700048e-05, "loss": 0.2256, "step": 1528 }, { "epoch": 0.28, "learning_rate": 2.5446595889483583e-05, "loss": 0.2759, "step": 1529 }, { "epoch": 0.28, "learning_rate": 2.544027318707423e-05, "loss": 0.2795, "step": 1530 }, { "epoch": 0.28, "learning_rate": 2.5433946884652184e-05, "loss": 0.3868, "step": 1531 }, { "epoch": 0.28, "learning_rate": 2.5427616984398883e-05, "loss": 0.2469, "step": 1532 }, { "epoch": 0.28, "learning_rate": 2.5421283488497003e-05, "loss": 0.2199, "step": 1533 }, { "epoch": 0.28, "learning_rate": 2.541494639913045e-05, "loss": 0.2756, "step": 1534 }, { "epoch": 0.28, "learning_rate": 2.540860571848439e-05, "loss": 0.3031, "step": 1535 }, { "epoch": 0.28, "learning_rate": 2.5402261448745207e-05, "loss": 0.2347, "step": 1536 }, { "epoch": 0.28, "learning_rate": 2.5395913592100534e-05, "loss": 0.274, "step": 1537 }, { "epoch": 0.28, "learning_rate": 2.5389562150739235e-05, "loss": 0.3224, "step": 1538 }, { "epoch": 0.28, "learning_rate": 2.5383207126851414e-05, "loss": 0.2847, "step": 1539 }, { "epoch": 0.28, "learning_rate": 2.5376848522628407e-05, "loss": 0.2446, "step": 1540 }, { "epoch": 0.28, "learning_rate": 2.5370486340262793e-05, "loss": 0.2351, "step": 1541 }, { "epoch": 0.28, "learning_rate": 2.5364120581948367e-05, "loss": 0.2463, "step": 1542 }, { "epoch": 0.28, "learning_rate": 2.535775124988018e-05, "loss": 0.2307, "step": 1543 }, { "epoch": 0.28, "learning_rate": 2.5351378346254498e-05, "loss": 0.2744, "step": 1544 }, { "epoch": 0.28, "learning_rate": 2.5345001873268822e-05, "loss": 0.3053, "step": 1545 }, { "epoch": 0.28, "learning_rate": 2.533862183312189e-05, "loss": 0.2408, "step": 1546 }, { "epoch": 0.28, "learning_rate": 2.5332238228013665e-05, "loss": 0.3307, "step": 1547 }, { "epoch": 0.28, "learning_rate": 2.532585106014534e-05, "loss": 0.3447, "step": 1548 }, { "epoch": 0.28, "learning_rate": 2.531946033171934e-05, "loss": 0.2423, "step": 1549 }, { "epoch": 0.28, "learning_rate": 2.531306604493931e-05, "loss": 0.2571, "step": 1550 }, { "epoch": 0.28, "learning_rate": 2.5306668202010126e-05, "loss": 0.2631, "step": 1551 }, { "epoch": 0.28, "learning_rate": 2.530026680513789e-05, "loss": 0.2598, "step": 1552 }, { "epoch": 0.28, "learning_rate": 2.5293861856529937e-05, "loss": 0.2546, "step": 1553 }, { "epoch": 0.28, "learning_rate": 2.5287453358394818e-05, "loss": 0.2819, "step": 1554 }, { "epoch": 0.28, "learning_rate": 2.528104131294231e-05, "loss": 0.3091, "step": 1555 }, { "epoch": 0.28, "learning_rate": 2.52746257223834e-05, "loss": 0.244, "step": 1556 }, { "epoch": 0.28, "learning_rate": 2.5268206588930332e-05, "loss": 0.2919, "step": 1557 }, { "epoch": 0.28, "learning_rate": 2.526178391479653e-05, "loss": 0.2703, "step": 1558 }, { "epoch": 0.28, "learning_rate": 2.525535770219668e-05, "loss": 0.2812, "step": 1559 }, { "epoch": 0.28, "learning_rate": 2.524892795334665e-05, "loss": 0.3601, "step": 1560 }, { "epoch": 0.28, "learning_rate": 2.5242494670463554e-05, "loss": 0.3185, "step": 1561 }, { "epoch": 0.28, "learning_rate": 2.523605785576571e-05, "loss": 0.3153, "step": 1562 }, { "epoch": 0.28, "learning_rate": 2.522961751147266e-05, "loss": 0.2777, "step": 1563 }, { "epoch": 0.28, "learning_rate": 2.5223173639805163e-05, "loss": 0.1722, "step": 1564 }, { "epoch": 0.28, "learning_rate": 2.5216726242985194e-05, "loss": 0.3507, "step": 1565 }, { "epoch": 0.28, "learning_rate": 2.5210275323235945e-05, "loss": 0.3037, "step": 1566 }, { "epoch": 0.28, "learning_rate": 2.5203820882781812e-05, "loss": 0.2419, "step": 1567 }, { "epoch": 0.28, "learning_rate": 2.5197362923848424e-05, "loss": 0.2046, "step": 1568 }, { "epoch": 0.28, "learning_rate": 2.5190901448662607e-05, "loss": 0.2976, "step": 1569 }, { "epoch": 0.28, "learning_rate": 2.5184436459452406e-05, "loss": 0.2666, "step": 1570 }, { "epoch": 0.28, "learning_rate": 2.5177967958447082e-05, "loss": 0.2551, "step": 1571 }, { "epoch": 0.28, "learning_rate": 2.5171495947877096e-05, "loss": 0.3459, "step": 1572 }, { "epoch": 0.29, "learning_rate": 2.5165020429974126e-05, "loss": 0.291, "step": 1573 }, { "epoch": 0.29, "learning_rate": 2.5158541406971063e-05, "loss": 0.2278, "step": 1574 }, { "epoch": 0.29, "learning_rate": 2.5152058881102e-05, "loss": 0.2538, "step": 1575 }, { "epoch": 0.29, "learning_rate": 2.5145572854602236e-05, "loss": 0.2557, "step": 1576 }, { "epoch": 0.29, "learning_rate": 2.513908332970829e-05, "loss": 0.3196, "step": 1577 }, { "epoch": 0.29, "learning_rate": 2.5132590308657867e-05, "loss": 0.3948, "step": 1578 }, { "epoch": 0.29, "learning_rate": 2.51260937936899e-05, "loss": 0.2107, "step": 1579 }, { "epoch": 0.29, "learning_rate": 2.5119593787044512e-05, "loss": 0.1872, "step": 1580 }, { "epoch": 0.29, "learning_rate": 2.5113090290963032e-05, "loss": 0.2253, "step": 1581 }, { "epoch": 0.29, "learning_rate": 2.5106583307687996e-05, "loss": 0.2854, "step": 1582 }, { "epoch": 0.29, "learning_rate": 2.5100072839463136e-05, "loss": 0.28, "step": 1583 }, { "epoch": 0.29, "learning_rate": 2.5093558888533403e-05, "loss": 0.2089, "step": 1584 }, { "epoch": 0.29, "learning_rate": 2.508704145714492e-05, "loss": 0.2308, "step": 1585 }, { "epoch": 0.29, "learning_rate": 2.508052054754503e-05, "loss": 0.2845, "step": 1586 }, { "epoch": 0.29, "learning_rate": 2.507399616198228e-05, "loss": 0.3225, "step": 1587 }, { "epoch": 0.29, "learning_rate": 2.5067468302706403e-05, "loss": 0.298, "step": 1588 }, { "epoch": 0.29, "learning_rate": 2.5060936971968333e-05, "loss": 0.2653, "step": 1589 }, { "epoch": 0.29, "learning_rate": 2.50544021720202e-05, "loss": 0.3385, "step": 1590 }, { "epoch": 0.29, "learning_rate": 2.5047863905115338e-05, "loss": 0.2764, "step": 1591 }, { "epoch": 0.29, "learning_rate": 2.5041322173508266e-05, "loss": 0.2611, "step": 1592 }, { "epoch": 0.29, "learning_rate": 2.5034776979454704e-05, "loss": 0.2653, "step": 1593 }, { "epoch": 0.29, "learning_rate": 2.502822832521156e-05, "loss": 0.3639, "step": 1594 }, { "epoch": 0.29, "learning_rate": 2.5021676213036947e-05, "loss": 0.3221, "step": 1595 }, { "epoch": 0.29, "learning_rate": 2.5015120645190158e-05, "loss": 0.3133, "step": 1596 }, { "epoch": 0.29, "learning_rate": 2.5008561623931685e-05, "loss": 0.2869, "step": 1597 }, { "epoch": 0.29, "learning_rate": 2.500199915152321e-05, "loss": 0.2971, "step": 1598 }, { "epoch": 0.29, "learning_rate": 2.4995433230227595e-05, "loss": 0.2782, "step": 1599 }, { "epoch": 0.29, "learning_rate": 2.4988863862308905e-05, "loss": 0.2795, "step": 1600 }, { "epoch": 0.29, "learning_rate": 2.498229105003239e-05, "loss": 0.3076, "step": 1601 }, { "epoch": 0.29, "learning_rate": 2.4975714795664485e-05, "loss": 0.2246, "step": 1602 }, { "epoch": 0.29, "learning_rate": 2.496913510147281e-05, "loss": 0.2441, "step": 1603 }, { "epoch": 0.29, "learning_rate": 2.4962551969726174e-05, "loss": 0.246, "step": 1604 }, { "epoch": 0.29, "learning_rate": 2.4955965402694575e-05, "loss": 0.226, "step": 1605 }, { "epoch": 0.29, "learning_rate": 2.4949375402649187e-05, "loss": 0.2569, "step": 1606 }, { "epoch": 0.29, "learning_rate": 2.4942781971862374e-05, "loss": 0.3342, "step": 1607 }, { "epoch": 0.29, "learning_rate": 2.493618511260768e-05, "loss": 0.2576, "step": 1608 }, { "epoch": 0.29, "learning_rate": 2.4929584827159832e-05, "loss": 0.2692, "step": 1609 }, { "epoch": 0.29, "learning_rate": 2.4922981117794747e-05, "loss": 0.2726, "step": 1610 }, { "epoch": 0.29, "learning_rate": 2.4916373986789504e-05, "loss": 0.2661, "step": 1611 }, { "epoch": 0.29, "learning_rate": 2.4909763436422375e-05, "loss": 0.2444, "step": 1612 }, { "epoch": 0.29, "learning_rate": 2.4903149468972815e-05, "loss": 0.2678, "step": 1613 }, { "epoch": 0.29, "learning_rate": 2.489653208672144e-05, "loss": 0.2859, "step": 1614 }, { "epoch": 0.29, "learning_rate": 2.488991129195007e-05, "loss": 0.3328, "step": 1615 }, { "epoch": 0.29, "learning_rate": 2.4883287086941668e-05, "loss": 0.2903, "step": 1616 }, { "epoch": 0.29, "learning_rate": 2.4876659473980403e-05, "loss": 0.2892, "step": 1617 }, { "epoch": 0.29, "learning_rate": 2.48700284553516e-05, "loss": 0.2617, "step": 1618 }, { "epoch": 0.29, "learning_rate": 2.486339403334177e-05, "loss": 0.3104, "step": 1619 }, { "epoch": 0.29, "learning_rate": 2.4856756210238597e-05, "loss": 0.2504, "step": 1620 }, { "epoch": 0.29, "learning_rate": 2.4850114988330917e-05, "loss": 0.2296, "step": 1621 }, { "epoch": 0.29, "learning_rate": 2.4843470369908776e-05, "loss": 0.2759, "step": 1622 }, { "epoch": 0.29, "learning_rate": 2.4836822357263355e-05, "loss": 0.2929, "step": 1623 }, { "epoch": 0.29, "learning_rate": 2.483017095268703e-05, "loss": 0.2882, "step": 1624 }, { "epoch": 0.29, "learning_rate": 2.4823516158473326e-05, "loss": 0.2707, "step": 1625 }, { "epoch": 0.29, "learning_rate": 2.4816857976916958e-05, "loss": 0.3783, "step": 1626 }, { "epoch": 0.29, "learning_rate": 2.4810196410313792e-05, "loss": 0.2281, "step": 1627 }, { "epoch": 0.3, "learning_rate": 2.4803531460960868e-05, "loss": 0.2668, "step": 1628 }, { "epoch": 0.3, "learning_rate": 2.4796863131156403e-05, "loss": 0.2816, "step": 1629 }, { "epoch": 0.3, "learning_rate": 2.4790191423199752e-05, "loss": 0.2593, "step": 1630 }, { "epoch": 0.3, "learning_rate": 2.4783516339391463e-05, "loss": 0.1911, "step": 1631 }, { "epoch": 0.3, "learning_rate": 2.477683788203323e-05, "loss": 0.1882, "step": 1632 }, { "epoch": 0.3, "learning_rate": 2.477015605342793e-05, "loss": 0.2545, "step": 1633 }, { "epoch": 0.3, "learning_rate": 2.476347085587958e-05, "loss": 0.2298, "step": 1634 }, { "epoch": 0.3, "learning_rate": 2.4756782291693367e-05, "loss": 0.2138, "step": 1635 }, { "epoch": 0.3, "learning_rate": 2.475009036317564e-05, "loss": 0.2672, "step": 1636 }, { "epoch": 0.3, "learning_rate": 2.4743395072633913e-05, "loss": 0.2804, "step": 1637 }, { "epoch": 0.3, "learning_rate": 2.4736696422376854e-05, "loss": 0.3269, "step": 1638 }, { "epoch": 0.3, "learning_rate": 2.472999441471429e-05, "loss": 0.3033, "step": 1639 }, { "epoch": 0.3, "learning_rate": 2.4723289051957205e-05, "loss": 0.256, "step": 1640 }, { "epoch": 0.3, "learning_rate": 2.4716580336417738e-05, "loss": 0.2635, "step": 1641 }, { "epoch": 0.3, "learning_rate": 2.470986827040919e-05, "loss": 0.2649, "step": 1642 }, { "epoch": 0.3, "learning_rate": 2.4703152856246012e-05, "loss": 0.2867, "step": 1643 }, { "epoch": 0.3, "learning_rate": 2.4696434096243816e-05, "loss": 0.3151, "step": 1644 }, { "epoch": 0.3, "learning_rate": 2.4689711992719353e-05, "loss": 0.29, "step": 1645 }, { "epoch": 0.3, "learning_rate": 2.4682986547990553e-05, "loss": 0.2536, "step": 1646 }, { "epoch": 0.3, "learning_rate": 2.4676257764376472e-05, "loss": 0.2508, "step": 1647 }, { "epoch": 0.3, "learning_rate": 2.4669525644197325e-05, "loss": 0.2765, "step": 1648 }, { "epoch": 0.3, "learning_rate": 2.466279018977449e-05, "loss": 0.2698, "step": 1649 }, { "epoch": 0.3, "learning_rate": 2.4656051403430475e-05, "loss": 0.2615, "step": 1650 }, { "epoch": 0.3, "learning_rate": 2.464930928748895e-05, "loss": 0.3206, "step": 1651 }, { "epoch": 0.3, "learning_rate": 2.4642563844274736e-05, "loss": 0.2908, "step": 1652 }, { "epoch": 0.3, "learning_rate": 2.4635815076113788e-05, "loss": 0.2992, "step": 1653 }, { "epoch": 0.3, "learning_rate": 2.462906298533322e-05, "loss": 0.3711, "step": 1654 }, { "epoch": 0.3, "learning_rate": 2.4622307574261283e-05, "loss": 0.3513, "step": 1655 }, { "epoch": 0.3, "learning_rate": 2.4615548845227378e-05, "loss": 0.2273, "step": 1656 }, { "epoch": 0.3, "learning_rate": 2.4608786800562045e-05, "loss": 0.3091, "step": 1657 }, { "epoch": 0.3, "learning_rate": 2.4602021442596975e-05, "loss": 0.2861, "step": 1658 }, { "epoch": 0.3, "learning_rate": 2.4595252773664997e-05, "loss": 0.2455, "step": 1659 }, { "epoch": 0.3, "learning_rate": 2.4588480796100076e-05, "loss": 0.2629, "step": 1660 }, { "epoch": 0.3, "learning_rate": 2.458170551223733e-05, "loss": 0.3307, "step": 1661 }, { "epoch": 0.3, "learning_rate": 2.457492692441301e-05, "loss": 0.2883, "step": 1662 }, { "epoch": 0.3, "learning_rate": 2.4568145034964503e-05, "loss": 0.302, "step": 1663 }, { "epoch": 0.3, "learning_rate": 2.4561359846230346e-05, "loss": 0.3254, "step": 1664 }, { "epoch": 0.3, "learning_rate": 2.4554571360550204e-05, "loss": 0.2438, "step": 1665 }, { "epoch": 0.3, "learning_rate": 2.4547779580264876e-05, "loss": 0.256, "step": 1666 }, { "epoch": 0.3, "learning_rate": 2.4540984507716306e-05, "loss": 0.3167, "step": 1667 }, { "epoch": 0.3, "learning_rate": 2.4534186145247565e-05, "loss": 0.2266, "step": 1668 }, { "epoch": 0.3, "learning_rate": 2.4527384495202872e-05, "loss": 0.2435, "step": 1669 }, { "epoch": 0.3, "learning_rate": 2.4520579559927565e-05, "loss": 0.2837, "step": 1670 }, { "epoch": 0.3, "learning_rate": 2.4513771341768117e-05, "loss": 0.309, "step": 1671 }, { "epoch": 0.3, "learning_rate": 2.4506959843072142e-05, "loss": 0.2076, "step": 1672 }, { "epoch": 0.3, "learning_rate": 2.4500145066188384e-05, "loss": 0.2791, "step": 1673 }, { "epoch": 0.3, "learning_rate": 2.4493327013466694e-05, "loss": 0.3405, "step": 1674 }, { "epoch": 0.3, "learning_rate": 2.448650568725809e-05, "loss": 0.2916, "step": 1675 }, { "epoch": 0.3, "learning_rate": 2.4479681089914692e-05, "loss": 0.2914, "step": 1676 }, { "epoch": 0.3, "learning_rate": 2.4472853223789756e-05, "loss": 0.2714, "step": 1677 }, { "epoch": 0.3, "learning_rate": 2.4466022091237672e-05, "loss": 0.3054, "step": 1678 }, { "epoch": 0.3, "learning_rate": 2.445918769461394e-05, "loss": 0.2893, "step": 1679 }, { "epoch": 0.3, "learning_rate": 2.4452350036275206e-05, "loss": 0.2588, "step": 1680 }, { "epoch": 0.3, "learning_rate": 2.4445509118579222e-05, "loss": 0.235, "step": 1681 }, { "epoch": 0.3, "learning_rate": 2.4438664943884868e-05, "loss": 0.2435, "step": 1682 }, { "epoch": 0.31, "learning_rate": 2.443181751455216e-05, "loss": 0.2515, "step": 1683 }, { "epoch": 0.31, "learning_rate": 2.4424966832942223e-05, "loss": 0.2393, "step": 1684 }, { "epoch": 0.31, "learning_rate": 2.441811290141731e-05, "loss": 0.2241, "step": 1685 }, { "epoch": 0.31, "learning_rate": 2.4411255722340785e-05, "loss": 0.2206, "step": 1686 }, { "epoch": 0.31, "learning_rate": 2.4404395298077148e-05, "loss": 0.2538, "step": 1687 }, { "epoch": 0.31, "learning_rate": 2.4397531630992006e-05, "loss": 0.2852, "step": 1688 }, { "epoch": 0.31, "learning_rate": 2.4390664723452085e-05, "loss": 0.3864, "step": 1689 }, { "epoch": 0.31, "learning_rate": 2.438379457782523e-05, "loss": 0.3113, "step": 1690 }, { "epoch": 0.31, "learning_rate": 2.4376921196480406e-05, "loss": 0.251, "step": 1691 }, { "epoch": 0.31, "learning_rate": 2.4370044581787693e-05, "loss": 0.3285, "step": 1692 }, { "epoch": 0.31, "learning_rate": 2.4363164736118275e-05, "loss": 0.2815, "step": 1693 }, { "epoch": 0.31, "learning_rate": 2.4356281661844464e-05, "loss": 0.2662, "step": 1694 }, { "epoch": 0.31, "learning_rate": 2.4349395361339682e-05, "loss": 0.2496, "step": 1695 }, { "epoch": 0.31, "learning_rate": 2.4342505836978463e-05, "loss": 0.2128, "step": 1696 }, { "epoch": 0.31, "learning_rate": 2.4335613091136442e-05, "loss": 0.1925, "step": 1697 }, { "epoch": 0.31, "learning_rate": 2.432871712619039e-05, "loss": 0.2335, "step": 1698 }, { "epoch": 0.31, "learning_rate": 2.4321817944518153e-05, "loss": 0.2236, "step": 1699 }, { "epoch": 0.31, "learning_rate": 2.4314915548498717e-05, "loss": 0.3038, "step": 1700 }, { "epoch": 0.31, "learning_rate": 2.4308009940512164e-05, "loss": 0.2836, "step": 1701 }, { "epoch": 0.31, "learning_rate": 2.4301101122939683e-05, "loss": 0.3073, "step": 1702 }, { "epoch": 0.31, "learning_rate": 2.4294189098163572e-05, "loss": 0.2155, "step": 1703 }, { "epoch": 0.31, "learning_rate": 2.428727386856723e-05, "loss": 0.257, "step": 1704 }, { "epoch": 0.31, "learning_rate": 2.4280355436535166e-05, "loss": 0.2402, "step": 1705 }, { "epoch": 0.31, "learning_rate": 2.4273433804452995e-05, "loss": 0.2531, "step": 1706 }, { "epoch": 0.31, "learning_rate": 2.4266508974707434e-05, "loss": 0.2538, "step": 1707 }, { "epoch": 0.31, "learning_rate": 2.4259580949686292e-05, "loss": 0.2924, "step": 1708 }, { "epoch": 0.31, "learning_rate": 2.42526497317785e-05, "loss": 0.3076, "step": 1709 }, { "epoch": 0.31, "learning_rate": 2.4245715323374077e-05, "loss": 0.2147, "step": 1710 }, { "epoch": 0.31, "learning_rate": 2.4238777726864135e-05, "loss": 0.2981, "step": 1711 }, { "epoch": 0.31, "learning_rate": 2.4231836944640903e-05, "loss": 0.2841, "step": 1712 }, { "epoch": 0.31, "learning_rate": 2.4224892979097704e-05, "loss": 0.2241, "step": 1713 }, { "epoch": 0.31, "learning_rate": 2.4217945832628942e-05, "loss": 0.201, "step": 1714 }, { "epoch": 0.31, "learning_rate": 2.4210995507630144e-05, "loss": 0.2429, "step": 1715 }, { "epoch": 0.31, "learning_rate": 2.420404200649791e-05, "loss": 0.2104, "step": 1716 }, { "epoch": 0.31, "learning_rate": 2.4197085331629948e-05, "loss": 0.2646, "step": 1717 }, { "epoch": 0.31, "learning_rate": 2.4190125485425054e-05, "loss": 0.2316, "step": 1718 }, { "epoch": 0.31, "learning_rate": 2.4183162470283126e-05, "loss": 0.3527, "step": 1719 }, { "epoch": 0.31, "learning_rate": 2.4176196288605145e-05, "loss": 0.2892, "step": 1720 }, { "epoch": 0.31, "learning_rate": 2.416922694279319e-05, "loss": 0.3192, "step": 1721 }, { "epoch": 0.31, "learning_rate": 2.4162254435250433e-05, "loss": 0.3107, "step": 1722 }, { "epoch": 0.31, "learning_rate": 2.4155278768381122e-05, "loss": 0.3113, "step": 1723 }, { "epoch": 0.31, "learning_rate": 2.414829994459062e-05, "loss": 0.3062, "step": 1724 }, { "epoch": 0.31, "learning_rate": 2.414131796628535e-05, "loss": 0.3038, "step": 1725 }, { "epoch": 0.31, "learning_rate": 2.4134332835872835e-05, "loss": 0.2216, "step": 1726 }, { "epoch": 0.31, "learning_rate": 2.4127344555761704e-05, "loss": 0.2324, "step": 1727 }, { "epoch": 0.31, "learning_rate": 2.4120353128361638e-05, "loss": 0.286, "step": 1728 }, { "epoch": 0.31, "learning_rate": 2.4113358556083422e-05, "loss": 0.2513, "step": 1729 }, { "epoch": 0.31, "learning_rate": 2.4106360841338925e-05, "loss": 0.2109, "step": 1730 }, { "epoch": 0.31, "learning_rate": 2.40993599865411e-05, "loss": 0.2765, "step": 1731 }, { "epoch": 0.31, "learning_rate": 2.409235599410398e-05, "loss": 0.2253, "step": 1732 }, { "epoch": 0.31, "learning_rate": 2.4085348866442673e-05, "loss": 0.2503, "step": 1733 }, { "epoch": 0.31, "learning_rate": 2.407833860597338e-05, "loss": 0.2921, "step": 1734 }, { "epoch": 0.31, "learning_rate": 2.407132521511338e-05, "loss": 0.2913, "step": 1735 }, { "epoch": 0.31, "learning_rate": 2.4064308696281026e-05, "loss": 0.2334, "step": 1736 }, { "epoch": 0.31, "learning_rate": 2.405728905189575e-05, "loss": 0.2662, "step": 1737 }, { "epoch": 0.32, "learning_rate": 2.405026628437807e-05, "loss": 0.3806, "step": 1738 }, { "epoch": 0.32, "learning_rate": 2.4043240396149574e-05, "loss": 0.2299, "step": 1739 }, { "epoch": 0.32, "learning_rate": 2.4036211389632922e-05, "loss": 0.2487, "step": 1740 }, { "epoch": 0.32, "learning_rate": 2.4029179267251854e-05, "loss": 0.2545, "step": 1741 }, { "epoch": 0.32, "learning_rate": 2.40221440314312e-05, "loss": 0.2286, "step": 1742 }, { "epoch": 0.32, "learning_rate": 2.4015105684596826e-05, "loss": 0.3662, "step": 1743 }, { "epoch": 0.32, "learning_rate": 2.400806422917571e-05, "loss": 0.2468, "step": 1744 }, { "epoch": 0.32, "learning_rate": 2.4001019667595876e-05, "loss": 0.2678, "step": 1745 }, { "epoch": 0.32, "learning_rate": 2.3993972002286434e-05, "loss": 0.2319, "step": 1746 }, { "epoch": 0.32, "learning_rate": 2.3986921235677556e-05, "loss": 0.2285, "step": 1747 }, { "epoch": 0.32, "learning_rate": 2.3979867370200487e-05, "loss": 0.2168, "step": 1748 }, { "epoch": 0.32, "learning_rate": 2.3972810408287535e-05, "loss": 0.2543, "step": 1749 }, { "epoch": 0.32, "learning_rate": 2.3965750352372086e-05, "loss": 0.2402, "step": 1750 }, { "epoch": 0.32, "learning_rate": 2.3958687204888582e-05, "loss": 0.3081, "step": 1751 }, { "epoch": 0.32, "learning_rate": 2.3951620968272535e-05, "loss": 0.2682, "step": 1752 }, { "epoch": 0.32, "learning_rate": 2.3944551644960533e-05, "loss": 0.3221, "step": 1753 }, { "epoch": 0.32, "learning_rate": 2.3937479237390208e-05, "loss": 0.2468, "step": 1754 }, { "epoch": 0.32, "learning_rate": 2.3930403748000273e-05, "loss": 0.2333, "step": 1755 }, { "epoch": 0.32, "learning_rate": 2.392332517923049e-05, "loss": 0.2814, "step": 1756 }, { "epoch": 0.32, "learning_rate": 2.3916243533521694e-05, "loss": 0.254, "step": 1757 }, { "epoch": 0.32, "learning_rate": 2.3909158813315774e-05, "loss": 0.2429, "step": 1758 }, { "epoch": 0.32, "learning_rate": 2.3902071021055687e-05, "loss": 0.2668, "step": 1759 }, { "epoch": 0.32, "learning_rate": 2.3894980159185435e-05, "loss": 0.2855, "step": 1760 }, { "epoch": 0.32, "learning_rate": 2.3887886230150094e-05, "loss": 0.288, "step": 1761 }, { "epoch": 0.32, "learning_rate": 2.3880789236395793e-05, "loss": 0.342, "step": 1762 }, { "epoch": 0.32, "learning_rate": 2.3873689180369708e-05, "loss": 0.2339, "step": 1763 }, { "epoch": 0.32, "learning_rate": 2.3866586064520083e-05, "loss": 0.3149, "step": 1764 }, { "epoch": 0.32, "learning_rate": 2.385947989129622e-05, "loss": 0.3425, "step": 1765 }, { "epoch": 0.32, "learning_rate": 2.3852370663148454e-05, "loss": 0.2704, "step": 1766 }, { "epoch": 0.32, "learning_rate": 2.3845258382528196e-05, "loss": 0.3425, "step": 1767 }, { "epoch": 0.32, "learning_rate": 2.38381430518879e-05, "loss": 0.2827, "step": 1768 }, { "epoch": 0.32, "learning_rate": 2.3831024673681073e-05, "loss": 0.2484, "step": 1769 }, { "epoch": 0.32, "learning_rate": 2.3823903250362267e-05, "loss": 0.2827, "step": 1770 }, { "epoch": 0.32, "learning_rate": 2.3816778784387097e-05, "loss": 0.2733, "step": 1771 }, { "epoch": 0.32, "learning_rate": 2.3809651278212216e-05, "loss": 0.2028, "step": 1772 }, { "epoch": 0.32, "learning_rate": 2.3802520734295335e-05, "loss": 0.2156, "step": 1773 }, { "epoch": 0.32, "learning_rate": 2.37953871550952e-05, "loss": 0.2885, "step": 1774 }, { "epoch": 0.32, "learning_rate": 2.378825054307161e-05, "loss": 0.2343, "step": 1775 }, { "epoch": 0.32, "learning_rate": 2.3781110900685416e-05, "loss": 0.2039, "step": 1776 }, { "epoch": 0.32, "learning_rate": 2.37739682303985e-05, "loss": 0.2846, "step": 1777 }, { "epoch": 0.32, "learning_rate": 2.3766822534673804e-05, "loss": 0.3082, "step": 1778 }, { "epoch": 0.32, "learning_rate": 2.3759673815975297e-05, "loss": 0.3085, "step": 1779 }, { "epoch": 0.32, "learning_rate": 2.3752522076768006e-05, "loss": 0.2863, "step": 1780 }, { "epoch": 0.32, "learning_rate": 2.3745367319517986e-05, "loss": 0.1911, "step": 1781 }, { "epoch": 0.32, "learning_rate": 2.373820954669234e-05, "loss": 0.1905, "step": 1782 }, { "epoch": 0.32, "learning_rate": 2.3731048760759214e-05, "loss": 0.227, "step": 1783 }, { "epoch": 0.32, "learning_rate": 2.372388496418779e-05, "loss": 0.2723, "step": 1784 }, { "epoch": 0.32, "learning_rate": 2.3716718159448276e-05, "loss": 0.3001, "step": 1785 }, { "epoch": 0.32, "learning_rate": 2.370954834901194e-05, "loss": 0.2576, "step": 1786 }, { "epoch": 0.32, "learning_rate": 2.3702375535351067e-05, "loss": 0.2449, "step": 1787 }, { "epoch": 0.32, "learning_rate": 2.3695199720938988e-05, "loss": 0.3156, "step": 1788 }, { "epoch": 0.32, "learning_rate": 2.3688020908250067e-05, "loss": 0.2874, "step": 1789 }, { "epoch": 0.32, "learning_rate": 2.3680839099759697e-05, "loss": 0.2219, "step": 1790 }, { "epoch": 0.32, "learning_rate": 2.3673654297944308e-05, "loss": 0.2831, "step": 1791 }, { "epoch": 0.32, "learning_rate": 2.3666466505281362e-05, "loss": 0.2637, "step": 1792 }, { "epoch": 0.33, "learning_rate": 2.3659275724249352e-05, "loss": 0.2828, "step": 1793 }, { "epoch": 0.33, "learning_rate": 2.3652081957327806e-05, "loss": 0.2797, "step": 1794 }, { "epoch": 0.33, "learning_rate": 2.364488520699727e-05, "loss": 0.3164, "step": 1795 }, { "epoch": 0.33, "learning_rate": 2.3637685475739332e-05, "loss": 0.3134, "step": 1796 }, { "epoch": 0.33, "learning_rate": 2.36304827660366e-05, "loss": 0.2899, "step": 1797 }, { "epoch": 0.33, "learning_rate": 2.362327708037271e-05, "loss": 0.2445, "step": 1798 }, { "epoch": 0.33, "learning_rate": 2.3616068421232326e-05, "loss": 0.2692, "step": 1799 }, { "epoch": 0.33, "learning_rate": 2.3608856791101132e-05, "loss": 0.2088, "step": 1800 }, { "epoch": 0.33, "learning_rate": 2.3601642192465844e-05, "loss": 0.2156, "step": 1801 }, { "epoch": 0.33, "learning_rate": 2.35944246278142e-05, "loss": 0.2667, "step": 1802 }, { "epoch": 0.33, "learning_rate": 2.358720409963496e-05, "loss": 0.2333, "step": 1803 }, { "epoch": 0.33, "learning_rate": 2.3579980610417898e-05, "loss": 0.3218, "step": 1804 }, { "epoch": 0.33, "learning_rate": 2.3572754162653822e-05, "loss": 0.2551, "step": 1805 }, { "epoch": 0.33, "learning_rate": 2.356552475883455e-05, "loss": 0.1975, "step": 1806 }, { "epoch": 0.33, "learning_rate": 2.3558292401452928e-05, "loss": 0.2821, "step": 1807 }, { "epoch": 0.33, "learning_rate": 2.355105709300281e-05, "loss": 0.282, "step": 1808 }, { "epoch": 0.33, "learning_rate": 2.3543818835979085e-05, "loss": 0.2834, "step": 1809 }, { "epoch": 0.33, "learning_rate": 2.3536577632877632e-05, "loss": 0.2368, "step": 1810 }, { "epoch": 0.33, "learning_rate": 2.3529333486195372e-05, "loss": 0.2937, "step": 1811 }, { "epoch": 0.33, "learning_rate": 2.3522086398430225e-05, "loss": 0.1788, "step": 1812 }, { "epoch": 0.33, "learning_rate": 2.351483637208113e-05, "loss": 0.2472, "step": 1813 }, { "epoch": 0.33, "learning_rate": 2.3507583409648046e-05, "loss": 0.2573, "step": 1814 }, { "epoch": 0.33, "learning_rate": 2.3500327513631926e-05, "loss": 0.245, "step": 1815 }, { "epoch": 0.33, "learning_rate": 2.349306868653476e-05, "loss": 0.2188, "step": 1816 }, { "epoch": 0.33, "learning_rate": 2.3485806930859523e-05, "loss": 0.2807, "step": 1817 }, { "epoch": 0.33, "learning_rate": 2.347854224911022e-05, "loss": 0.2719, "step": 1818 }, { "epoch": 0.33, "learning_rate": 2.3471274643791856e-05, "loss": 0.2395, "step": 1819 }, { "epoch": 0.33, "learning_rate": 2.346400411741044e-05, "loss": 0.2825, "step": 1820 }, { "epoch": 0.33, "learning_rate": 2.3456730672472996e-05, "loss": 0.2645, "step": 1821 }, { "epoch": 0.33, "learning_rate": 2.3449454311487558e-05, "loss": 0.2284, "step": 1822 }, { "epoch": 0.33, "learning_rate": 2.344217503696315e-05, "loss": 0.2343, "step": 1823 }, { "epoch": 0.33, "learning_rate": 2.3434892851409817e-05, "loss": 0.2655, "step": 1824 }, { "epoch": 0.33, "learning_rate": 2.3427607757338594e-05, "loss": 0.243, "step": 1825 }, { "epoch": 0.33, "learning_rate": 2.3420319757261528e-05, "loss": 0.2598, "step": 1826 }, { "epoch": 0.33, "learning_rate": 2.3413028853691667e-05, "loss": 0.2282, "step": 1827 }, { "epoch": 0.33, "learning_rate": 2.340573504914306e-05, "loss": 0.2236, "step": 1828 }, { "epoch": 0.33, "learning_rate": 2.3398438346130747e-05, "loss": 0.3567, "step": 1829 }, { "epoch": 0.33, "learning_rate": 2.339113874717079e-05, "loss": 0.2693, "step": 1830 }, { "epoch": 0.33, "learning_rate": 2.338383625478022e-05, "loss": 0.2921, "step": 1831 }, { "epoch": 0.33, "learning_rate": 2.337653087147709e-05, "loss": 0.2478, "step": 1832 }, { "epoch": 0.33, "learning_rate": 2.336922259978044e-05, "loss": 0.2563, "step": 1833 }, { "epoch": 0.33, "learning_rate": 2.3361911442210305e-05, "loss": 0.2883, "step": 1834 }, { "epoch": 0.33, "learning_rate": 2.335459740128771e-05, "loss": 0.2561, "step": 1835 }, { "epoch": 0.33, "learning_rate": 2.334728047953469e-05, "loss": 0.3024, "step": 1836 }, { "epoch": 0.33, "learning_rate": 2.3339960679474266e-05, "loss": 0.3334, "step": 1837 }, { "epoch": 0.33, "learning_rate": 2.333263800363045e-05, "loss": 0.3438, "step": 1838 }, { "epoch": 0.33, "learning_rate": 2.3325312454528235e-05, "loss": 0.2317, "step": 1839 }, { "epoch": 0.33, "learning_rate": 2.331798403469362e-05, "loss": 0.234, "step": 1840 }, { "epoch": 0.33, "learning_rate": 2.3310652746653586e-05, "loss": 0.3511, "step": 1841 }, { "epoch": 0.33, "learning_rate": 2.3303318592936118e-05, "loss": 0.2863, "step": 1842 }, { "epoch": 0.33, "learning_rate": 2.329598157607017e-05, "loss": 0.2628, "step": 1843 }, { "epoch": 0.33, "learning_rate": 2.3288641698585688e-05, "loss": 0.2623, "step": 1844 }, { "epoch": 0.33, "learning_rate": 2.3281298963013606e-05, "loss": 0.2728, "step": 1845 }, { "epoch": 0.33, "learning_rate": 2.327395337188585e-05, "loss": 0.2781, "step": 1846 }, { "epoch": 0.33, "learning_rate": 2.3266604927735325e-05, "loss": 0.2495, "step": 1847 }, { "epoch": 0.34, "learning_rate": 2.3259253633095922e-05, "loss": 0.2905, "step": 1848 }, { "epoch": 0.34, "learning_rate": 2.32518994905025e-05, "loss": 0.3496, "step": 1849 }, { "epoch": 0.34, "learning_rate": 2.3244542502490924e-05, "loss": 0.2805, "step": 1850 }, { "epoch": 0.34, "learning_rate": 2.3237182671598027e-05, "loss": 0.2874, "step": 1851 }, { "epoch": 0.34, "learning_rate": 2.3229820000361626e-05, "loss": 0.3262, "step": 1852 }, { "epoch": 0.34, "learning_rate": 2.3222454491320513e-05, "loss": 0.2347, "step": 1853 }, { "epoch": 0.34, "learning_rate": 2.3215086147014456e-05, "loss": 0.2311, "step": 1854 }, { "epoch": 0.34, "learning_rate": 2.3207714969984218e-05, "loss": 0.2001, "step": 1855 }, { "epoch": 0.34, "learning_rate": 2.3200340962771514e-05, "loss": 0.3353, "step": 1856 }, { "epoch": 0.34, "learning_rate": 2.319296412791906e-05, "loss": 0.2596, "step": 1857 }, { "epoch": 0.34, "learning_rate": 2.318558446797052e-05, "loss": 0.2289, "step": 1858 }, { "epoch": 0.34, "learning_rate": 2.3178201985470563e-05, "loss": 0.2737, "step": 1859 }, { "epoch": 0.34, "learning_rate": 2.3170816682964802e-05, "loss": 0.2559, "step": 1860 }, { "epoch": 0.34, "learning_rate": 2.3163428562999834e-05, "loss": 0.2777, "step": 1861 }, { "epoch": 0.34, "learning_rate": 2.3156037628123246e-05, "loss": 0.1727, "step": 1862 }, { "epoch": 0.34, "learning_rate": 2.3148643880883567e-05, "loss": 0.3474, "step": 1863 }, { "epoch": 0.34, "learning_rate": 2.31412473238303e-05, "loss": 0.2884, "step": 1864 }, { "epoch": 0.34, "learning_rate": 2.3133847959513936e-05, "loss": 0.2563, "step": 1865 }, { "epoch": 0.34, "learning_rate": 2.312644579048592e-05, "loss": 0.1749, "step": 1866 }, { "epoch": 0.34, "learning_rate": 2.3119040819298662e-05, "loss": 0.3767, "step": 1867 }, { "epoch": 0.34, "learning_rate": 2.311163304850555e-05, "loss": 0.212, "step": 1868 }, { "epoch": 0.34, "learning_rate": 2.3104222480660916e-05, "loss": 0.2576, "step": 1869 }, { "epoch": 0.34, "learning_rate": 2.3096809118320084e-05, "loss": 0.3002, "step": 1870 }, { "epoch": 0.34, "learning_rate": 2.3089392964039325e-05, "loss": 0.2833, "step": 1871 }, { "epoch": 0.34, "learning_rate": 2.308197402037587e-05, "loss": 0.2916, "step": 1872 }, { "epoch": 0.34, "learning_rate": 2.3074552289887922e-05, "loss": 0.2561, "step": 1873 }, { "epoch": 0.34, "learning_rate": 2.3067127775134645e-05, "loss": 0.25, "step": 1874 }, { "epoch": 0.34, "learning_rate": 2.305970047867615e-05, "loss": 0.2866, "step": 1875 }, { "epoch": 0.34, "learning_rate": 2.3052270403073514e-05, "loss": 0.2471, "step": 1876 }, { "epoch": 0.34, "learning_rate": 2.3044837550888783e-05, "loss": 0.2397, "step": 1877 }, { "epoch": 0.34, "learning_rate": 2.303740192468495e-05, "loss": 0.2594, "step": 1878 }, { "epoch": 0.34, "learning_rate": 2.302996352702596e-05, "loss": 0.2618, "step": 1879 }, { "epoch": 0.34, "learning_rate": 2.3022522360476723e-05, "loss": 0.2573, "step": 1880 }, { "epoch": 0.34, "learning_rate": 2.3015078427603105e-05, "loss": 0.2372, "step": 1881 }, { "epoch": 0.34, "learning_rate": 2.3007631730971918e-05, "loss": 0.2626, "step": 1882 }, { "epoch": 0.34, "learning_rate": 2.3000182273150922e-05, "loss": 0.2332, "step": 1883 }, { "epoch": 0.34, "learning_rate": 2.299273005670885e-05, "loss": 0.2826, "step": 1884 }, { "epoch": 0.34, "learning_rate": 2.2985275084215363e-05, "loss": 0.1702, "step": 1885 }, { "epoch": 0.34, "learning_rate": 2.297781735824109e-05, "loss": 0.2488, "step": 1886 }, { "epoch": 0.34, "learning_rate": 2.2970356881357608e-05, "loss": 0.2341, "step": 1887 }, { "epoch": 0.34, "learning_rate": 2.296289365613742e-05, "loss": 0.2502, "step": 1888 }, { "epoch": 0.34, "learning_rate": 2.295542768515401e-05, "loss": 0.3004, "step": 1889 }, { "epoch": 0.34, "learning_rate": 2.2947958970981782e-05, "loss": 0.2251, "step": 1890 }, { "epoch": 0.34, "learning_rate": 2.29404875161961e-05, "loss": 0.3583, "step": 1891 }, { "epoch": 0.34, "learning_rate": 2.293301332337327e-05, "loss": 0.2465, "step": 1892 }, { "epoch": 0.34, "learning_rate": 2.2925536395090537e-05, "loss": 0.2526, "step": 1893 }, { "epoch": 0.34, "learning_rate": 2.2918056733926107e-05, "loss": 0.1948, "step": 1894 }, { "epoch": 0.34, "learning_rate": 2.2910574342459096e-05, "loss": 0.3118, "step": 1895 }, { "epoch": 0.34, "learning_rate": 2.2903089223269595e-05, "loss": 0.2903, "step": 1896 }, { "epoch": 0.34, "learning_rate": 2.2895601378938618e-05, "loss": 0.2459, "step": 1897 }, { "epoch": 0.34, "learning_rate": 2.2888110812048118e-05, "loss": 0.1932, "step": 1898 }, { "epoch": 0.34, "learning_rate": 2.2880617525180992e-05, "loss": 0.3022, "step": 1899 }, { "epoch": 0.34, "learning_rate": 2.287312152092108e-05, "loss": 0.228, "step": 1900 }, { "epoch": 0.34, "learning_rate": 2.2865622801853142e-05, "loss": 0.2499, "step": 1901 }, { "epoch": 0.34, "learning_rate": 2.285812137056289e-05, "loss": 0.1984, "step": 1902 }, { "epoch": 0.34, "learning_rate": 2.2850617229636965e-05, "loss": 0.2505, "step": 1903 }, { "epoch": 0.35, "learning_rate": 2.2843110381662945e-05, "loss": 0.2731, "step": 1904 }, { "epoch": 0.35, "learning_rate": 2.283560082922934e-05, "loss": 0.2828, "step": 1905 }, { "epoch": 0.35, "learning_rate": 2.282808857492559e-05, "loss": 0.2894, "step": 1906 }, { "epoch": 0.35, "learning_rate": 2.2820573621342068e-05, "loss": 0.2243, "step": 1907 }, { "epoch": 0.35, "learning_rate": 2.281305597107008e-05, "loss": 0.2689, "step": 1908 }, { "epoch": 0.35, "learning_rate": 2.280553562670186e-05, "loss": 0.2707, "step": 1909 }, { "epoch": 0.35, "learning_rate": 2.2798012590830577e-05, "loss": 0.2646, "step": 1910 }, { "epoch": 0.35, "learning_rate": 2.2790486866050314e-05, "loss": 0.2877, "step": 1911 }, { "epoch": 0.35, "learning_rate": 2.278295845495609e-05, "loss": 0.2612, "step": 1912 }, { "epoch": 0.35, "learning_rate": 2.277542736014386e-05, "loss": 0.2489, "step": 1913 }, { "epoch": 0.35, "learning_rate": 2.2767893584210486e-05, "loss": 0.2024, "step": 1914 }, { "epoch": 0.35, "learning_rate": 2.2760357129753764e-05, "loss": 0.1995, "step": 1915 }, { "epoch": 0.35, "learning_rate": 2.2752817999372412e-05, "loss": 0.2354, "step": 1916 }, { "epoch": 0.35, "learning_rate": 2.274527619566608e-05, "loss": 0.2933, "step": 1917 }, { "epoch": 0.35, "learning_rate": 2.273773172123531e-05, "loss": 0.2838, "step": 1918 }, { "epoch": 0.35, "learning_rate": 2.273018457868161e-05, "loss": 0.2615, "step": 1919 }, { "epoch": 0.35, "learning_rate": 2.2722634770607375e-05, "loss": 0.2885, "step": 1920 }, { "epoch": 0.35, "learning_rate": 2.271508229961592e-05, "loss": 0.2849, "step": 1921 }, { "epoch": 0.35, "learning_rate": 2.2707527168311497e-05, "loss": 0.2623, "step": 1922 }, { "epoch": 0.35, "learning_rate": 2.269996937929926e-05, "loss": 0.2983, "step": 1923 }, { "epoch": 0.35, "learning_rate": 2.2692408935185287e-05, "loss": 0.2182, "step": 1924 }, { "epoch": 0.35, "learning_rate": 2.2684845838576567e-05, "loss": 0.1875, "step": 1925 }, { "epoch": 0.35, "learning_rate": 2.2677280092081e-05, "loss": 0.2605, "step": 1926 }, { "epoch": 0.35, "learning_rate": 2.266971169830742e-05, "loss": 0.2212, "step": 1927 }, { "epoch": 0.35, "learning_rate": 2.266214065986554e-05, "loss": 0.1995, "step": 1928 }, { "epoch": 0.35, "learning_rate": 2.265456697936602e-05, "loss": 0.2572, "step": 1929 }, { "epoch": 0.35, "learning_rate": 2.2646990659420402e-05, "loss": 0.2485, "step": 1930 }, { "epoch": 0.35, "learning_rate": 2.2639411702641157e-05, "loss": 0.2389, "step": 1931 }, { "epoch": 0.35, "learning_rate": 2.263183011164166e-05, "loss": 0.2657, "step": 1932 }, { "epoch": 0.35, "learning_rate": 2.2624245889036192e-05, "loss": 0.2451, "step": 1933 }, { "epoch": 0.35, "learning_rate": 2.2616659037439948e-05, "loss": 0.4142, "step": 1934 }, { "epoch": 0.35, "learning_rate": 2.2609069559469014e-05, "loss": 0.2258, "step": 1935 }, { "epoch": 0.35, "learning_rate": 2.26014774577404e-05, "loss": 0.2925, "step": 1936 }, { "epoch": 0.35, "learning_rate": 2.2593882734872014e-05, "loss": 0.3193, "step": 1937 }, { "epoch": 0.35, "learning_rate": 2.2586285393482657e-05, "loss": 0.2456, "step": 1938 }, { "epoch": 0.35, "learning_rate": 2.2578685436192058e-05, "loss": 0.2933, "step": 1939 }, { "epoch": 0.35, "learning_rate": 2.257108286562081e-05, "loss": 0.221, "step": 1940 }, { "epoch": 0.35, "learning_rate": 2.2563477684390457e-05, "loss": 0.2423, "step": 1941 }, { "epoch": 0.35, "learning_rate": 2.2555869895123396e-05, "loss": 0.2314, "step": 1942 }, { "epoch": 0.35, "learning_rate": 2.254825950044295e-05, "loss": 0.2847, "step": 1943 }, { "epoch": 0.35, "learning_rate": 2.2540646502973332e-05, "loss": 0.2574, "step": 1944 }, { "epoch": 0.35, "learning_rate": 2.2533030905339654e-05, "loss": 0.2694, "step": 1945 }, { "epoch": 0.35, "learning_rate": 2.2525412710167933e-05, "loss": 0.2139, "step": 1946 }, { "epoch": 0.35, "learning_rate": 2.251779192008506e-05, "loss": 0.2374, "step": 1947 }, { "epoch": 0.35, "learning_rate": 2.2510168537718842e-05, "loss": 0.2265, "step": 1948 }, { "epoch": 0.35, "learning_rate": 2.2502542565697962e-05, "loss": 0.2203, "step": 1949 }, { "epoch": 0.35, "learning_rate": 2.2494914006652026e-05, "loss": 0.2273, "step": 1950 }, { "epoch": 0.35, "learning_rate": 2.2487282863211498e-05, "loss": 0.2141, "step": 1951 }, { "epoch": 0.35, "learning_rate": 2.2479649138007747e-05, "loss": 0.1813, "step": 1952 }, { "epoch": 0.35, "learning_rate": 2.247201283367304e-05, "loss": 0.3182, "step": 1953 }, { "epoch": 0.35, "learning_rate": 2.2464373952840512e-05, "loss": 0.2553, "step": 1954 }, { "epoch": 0.35, "learning_rate": 2.2456732498144215e-05, "loss": 0.2048, "step": 1955 }, { "epoch": 0.35, "learning_rate": 2.244908847221907e-05, "loss": 0.2339, "step": 1956 }, { "epoch": 0.35, "learning_rate": 2.244144187770088e-05, "loss": 0.265, "step": 1957 }, { "epoch": 0.35, "learning_rate": 2.243379271722635e-05, "loss": 0.3309, "step": 1958 }, { "epoch": 0.36, "learning_rate": 2.242614099343306e-05, "loss": 0.2271, "step": 1959 }, { "epoch": 0.36, "learning_rate": 2.241848670895948e-05, "loss": 0.3195, "step": 1960 }, { "epoch": 0.36, "learning_rate": 2.2410829866444952e-05, "loss": 0.2517, "step": 1961 }, { "epoch": 0.36, "learning_rate": 2.240317046852971e-05, "loss": 0.2628, "step": 1962 }, { "epoch": 0.36, "learning_rate": 2.239550851785486e-05, "loss": 0.247, "step": 1963 }, { "epoch": 0.36, "learning_rate": 2.2387844017062404e-05, "loss": 0.28, "step": 1964 }, { "epoch": 0.36, "learning_rate": 2.2380176968795205e-05, "loss": 0.2427, "step": 1965 }, { "epoch": 0.36, "learning_rate": 2.2372507375697017e-05, "loss": 0.2622, "step": 1966 }, { "epoch": 0.36, "learning_rate": 2.236483524041247e-05, "loss": 0.2213, "step": 1967 }, { "epoch": 0.36, "learning_rate": 2.235716056558706e-05, "loss": 0.2566, "step": 1968 }, { "epoch": 0.36, "learning_rate": 2.2349483353867174e-05, "loss": 0.2668, "step": 1969 }, { "epoch": 0.36, "learning_rate": 2.2341803607900068e-05, "loss": 0.285, "step": 1970 }, { "epoch": 0.36, "learning_rate": 2.2334121330333858e-05, "loss": 0.1824, "step": 1971 }, { "epoch": 0.36, "learning_rate": 2.232643652381756e-05, "loss": 0.2203, "step": 1972 }, { "epoch": 0.36, "learning_rate": 2.2318749191001038e-05, "loss": 0.2518, "step": 1973 }, { "epoch": 0.36, "learning_rate": 2.231105933453504e-05, "loss": 0.2649, "step": 1974 }, { "epoch": 0.36, "learning_rate": 2.2303366957071178e-05, "loss": 0.2525, "step": 1975 }, { "epoch": 0.36, "learning_rate": 2.2295672061261936e-05, "loss": 0.2609, "step": 1976 }, { "epoch": 0.36, "learning_rate": 2.2287974649760678e-05, "loss": 0.2123, "step": 1977 }, { "epoch": 0.36, "learning_rate": 2.2280274725221603e-05, "loss": 0.2454, "step": 1978 }, { "epoch": 0.36, "learning_rate": 2.227257229029981e-05, "loss": 0.2251, "step": 1979 }, { "epoch": 0.36, "learning_rate": 2.2264867347651253e-05, "loss": 0.2694, "step": 1980 }, { "epoch": 0.36, "learning_rate": 2.2257159899932742e-05, "loss": 0.2642, "step": 1981 }, { "epoch": 0.36, "learning_rate": 2.2249449949801962e-05, "loss": 0.2308, "step": 1982 }, { "epoch": 0.36, "learning_rate": 2.2241737499917448e-05, "loss": 0.1879, "step": 1983 }, { "epoch": 0.36, "learning_rate": 2.223402255293862e-05, "loss": 0.3306, "step": 1984 }, { "epoch": 0.36, "learning_rate": 2.222630511152573e-05, "loss": 0.2733, "step": 1985 }, { "epoch": 0.36, "learning_rate": 2.221858517833991e-05, "loss": 0.2904, "step": 1986 }, { "epoch": 0.36, "learning_rate": 2.221086275604315e-05, "loss": 0.2362, "step": 1987 }, { "epoch": 0.36, "learning_rate": 2.220313784729829e-05, "loss": 0.251, "step": 1988 }, { "epoch": 0.36, "learning_rate": 2.2195410454769026e-05, "loss": 0.2582, "step": 1989 }, { "epoch": 0.36, "learning_rate": 2.2187680581119923e-05, "loss": 0.2468, "step": 1990 }, { "epoch": 0.36, "learning_rate": 2.2179948229016393e-05, "loss": 0.2572, "step": 1991 }, { "epoch": 0.36, "learning_rate": 2.21722134011247e-05, "loss": 0.3319, "step": 1992 }, { "epoch": 0.36, "learning_rate": 2.216447610011197e-05, "loss": 0.2336, "step": 1993 }, { "epoch": 0.36, "learning_rate": 2.2156736328646184e-05, "loss": 0.269, "step": 1994 }, { "epoch": 0.36, "learning_rate": 2.2148994089396153e-05, "loss": 0.2156, "step": 1995 }, { "epoch": 0.36, "learning_rate": 2.2141249385031564e-05, "loss": 0.2251, "step": 1996 }, { "epoch": 0.36, "learning_rate": 2.213350221822294e-05, "loss": 0.3066, "step": 1997 }, { "epoch": 0.36, "learning_rate": 2.212575259164166e-05, "loss": 0.1854, "step": 1998 }, { "epoch": 0.36, "learning_rate": 2.2118000507959947e-05, "loss": 0.2386, "step": 1999 }, { "epoch": 0.36, "learning_rate": 2.211024596985088e-05, "loss": 0.2409, "step": 2000 }, { "epoch": 0.36, "learning_rate": 2.2102488979988366e-05, "loss": 0.326, "step": 2001 }, { "epoch": 0.36, "learning_rate": 2.2094729541047167e-05, "loss": 0.2328, "step": 2002 }, { "epoch": 0.36, "learning_rate": 2.2086967655702906e-05, "loss": 0.2424, "step": 2003 }, { "epoch": 0.36, "learning_rate": 2.207920332663202e-05, "loss": 0.3271, "step": 2004 }, { "epoch": 0.36, "learning_rate": 2.2071436556511816e-05, "loss": 0.1898, "step": 2005 }, { "epoch": 0.36, "learning_rate": 2.206366734802042e-05, "loss": 0.2314, "step": 2006 }, { "epoch": 0.36, "learning_rate": 2.205589570383681e-05, "loss": 0.3212, "step": 2007 }, { "epoch": 0.36, "learning_rate": 2.2048121626640806e-05, "loss": 0.2256, "step": 2008 }, { "epoch": 0.36, "learning_rate": 2.204034511911306e-05, "loss": 0.2268, "step": 2009 }, { "epoch": 0.36, "learning_rate": 2.203256618393507e-05, "loss": 0.1904, "step": 2010 }, { "epoch": 0.36, "learning_rate": 2.2024784823789163e-05, "loss": 0.223, "step": 2011 }, { "epoch": 0.36, "learning_rate": 2.2017001041358514e-05, "loss": 0.2394, "step": 2012 }, { "epoch": 0.36, "learning_rate": 2.200921483932711e-05, "loss": 0.2655, "step": 2013 }, { "epoch": 0.37, "learning_rate": 2.20014262203798e-05, "loss": 0.2865, "step": 2014 }, { "epoch": 0.37, "learning_rate": 2.1993635187202245e-05, "loss": 0.2402, "step": 2015 }, { "epoch": 0.37, "learning_rate": 2.1985841742480954e-05, "loss": 0.2103, "step": 2016 }, { "epoch": 0.37, "learning_rate": 2.197804588890326e-05, "loss": 0.2837, "step": 2017 }, { "epoch": 0.37, "learning_rate": 2.1970247629157324e-05, "loss": 0.2441, "step": 2018 }, { "epoch": 0.37, "learning_rate": 2.196244696593215e-05, "loss": 0.1938, "step": 2019 }, { "epoch": 0.37, "learning_rate": 2.195464390191755e-05, "loss": 0.3164, "step": 2020 }, { "epoch": 0.37, "learning_rate": 2.1946838439804174e-05, "loss": 0.2721, "step": 2021 }, { "epoch": 0.37, "learning_rate": 2.193903058228351e-05, "loss": 0.246, "step": 2022 }, { "epoch": 0.37, "learning_rate": 2.193122033204785e-05, "loss": 0.2705, "step": 2023 }, { "epoch": 0.37, "learning_rate": 2.192340769179033e-05, "loss": 0.2556, "step": 2024 }, { "epoch": 0.37, "learning_rate": 2.1915592664204903e-05, "loss": 0.2399, "step": 2025 }, { "epoch": 0.37, "learning_rate": 2.1907775251986348e-05, "loss": 0.261, "step": 2026 }, { "epoch": 0.37, "learning_rate": 2.1899955457830253e-05, "loss": 0.2379, "step": 2027 }, { "epoch": 0.37, "learning_rate": 2.189213328443304e-05, "loss": 0.2039, "step": 2028 }, { "epoch": 0.37, "learning_rate": 2.188430873449196e-05, "loss": 0.2526, "step": 2029 }, { "epoch": 0.37, "learning_rate": 2.1876481810705067e-05, "loss": 0.2651, "step": 2030 }, { "epoch": 0.37, "learning_rate": 2.1868652515771228e-05, "loss": 0.2159, "step": 2031 }, { "epoch": 0.37, "learning_rate": 2.1860820852390157e-05, "loss": 0.2511, "step": 2032 }, { "epoch": 0.37, "learning_rate": 2.185298682326235e-05, "loss": 0.2512, "step": 2033 }, { "epoch": 0.37, "learning_rate": 2.184515043108915e-05, "loss": 0.2302, "step": 2034 }, { "epoch": 0.37, "learning_rate": 2.183731167857269e-05, "loss": 0.2776, "step": 2035 }, { "epoch": 0.37, "learning_rate": 2.182947056841593e-05, "loss": 0.2141, "step": 2036 }, { "epoch": 0.37, "learning_rate": 2.182162710332264e-05, "loss": 0.2434, "step": 2037 }, { "epoch": 0.37, "learning_rate": 2.18137812859974e-05, "loss": 0.2015, "step": 2038 }, { "epoch": 0.37, "learning_rate": 2.1805933119145607e-05, "loss": 0.2299, "step": 2039 }, { "epoch": 0.37, "learning_rate": 2.179808260547346e-05, "loss": 0.2649, "step": 2040 }, { "epoch": 0.37, "learning_rate": 2.1790229747687973e-05, "loss": 0.2646, "step": 2041 }, { "epoch": 0.37, "learning_rate": 2.1782374548496963e-05, "loss": 0.2346, "step": 2042 }, { "epoch": 0.37, "learning_rate": 2.1774517010609067e-05, "loss": 0.3112, "step": 2043 }, { "epoch": 0.37, "learning_rate": 2.176665713673371e-05, "loss": 0.2544, "step": 2044 }, { "epoch": 0.37, "learning_rate": 2.175879492958114e-05, "loss": 0.2865, "step": 2045 }, { "epoch": 0.37, "learning_rate": 2.1750930391862396e-05, "loss": 0.2465, "step": 2046 }, { "epoch": 0.37, "learning_rate": 2.1743063526289326e-05, "loss": 0.2367, "step": 2047 }, { "epoch": 0.37, "learning_rate": 2.1735194335574584e-05, "loss": 0.2188, "step": 2048 }, { "epoch": 0.37, "learning_rate": 2.1727322822431622e-05, "loss": 0.2847, "step": 2049 }, { "epoch": 0.37, "learning_rate": 2.1719448989574693e-05, "loss": 0.3244, "step": 2050 }, { "epoch": 0.37, "learning_rate": 2.1711572839718852e-05, "loss": 0.3182, "step": 2051 }, { "epoch": 0.37, "learning_rate": 2.1703694375579942e-05, "loss": 0.2236, "step": 2052 }, { "epoch": 0.37, "learning_rate": 2.169581359987463e-05, "loss": 0.2764, "step": 2053 }, { "epoch": 0.37, "learning_rate": 2.1687930515320345e-05, "loss": 0.2625, "step": 2054 }, { "epoch": 0.37, "learning_rate": 2.1680045124635347e-05, "loss": 0.2816, "step": 2055 }, { "epoch": 0.37, "learning_rate": 2.1672157430538663e-05, "loss": 0.2943, "step": 2056 }, { "epoch": 0.37, "learning_rate": 2.166426743575013e-05, "loss": 0.2388, "step": 2057 }, { "epoch": 0.37, "learning_rate": 2.165637514299038e-05, "loss": 0.2443, "step": 2058 }, { "epoch": 0.37, "learning_rate": 2.1648480554980815e-05, "loss": 0.2438, "step": 2059 }, { "epoch": 0.37, "learning_rate": 2.1640583674443666e-05, "loss": 0.2147, "step": 2060 }, { "epoch": 0.37, "learning_rate": 2.163268450410192e-05, "loss": 0.2709, "step": 2061 }, { "epoch": 0.37, "learning_rate": 2.1624783046679373e-05, "loss": 0.2713, "step": 2062 }, { "epoch": 0.37, "learning_rate": 2.1616879304900598e-05, "loss": 0.349, "step": 2063 }, { "epoch": 0.37, "learning_rate": 2.1608973281490965e-05, "loss": 0.2185, "step": 2064 }, { "epoch": 0.37, "learning_rate": 2.160106497917663e-05, "loss": 0.2579, "step": 2065 }, { "epoch": 0.37, "learning_rate": 2.1593154400684524e-05, "loss": 0.2439, "step": 2066 }, { "epoch": 0.37, "learning_rate": 2.1585241548742384e-05, "loss": 0.2503, "step": 2067 }, { "epoch": 0.37, "learning_rate": 2.15773264260787e-05, "loss": 0.2374, "step": 2068 }, { "epoch": 0.38, "learning_rate": 2.1569409035422777e-05, "loss": 0.2354, "step": 2069 }, { "epoch": 0.38, "learning_rate": 2.156148937950469e-05, "loss": 0.2567, "step": 2070 }, { "epoch": 0.38, "learning_rate": 2.155356746105528e-05, "loss": 0.1479, "step": 2071 }, { "epoch": 0.38, "learning_rate": 2.154564328280619e-05, "loss": 0.1889, "step": 2072 }, { "epoch": 0.38, "learning_rate": 2.1537716847489833e-05, "loss": 0.213, "step": 2073 }, { "epoch": 0.38, "learning_rate": 2.1529788157839398e-05, "loss": 0.2841, "step": 2074 }, { "epoch": 0.38, "learning_rate": 2.1521857216588853e-05, "loss": 0.2357, "step": 2075 }, { "epoch": 0.38, "learning_rate": 2.1513924026472947e-05, "loss": 0.2567, "step": 2076 }, { "epoch": 0.38, "learning_rate": 2.1505988590227202e-05, "loss": 0.238, "step": 2077 }, { "epoch": 0.38, "learning_rate": 2.1498050910587906e-05, "loss": 0.2761, "step": 2078 }, { "epoch": 0.38, "learning_rate": 2.1490110990292135e-05, "loss": 0.2058, "step": 2079 }, { "epoch": 0.38, "learning_rate": 2.1482168832077732e-05, "loss": 0.2181, "step": 2080 }, { "epoch": 0.38, "learning_rate": 2.1474224438683297e-05, "loss": 0.218, "step": 2081 }, { "epoch": 0.38, "learning_rate": 2.1466277812848234e-05, "loss": 0.2059, "step": 2082 }, { "epoch": 0.38, "learning_rate": 2.1458328957312677e-05, "loss": 0.1841, "step": 2083 }, { "epoch": 0.38, "learning_rate": 2.1450377874817567e-05, "loss": 0.3046, "step": 2084 }, { "epoch": 0.38, "learning_rate": 2.1442424568104576e-05, "loss": 0.2968, "step": 2085 }, { "epoch": 0.38, "learning_rate": 2.1434469039916182e-05, "loss": 0.2377, "step": 2086 }, { "epoch": 0.38, "learning_rate": 2.1426511292995587e-05, "loss": 0.226, "step": 2087 }, { "epoch": 0.38, "learning_rate": 2.1418551330086793e-05, "loss": 0.1942, "step": 2088 }, { "epoch": 0.38, "learning_rate": 2.141058915393455e-05, "loss": 0.3628, "step": 2089 }, { "epoch": 0.38, "learning_rate": 2.140262476728437e-05, "loss": 0.2684, "step": 2090 }, { "epoch": 0.38, "learning_rate": 2.139465817288254e-05, "loss": 0.2357, "step": 2091 }, { "epoch": 0.38, "learning_rate": 2.138668937347609e-05, "loss": 0.207, "step": 2092 }, { "epoch": 0.38, "learning_rate": 2.1378718371812825e-05, "loss": 0.1755, "step": 2093 }, { "epoch": 0.38, "learning_rate": 2.1370745170641304e-05, "loss": 0.2473, "step": 2094 }, { "epoch": 0.38, "learning_rate": 2.1362769772710838e-05, "loss": 0.2546, "step": 2095 }, { "epoch": 0.38, "learning_rate": 2.1354792180771507e-05, "loss": 0.2377, "step": 2096 }, { "epoch": 0.38, "learning_rate": 2.1346812397574145e-05, "loss": 0.2155, "step": 2097 }, { "epoch": 0.38, "learning_rate": 2.1338830425870336e-05, "loss": 0.259, "step": 2098 }, { "epoch": 0.38, "learning_rate": 2.1330846268412423e-05, "loss": 0.2749, "step": 2099 }, { "epoch": 0.38, "learning_rate": 2.1322859927953494e-05, "loss": 0.2678, "step": 2100 }, { "epoch": 0.38, "learning_rate": 2.1314871407247412e-05, "loss": 0.2899, "step": 2101 }, { "epoch": 0.38, "learning_rate": 2.1306880709048762e-05, "loss": 0.2847, "step": 2102 }, { "epoch": 0.38, "learning_rate": 2.1298887836112905e-05, "loss": 0.281, "step": 2103 }, { "epoch": 0.38, "learning_rate": 2.1290892791195933e-05, "loss": 0.2535, "step": 2104 }, { "epoch": 0.38, "learning_rate": 2.1282895577054703e-05, "loss": 0.1804, "step": 2105 }, { "epoch": 0.38, "learning_rate": 2.1274896196446816e-05, "loss": 0.2298, "step": 2106 }, { "epoch": 0.38, "learning_rate": 2.1266894652130603e-05, "loss": 0.2078, "step": 2107 }, { "epoch": 0.38, "learning_rate": 2.125889094686517e-05, "loss": 0.2411, "step": 2108 }, { "epoch": 0.38, "learning_rate": 2.125088508341034e-05, "loss": 0.2541, "step": 2109 }, { "epoch": 0.38, "learning_rate": 2.12428770645267e-05, "loss": 0.209, "step": 2110 }, { "epoch": 0.38, "learning_rate": 2.123486689297557e-05, "loss": 0.1724, "step": 2111 }, { "epoch": 0.38, "learning_rate": 2.122685457151902e-05, "loss": 0.2379, "step": 2112 }, { "epoch": 0.38, "learning_rate": 2.1218840102919857e-05, "loss": 0.2777, "step": 2113 }, { "epoch": 0.38, "learning_rate": 2.121082348994162e-05, "loss": 0.2057, "step": 2114 }, { "epoch": 0.38, "learning_rate": 2.12028047353486e-05, "loss": 0.3227, "step": 2115 }, { "epoch": 0.38, "learning_rate": 2.1194783841905826e-05, "loss": 0.1875, "step": 2116 }, { "epoch": 0.38, "learning_rate": 2.1186760812379063e-05, "loss": 0.3074, "step": 2117 }, { "epoch": 0.38, "learning_rate": 2.1178735649534795e-05, "loss": 0.2147, "step": 2118 }, { "epoch": 0.38, "learning_rate": 2.1170708356140277e-05, "loss": 0.267, "step": 2119 }, { "epoch": 0.38, "learning_rate": 2.1162678934963464e-05, "loss": 0.2625, "step": 2120 }, { "epoch": 0.38, "learning_rate": 2.1154647388773063e-05, "loss": 0.2115, "step": 2121 }, { "epoch": 0.38, "learning_rate": 2.114661372033852e-05, "loss": 0.1924, "step": 2122 }, { "epoch": 0.38, "learning_rate": 2.1138577932429984e-05, "loss": 0.2881, "step": 2123 }, { "epoch": 0.39, "learning_rate": 2.1130540027818368e-05, "loss": 0.2238, "step": 2124 }, { "epoch": 0.39, "learning_rate": 2.1122500009275295e-05, "loss": 0.2006, "step": 2125 }, { "epoch": 0.39, "learning_rate": 2.111445787957312e-05, "loss": 0.2017, "step": 2126 }, { "epoch": 0.39, "learning_rate": 2.110641364148494e-05, "loss": 0.2552, "step": 2127 }, { "epoch": 0.39, "learning_rate": 2.109836729778455e-05, "loss": 0.2379, "step": 2128 }, { "epoch": 0.39, "learning_rate": 2.1090318851246502e-05, "loss": 0.2748, "step": 2129 }, { "epoch": 0.39, "learning_rate": 2.1082268304646057e-05, "loss": 0.2804, "step": 2130 }, { "epoch": 0.39, "learning_rate": 2.1074215660759194e-05, "loss": 0.2618, "step": 2131 }, { "epoch": 0.39, "learning_rate": 2.106616092236264e-05, "loss": 0.2565, "step": 2132 }, { "epoch": 0.39, "learning_rate": 2.1058104092233812e-05, "loss": 0.2626, "step": 2133 }, { "epoch": 0.39, "learning_rate": 2.1050045173150875e-05, "loss": 0.1786, "step": 2134 }, { "epoch": 0.39, "learning_rate": 2.1041984167892697e-05, "loss": 0.2399, "step": 2135 }, { "epoch": 0.39, "learning_rate": 2.1033921079238875e-05, "loss": 0.176, "step": 2136 }, { "epoch": 0.39, "learning_rate": 2.1025855909969723e-05, "loss": 0.2633, "step": 2137 }, { "epoch": 0.39, "learning_rate": 2.101778866286627e-05, "loss": 0.2322, "step": 2138 }, { "epoch": 0.39, "learning_rate": 2.1009719340710263e-05, "loss": 0.2678, "step": 2139 }, { "epoch": 0.39, "learning_rate": 2.100164794628416e-05, "loss": 0.2947, "step": 2140 }, { "epoch": 0.39, "learning_rate": 2.099357448237114e-05, "loss": 0.3043, "step": 2141 }, { "epoch": 0.39, "learning_rate": 2.098549895175509e-05, "loss": 0.311, "step": 2142 }, { "epoch": 0.39, "learning_rate": 2.097742135722062e-05, "loss": 0.2581, "step": 2143 }, { "epoch": 0.39, "learning_rate": 2.096934170155304e-05, "loss": 0.2649, "step": 2144 }, { "epoch": 0.39, "learning_rate": 2.0961259987538376e-05, "loss": 0.2189, "step": 2145 }, { "epoch": 0.39, "learning_rate": 2.095317621796336e-05, "loss": 0.2641, "step": 2146 }, { "epoch": 0.39, "learning_rate": 2.0945090395615435e-05, "loss": 0.179, "step": 2147 }, { "epoch": 0.39, "learning_rate": 2.093700252328276e-05, "loss": 0.2101, "step": 2148 }, { "epoch": 0.39, "learning_rate": 2.0928912603754187e-05, "loss": 0.1927, "step": 2149 }, { "epoch": 0.39, "learning_rate": 2.0920820639819275e-05, "loss": 0.2189, "step": 2150 }, { "epoch": 0.39, "learning_rate": 2.091272663426831e-05, "loss": 0.1973, "step": 2151 }, { "epoch": 0.39, "learning_rate": 2.090463058989225e-05, "loss": 0.2294, "step": 2152 }, { "epoch": 0.39, "learning_rate": 2.0896532509482776e-05, "loss": 0.3098, "step": 2153 }, { "epoch": 0.39, "learning_rate": 2.0888432395832268e-05, "loss": 0.229, "step": 2154 }, { "epoch": 0.39, "learning_rate": 2.0880330251733807e-05, "loss": 0.2231, "step": 2155 }, { "epoch": 0.39, "learning_rate": 2.0872226079981165e-05, "loss": 0.2715, "step": 2156 }, { "epoch": 0.39, "learning_rate": 2.0864119883368825e-05, "loss": 0.2158, "step": 2157 }, { "epoch": 0.39, "learning_rate": 2.0856011664691966e-05, "loss": 0.2357, "step": 2158 }, { "epoch": 0.39, "learning_rate": 2.084790142674646e-05, "loss": 0.236, "step": 2159 }, { "epoch": 0.39, "learning_rate": 2.0839789172328883e-05, "loss": 0.2601, "step": 2160 }, { "epoch": 0.39, "learning_rate": 2.083167490423649e-05, "loss": 0.2443, "step": 2161 }, { "epoch": 0.39, "learning_rate": 2.0823558625267247e-05, "loss": 0.249, "step": 2162 }, { "epoch": 0.39, "learning_rate": 2.081544033821981e-05, "loss": 0.2163, "step": 2163 }, { "epoch": 0.39, "learning_rate": 2.0807320045893517e-05, "loss": 0.2066, "step": 2164 }, { "epoch": 0.39, "learning_rate": 2.0799197751088415e-05, "loss": 0.2726, "step": 2165 }, { "epoch": 0.39, "learning_rate": 2.0791073456605224e-05, "loss": 0.1786, "step": 2166 }, { "epoch": 0.39, "learning_rate": 2.078294716524537e-05, "loss": 0.2051, "step": 2167 }, { "epoch": 0.39, "learning_rate": 2.0774818879810946e-05, "loss": 0.1873, "step": 2168 }, { "epoch": 0.39, "learning_rate": 2.076668860310476e-05, "loss": 0.2034, "step": 2169 }, { "epoch": 0.39, "learning_rate": 2.0758556337930287e-05, "loss": 0.25, "step": 2170 }, { "epoch": 0.39, "learning_rate": 2.0750422087091688e-05, "loss": 0.2504, "step": 2171 }, { "epoch": 0.39, "learning_rate": 2.0742285853393818e-05, "loss": 0.1791, "step": 2172 }, { "epoch": 0.39, "learning_rate": 2.0734147639642206e-05, "loss": 0.2253, "step": 2173 }, { "epoch": 0.39, "learning_rate": 2.0726007448643083e-05, "loss": 0.2347, "step": 2174 }, { "epoch": 0.39, "learning_rate": 2.0717865283203333e-05, "loss": 0.3226, "step": 2175 }, { "epoch": 0.39, "learning_rate": 2.0709721146130537e-05, "loss": 0.2692, "step": 2176 }, { "epoch": 0.39, "learning_rate": 2.070157504023297e-05, "loss": 0.2225, "step": 2177 }, { "epoch": 0.39, "learning_rate": 2.0693426968319546e-05, "loss": 0.2607, "step": 2178 }, { "epoch": 0.4, "learning_rate": 2.0685276933199902e-05, "loss": 0.2511, "step": 2179 }, { "epoch": 0.4, "learning_rate": 2.0677124937684325e-05, "loss": 0.1807, "step": 2180 }, { "epoch": 0.4, "learning_rate": 2.066897098458378e-05, "loss": 0.198, "step": 2181 }, { "epoch": 0.4, "learning_rate": 2.0660815076709914e-05, "loss": 0.2762, "step": 2182 }, { "epoch": 0.4, "learning_rate": 2.0652657216875046e-05, "loss": 0.2758, "step": 2183 }, { "epoch": 0.4, "learning_rate": 2.0644497407892165e-05, "loss": 0.2516, "step": 2184 }, { "epoch": 0.4, "learning_rate": 2.0636335652574945e-05, "loss": 0.2507, "step": 2185 }, { "epoch": 0.4, "learning_rate": 2.0628171953737707e-05, "loss": 0.2096, "step": 2186 }, { "epoch": 0.4, "learning_rate": 2.0620006314195453e-05, "loss": 0.1963, "step": 2187 }, { "epoch": 0.4, "learning_rate": 2.0611838736763876e-05, "loss": 0.2078, "step": 2188 }, { "epoch": 0.4, "learning_rate": 2.0603669224259304e-05, "loss": 0.269, "step": 2189 }, { "epoch": 0.4, "learning_rate": 2.059549777949875e-05, "loss": 0.2449, "step": 2190 }, { "epoch": 0.4, "learning_rate": 2.0587324405299893e-05, "loss": 0.2208, "step": 2191 }, { "epoch": 0.4, "learning_rate": 2.057914910448106e-05, "loss": 0.2148, "step": 2192 }, { "epoch": 0.4, "learning_rate": 2.057097187986128e-05, "loss": 0.2659, "step": 2193 }, { "epoch": 0.4, "learning_rate": 2.0562792734260204e-05, "loss": 0.2233, "step": 2194 }, { "epoch": 0.4, "learning_rate": 2.0554611670498164e-05, "loss": 0.2705, "step": 2195 }, { "epoch": 0.4, "learning_rate": 2.054642869139616e-05, "loss": 0.1904, "step": 2196 }, { "epoch": 0.4, "learning_rate": 2.053824379977584e-05, "loss": 0.2266, "step": 2197 }, { "epoch": 0.4, "learning_rate": 2.0530056998459522e-05, "loss": 0.1807, "step": 2198 }, { "epoch": 0.4, "learning_rate": 2.052186829027017e-05, "loss": 0.2206, "step": 2199 }, { "epoch": 0.4, "learning_rate": 2.0513677678031416e-05, "loss": 0.2616, "step": 2200 }, { "epoch": 0.4, "learning_rate": 2.050548516456755e-05, "loss": 0.2185, "step": 2201 }, { "epoch": 0.4, "learning_rate": 2.0497290752703503e-05, "loss": 0.2241, "step": 2202 }, { "epoch": 0.4, "learning_rate": 2.0489094445264875e-05, "loss": 0.2789, "step": 2203 }, { "epoch": 0.4, "learning_rate": 2.0480896245077912e-05, "loss": 0.1944, "step": 2204 }, { "epoch": 0.4, "learning_rate": 2.047269615496952e-05, "loss": 0.2529, "step": 2205 }, { "epoch": 0.4, "learning_rate": 2.0464494177767256e-05, "loss": 0.3013, "step": 2206 }, { "epoch": 0.4, "learning_rate": 2.0456290316299313e-05, "loss": 0.3014, "step": 2207 }, { "epoch": 0.4, "learning_rate": 2.0448084573394554e-05, "loss": 0.21, "step": 2208 }, { "epoch": 0.4, "learning_rate": 2.043987695188247e-05, "loss": 0.244, "step": 2209 }, { "epoch": 0.4, "learning_rate": 2.043166745459323e-05, "loss": 0.2619, "step": 2210 }, { "epoch": 0.4, "learning_rate": 2.0423456084357615e-05, "loss": 0.2222, "step": 2211 }, { "epoch": 0.4, "learning_rate": 2.0415242844007073e-05, "loss": 0.3145, "step": 2212 }, { "epoch": 0.4, "learning_rate": 2.0407027736373697e-05, "loss": 0.1602, "step": 2213 }, { "epoch": 0.4, "learning_rate": 2.0398810764290204e-05, "loss": 0.2805, "step": 2214 }, { "epoch": 0.4, "learning_rate": 2.039059193058998e-05, "loss": 0.3136, "step": 2215 }, { "epoch": 0.4, "learning_rate": 2.038237123810704e-05, "loss": 0.3035, "step": 2216 }, { "epoch": 0.4, "learning_rate": 2.037414868967604e-05, "loss": 0.2461, "step": 2217 }, { "epoch": 0.4, "learning_rate": 2.036592428813228e-05, "loss": 0.2042, "step": 2218 }, { "epoch": 0.4, "learning_rate": 2.0357698036311694e-05, "loss": 0.1913, "step": 2219 }, { "epoch": 0.4, "learning_rate": 2.0349469937050853e-05, "loss": 0.2515, "step": 2220 }, { "epoch": 0.4, "learning_rate": 2.0341239993186976e-05, "loss": 0.267, "step": 2221 }, { "epoch": 0.4, "learning_rate": 2.0333008207557904e-05, "loss": 0.2449, "step": 2222 }, { "epoch": 0.4, "learning_rate": 2.032477458300212e-05, "loss": 0.2349, "step": 2223 }, { "epoch": 0.4, "learning_rate": 2.0316539122358745e-05, "loss": 0.1795, "step": 2224 }, { "epoch": 0.4, "learning_rate": 2.030830182846753e-05, "loss": 0.2227, "step": 2225 }, { "epoch": 0.4, "learning_rate": 2.0300062704168845e-05, "loss": 0.2355, "step": 2226 }, { "epoch": 0.4, "learning_rate": 2.0291821752303715e-05, "loss": 0.2988, "step": 2227 }, { "epoch": 0.4, "learning_rate": 2.0283578975713776e-05, "loss": 0.2618, "step": 2228 }, { "epoch": 0.4, "learning_rate": 2.0275334377241307e-05, "loss": 0.3389, "step": 2229 }, { "epoch": 0.4, "learning_rate": 2.0267087959729206e-05, "loss": 0.1756, "step": 2230 }, { "epoch": 0.4, "learning_rate": 2.0258839726021e-05, "loss": 0.2822, "step": 2231 }, { "epoch": 0.4, "learning_rate": 2.025058967896084e-05, "loss": 0.2107, "step": 2232 }, { "epoch": 0.4, "learning_rate": 2.024233782139351e-05, "loss": 0.1624, "step": 2233 }, { "epoch": 0.41, "learning_rate": 2.023408415616441e-05, "loss": 0.2607, "step": 2234 }, { "epoch": 0.41, "learning_rate": 2.0225828686119576e-05, "loss": 0.3044, "step": 2235 }, { "epoch": 0.41, "learning_rate": 2.0217571414105645e-05, "loss": 0.2476, "step": 2236 }, { "epoch": 0.41, "learning_rate": 2.0209312342969894e-05, "loss": 0.2174, "step": 2237 }, { "epoch": 0.41, "learning_rate": 2.0201051475560216e-05, "loss": 0.1996, "step": 2238 }, { "epoch": 0.41, "learning_rate": 2.0192788814725116e-05, "loss": 0.2415, "step": 2239 }, { "epoch": 0.41, "learning_rate": 2.0184524363313724e-05, "loss": 0.2475, "step": 2240 }, { "epoch": 0.41, "learning_rate": 2.0176258124175793e-05, "loss": 0.1891, "step": 2241 }, { "epoch": 0.41, "learning_rate": 2.0167990100161677e-05, "loss": 0.3125, "step": 2242 }, { "epoch": 0.41, "learning_rate": 2.015972029412236e-05, "loss": 0.1802, "step": 2243 }, { "epoch": 0.41, "learning_rate": 2.0151448708909436e-05, "loss": 0.2057, "step": 2244 }, { "epoch": 0.41, "learning_rate": 2.01431753473751e-05, "loss": 0.1774, "step": 2245 }, { "epoch": 0.41, "learning_rate": 2.0134900212372183e-05, "loss": 0.2992, "step": 2246 }, { "epoch": 0.41, "learning_rate": 2.012662330675411e-05, "loss": 0.2838, "step": 2247 }, { "epoch": 0.41, "learning_rate": 2.0118344633374924e-05, "loss": 0.2477, "step": 2248 }, { "epoch": 0.41, "learning_rate": 2.0110064195089276e-05, "loss": 0.2301, "step": 2249 }, { "epoch": 0.41, "learning_rate": 2.0101781994752416e-05, "loss": 0.2567, "step": 2250 }, { "epoch": 0.41, "learning_rate": 2.0093498035220224e-05, "loss": 0.2548, "step": 2251 }, { "epoch": 0.41, "learning_rate": 2.0085212319349163e-05, "loss": 0.2096, "step": 2252 }, { "epoch": 0.41, "learning_rate": 2.0076924849996316e-05, "loss": 0.2405, "step": 2253 }, { "epoch": 0.41, "learning_rate": 2.0068635630019364e-05, "loss": 0.238, "step": 2254 }, { "epoch": 0.41, "learning_rate": 2.0060344662276595e-05, "loss": 0.1898, "step": 2255 }, { "epoch": 0.41, "learning_rate": 2.0052051949626903e-05, "loss": 0.3032, "step": 2256 }, { "epoch": 0.41, "learning_rate": 2.0043757494929772e-05, "loss": 0.2401, "step": 2257 }, { "epoch": 0.41, "learning_rate": 2.0035461301045303e-05, "loss": 0.1774, "step": 2258 }, { "epoch": 0.41, "learning_rate": 2.0027163370834178e-05, "loss": 0.2493, "step": 2259 }, { "epoch": 0.41, "learning_rate": 2.0018863707157692e-05, "loss": 0.2594, "step": 2260 }, { "epoch": 0.41, "learning_rate": 2.001056231287773e-05, "loss": 0.3102, "step": 2261 }, { "epoch": 0.41, "learning_rate": 2.0002259190856782e-05, "loss": 0.2167, "step": 2262 }, { "epoch": 0.41, "learning_rate": 1.999395434395793e-05, "loss": 0.2078, "step": 2263 }, { "epoch": 0.41, "learning_rate": 1.9985647775044843e-05, "loss": 0.1969, "step": 2264 }, { "epoch": 0.41, "learning_rate": 1.9977339486981792e-05, "loss": 0.2144, "step": 2265 }, { "epoch": 0.41, "learning_rate": 1.996902948263364e-05, "loss": 0.227, "step": 2266 }, { "epoch": 0.41, "learning_rate": 1.9960717764865842e-05, "loss": 0.2092, "step": 2267 }, { "epoch": 0.41, "learning_rate": 1.9952404336544445e-05, "loss": 0.2074, "step": 2268 }, { "epoch": 0.41, "learning_rate": 1.9944089200536078e-05, "loss": 0.2239, "step": 2269 }, { "epoch": 0.41, "learning_rate": 1.9935772359707967e-05, "loss": 0.2842, "step": 2270 }, { "epoch": 0.41, "learning_rate": 1.9927453816927922e-05, "loss": 0.2269, "step": 2271 }, { "epoch": 0.41, "learning_rate": 1.991913357506434e-05, "loss": 0.2291, "step": 2272 }, { "epoch": 0.41, "learning_rate": 1.9910811636986208e-05, "loss": 0.2418, "step": 2273 }, { "epoch": 0.41, "learning_rate": 1.9902488005563097e-05, "loss": 0.265, "step": 2274 }, { "epoch": 0.41, "learning_rate": 1.9894162683665157e-05, "loss": 0.3617, "step": 2275 }, { "epoch": 0.41, "learning_rate": 1.988583567416312e-05, "loss": 0.3318, "step": 2276 }, { "epoch": 0.41, "learning_rate": 1.9877506979928307e-05, "loss": 0.134, "step": 2277 }, { "epoch": 0.41, "learning_rate": 1.9869176603832614e-05, "loss": 0.3115, "step": 2278 }, { "epoch": 0.41, "learning_rate": 1.9860844548748528e-05, "loss": 0.317, "step": 2279 }, { "epoch": 0.41, "learning_rate": 1.98525108175491e-05, "loss": 0.2346, "step": 2280 }, { "epoch": 0.41, "learning_rate": 1.984417541310796e-05, "loss": 0.2133, "step": 2281 }, { "epoch": 0.41, "learning_rate": 1.983583833829933e-05, "loss": 0.168, "step": 2282 }, { "epoch": 0.41, "learning_rate": 1.9827499595997986e-05, "loss": 0.2137, "step": 2283 }, { "epoch": 0.41, "learning_rate": 1.9819159189079312e-05, "loss": 0.1804, "step": 2284 }, { "epoch": 0.41, "learning_rate": 1.9810817120419225e-05, "loss": 0.1578, "step": 2285 }, { "epoch": 0.41, "learning_rate": 1.9802473392894245e-05, "loss": 0.198, "step": 2286 }, { "epoch": 0.41, "learning_rate": 1.979412800938145e-05, "loss": 0.2269, "step": 2287 }, { "epoch": 0.41, "learning_rate": 1.9785780972758496e-05, "loss": 0.2128, "step": 2288 }, { "epoch": 0.41, "learning_rate": 1.977743228590361e-05, "loss": 0.2849, "step": 2289 }, { "epoch": 0.42, "learning_rate": 1.9769081951695575e-05, "loss": 0.294, "step": 2290 }, { "epoch": 0.42, "learning_rate": 1.9760729973013758e-05, "loss": 0.1948, "step": 2291 }, { "epoch": 0.42, "learning_rate": 1.9752376352738084e-05, "loss": 0.2192, "step": 2292 }, { "epoch": 0.42, "learning_rate": 1.974402109374905e-05, "loss": 0.2496, "step": 2293 }, { "epoch": 0.42, "learning_rate": 1.9735664198927713e-05, "loss": 0.192, "step": 2294 }, { "epoch": 0.42, "learning_rate": 1.972730567115569e-05, "loss": 0.2169, "step": 2295 }, { "epoch": 0.42, "learning_rate": 1.9718945513315178e-05, "loss": 0.2377, "step": 2296 }, { "epoch": 0.42, "learning_rate": 1.9710583728288907e-05, "loss": 0.2593, "step": 2297 }, { "epoch": 0.42, "learning_rate": 1.9702220318960208e-05, "loss": 0.2486, "step": 2298 }, { "epoch": 0.42, "learning_rate": 1.9693855288212937e-05, "loss": 0.2609, "step": 2299 }, { "epoch": 0.42, "learning_rate": 1.9685488638931518e-05, "loss": 0.2313, "step": 2300 }, { "epoch": 0.42, "learning_rate": 1.967712037400095e-05, "loss": 0.2901, "step": 2301 }, { "epoch": 0.42, "learning_rate": 1.9668750496306762e-05, "loss": 0.1689, "step": 2302 }, { "epoch": 0.42, "learning_rate": 1.9660379008735066e-05, "loss": 0.2914, "step": 2303 }, { "epoch": 0.42, "learning_rate": 1.9652005914172514e-05, "loss": 0.239, "step": 2304 }, { "epoch": 0.42, "learning_rate": 1.9643631215506304e-05, "loss": 0.2885, "step": 2305 }, { "epoch": 0.42, "learning_rate": 1.963525491562421e-05, "loss": 0.2341, "step": 2306 }, { "epoch": 0.42, "learning_rate": 1.962687701741454e-05, "loss": 0.2657, "step": 2307 }, { "epoch": 0.42, "learning_rate": 1.9618497523766164e-05, "loss": 0.2657, "step": 2308 }, { "epoch": 0.42, "learning_rate": 1.9610116437568494e-05, "loss": 0.2617, "step": 2309 }, { "epoch": 0.42, "learning_rate": 1.9601733761711494e-05, "loss": 0.2568, "step": 2310 }, { "epoch": 0.42, "learning_rate": 1.959334949908567e-05, "loss": 0.204, "step": 2311 }, { "epoch": 0.42, "learning_rate": 1.9584963652582092e-05, "loss": 0.252, "step": 2312 }, { "epoch": 0.42, "learning_rate": 1.9576576225092364e-05, "loss": 0.1315, "step": 2313 }, { "epoch": 0.42, "learning_rate": 1.9568187219508634e-05, "loss": 0.1602, "step": 2314 }, { "epoch": 0.42, "learning_rate": 1.9559796638723596e-05, "loss": 0.2142, "step": 2315 }, { "epoch": 0.42, "learning_rate": 1.955140448563049e-05, "loss": 0.2362, "step": 2316 }, { "epoch": 0.42, "learning_rate": 1.9543010763123095e-05, "loss": 0.27, "step": 2317 }, { "epoch": 0.42, "learning_rate": 1.9534615474095737e-05, "loss": 0.2357, "step": 2318 }, { "epoch": 0.42, "learning_rate": 1.952621862144327e-05, "loss": 0.3027, "step": 2319 }, { "epoch": 0.42, "learning_rate": 1.9517820208061103e-05, "loss": 0.2302, "step": 2320 }, { "epoch": 0.42, "learning_rate": 1.9509420236845165e-05, "loss": 0.3176, "step": 2321 }, { "epoch": 0.42, "learning_rate": 1.9501018710691947e-05, "loss": 0.2097, "step": 2322 }, { "epoch": 0.42, "learning_rate": 1.949261563249845e-05, "loss": 0.2542, "step": 2323 }, { "epoch": 0.42, "learning_rate": 1.948421100516222e-05, "loss": 0.1954, "step": 2324 }, { "epoch": 0.42, "learning_rate": 1.947580483158135e-05, "loss": 0.2274, "step": 2325 }, { "epoch": 0.42, "learning_rate": 1.9467397114654445e-05, "loss": 0.2058, "step": 2326 }, { "epoch": 0.42, "learning_rate": 1.945898785728066e-05, "loss": 0.2606, "step": 2327 }, { "epoch": 0.42, "learning_rate": 1.945057706235966e-05, "loss": 0.3335, "step": 2328 }, { "epoch": 0.42, "learning_rate": 1.944216473279167e-05, "loss": 0.1949, "step": 2329 }, { "epoch": 0.42, "learning_rate": 1.9433750871477417e-05, "loss": 0.2131, "step": 2330 }, { "epoch": 0.42, "learning_rate": 1.9425335481318166e-05, "loss": 0.2499, "step": 2331 }, { "epoch": 0.42, "learning_rate": 1.9416918565215717e-05, "loss": 0.2892, "step": 2332 }, { "epoch": 0.42, "learning_rate": 1.9408500126072383e-05, "loss": 0.2664, "step": 2333 }, { "epoch": 0.42, "learning_rate": 1.940008016679102e-05, "loss": 0.1863, "step": 2334 }, { "epoch": 0.42, "learning_rate": 1.9391658690274973e-05, "loss": 0.2658, "step": 2335 }, { "epoch": 0.42, "learning_rate": 1.938323569942816e-05, "loss": 0.2632, "step": 2336 }, { "epoch": 0.42, "learning_rate": 1.937481119715498e-05, "loss": 0.2165, "step": 2337 }, { "epoch": 0.42, "learning_rate": 1.9366385186360364e-05, "loss": 0.2039, "step": 2338 }, { "epoch": 0.42, "learning_rate": 1.935795766994978e-05, "loss": 0.1942, "step": 2339 }, { "epoch": 0.42, "learning_rate": 1.9349528650829195e-05, "loss": 0.2545, "step": 2340 }, { "epoch": 0.42, "learning_rate": 1.9341098131905105e-05, "loss": 0.2737, "step": 2341 }, { "epoch": 0.42, "learning_rate": 1.933266611608451e-05, "loss": 0.2689, "step": 2342 }, { "epoch": 0.42, "learning_rate": 1.9324232606274953e-05, "loss": 0.2509, "step": 2343 }, { "epoch": 0.42, "learning_rate": 1.931579760538446e-05, "loss": 0.2472, "step": 2344 }, { "epoch": 0.43, "learning_rate": 1.9307361116321592e-05, "loss": 0.1744, "step": 2345 }, { "epoch": 0.43, "learning_rate": 1.929892314199542e-05, "loss": 0.2528, "step": 2346 }, { "epoch": 0.43, "learning_rate": 1.9290483685315517e-05, "loss": 0.2485, "step": 2347 }, { "epoch": 0.43, "learning_rate": 1.9282042749191983e-05, "loss": 0.2302, "step": 2348 }, { "epoch": 0.43, "learning_rate": 1.9273600336535414e-05, "loss": 0.2324, "step": 2349 }, { "epoch": 0.43, "learning_rate": 1.926515645025692e-05, "loss": 0.2708, "step": 2350 }, { "epoch": 0.43, "learning_rate": 1.9256711093268128e-05, "loss": 0.2678, "step": 2351 }, { "epoch": 0.43, "learning_rate": 1.924826426848115e-05, "loss": 0.2651, "step": 2352 }, { "epoch": 0.43, "learning_rate": 1.9239815978808634e-05, "loss": 0.2043, "step": 2353 }, { "epoch": 0.43, "learning_rate": 1.923136622716371e-05, "loss": 0.2147, "step": 2354 }, { "epoch": 0.43, "learning_rate": 1.9222915016460018e-05, "loss": 0.1865, "step": 2355 }, { "epoch": 0.43, "learning_rate": 1.9214462349611704e-05, "loss": 0.1768, "step": 2356 }, { "epoch": 0.43, "learning_rate": 1.920600822953341e-05, "loss": 0.2617, "step": 2357 }, { "epoch": 0.43, "learning_rate": 1.9197552659140297e-05, "loss": 0.3026, "step": 2358 }, { "epoch": 0.43, "learning_rate": 1.9189095641348003e-05, "loss": 0.2109, "step": 2359 }, { "epoch": 0.43, "learning_rate": 1.9180637179072677e-05, "loss": 0.2284, "step": 2360 }, { "epoch": 0.43, "learning_rate": 1.9172177275230962e-05, "loss": 0.1607, "step": 2361 }, { "epoch": 0.43, "learning_rate": 1.9163715932740005e-05, "loss": 0.1921, "step": 2362 }, { "epoch": 0.43, "learning_rate": 1.915525315451744e-05, "loss": 0.266, "step": 2363 }, { "epoch": 0.43, "learning_rate": 1.9146788943481396e-05, "loss": 0.2355, "step": 2364 }, { "epoch": 0.43, "learning_rate": 1.913832330255051e-05, "loss": 0.264, "step": 2365 }, { "epoch": 0.43, "learning_rate": 1.91298562346439e-05, "loss": 0.3121, "step": 2366 }, { "epoch": 0.43, "learning_rate": 1.9121387742681176e-05, "loss": 0.2812, "step": 2367 }, { "epoch": 0.43, "learning_rate": 1.9112917829582443e-05, "loss": 0.1817, "step": 2368 }, { "epoch": 0.43, "learning_rate": 1.910444649826829e-05, "loss": 0.1738, "step": 2369 }, { "epoch": 0.43, "learning_rate": 1.9095973751659806e-05, "loss": 0.2427, "step": 2370 }, { "epoch": 0.43, "learning_rate": 1.908749959267856e-05, "loss": 0.2386, "step": 2371 }, { "epoch": 0.43, "learning_rate": 1.907902402424661e-05, "loss": 0.1862, "step": 2372 }, { "epoch": 0.43, "learning_rate": 1.9070547049286494e-05, "loss": 0.1927, "step": 2373 }, { "epoch": 0.43, "learning_rate": 1.9062068670721248e-05, "loss": 0.1598, "step": 2374 }, { "epoch": 0.43, "learning_rate": 1.9053588891474385e-05, "loss": 0.2311, "step": 2375 }, { "epoch": 0.43, "learning_rate": 1.9045107714469893e-05, "loss": 0.2727, "step": 2376 }, { "epoch": 0.43, "learning_rate": 1.9036625142632252e-05, "loss": 0.2157, "step": 2377 }, { "epoch": 0.43, "learning_rate": 1.9028141178886423e-05, "loss": 0.2084, "step": 2378 }, { "epoch": 0.43, "learning_rate": 1.9019655826157848e-05, "loss": 0.2658, "step": 2379 }, { "epoch": 0.43, "learning_rate": 1.9011169087372438e-05, "loss": 0.2593, "step": 2380 }, { "epoch": 0.43, "learning_rate": 1.900268096545659e-05, "loss": 0.2238, "step": 2381 }, { "epoch": 0.43, "learning_rate": 1.8994191463337177e-05, "loss": 0.2235, "step": 2382 }, { "epoch": 0.43, "learning_rate": 1.8985700583941553e-05, "loss": 0.3284, "step": 2383 }, { "epoch": 0.43, "learning_rate": 1.8977208330197533e-05, "loss": 0.2747, "step": 2384 }, { "epoch": 0.43, "learning_rate": 1.896871470503341e-05, "loss": 0.2499, "step": 2385 }, { "epoch": 0.43, "learning_rate": 1.896021971137797e-05, "loss": 0.193, "step": 2386 }, { "epoch": 0.43, "learning_rate": 1.895172335216044e-05, "loss": 0.2247, "step": 2387 }, { "epoch": 0.43, "learning_rate": 1.8943225630310535e-05, "loss": 0.1884, "step": 2388 }, { "epoch": 0.43, "learning_rate": 1.8934726548758445e-05, "loss": 0.2539, "step": 2389 }, { "epoch": 0.43, "learning_rate": 1.8926226110434814e-05, "loss": 0.1882, "step": 2390 }, { "epoch": 0.43, "learning_rate": 1.8917724318270762e-05, "loss": 0.2386, "step": 2391 }, { "epoch": 0.43, "learning_rate": 1.8909221175197882e-05, "loss": 0.2369, "step": 2392 }, { "epoch": 0.43, "learning_rate": 1.890071668414822e-05, "loss": 0.2735, "step": 2393 }, { "epoch": 0.43, "learning_rate": 1.88922108480543e-05, "loss": 0.2896, "step": 2394 }, { "epoch": 0.43, "learning_rate": 1.888370366984909e-05, "loss": 0.2506, "step": 2395 }, { "epoch": 0.43, "learning_rate": 1.887519515246604e-05, "loss": 0.2408, "step": 2396 }, { "epoch": 0.43, "learning_rate": 1.886668529883905e-05, "loss": 0.2355, "step": 2397 }, { "epoch": 0.43, "learning_rate": 1.8858174111902503e-05, "loss": 0.2577, "step": 2398 }, { "epoch": 0.43, "learning_rate": 1.8849661594591203e-05, "loss": 0.2742, "step": 2399 }, { "epoch": 0.44, "learning_rate": 1.8841147749840447e-05, "loss": 0.2417, "step": 2400 }, { "epoch": 0.44, "learning_rate": 1.883263258058597e-05, "loss": 0.2102, "step": 2401 }, { "epoch": 0.44, "learning_rate": 1.8824116089763976e-05, "loss": 0.3619, "step": 2402 }, { "epoch": 0.44, "learning_rate": 1.881559828031112e-05, "loss": 0.2397, "step": 2403 }, { "epoch": 0.44, "learning_rate": 1.880707915516451e-05, "loss": 0.2384, "step": 2404 }, { "epoch": 0.44, "learning_rate": 1.8798558717261708e-05, "loss": 0.2266, "step": 2405 }, { "epoch": 0.44, "learning_rate": 1.8790036969540728e-05, "loss": 0.2725, "step": 2406 }, { "epoch": 0.44, "learning_rate": 1.8781513914940038e-05, "loss": 0.2634, "step": 2407 }, { "epoch": 0.44, "learning_rate": 1.877298955639856e-05, "loss": 0.2465, "step": 2408 }, { "epoch": 0.44, "learning_rate": 1.8764463896855654e-05, "loss": 0.1499, "step": 2409 }, { "epoch": 0.44, "learning_rate": 1.8755936939251145e-05, "loss": 0.224, "step": 2410 }, { "epoch": 0.44, "learning_rate": 1.8747408686525284e-05, "loss": 0.3086, "step": 2411 }, { "epoch": 0.44, "learning_rate": 1.8738879141618793e-05, "loss": 0.2923, "step": 2412 }, { "epoch": 0.44, "learning_rate": 1.8730348307472828e-05, "loss": 0.2323, "step": 2413 }, { "epoch": 0.44, "learning_rate": 1.8721816187028976e-05, "loss": 0.2324, "step": 2414 }, { "epoch": 0.44, "learning_rate": 1.8713282783229293e-05, "loss": 0.2186, "step": 2415 }, { "epoch": 0.44, "learning_rate": 1.8704748099016263e-05, "loss": 0.1435, "step": 2416 }, { "epoch": 0.44, "learning_rate": 1.8696212137332815e-05, "loss": 0.2191, "step": 2417 }, { "epoch": 0.44, "learning_rate": 1.8687674901122316e-05, "loss": 0.213, "step": 2418 }, { "epoch": 0.44, "learning_rate": 1.8679136393328566e-05, "loss": 0.2388, "step": 2419 }, { "epoch": 0.44, "learning_rate": 1.8670596616895826e-05, "loss": 0.2684, "step": 2420 }, { "epoch": 0.44, "learning_rate": 1.866205557476877e-05, "loss": 0.2493, "step": 2421 }, { "epoch": 0.44, "learning_rate": 1.8653513269892523e-05, "loss": 0.2137, "step": 2422 }, { "epoch": 0.44, "learning_rate": 1.8644969705212636e-05, "loss": 0.1925, "step": 2423 }, { "epoch": 0.44, "learning_rate": 1.8636424883675104e-05, "loss": 0.2086, "step": 2424 }, { "epoch": 0.44, "learning_rate": 1.8627878808226348e-05, "loss": 0.2438, "step": 2425 }, { "epoch": 0.44, "learning_rate": 1.861933148181322e-05, "loss": 0.2594, "step": 2426 }, { "epoch": 0.44, "learning_rate": 1.8610782907383017e-05, "loss": 0.2279, "step": 2427 }, { "epoch": 0.44, "learning_rate": 1.8602233087883446e-05, "loss": 0.2624, "step": 2428 }, { "epoch": 0.44, "learning_rate": 1.8593682026262662e-05, "loss": 0.2456, "step": 2429 }, { "epoch": 0.44, "learning_rate": 1.8585129725469237e-05, "loss": 0.2681, "step": 2430 }, { "epoch": 0.44, "learning_rate": 1.8576576188452168e-05, "loss": 0.1767, "step": 2431 }, { "epoch": 0.44, "learning_rate": 1.856802141816089e-05, "loss": 0.2534, "step": 2432 }, { "epoch": 0.44, "learning_rate": 1.8559465417545266e-05, "loss": 0.215, "step": 2433 }, { "epoch": 0.44, "learning_rate": 1.8550908189555556e-05, "loss": 0.173, "step": 2434 }, { "epoch": 0.44, "learning_rate": 1.854234973714247e-05, "loss": 0.2423, "step": 2435 }, { "epoch": 0.44, "learning_rate": 1.853379006325713e-05, "loss": 0.1901, "step": 2436 }, { "epoch": 0.44, "learning_rate": 1.852522917085109e-05, "loss": 0.2159, "step": 2437 }, { "epoch": 0.44, "learning_rate": 1.8516667062876297e-05, "loss": 0.2311, "step": 2438 }, { "epoch": 0.44, "learning_rate": 1.850810374228515e-05, "loss": 0.2527, "step": 2439 }, { "epoch": 0.44, "learning_rate": 1.8499539212030444e-05, "loss": 0.2061, "step": 2440 }, { "epoch": 0.44, "learning_rate": 1.8490973475065407e-05, "loss": 0.2896, "step": 2441 }, { "epoch": 0.44, "learning_rate": 1.848240653434366e-05, "loss": 0.2436, "step": 2442 }, { "epoch": 0.44, "learning_rate": 1.847383839281927e-05, "loss": 0.283, "step": 2443 }, { "epoch": 0.44, "learning_rate": 1.8465269053446694e-05, "loss": 0.2711, "step": 2444 }, { "epoch": 0.44, "learning_rate": 1.8456698519180805e-05, "loss": 0.1415, "step": 2445 }, { "epoch": 0.44, "learning_rate": 1.8448126792976902e-05, "loss": 0.2337, "step": 2446 }, { "epoch": 0.44, "learning_rate": 1.8439553877790672e-05, "loss": 0.288, "step": 2447 }, { "epoch": 0.44, "learning_rate": 1.8430979776578244e-05, "loss": 0.2169, "step": 2448 }, { "epoch": 0.44, "learning_rate": 1.8422404492296128e-05, "loss": 0.2367, "step": 2449 }, { "epoch": 0.44, "learning_rate": 1.8413828027901248e-05, "loss": 0.2657, "step": 2450 }, { "epoch": 0.44, "learning_rate": 1.840525038635095e-05, "loss": 0.2715, "step": 2451 }, { "epoch": 0.44, "learning_rate": 1.839667157060296e-05, "loss": 0.2393, "step": 2452 }, { "epoch": 0.44, "learning_rate": 1.838809158361543e-05, "loss": 0.2976, "step": 2453 }, { "epoch": 0.44, "learning_rate": 1.8379510428346913e-05, "loss": 0.1906, "step": 2454 }, { "epoch": 0.45, "learning_rate": 1.8370928107756357e-05, "loss": 0.1294, "step": 2455 }, { "epoch": 0.45, "learning_rate": 1.836234462480312e-05, "loss": 0.1676, "step": 2456 }, { "epoch": 0.45, "learning_rate": 1.8353759982446944e-05, "loss": 0.2156, "step": 2457 }, { "epoch": 0.45, "learning_rate": 1.8345174183648006e-05, "loss": 0.2146, "step": 2458 }, { "epoch": 0.45, "learning_rate": 1.8336587231366837e-05, "loss": 0.2583, "step": 2459 }, { "epoch": 0.45, "learning_rate": 1.83279991285644e-05, "loss": 0.2526, "step": 2460 }, { "epoch": 0.45, "learning_rate": 1.8319409878202036e-05, "loss": 0.2214, "step": 2461 }, { "epoch": 0.45, "learning_rate": 1.8310819483241498e-05, "loss": 0.2196, "step": 2462 }, { "epoch": 0.45, "learning_rate": 1.8302227946644916e-05, "loss": 0.1713, "step": 2463 }, { "epoch": 0.45, "learning_rate": 1.829363527137482e-05, "loss": 0.207, "step": 2464 }, { "epoch": 0.45, "learning_rate": 1.8285041460394148e-05, "loss": 0.1889, "step": 2465 }, { "epoch": 0.45, "learning_rate": 1.8276446516666194e-05, "loss": 0.2117, "step": 2466 }, { "epoch": 0.45, "learning_rate": 1.8267850443154687e-05, "loss": 0.2598, "step": 2467 }, { "epoch": 0.45, "learning_rate": 1.825925324282371e-05, "loss": 0.2658, "step": 2468 }, { "epoch": 0.45, "learning_rate": 1.8250654918637747e-05, "loss": 0.2023, "step": 2469 }, { "epoch": 0.45, "learning_rate": 1.824205547356168e-05, "loss": 0.1862, "step": 2470 }, { "epoch": 0.45, "learning_rate": 1.823345491056076e-05, "loss": 0.249, "step": 2471 }, { "epoch": 0.45, "learning_rate": 1.822485323260063e-05, "loss": 0.1872, "step": 2472 }, { "epoch": 0.45, "learning_rate": 1.8216250442647326e-05, "loss": 0.2146, "step": 2473 }, { "epoch": 0.45, "learning_rate": 1.820764654366725e-05, "loss": 0.1827, "step": 2474 }, { "epoch": 0.45, "learning_rate": 1.8199041538627213e-05, "loss": 0.2584, "step": 2475 }, { "epoch": 0.45, "learning_rate": 1.819043543049437e-05, "loss": 0.2153, "step": 2476 }, { "epoch": 0.45, "learning_rate": 1.8181828222236294e-05, "loss": 0.2477, "step": 2477 }, { "epoch": 0.45, "learning_rate": 1.8173219916820913e-05, "loss": 0.2734, "step": 2478 }, { "epoch": 0.45, "learning_rate": 1.816461051721654e-05, "loss": 0.1881, "step": 2479 }, { "epoch": 0.45, "learning_rate": 1.815600002639187e-05, "loss": 0.2821, "step": 2480 }, { "epoch": 0.45, "learning_rate": 1.8147388447315967e-05, "loss": 0.203, "step": 2481 }, { "epoch": 0.45, "learning_rate": 1.8138775782958284e-05, "loss": 0.1869, "step": 2482 }, { "epoch": 0.45, "learning_rate": 1.8130162036288624e-05, "loss": 0.217, "step": 2483 }, { "epoch": 0.45, "learning_rate": 1.812154721027718e-05, "loss": 0.2205, "step": 2484 }, { "epoch": 0.45, "learning_rate": 1.811293130789452e-05, "loss": 0.1696, "step": 2485 }, { "epoch": 0.45, "learning_rate": 1.8104314332111577e-05, "loss": 0.2464, "step": 2486 }, { "epoch": 0.45, "learning_rate": 1.8095696285899657e-05, "loss": 0.2598, "step": 2487 }, { "epoch": 0.45, "learning_rate": 1.808707717223042e-05, "loss": 0.2527, "step": 2488 }, { "epoch": 0.45, "learning_rate": 1.8078456994075918e-05, "loss": 0.2148, "step": 2489 }, { "epoch": 0.45, "learning_rate": 1.8069835754408556e-05, "loss": 0.2305, "step": 2490 }, { "epoch": 0.45, "learning_rate": 1.806121345620111e-05, "loss": 0.2358, "step": 2491 }, { "epoch": 0.45, "learning_rate": 1.8052590102426717e-05, "loss": 0.2459, "step": 2492 }, { "epoch": 0.45, "learning_rate": 1.8043965696058882e-05, "loss": 0.3254, "step": 2493 }, { "epoch": 0.45, "learning_rate": 1.803534024007147e-05, "loss": 0.1783, "step": 2494 }, { "epoch": 0.45, "learning_rate": 1.8026713737438702e-05, "loss": 0.2066, "step": 2495 }, { "epoch": 0.45, "learning_rate": 1.8018086191135178e-05, "loss": 0.2174, "step": 2496 }, { "epoch": 0.45, "learning_rate": 1.8009457604135834e-05, "loss": 0.2349, "step": 2497 }, { "epoch": 0.45, "learning_rate": 1.8000827979415994e-05, "loss": 0.2822, "step": 2498 }, { "epoch": 0.45, "learning_rate": 1.799219731995131e-05, "loss": 0.1607, "step": 2499 }, { "epoch": 0.45, "learning_rate": 1.7983565628717804e-05, "loss": 0.2816, "step": 2500 }, { "epoch": 0.45, "learning_rate": 1.7974932908691868e-05, "loss": 0.2114, "step": 2501 }, { "epoch": 0.45, "learning_rate": 1.7966299162850217e-05, "loss": 0.188, "step": 2502 }, { "epoch": 0.45, "learning_rate": 1.7957664394169944e-05, "loss": 0.2305, "step": 2503 }, { "epoch": 0.45, "learning_rate": 1.7949028605628493e-05, "loss": 0.1955, "step": 2504 }, { "epoch": 0.45, "learning_rate": 1.794039180020365e-05, "loss": 0.2389, "step": 2505 }, { "epoch": 0.45, "learning_rate": 1.7931753980873556e-05, "loss": 0.2523, "step": 2506 }, { "epoch": 0.45, "learning_rate": 1.7923115150616707e-05, "loss": 0.2551, "step": 2507 }, { "epoch": 0.45, "learning_rate": 1.7914475312411935e-05, "loss": 0.2756, "step": 2508 }, { "epoch": 0.45, "learning_rate": 1.7905834469238428e-05, "loss": 0.2534, "step": 2509 }, { "epoch": 0.46, "learning_rate": 1.7897192624075725e-05, "loss": 0.188, "step": 2510 }, { "epoch": 0.46, "learning_rate": 1.78885497799037e-05, "loss": 0.2403, "step": 2511 }, { "epoch": 0.46, "learning_rate": 1.7879905939702582e-05, "loss": 0.1979, "step": 2512 }, { "epoch": 0.46, "learning_rate": 1.7871261106452935e-05, "loss": 0.2949, "step": 2513 }, { "epoch": 0.46, "learning_rate": 1.7862615283135665e-05, "loss": 0.1545, "step": 2514 }, { "epoch": 0.46, "learning_rate": 1.7853968472732033e-05, "loss": 0.2145, "step": 2515 }, { "epoch": 0.46, "learning_rate": 1.7845320678223616e-05, "loss": 0.2187, "step": 2516 }, { "epoch": 0.46, "learning_rate": 1.7836671902592358e-05, "loss": 0.2465, "step": 2517 }, { "epoch": 0.46, "learning_rate": 1.782802214882052e-05, "loss": 0.2368, "step": 2518 }, { "epoch": 0.46, "learning_rate": 1.781937141989071e-05, "loss": 0.2287, "step": 2519 }, { "epoch": 0.46, "learning_rate": 1.781071971878587e-05, "loss": 0.2067, "step": 2520 }, { "epoch": 0.46, "learning_rate": 1.780206704848928e-05, "loss": 0.1817, "step": 2521 }, { "epoch": 0.46, "learning_rate": 1.779341341198455e-05, "loss": 0.2013, "step": 2522 }, { "epoch": 0.46, "learning_rate": 1.7784758812255626e-05, "loss": 0.1987, "step": 2523 }, { "epoch": 0.46, "learning_rate": 1.777610325228678e-05, "loss": 0.198, "step": 2524 }, { "epoch": 0.46, "learning_rate": 1.776744673506263e-05, "loss": 0.2054, "step": 2525 }, { "epoch": 0.46, "learning_rate": 1.7758789263568105e-05, "loss": 0.2314, "step": 2526 }, { "epoch": 0.46, "learning_rate": 1.775013084078848e-05, "loss": 0.3157, "step": 2527 }, { "epoch": 0.46, "learning_rate": 1.7741471469709344e-05, "loss": 0.2317, "step": 2528 }, { "epoch": 0.46, "learning_rate": 1.7732811153316623e-05, "loss": 0.1844, "step": 2529 }, { "epoch": 0.46, "learning_rate": 1.7724149894596567e-05, "loss": 0.2736, "step": 2530 }, { "epoch": 0.46, "learning_rate": 1.7715487696535746e-05, "loss": 0.2127, "step": 2531 }, { "epoch": 0.46, "learning_rate": 1.7706824562121066e-05, "loss": 0.1879, "step": 2532 }, { "epoch": 0.46, "learning_rate": 1.7698160494339727e-05, "loss": 0.2645, "step": 2533 }, { "epoch": 0.46, "learning_rate": 1.768949549617929e-05, "loss": 0.1973, "step": 2534 }, { "epoch": 0.46, "learning_rate": 1.7680829570627605e-05, "loss": 0.1838, "step": 2535 }, { "epoch": 0.46, "learning_rate": 1.7672162720672866e-05, "loss": 0.217, "step": 2536 }, { "epoch": 0.46, "learning_rate": 1.7663494949303568e-05, "loss": 0.2397, "step": 2537 }, { "epoch": 0.46, "learning_rate": 1.7654826259508527e-05, "loss": 0.1838, "step": 2538 }, { "epoch": 0.46, "learning_rate": 1.764615665427689e-05, "loss": 0.1735, "step": 2539 }, { "epoch": 0.46, "learning_rate": 1.7637486136598093e-05, "loss": 0.2717, "step": 2540 }, { "epoch": 0.46, "learning_rate": 1.7628814709461913e-05, "loss": 0.1679, "step": 2541 }, { "epoch": 0.46, "learning_rate": 1.762014237585843e-05, "loss": 0.226, "step": 2542 }, { "epoch": 0.46, "learning_rate": 1.7611469138778028e-05, "loss": 0.2498, "step": 2543 }, { "epoch": 0.46, "learning_rate": 1.760279500121142e-05, "loss": 0.2883, "step": 2544 }, { "epoch": 0.46, "learning_rate": 1.759411996614961e-05, "loss": 0.1718, "step": 2545 }, { "epoch": 0.46, "learning_rate": 1.7585444036583932e-05, "loss": 0.2876, "step": 2546 }, { "epoch": 0.46, "learning_rate": 1.7576767215506006e-05, "loss": 0.2279, "step": 2547 }, { "epoch": 0.46, "learning_rate": 1.7568089505907787e-05, "loss": 0.2522, "step": 2548 }, { "epoch": 0.46, "learning_rate": 1.7559410910781508e-05, "loss": 0.2003, "step": 2549 }, { "epoch": 0.46, "learning_rate": 1.7550731433119726e-05, "loss": 0.1988, "step": 2550 }, { "epoch": 0.46, "learning_rate": 1.7542051075915296e-05, "loss": 0.2303, "step": 2551 }, { "epoch": 0.46, "learning_rate": 1.7533369842161375e-05, "loss": 0.1649, "step": 2552 }, { "epoch": 0.46, "learning_rate": 1.752468773485143e-05, "loss": 0.1955, "step": 2553 }, { "epoch": 0.46, "learning_rate": 1.7516004756979217e-05, "loss": 0.2228, "step": 2554 }, { "epoch": 0.46, "learning_rate": 1.7507320911538798e-05, "loss": 0.1841, "step": 2555 }, { "epoch": 0.46, "learning_rate": 1.7498636201524538e-05, "loss": 0.2399, "step": 2556 }, { "epoch": 0.46, "learning_rate": 1.7489950629931095e-05, "loss": 0.23, "step": 2557 }, { "epoch": 0.46, "learning_rate": 1.748126419975343e-05, "loss": 0.2083, "step": 2558 }, { "epoch": 0.46, "learning_rate": 1.747257691398679e-05, "loss": 0.2865, "step": 2559 }, { "epoch": 0.46, "learning_rate": 1.7463888775626726e-05, "loss": 0.2438, "step": 2560 }, { "epoch": 0.46, "learning_rate": 1.745519978766908e-05, "loss": 0.3015, "step": 2561 }, { "epoch": 0.46, "learning_rate": 1.7446509953109984e-05, "loss": 0.2782, "step": 2562 }, { "epoch": 0.46, "learning_rate": 1.743781927494587e-05, "loss": 0.1961, "step": 2563 }, { "epoch": 0.46, "learning_rate": 1.742912775617345e-05, "loss": 0.3119, "step": 2564 }, { "epoch": 0.47, "learning_rate": 1.7420435399789738e-05, "loss": 0.2054, "step": 2565 }, { "epoch": 0.47, "learning_rate": 1.7411742208792025e-05, "loss": 0.1655, "step": 2566 }, { "epoch": 0.47, "learning_rate": 1.7403048186177897e-05, "loss": 0.2176, "step": 2567 }, { "epoch": 0.47, "learning_rate": 1.739435333494523e-05, "loss": 0.213, "step": 2568 }, { "epoch": 0.47, "learning_rate": 1.7385657658092166e-05, "loss": 0.1795, "step": 2569 }, { "epoch": 0.47, "learning_rate": 1.7376961158617165e-05, "loss": 0.2709, "step": 2570 }, { "epoch": 0.47, "learning_rate": 1.7368263839518935e-05, "loss": 0.1915, "step": 2571 }, { "epoch": 0.47, "learning_rate": 1.73595657037965e-05, "loss": 0.1796, "step": 2572 }, { "epoch": 0.47, "learning_rate": 1.735086675444914e-05, "loss": 0.2212, "step": 2573 }, { "epoch": 0.47, "learning_rate": 1.7342166994476418e-05, "loss": 0.2156, "step": 2574 }, { "epoch": 0.47, "learning_rate": 1.73334664268782e-05, "loss": 0.1834, "step": 2575 }, { "epoch": 0.47, "learning_rate": 1.73247650546546e-05, "loss": 0.2446, "step": 2576 }, { "epoch": 0.47, "learning_rate": 1.731606288080603e-05, "loss": 0.2108, "step": 2577 }, { "epoch": 0.47, "learning_rate": 1.730735990833317e-05, "loss": 0.2072, "step": 2578 }, { "epoch": 0.47, "learning_rate": 1.7298656140236978e-05, "loss": 0.2036, "step": 2579 }, { "epoch": 0.47, "learning_rate": 1.7289951579518683e-05, "loss": 0.2215, "step": 2580 }, { "epoch": 0.47, "learning_rate": 1.7281246229179793e-05, "loss": 0.1847, "step": 2581 }, { "epoch": 0.47, "learning_rate": 1.7272540092222078e-05, "loss": 0.3033, "step": 2582 }, { "epoch": 0.47, "learning_rate": 1.7263833171647594e-05, "loss": 0.2208, "step": 2583 }, { "epoch": 0.47, "learning_rate": 1.725512547045866e-05, "loss": 0.2401, "step": 2584 }, { "epoch": 0.47, "learning_rate": 1.724641699165785e-05, "loss": 0.2385, "step": 2585 }, { "epoch": 0.47, "learning_rate": 1.723770773824804e-05, "loss": 0.1887, "step": 2586 }, { "epoch": 0.47, "learning_rate": 1.722899771323234e-05, "loss": 0.166, "step": 2587 }, { "epoch": 0.47, "learning_rate": 1.722028691961414e-05, "loss": 0.2882, "step": 2588 }, { "epoch": 0.47, "learning_rate": 1.72115753603971e-05, "loss": 0.2068, "step": 2589 }, { "epoch": 0.47, "learning_rate": 1.720286303858513e-05, "loss": 0.2184, "step": 2590 }, { "epoch": 0.47, "learning_rate": 1.7194149957182412e-05, "loss": 0.205, "step": 2591 }, { "epoch": 0.47, "learning_rate": 1.7185436119193394e-05, "loss": 0.211, "step": 2592 }, { "epoch": 0.47, "learning_rate": 1.7176721527622778e-05, "loss": 0.1429, "step": 2593 }, { "epoch": 0.47, "learning_rate": 1.7168006185475525e-05, "loss": 0.2603, "step": 2594 }, { "epoch": 0.47, "learning_rate": 1.7159290095756854e-05, "loss": 0.2567, "step": 2595 }, { "epoch": 0.47, "learning_rate": 1.7150573261472258e-05, "loss": 0.2344, "step": 2596 }, { "epoch": 0.47, "learning_rate": 1.714185568562746e-05, "loss": 0.2379, "step": 2597 }, { "epoch": 0.47, "learning_rate": 1.7133137371228464e-05, "loss": 0.1719, "step": 2598 }, { "epoch": 0.47, "learning_rate": 1.7124418321281512e-05, "loss": 0.2473, "step": 2599 }, { "epoch": 0.47, "learning_rate": 1.7115698538793098e-05, "loss": 0.2491, "step": 2600 }, { "epoch": 0.47, "learning_rate": 1.7106978026769986e-05, "loss": 0.2141, "step": 2601 }, { "epoch": 0.47, "learning_rate": 1.7098256788219172e-05, "loss": 0.2587, "step": 2602 }, { "epoch": 0.47, "learning_rate": 1.7089534826147923e-05, "loss": 0.2033, "step": 2603 }, { "epoch": 0.47, "learning_rate": 1.7080812143563733e-05, "loss": 0.2239, "step": 2604 }, { "epoch": 0.47, "learning_rate": 1.707208874347436e-05, "loss": 0.1996, "step": 2605 }, { "epoch": 0.47, "learning_rate": 1.7063364628887812e-05, "loss": 0.2317, "step": 2606 }, { "epoch": 0.47, "learning_rate": 1.7054639802812323e-05, "loss": 0.2448, "step": 2607 }, { "epoch": 0.47, "learning_rate": 1.70459142682564e-05, "loss": 0.2482, "step": 2608 }, { "epoch": 0.47, "learning_rate": 1.7037188028228756e-05, "loss": 0.3171, "step": 2609 }, { "epoch": 0.47, "learning_rate": 1.70284610857384e-05, "loss": 0.2661, "step": 2610 }, { "epoch": 0.47, "learning_rate": 1.7019733443794533e-05, "loss": 0.2381, "step": 2611 }, { "epoch": 0.47, "learning_rate": 1.7011005105406634e-05, "loss": 0.2309, "step": 2612 }, { "epoch": 0.47, "learning_rate": 1.7002276073584394e-05, "loss": 0.2026, "step": 2613 }, { "epoch": 0.47, "learning_rate": 1.6993546351337762e-05, "loss": 0.2296, "step": 2614 }, { "epoch": 0.47, "learning_rate": 1.698481594167692e-05, "loss": 0.1591, "step": 2615 }, { "epoch": 0.47, "learning_rate": 1.6976084847612282e-05, "loss": 0.1474, "step": 2616 }, { "epoch": 0.47, "learning_rate": 1.6967353072154506e-05, "loss": 0.2234, "step": 2617 }, { "epoch": 0.47, "learning_rate": 1.695862061831448e-05, "loss": 0.2238, "step": 2618 }, { "epoch": 0.47, "learning_rate": 1.6949887489103322e-05, "loss": 0.2814, "step": 2619 }, { "epoch": 0.47, "learning_rate": 1.6941153687532392e-05, "loss": 0.2041, "step": 2620 }, { "epoch": 0.48, "learning_rate": 1.693241921661328e-05, "loss": 0.2354, "step": 2621 }, { "epoch": 0.48, "learning_rate": 1.6923684079357803e-05, "loss": 0.1593, "step": 2622 }, { "epoch": 0.48, "learning_rate": 1.6914948278778005e-05, "loss": 0.1943, "step": 2623 }, { "epoch": 0.48, "learning_rate": 1.690621181788616e-05, "loss": 0.1249, "step": 2624 }, { "epoch": 0.48, "learning_rate": 1.6897474699694782e-05, "loss": 0.2103, "step": 2625 }, { "epoch": 0.48, "learning_rate": 1.6888736927216594e-05, "loss": 0.1711, "step": 2626 }, { "epoch": 0.48, "learning_rate": 1.6879998503464565e-05, "loss": 0.2743, "step": 2627 }, { "epoch": 0.48, "learning_rate": 1.6871259431451865e-05, "loss": 0.2181, "step": 2628 }, { "epoch": 0.48, "learning_rate": 1.6862519714191898e-05, "loss": 0.249, "step": 2629 }, { "epoch": 0.48, "learning_rate": 1.6853779354698296e-05, "loss": 0.2571, "step": 2630 }, { "epoch": 0.48, "learning_rate": 1.684503835598491e-05, "loss": 0.1804, "step": 2631 }, { "epoch": 0.48, "learning_rate": 1.6836296721065808e-05, "loss": 0.2357, "step": 2632 }, { "epoch": 0.48, "learning_rate": 1.6827554452955272e-05, "loss": 0.2461, "step": 2633 }, { "epoch": 0.48, "learning_rate": 1.6818811554667815e-05, "loss": 0.2681, "step": 2634 }, { "epoch": 0.48, "learning_rate": 1.681006802921816e-05, "loss": 0.1406, "step": 2635 }, { "epoch": 0.48, "learning_rate": 1.680132387962125e-05, "loss": 0.2328, "step": 2636 }, { "epoch": 0.48, "learning_rate": 1.6792579108892237e-05, "loss": 0.1843, "step": 2637 }, { "epoch": 0.48, "learning_rate": 1.678383372004649e-05, "loss": 0.2198, "step": 2638 }, { "epoch": 0.48, "learning_rate": 1.67750877160996e-05, "loss": 0.2539, "step": 2639 }, { "epoch": 0.48, "learning_rate": 1.676634110006735e-05, "loss": 0.1901, "step": 2640 }, { "epoch": 0.48, "learning_rate": 1.6757593874965754e-05, "loss": 0.2156, "step": 2641 }, { "epoch": 0.48, "learning_rate": 1.674884604381103e-05, "loss": 0.2478, "step": 2642 }, { "epoch": 0.48, "learning_rate": 1.6740097609619595e-05, "loss": 0.2263, "step": 2643 }, { "epoch": 0.48, "learning_rate": 1.6731348575408094e-05, "loss": 0.2053, "step": 2644 }, { "epoch": 0.48, "learning_rate": 1.672259894419335e-05, "loss": 0.2614, "step": 2645 }, { "epoch": 0.48, "learning_rate": 1.6713848718992432e-05, "loss": 0.2756, "step": 2646 }, { "epoch": 0.48, "learning_rate": 1.6705097902822568e-05, "loss": 0.1476, "step": 2647 }, { "epoch": 0.48, "learning_rate": 1.669634649870123e-05, "loss": 0.1868, "step": 2648 }, { "epoch": 0.48, "learning_rate": 1.6687594509646063e-05, "loss": 0.3185, "step": 2649 }, { "epoch": 0.48, "learning_rate": 1.6678841938674932e-05, "loss": 0.3058, "step": 2650 }, { "epoch": 0.48, "learning_rate": 1.6670088788805895e-05, "loss": 0.1733, "step": 2651 }, { "epoch": 0.48, "learning_rate": 1.6661335063057207e-05, "loss": 0.2842, "step": 2652 }, { "epoch": 0.48, "learning_rate": 1.665258076444734e-05, "loss": 0.273, "step": 2653 }, { "epoch": 0.48, "learning_rate": 1.6643825895994934e-05, "loss": 0.1979, "step": 2654 }, { "epoch": 0.48, "learning_rate": 1.663507046071884e-05, "loss": 0.2366, "step": 2655 }, { "epoch": 0.48, "learning_rate": 1.6626314461638125e-05, "loss": 0.2327, "step": 2656 }, { "epoch": 0.48, "learning_rate": 1.6617557901772005e-05, "loss": 0.2142, "step": 2657 }, { "epoch": 0.48, "learning_rate": 1.660880078413993e-05, "loss": 0.2115, "step": 2658 }, { "epoch": 0.48, "learning_rate": 1.660004311176153e-05, "loss": 0.3016, "step": 2659 }, { "epoch": 0.48, "learning_rate": 1.6591284887656615e-05, "loss": 0.2389, "step": 2660 }, { "epoch": 0.48, "learning_rate": 1.6582526114845196e-05, "loss": 0.2687, "step": 2661 }, { "epoch": 0.48, "learning_rate": 1.657376679634747e-05, "loss": 0.1862, "step": 2662 }, { "epoch": 0.48, "learning_rate": 1.656500693518383e-05, "loss": 0.2496, "step": 2663 }, { "epoch": 0.48, "learning_rate": 1.655624653437484e-05, "loss": 0.2252, "step": 2664 }, { "epoch": 0.48, "learning_rate": 1.6547485596941268e-05, "loss": 0.2218, "step": 2665 }, { "epoch": 0.48, "learning_rate": 1.6538724125904052e-05, "loss": 0.2196, "step": 2666 }, { "epoch": 0.48, "learning_rate": 1.6529962124284326e-05, "loss": 0.2206, "step": 2667 }, { "epoch": 0.48, "learning_rate": 1.65211995951034e-05, "loss": 0.1736, "step": 2668 }, { "epoch": 0.48, "learning_rate": 1.651243654138276e-05, "loss": 0.1945, "step": 2669 }, { "epoch": 0.48, "learning_rate": 1.6503672966144095e-05, "loss": 0.2439, "step": 2670 }, { "epoch": 0.48, "learning_rate": 1.6494908872409247e-05, "loss": 0.2183, "step": 2671 }, { "epoch": 0.48, "learning_rate": 1.648614426320025e-05, "loss": 0.2023, "step": 2672 }, { "epoch": 0.48, "learning_rate": 1.6477379141539322e-05, "loss": 0.1425, "step": 2673 }, { "epoch": 0.48, "learning_rate": 1.646861351044884e-05, "loss": 0.1551, "step": 2674 }, { "epoch": 0.48, "learning_rate": 1.6459847372951375e-05, "loss": 0.2188, "step": 2675 }, { "epoch": 0.49, "learning_rate": 1.6451080732069656e-05, "loss": 0.2174, "step": 2676 }, { "epoch": 0.49, "learning_rate": 1.6442313590826608e-05, "loss": 0.2158, "step": 2677 }, { "epoch": 0.49, "learning_rate": 1.6433545952245306e-05, "loss": 0.2523, "step": 2678 }, { "epoch": 0.49, "learning_rate": 1.6424777819349e-05, "loss": 0.1806, "step": 2679 }, { "epoch": 0.49, "learning_rate": 1.6416009195161126e-05, "loss": 0.2191, "step": 2680 }, { "epoch": 0.49, "learning_rate": 1.6407240082705273e-05, "loss": 0.2098, "step": 2681 }, { "epoch": 0.49, "learning_rate": 1.639847048500521e-05, "loss": 0.2833, "step": 2682 }, { "epoch": 0.49, "learning_rate": 1.6389700405084857e-05, "loss": 0.2471, "step": 2683 }, { "epoch": 0.49, "learning_rate": 1.6380929845968325e-05, "loss": 0.2057, "step": 2684 }, { "epoch": 0.49, "learning_rate": 1.6372158810679863e-05, "loss": 0.2301, "step": 2685 }, { "epoch": 0.49, "learning_rate": 1.6363387302243905e-05, "loss": 0.1966, "step": 2686 }, { "epoch": 0.49, "learning_rate": 1.6354615323685044e-05, "loss": 0.2219, "step": 2687 }, { "epoch": 0.49, "learning_rate": 1.6345842878028017e-05, "loss": 0.1956, "step": 2688 }, { "epoch": 0.49, "learning_rate": 1.6337069968297757e-05, "loss": 0.2941, "step": 2689 }, { "epoch": 0.49, "learning_rate": 1.6328296597519318e-05, "loss": 0.2331, "step": 2690 }, { "epoch": 0.49, "learning_rate": 1.6319522768717947e-05, "loss": 0.2263, "step": 2691 }, { "epoch": 0.49, "learning_rate": 1.6310748484919024e-05, "loss": 0.2531, "step": 2692 }, { "epoch": 0.49, "learning_rate": 1.6301973749148096e-05, "loss": 0.1774, "step": 2693 }, { "epoch": 0.49, "learning_rate": 1.629319856443087e-05, "loss": 0.2251, "step": 2694 }, { "epoch": 0.49, "learning_rate": 1.62844229337932e-05, "loss": 0.1522, "step": 2695 }, { "epoch": 0.49, "learning_rate": 1.6275646860261098e-05, "loss": 0.1452, "step": 2696 }, { "epoch": 0.49, "learning_rate": 1.626687034686073e-05, "loss": 0.1367, "step": 2697 }, { "epoch": 0.49, "learning_rate": 1.6258093396618407e-05, "loss": 0.2335, "step": 2698 }, { "epoch": 0.49, "learning_rate": 1.62493160125606e-05, "loss": 0.288, "step": 2699 }, { "epoch": 0.49, "learning_rate": 1.6240538197713918e-05, "loss": 0.1512, "step": 2700 }, { "epoch": 0.49, "learning_rate": 1.6231759955105133e-05, "loss": 0.2147, "step": 2701 }, { "epoch": 0.49, "learning_rate": 1.622298128776115e-05, "loss": 0.2036, "step": 2702 }, { "epoch": 0.49, "learning_rate": 1.6214202198709034e-05, "loss": 0.2269, "step": 2703 }, { "epoch": 0.49, "learning_rate": 1.6205422690975983e-05, "loss": 0.2667, "step": 2704 }, { "epoch": 0.49, "learning_rate": 1.6196642767589346e-05, "loss": 0.3026, "step": 2705 }, { "epoch": 0.49, "learning_rate": 1.6187862431576613e-05, "loss": 0.2222, "step": 2706 }, { "epoch": 0.49, "learning_rate": 1.6179081685965418e-05, "loss": 0.2247, "step": 2707 }, { "epoch": 0.49, "learning_rate": 1.617030053378354e-05, "loss": 0.1916, "step": 2708 }, { "epoch": 0.49, "learning_rate": 1.616151897805889e-05, "loss": 0.3033, "step": 2709 }, { "epoch": 0.49, "learning_rate": 1.6152737021819525e-05, "loss": 0.1967, "step": 2710 }, { "epoch": 0.49, "learning_rate": 1.6143954668093633e-05, "loss": 0.2126, "step": 2711 }, { "epoch": 0.49, "learning_rate": 1.6135171919909543e-05, "loss": 0.166, "step": 2712 }, { "epoch": 0.49, "learning_rate": 1.6126388780295725e-05, "loss": 0.1608, "step": 2713 }, { "epoch": 0.49, "learning_rate": 1.6117605252280774e-05, "loss": 0.2114, "step": 2714 }, { "epoch": 0.49, "learning_rate": 1.610882133889343e-05, "loss": 0.2151, "step": 2715 }, { "epoch": 0.49, "learning_rate": 1.610003704316256e-05, "loss": 0.2378, "step": 2716 }, { "epoch": 0.49, "learning_rate": 1.6091252368117156e-05, "loss": 0.2334, "step": 2717 }, { "epoch": 0.49, "learning_rate": 1.6082467316786355e-05, "loss": 0.1993, "step": 2718 }, { "epoch": 0.49, "learning_rate": 1.607368189219941e-05, "loss": 0.2165, "step": 2719 }, { "epoch": 0.49, "learning_rate": 1.6064896097385717e-05, "loss": 0.2744, "step": 2720 }, { "epoch": 0.49, "learning_rate": 1.6056109935374782e-05, "loss": 0.2626, "step": 2721 }, { "epoch": 0.49, "learning_rate": 1.6047323409196258e-05, "loss": 0.168, "step": 2722 }, { "epoch": 0.49, "learning_rate": 1.6038536521879907e-05, "loss": 0.1588, "step": 2723 }, { "epoch": 0.49, "learning_rate": 1.6029749276455617e-05, "loss": 0.1414, "step": 2724 }, { "epoch": 0.49, "learning_rate": 1.6020961675953415e-05, "loss": 0.2416, "step": 2725 }, { "epoch": 0.49, "learning_rate": 1.601217372340343e-05, "loss": 0.2552, "step": 2726 }, { "epoch": 0.49, "learning_rate": 1.6003385421835925e-05, "loss": 0.1486, "step": 2727 }, { "epoch": 0.49, "learning_rate": 1.5994596774281287e-05, "loss": 0.2764, "step": 2728 }, { "epoch": 0.49, "learning_rate": 1.5985807783770002e-05, "loss": 0.224, "step": 2729 }, { "epoch": 0.49, "learning_rate": 1.5977018453332696e-05, "loss": 0.2318, "step": 2730 }, { "epoch": 0.5, "learning_rate": 1.5968228786000107e-05, "loss": 0.2314, "step": 2731 }, { "epoch": 0.5, "learning_rate": 1.5959438784803086e-05, "loss": 0.2557, "step": 2732 }, { "epoch": 0.5, "learning_rate": 1.5950648452772586e-05, "loss": 0.2283, "step": 2733 }, { "epoch": 0.5, "learning_rate": 1.5941857792939702e-05, "loss": 0.2507, "step": 2734 }, { "epoch": 0.5, "learning_rate": 1.5933066808335622e-05, "loss": 0.1869, "step": 2735 }, { "epoch": 0.5, "learning_rate": 1.592427550199165e-05, "loss": 0.2407, "step": 2736 }, { "epoch": 0.5, "learning_rate": 1.5915483876939208e-05, "loss": 0.2014, "step": 2737 }, { "epoch": 0.5, "learning_rate": 1.590669193620981e-05, "loss": 0.2041, "step": 2738 }, { "epoch": 0.5, "learning_rate": 1.5897899682835105e-05, "loss": 0.152, "step": 2739 }, { "epoch": 0.5, "learning_rate": 1.588910711984683e-05, "loss": 0.2302, "step": 2740 }, { "epoch": 0.5, "learning_rate": 1.5880314250276832e-05, "loss": 0.2086, "step": 2741 }, { "epoch": 0.5, "learning_rate": 1.5871521077157074e-05, "loss": 0.1997, "step": 2742 }, { "epoch": 0.5, "learning_rate": 1.5862727603519605e-05, "loss": 0.1653, "step": 2743 }, { "epoch": 0.5, "learning_rate": 1.5853933832396595e-05, "loss": 0.2475, "step": 2744 }, { "epoch": 0.5, "learning_rate": 1.584513976682031e-05, "loss": 0.1912, "step": 2745 }, { "epoch": 0.5, "learning_rate": 1.5836345409823125e-05, "loss": 0.1857, "step": 2746 }, { "epoch": 0.5, "learning_rate": 1.5827550764437495e-05, "loss": 0.25, "step": 2747 }, { "epoch": 0.5, "learning_rate": 1.5818755833695992e-05, "loss": 0.2004, "step": 2748 }, { "epoch": 0.5, "learning_rate": 1.580996062063129e-05, "loss": 0.22, "step": 2749 }, { "epoch": 0.5, "learning_rate": 1.5801165128276138e-05, "loss": 0.2487, "step": 2750 }, { "epoch": 0.5, "learning_rate": 1.5792369359663406e-05, "loss": 0.25, "step": 2751 }, { "epoch": 0.5, "learning_rate": 1.578357331782605e-05, "loss": 0.2202, "step": 2752 }, { "epoch": 0.5, "learning_rate": 1.577477700579711e-05, "loss": 0.1441, "step": 2753 }, { "epoch": 0.5, "learning_rate": 1.576598042660974e-05, "loss": 0.2883, "step": 2754 }, { "epoch": 0.5, "learning_rate": 1.575718358329717e-05, "loss": 0.2184, "step": 2755 }, { "epoch": 0.5, "learning_rate": 1.5748386478892725e-05, "loss": 0.2514, "step": 2756 }, { "epoch": 0.5, "learning_rate": 1.5739589116429812e-05, "loss": 0.2583, "step": 2757 }, { "epoch": 0.5, "learning_rate": 1.5730791498941946e-05, "loss": 0.2828, "step": 2758 }, { "epoch": 0.5, "learning_rate": 1.572199362946272e-05, "loss": 0.2076, "step": 2759 }, { "epoch": 0.5, "learning_rate": 1.5713195511025806e-05, "loss": 0.2278, "step": 2760 }, { "epoch": 0.5, "learning_rate": 1.570439714666498e-05, "loss": 0.2357, "step": 2761 }, { "epoch": 0.5, "learning_rate": 1.569559853941408e-05, "loss": 0.1837, "step": 2762 }, { "epoch": 0.5, "learning_rate": 1.5686799692307052e-05, "loss": 0.2307, "step": 2763 }, { "epoch": 0.5, "learning_rate": 1.5678000608377895e-05, "loss": 0.1521, "step": 2764 }, { "epoch": 0.5, "learning_rate": 1.566920129066073e-05, "loss": 0.1665, "step": 2765 }, { "epoch": 0.5, "learning_rate": 1.5660401742189716e-05, "loss": 0.1847, "step": 2766 }, { "epoch": 0.5, "learning_rate": 1.5651601965999125e-05, "loss": 0.2502, "step": 2767 }, { "epoch": 0.5, "learning_rate": 1.564280196512329e-05, "loss": 0.1603, "step": 2768 }, { "epoch": 0.5, "learning_rate": 1.5634001742596613e-05, "loss": 0.2852, "step": 2769 }, { "epoch": 0.5, "learning_rate": 1.5625201301453605e-05, "loss": 0.2215, "step": 2770 }, { "epoch": 0.5, "learning_rate": 1.561640064472882e-05, "loss": 0.1965, "step": 2771 }, { "epoch": 0.5, "learning_rate": 1.56075997754569e-05, "loss": 0.209, "step": 2772 }, { "epoch": 0.5, "learning_rate": 1.559879869667256e-05, "loss": 0.2104, "step": 2773 }, { "epoch": 0.5, "learning_rate": 1.5589997411410587e-05, "loss": 0.2005, "step": 2774 }, { "epoch": 0.5, "learning_rate": 1.5581195922705836e-05, "loss": 0.2097, "step": 2775 }, { "epoch": 0.5, "learning_rate": 1.5572394233593236e-05, "loss": 0.2079, "step": 2776 }, { "epoch": 0.5, "learning_rate": 1.5563592347107782e-05, "loss": 0.1942, "step": 2777 }, { "epoch": 0.5, "learning_rate": 1.555479026628454e-05, "loss": 0.2482, "step": 2778 }, { "epoch": 0.5, "learning_rate": 1.554598799415864e-05, "loss": 0.1649, "step": 2779 }, { "epoch": 0.5, "learning_rate": 1.553718553376529e-05, "loss": 0.2482, "step": 2780 }, { "epoch": 0.5, "learning_rate": 1.552838288813973e-05, "loss": 0.217, "step": 2781 }, { "epoch": 0.5, "learning_rate": 1.5519580060317306e-05, "loss": 0.133, "step": 2782 }, { "epoch": 0.5, "learning_rate": 1.55107770533334e-05, "loss": 0.1944, "step": 2783 }, { "epoch": 0.5, "learning_rate": 1.5501973870223464e-05, "loss": 0.2365, "step": 2784 }, { "epoch": 0.5, "learning_rate": 1.5493170514023005e-05, "loss": 0.2229, "step": 2785 }, { "epoch": 0.51, "learning_rate": 1.54843669877676e-05, "loss": 0.1418, "step": 2786 }, { "epoch": 0.51, "learning_rate": 1.547556329449288e-05, "loss": 0.1865, "step": 2787 }, { "epoch": 0.51, "learning_rate": 1.5466759437234528e-05, "loss": 0.2144, "step": 2788 }, { "epoch": 0.51, "learning_rate": 1.5457955419028294e-05, "loss": 0.1808, "step": 2789 }, { "epoch": 0.51, "learning_rate": 1.5449151242909968e-05, "loss": 0.1868, "step": 2790 }, { "epoch": 0.51, "learning_rate": 1.544034691191541e-05, "loss": 0.2287, "step": 2791 }, { "epoch": 0.51, "learning_rate": 1.5431542429080536e-05, "loss": 0.1669, "step": 2792 }, { "epoch": 0.51, "learning_rate": 1.5422737797441284e-05, "loss": 0.2686, "step": 2793 }, { "epoch": 0.51, "learning_rate": 1.541393302003369e-05, "loss": 0.2026, "step": 2794 }, { "epoch": 0.51, "learning_rate": 1.5405128099893796e-05, "loss": 0.2166, "step": 2795 }, { "epoch": 0.51, "learning_rate": 1.5396323040057723e-05, "loss": 0.1786, "step": 2796 }, { "epoch": 0.51, "learning_rate": 1.5387517843561628e-05, "loss": 0.202, "step": 2797 }, { "epoch": 0.51, "learning_rate": 1.537871251344171e-05, "loss": 0.135, "step": 2798 }, { "epoch": 0.51, "learning_rate": 1.5369907052734234e-05, "loss": 0.227, "step": 2799 }, { "epoch": 0.51, "learning_rate": 1.5361101464475487e-05, "loss": 0.1907, "step": 2800 }, { "epoch": 0.51, "learning_rate": 1.5352295751701817e-05, "loss": 0.1928, "step": 2801 }, { "epoch": 0.51, "learning_rate": 1.5343489917449598e-05, "loss": 0.2462, "step": 2802 }, { "epoch": 0.51, "learning_rate": 1.5334683964755268e-05, "loss": 0.1981, "step": 2803 }, { "epoch": 0.51, "learning_rate": 1.5325877896655284e-05, "loss": 0.2379, "step": 2804 }, { "epoch": 0.51, "learning_rate": 1.5317071716186154e-05, "loss": 0.2229, "step": 2805 }, { "epoch": 0.51, "learning_rate": 1.5308265426384427e-05, "loss": 0.2623, "step": 2806 }, { "epoch": 0.51, "learning_rate": 1.5299459030286682e-05, "loss": 0.1767, "step": 2807 }, { "epoch": 0.51, "learning_rate": 1.5290652530929543e-05, "loss": 0.1695, "step": 2808 }, { "epoch": 0.51, "learning_rate": 1.528184593134966e-05, "loss": 0.1471, "step": 2809 }, { "epoch": 0.51, "learning_rate": 1.5273039234583726e-05, "loss": 0.2092, "step": 2810 }, { "epoch": 0.51, "learning_rate": 1.5264232443668464e-05, "loss": 0.1836, "step": 2811 }, { "epoch": 0.51, "learning_rate": 1.5255425561640625e-05, "loss": 0.2867, "step": 2812 }, { "epoch": 0.51, "learning_rate": 1.5246618591537007e-05, "loss": 0.2322, "step": 2813 }, { "epoch": 0.51, "learning_rate": 1.5237811536394415e-05, "loss": 0.2192, "step": 2814 }, { "epoch": 0.51, "learning_rate": 1.5229004399249707e-05, "loss": 0.1931, "step": 2815 }, { "epoch": 0.51, "learning_rate": 1.5220197183139749e-05, "loss": 0.1937, "step": 2816 }, { "epoch": 0.51, "learning_rate": 1.5211389891101452e-05, "loss": 0.236, "step": 2817 }, { "epoch": 0.51, "learning_rate": 1.5202582526171734e-05, "loss": 0.2042, "step": 2818 }, { "epoch": 0.51, "learning_rate": 1.5193775091387555e-05, "loss": 0.2495, "step": 2819 }, { "epoch": 0.51, "learning_rate": 1.5184967589785894e-05, "loss": 0.1783, "step": 2820 }, { "epoch": 0.51, "learning_rate": 1.5176160024403749e-05, "loss": 0.176, "step": 2821 }, { "epoch": 0.51, "learning_rate": 1.5167352398278143e-05, "loss": 0.1948, "step": 2822 }, { "epoch": 0.51, "learning_rate": 1.5158544714446122e-05, "loss": 0.1867, "step": 2823 }, { "epoch": 0.51, "learning_rate": 1.5149736975944745e-05, "loss": 0.1916, "step": 2824 }, { "epoch": 0.51, "learning_rate": 1.5140929185811099e-05, "loss": 0.2079, "step": 2825 }, { "epoch": 0.51, "learning_rate": 1.5132121347082278e-05, "loss": 0.2111, "step": 2826 }, { "epoch": 0.51, "learning_rate": 1.5123313462795408e-05, "loss": 0.307, "step": 2827 }, { "epoch": 0.51, "learning_rate": 1.5114505535987617e-05, "loss": 0.1886, "step": 2828 }, { "epoch": 0.51, "learning_rate": 1.5105697569696053e-05, "loss": 0.301, "step": 2829 }, { "epoch": 0.51, "learning_rate": 1.5096889566957876e-05, "loss": 0.2314, "step": 2830 }, { "epoch": 0.51, "learning_rate": 1.5088081530810254e-05, "loss": 0.2433, "step": 2831 }, { "epoch": 0.51, "learning_rate": 1.5079273464290384e-05, "loss": 0.2191, "step": 2832 }, { "epoch": 0.51, "learning_rate": 1.5070465370435453e-05, "loss": 0.2579, "step": 2833 }, { "epoch": 0.51, "learning_rate": 1.5061657252282669e-05, "loss": 0.1997, "step": 2834 }, { "epoch": 0.51, "learning_rate": 1.5052849112869244e-05, "loss": 0.1691, "step": 2835 }, { "epoch": 0.51, "learning_rate": 1.50440409552324e-05, "loss": 0.2473, "step": 2836 }, { "epoch": 0.51, "learning_rate": 1.5035232782409369e-05, "loss": 0.2747, "step": 2837 }, { "epoch": 0.51, "learning_rate": 1.502642459743737e-05, "loss": 0.203, "step": 2838 }, { "epoch": 0.51, "learning_rate": 1.5017616403353651e-05, "loss": 0.1924, "step": 2839 }, { "epoch": 0.51, "learning_rate": 1.5008808203195448e-05, "loss": 0.192, "step": 2840 }, { "epoch": 0.52, "learning_rate": 1.5e-05, "loss": 0.212, "step": 2841 }, { "epoch": 0.52, "learning_rate": 1.4991191796804556e-05, "loss": 0.1662, "step": 2842 }, { "epoch": 0.52, "learning_rate": 1.498238359664635e-05, "loss": 0.2111, "step": 2843 }, { "epoch": 0.52, "learning_rate": 1.4973575402562632e-05, "loss": 0.1706, "step": 2844 }, { "epoch": 0.52, "learning_rate": 1.4964767217590634e-05, "loss": 0.1963, "step": 2845 }, { "epoch": 0.52, "learning_rate": 1.49559590447676e-05, "loss": 0.2115, "step": 2846 }, { "epoch": 0.52, "learning_rate": 1.4947150887130758e-05, "loss": 0.206, "step": 2847 }, { "epoch": 0.52, "learning_rate": 1.4938342747717333e-05, "loss": 0.2609, "step": 2848 }, { "epoch": 0.52, "learning_rate": 1.4929534629564551e-05, "loss": 0.1873, "step": 2849 }, { "epoch": 0.52, "learning_rate": 1.4920726535709618e-05, "loss": 0.2098, "step": 2850 }, { "epoch": 0.52, "learning_rate": 1.4911918469189748e-05, "loss": 0.1471, "step": 2851 }, { "epoch": 0.52, "learning_rate": 1.4903110433042129e-05, "loss": 0.1918, "step": 2852 }, { "epoch": 0.52, "learning_rate": 1.489430243030395e-05, "loss": 0.2822, "step": 2853 }, { "epoch": 0.52, "learning_rate": 1.4885494464012386e-05, "loss": 0.2067, "step": 2854 }, { "epoch": 0.52, "learning_rate": 1.4876686537204591e-05, "loss": 0.1823, "step": 2855 }, { "epoch": 0.52, "learning_rate": 1.4867878652917723e-05, "loss": 0.2681, "step": 2856 }, { "epoch": 0.52, "learning_rate": 1.4859070814188902e-05, "loss": 0.234, "step": 2857 }, { "epoch": 0.52, "learning_rate": 1.4850263024055256e-05, "loss": 0.2529, "step": 2858 }, { "epoch": 0.52, "learning_rate": 1.484145528555388e-05, "loss": 0.2094, "step": 2859 }, { "epoch": 0.52, "learning_rate": 1.4832647601721856e-05, "loss": 0.2488, "step": 2860 }, { "epoch": 0.52, "learning_rate": 1.4823839975596252e-05, "loss": 0.2615, "step": 2861 }, { "epoch": 0.52, "learning_rate": 1.4815032410214103e-05, "loss": 0.2192, "step": 2862 }, { "epoch": 0.52, "learning_rate": 1.4806224908612444e-05, "loss": 0.2159, "step": 2863 }, { "epoch": 0.52, "learning_rate": 1.4797417473828267e-05, "loss": 0.1945, "step": 2864 }, { "epoch": 0.52, "learning_rate": 1.4788610108898549e-05, "loss": 0.2114, "step": 2865 }, { "epoch": 0.52, "learning_rate": 1.4779802816860254e-05, "loss": 0.3046, "step": 2866 }, { "epoch": 0.52, "learning_rate": 1.4770995600750297e-05, "loss": 0.257, "step": 2867 }, { "epoch": 0.52, "learning_rate": 1.4762188463605587e-05, "loss": 0.1632, "step": 2868 }, { "epoch": 0.52, "learning_rate": 1.4753381408462999e-05, "loss": 0.1726, "step": 2869 }, { "epoch": 0.52, "learning_rate": 1.4744574438359378e-05, "loss": 0.2686, "step": 2870 }, { "epoch": 0.52, "learning_rate": 1.4735767556331541e-05, "loss": 0.1577, "step": 2871 }, { "epoch": 0.52, "learning_rate": 1.4726960765416282e-05, "loss": 0.1755, "step": 2872 }, { "epoch": 0.52, "learning_rate": 1.4718154068650345e-05, "loss": 0.2657, "step": 2873 }, { "epoch": 0.52, "learning_rate": 1.4709347469070463e-05, "loss": 0.2587, "step": 2874 }, { "epoch": 0.52, "learning_rate": 1.4700540969713325e-05, "loss": 0.1804, "step": 2875 }, { "epoch": 0.52, "learning_rate": 1.4691734573615579e-05, "loss": 0.149, "step": 2876 }, { "epoch": 0.52, "learning_rate": 1.4682928283813852e-05, "loss": 0.2034, "step": 2877 }, { "epoch": 0.52, "learning_rate": 1.4674122103344722e-05, "loss": 0.1517, "step": 2878 }, { "epoch": 0.52, "learning_rate": 1.4665316035244738e-05, "loss": 0.2152, "step": 2879 }, { "epoch": 0.52, "learning_rate": 1.4656510082550406e-05, "loss": 0.3186, "step": 2880 }, { "epoch": 0.52, "learning_rate": 1.4647704248298187e-05, "loss": 0.2194, "step": 2881 }, { "epoch": 0.52, "learning_rate": 1.4638898535524513e-05, "loss": 0.2049, "step": 2882 }, { "epoch": 0.52, "learning_rate": 1.4630092947265765e-05, "loss": 0.1935, "step": 2883 }, { "epoch": 0.52, "learning_rate": 1.462128748655829e-05, "loss": 0.3096, "step": 2884 }, { "epoch": 0.52, "learning_rate": 1.4612482156438373e-05, "loss": 0.2214, "step": 2885 }, { "epoch": 0.52, "learning_rate": 1.4603676959942278e-05, "loss": 0.202, "step": 2886 }, { "epoch": 0.52, "learning_rate": 1.4594871900106208e-05, "loss": 0.2228, "step": 2887 }, { "epoch": 0.52, "learning_rate": 1.4586066979966313e-05, "loss": 0.2378, "step": 2888 }, { "epoch": 0.52, "learning_rate": 1.4577262202558715e-05, "loss": 0.1574, "step": 2889 }, { "epoch": 0.52, "learning_rate": 1.4568457570919468e-05, "loss": 0.189, "step": 2890 }, { "epoch": 0.52, "learning_rate": 1.455965308808459e-05, "loss": 0.2738, "step": 2891 }, { "epoch": 0.52, "learning_rate": 1.4550848757090035e-05, "loss": 0.2147, "step": 2892 }, { "epoch": 0.52, "learning_rate": 1.454204458097171e-05, "loss": 0.17, "step": 2893 }, { "epoch": 0.52, "learning_rate": 1.4533240562765474e-05, "loss": 0.2448, "step": 2894 }, { "epoch": 0.52, "learning_rate": 1.4524436705507121e-05, "loss": 0.181, "step": 2895 }, { "epoch": 0.53, "learning_rate": 1.45156330122324e-05, "loss": 0.1423, "step": 2896 }, { "epoch": 0.53, "learning_rate": 1.4506829485976999e-05, "loss": 0.2219, "step": 2897 }, { "epoch": 0.53, "learning_rate": 1.449802612977654e-05, "loss": 0.2222, "step": 2898 }, { "epoch": 0.53, "learning_rate": 1.4489222946666604e-05, "loss": 0.2225, "step": 2899 }, { "epoch": 0.53, "learning_rate": 1.4480419939682695e-05, "loss": 0.2859, "step": 2900 }, { "epoch": 0.53, "learning_rate": 1.447161711186027e-05, "loss": 0.2745, "step": 2901 }, { "epoch": 0.53, "learning_rate": 1.4462814466234715e-05, "loss": 0.2015, "step": 2902 }, { "epoch": 0.53, "learning_rate": 1.4454012005841358e-05, "loss": 0.1595, "step": 2903 }, { "epoch": 0.53, "learning_rate": 1.4445209733715461e-05, "loss": 0.201, "step": 2904 }, { "epoch": 0.53, "learning_rate": 1.4436407652892219e-05, "loss": 0.2601, "step": 2905 }, { "epoch": 0.53, "learning_rate": 1.4427605766406767e-05, "loss": 0.1362, "step": 2906 }, { "epoch": 0.53, "learning_rate": 1.4418804077294163e-05, "loss": 0.1505, "step": 2907 }, { "epoch": 0.53, "learning_rate": 1.4410002588589415e-05, "loss": 0.2073, "step": 2908 }, { "epoch": 0.53, "learning_rate": 1.4401201303327441e-05, "loss": 0.169, "step": 2909 }, { "epoch": 0.53, "learning_rate": 1.4392400224543101e-05, "loss": 0.2253, "step": 2910 }, { "epoch": 0.53, "learning_rate": 1.4383599355271183e-05, "loss": 0.278, "step": 2911 }, { "epoch": 0.53, "learning_rate": 1.4374798698546394e-05, "loss": 0.2347, "step": 2912 }, { "epoch": 0.53, "learning_rate": 1.4365998257403384e-05, "loss": 0.2919, "step": 2913 }, { "epoch": 0.53, "learning_rate": 1.4357198034876715e-05, "loss": 0.2045, "step": 2914 }, { "epoch": 0.53, "learning_rate": 1.4348398034000874e-05, "loss": 0.2249, "step": 2915 }, { "epoch": 0.53, "learning_rate": 1.4339598257810285e-05, "loss": 0.1655, "step": 2916 }, { "epoch": 0.53, "learning_rate": 1.4330798709339276e-05, "loss": 0.1401, "step": 2917 }, { "epoch": 0.53, "learning_rate": 1.4321999391622104e-05, "loss": 0.1266, "step": 2918 }, { "epoch": 0.53, "learning_rate": 1.4313200307692955e-05, "loss": 0.2384, "step": 2919 }, { "epoch": 0.53, "learning_rate": 1.4304401460585924e-05, "loss": 0.1894, "step": 2920 }, { "epoch": 0.53, "learning_rate": 1.4295602853335024e-05, "loss": 0.275, "step": 2921 }, { "epoch": 0.53, "learning_rate": 1.4286804488974197e-05, "loss": 0.2665, "step": 2922 }, { "epoch": 0.53, "learning_rate": 1.4278006370537284e-05, "loss": 0.2083, "step": 2923 }, { "epoch": 0.53, "learning_rate": 1.4269208501058058e-05, "loss": 0.1747, "step": 2924 }, { "epoch": 0.53, "learning_rate": 1.4260410883570195e-05, "loss": 0.1926, "step": 2925 }, { "epoch": 0.53, "learning_rate": 1.4251613521107285e-05, "loss": 0.2546, "step": 2926 }, { "epoch": 0.53, "learning_rate": 1.4242816416702836e-05, "loss": 0.2317, "step": 2927 }, { "epoch": 0.53, "learning_rate": 1.4234019573390261e-05, "loss": 0.1942, "step": 2928 }, { "epoch": 0.53, "learning_rate": 1.422522299420289e-05, "loss": 0.1483, "step": 2929 }, { "epoch": 0.53, "learning_rate": 1.4216426682173957e-05, "loss": 0.2122, "step": 2930 }, { "epoch": 0.53, "learning_rate": 1.4207630640336596e-05, "loss": 0.1953, "step": 2931 }, { "epoch": 0.53, "learning_rate": 1.4198834871723866e-05, "loss": 0.236, "step": 2932 }, { "epoch": 0.53, "learning_rate": 1.4190039379368716e-05, "loss": 0.1401, "step": 2933 }, { "epoch": 0.53, "learning_rate": 1.4181244166304009e-05, "loss": 0.1783, "step": 2934 }, { "epoch": 0.53, "learning_rate": 1.4172449235562508e-05, "loss": 0.2549, "step": 2935 }, { "epoch": 0.53, "learning_rate": 1.416365459017688e-05, "loss": 0.2415, "step": 2936 }, { "epoch": 0.53, "learning_rate": 1.415486023317969e-05, "loss": 0.2053, "step": 2937 }, { "epoch": 0.53, "learning_rate": 1.4146066167603404e-05, "loss": 0.1479, "step": 2938 }, { "epoch": 0.53, "learning_rate": 1.4137272396480398e-05, "loss": 0.2276, "step": 2939 }, { "epoch": 0.53, "learning_rate": 1.412847892284293e-05, "loss": 0.1751, "step": 2940 }, { "epoch": 0.53, "learning_rate": 1.411968574972317e-05, "loss": 0.2402, "step": 2941 }, { "epoch": 0.53, "learning_rate": 1.4110892880153174e-05, "loss": 0.2128, "step": 2942 }, { "epoch": 0.53, "learning_rate": 1.4102100317164896e-05, "loss": 0.1807, "step": 2943 }, { "epoch": 0.53, "learning_rate": 1.4093308063790191e-05, "loss": 0.2817, "step": 2944 }, { "epoch": 0.53, "learning_rate": 1.4084516123060796e-05, "loss": 0.2569, "step": 2945 }, { "epoch": 0.53, "learning_rate": 1.4075724498008353e-05, "loss": 0.3168, "step": 2946 }, { "epoch": 0.53, "learning_rate": 1.4066933191664384e-05, "loss": 0.1774, "step": 2947 }, { "epoch": 0.53, "learning_rate": 1.40581422070603e-05, "loss": 0.1829, "step": 2948 }, { "epoch": 0.53, "learning_rate": 1.4049351547227415e-05, "loss": 0.2976, "step": 2949 }, { "epoch": 0.53, "learning_rate": 1.4040561215196917e-05, "loss": 0.2341, "step": 2950 }, { "epoch": 0.53, "learning_rate": 1.4031771213999892e-05, "loss": 0.2313, "step": 2951 }, { "epoch": 0.54, "learning_rate": 1.4022981546667303e-05, "loss": 0.2473, "step": 2952 }, { "epoch": 0.54, "learning_rate": 1.4014192216229998e-05, "loss": 0.2278, "step": 2953 }, { "epoch": 0.54, "learning_rate": 1.4005403225718717e-05, "loss": 0.2318, "step": 2954 }, { "epoch": 0.54, "learning_rate": 1.3996614578164072e-05, "loss": 0.192, "step": 2955 }, { "epoch": 0.54, "learning_rate": 1.398782627659657e-05, "loss": 0.2121, "step": 2956 }, { "epoch": 0.54, "learning_rate": 1.3979038324046586e-05, "loss": 0.2338, "step": 2957 }, { "epoch": 0.54, "learning_rate": 1.3970250723544384e-05, "loss": 0.1774, "step": 2958 }, { "epoch": 0.54, "learning_rate": 1.3961463478120097e-05, "loss": 0.2184, "step": 2959 }, { "epoch": 0.54, "learning_rate": 1.3952676590803743e-05, "loss": 0.2507, "step": 2960 }, { "epoch": 0.54, "learning_rate": 1.3943890064625217e-05, "loss": 0.166, "step": 2961 }, { "epoch": 0.54, "learning_rate": 1.3935103902614282e-05, "loss": 0.2836, "step": 2962 }, { "epoch": 0.54, "learning_rate": 1.3926318107800587e-05, "loss": 0.1964, "step": 2963 }, { "epoch": 0.54, "learning_rate": 1.3917532683213646e-05, "loss": 0.1691, "step": 2964 }, { "epoch": 0.54, "learning_rate": 1.3908747631882848e-05, "loss": 0.2009, "step": 2965 }, { "epoch": 0.54, "learning_rate": 1.3899962956837443e-05, "loss": 0.2141, "step": 2966 }, { "epoch": 0.54, "learning_rate": 1.389117866110657e-05, "loss": 0.2334, "step": 2967 }, { "epoch": 0.54, "learning_rate": 1.3882394747719227e-05, "loss": 0.1821, "step": 2968 }, { "epoch": 0.54, "learning_rate": 1.3873611219704279e-05, "loss": 0.1597, "step": 2969 }, { "epoch": 0.54, "learning_rate": 1.3864828080090463e-05, "loss": 0.2495, "step": 2970 }, { "epoch": 0.54, "learning_rate": 1.3856045331906373e-05, "loss": 0.2472, "step": 2971 }, { "epoch": 0.54, "learning_rate": 1.3847262978180483e-05, "loss": 0.1538, "step": 2972 }, { "epoch": 0.54, "learning_rate": 1.3838481021941114e-05, "loss": 0.1732, "step": 2973 }, { "epoch": 0.54, "learning_rate": 1.3829699466216465e-05, "loss": 0.206, "step": 2974 }, { "epoch": 0.54, "learning_rate": 1.3820918314034586e-05, "loss": 0.2908, "step": 2975 }, { "epoch": 0.54, "learning_rate": 1.3812137568423391e-05, "loss": 0.1567, "step": 2976 }, { "epoch": 0.54, "learning_rate": 1.380335723241066e-05, "loss": 0.162, "step": 2977 }, { "epoch": 0.54, "learning_rate": 1.379457730902402e-05, "loss": 0.1735, "step": 2978 }, { "epoch": 0.54, "learning_rate": 1.378579780129097e-05, "loss": 0.2402, "step": 2979 }, { "epoch": 0.54, "learning_rate": 1.3777018712238851e-05, "loss": 0.1967, "step": 2980 }, { "epoch": 0.54, "learning_rate": 1.376824004489487e-05, "loss": 0.2108, "step": 2981 }, { "epoch": 0.54, "learning_rate": 1.3759461802286083e-05, "loss": 0.1495, "step": 2982 }, { "epoch": 0.54, "learning_rate": 1.3750683987439403e-05, "loss": 0.1907, "step": 2983 }, { "epoch": 0.54, "learning_rate": 1.3741906603381595e-05, "loss": 0.1663, "step": 2984 }, { "epoch": 0.54, "learning_rate": 1.3733129653139274e-05, "loss": 0.2971, "step": 2985 }, { "epoch": 0.54, "learning_rate": 1.3724353139738903e-05, "loss": 0.2236, "step": 2986 }, { "epoch": 0.54, "learning_rate": 1.3715577066206802e-05, "loss": 0.1814, "step": 2987 }, { "epoch": 0.54, "learning_rate": 1.3706801435569132e-05, "loss": 0.2054, "step": 2988 }, { "epoch": 0.54, "learning_rate": 1.3698026250851909e-05, "loss": 0.1672, "step": 2989 }, { "epoch": 0.54, "learning_rate": 1.368925151508098e-05, "loss": 0.1447, "step": 2990 }, { "epoch": 0.54, "learning_rate": 1.3680477231282058e-05, "loss": 0.2523, "step": 2991 }, { "epoch": 0.54, "learning_rate": 1.3671703402480685e-05, "loss": 0.2189, "step": 2992 }, { "epoch": 0.54, "learning_rate": 1.3662930031702247e-05, "loss": 0.2006, "step": 2993 }, { "epoch": 0.54, "learning_rate": 1.3654157121971983e-05, "loss": 0.1866, "step": 2994 }, { "epoch": 0.54, "learning_rate": 1.364538467631496e-05, "loss": 0.2349, "step": 2995 }, { "epoch": 0.54, "learning_rate": 1.3636612697756096e-05, "loss": 0.2198, "step": 2996 }, { "epoch": 0.54, "learning_rate": 1.362784118932014e-05, "loss": 0.1746, "step": 2997 }, { "epoch": 0.54, "learning_rate": 1.3619070154031679e-05, "loss": 0.2034, "step": 2998 }, { "epoch": 0.54, "learning_rate": 1.3610299594915144e-05, "loss": 0.2371, "step": 2999 }, { "epoch": 0.54, "learning_rate": 1.3601529514994793e-05, "loss": 0.2059, "step": 3000 }, { "epoch": 0.54, "learning_rate": 1.3592759917294728e-05, "loss": 0.1963, "step": 3001 }, { "epoch": 0.54, "learning_rate": 1.3583990804838877e-05, "loss": 0.2164, "step": 3002 }, { "epoch": 0.54, "learning_rate": 1.3575222180651e-05, "loss": 0.1943, "step": 3003 }, { "epoch": 0.54, "learning_rate": 1.35664540477547e-05, "loss": 0.2537, "step": 3004 }, { "epoch": 0.54, "learning_rate": 1.3557686409173391e-05, "loss": 0.2509, "step": 3005 }, { "epoch": 0.54, "learning_rate": 1.3548919267930342e-05, "loss": 0.19, "step": 3006 }, { "epoch": 0.55, "learning_rate": 1.3540152627048626e-05, "loss": 0.177, "step": 3007 }, { "epoch": 0.55, "learning_rate": 1.3531386489551161e-05, "loss": 0.1437, "step": 3008 }, { "epoch": 0.55, "learning_rate": 1.3522620858460682e-05, "loss": 0.231, "step": 3009 }, { "epoch": 0.55, "learning_rate": 1.3513855736799749e-05, "loss": 0.2061, "step": 3010 }, { "epoch": 0.55, "learning_rate": 1.3505091127590756e-05, "loss": 0.2192, "step": 3011 }, { "epoch": 0.55, "learning_rate": 1.3496327033855906e-05, "loss": 0.227, "step": 3012 }, { "epoch": 0.55, "learning_rate": 1.3487563458617236e-05, "loss": 0.2509, "step": 3013 }, { "epoch": 0.55, "learning_rate": 1.3478800404896603e-05, "loss": 0.1959, "step": 3014 }, { "epoch": 0.55, "learning_rate": 1.347003787571568e-05, "loss": 0.198, "step": 3015 }, { "epoch": 0.55, "learning_rate": 1.346127587409595e-05, "loss": 0.2311, "step": 3016 }, { "epoch": 0.55, "learning_rate": 1.3452514403058738e-05, "loss": 0.2523, "step": 3017 }, { "epoch": 0.55, "learning_rate": 1.3443753465625166e-05, "loss": 0.1708, "step": 3018 }, { "epoch": 0.55, "learning_rate": 1.3434993064816178e-05, "loss": 0.2308, "step": 3019 }, { "epoch": 0.55, "learning_rate": 1.3426233203652535e-05, "loss": 0.243, "step": 3020 }, { "epoch": 0.55, "learning_rate": 1.341747388515481e-05, "loss": 0.3026, "step": 3021 }, { "epoch": 0.55, "learning_rate": 1.3408715112343395e-05, "loss": 0.2026, "step": 3022 }, { "epoch": 0.55, "learning_rate": 1.3399956888238476e-05, "loss": 0.235, "step": 3023 }, { "epoch": 0.55, "learning_rate": 1.3391199215860072e-05, "loss": 0.2001, "step": 3024 }, { "epoch": 0.55, "learning_rate": 1.3382442098228001e-05, "loss": 0.1489, "step": 3025 }, { "epoch": 0.55, "learning_rate": 1.3373685538361884e-05, "loss": 0.1523, "step": 3026 }, { "epoch": 0.55, "learning_rate": 1.3364929539281162e-05, "loss": 0.3124, "step": 3027 }, { "epoch": 0.55, "learning_rate": 1.3356174104005071e-05, "loss": 0.2305, "step": 3028 }, { "epoch": 0.55, "learning_rate": 1.3347419235552665e-05, "loss": 0.2229, "step": 3029 }, { "epoch": 0.55, "learning_rate": 1.3338664936942794e-05, "loss": 0.1757, "step": 3030 }, { "epoch": 0.55, "learning_rate": 1.3329911211194109e-05, "loss": 0.2477, "step": 3031 }, { "epoch": 0.55, "learning_rate": 1.3321158061325072e-05, "loss": 0.1958, "step": 3032 }, { "epoch": 0.55, "learning_rate": 1.331240549035394e-05, "loss": 0.2832, "step": 3033 }, { "epoch": 0.55, "learning_rate": 1.3303653501298775e-05, "loss": 0.2087, "step": 3034 }, { "epoch": 0.55, "learning_rate": 1.3294902097177433e-05, "loss": 0.1796, "step": 3035 }, { "epoch": 0.55, "learning_rate": 1.328615128100757e-05, "loss": 0.1628, "step": 3036 }, { "epoch": 0.55, "learning_rate": 1.3277401055806648e-05, "loss": 0.2335, "step": 3037 }, { "epoch": 0.55, "learning_rate": 1.326865142459191e-05, "loss": 0.2012, "step": 3038 }, { "epoch": 0.55, "learning_rate": 1.3259902390380406e-05, "loss": 0.2477, "step": 3039 }, { "epoch": 0.55, "learning_rate": 1.3251153956188973e-05, "loss": 0.2518, "step": 3040 }, { "epoch": 0.55, "learning_rate": 1.3242406125034249e-05, "loss": 0.2081, "step": 3041 }, { "epoch": 0.55, "learning_rate": 1.3233658899932654e-05, "loss": 0.2378, "step": 3042 }, { "epoch": 0.55, "learning_rate": 1.3224912283900406e-05, "loss": 0.2008, "step": 3043 }, { "epoch": 0.55, "learning_rate": 1.3216166279953513e-05, "loss": 0.2113, "step": 3044 }, { "epoch": 0.55, "learning_rate": 1.3207420891107766e-05, "loss": 0.223, "step": 3045 }, { "epoch": 0.55, "learning_rate": 1.3198676120378753e-05, "loss": 0.166, "step": 3046 }, { "epoch": 0.55, "learning_rate": 1.3189931970781842e-05, "loss": 0.1061, "step": 3047 }, { "epoch": 0.55, "learning_rate": 1.3181188445332186e-05, "loss": 0.2131, "step": 3048 }, { "epoch": 0.55, "learning_rate": 1.3172445547044732e-05, "loss": 0.1829, "step": 3049 }, { "epoch": 0.55, "learning_rate": 1.3163703278934194e-05, "loss": 0.2063, "step": 3050 }, { "epoch": 0.55, "learning_rate": 1.3154961644015091e-05, "loss": 0.2097, "step": 3051 }, { "epoch": 0.55, "learning_rate": 1.3146220645301705e-05, "loss": 0.2532, "step": 3052 }, { "epoch": 0.55, "learning_rate": 1.3137480285808101e-05, "loss": 0.259, "step": 3053 }, { "epoch": 0.55, "learning_rate": 1.312874056854814e-05, "loss": 0.1782, "step": 3054 }, { "epoch": 0.55, "learning_rate": 1.3120001496535434e-05, "loss": 0.2476, "step": 3055 }, { "epoch": 0.55, "learning_rate": 1.3111263072783403e-05, "loss": 0.2329, "step": 3056 }, { "epoch": 0.55, "learning_rate": 1.3102525300305217e-05, "loss": 0.1744, "step": 3057 }, { "epoch": 0.55, "learning_rate": 1.3093788182113839e-05, "loss": 0.2003, "step": 3058 }, { "epoch": 0.55, "learning_rate": 1.3085051721222e-05, "loss": 0.185, "step": 3059 }, { "epoch": 0.55, "learning_rate": 1.3076315920642199e-05, "loss": 0.2693, "step": 3060 }, { "epoch": 0.55, "learning_rate": 1.306758078338672e-05, "loss": 0.2294, "step": 3061 }, { "epoch": 0.56, "learning_rate": 1.3058846312467603e-05, "loss": 0.2175, "step": 3062 }, { "epoch": 0.56, "learning_rate": 1.3050112510896677e-05, "loss": 0.1501, "step": 3063 }, { "epoch": 0.56, "learning_rate": 1.3041379381685526e-05, "loss": 0.2556, "step": 3064 }, { "epoch": 0.56, "learning_rate": 1.30326469278455e-05, "loss": 0.2043, "step": 3065 }, { "epoch": 0.56, "learning_rate": 1.302391515238772e-05, "loss": 0.2775, "step": 3066 }, { "epoch": 0.56, "learning_rate": 1.3015184058323085e-05, "loss": 0.2269, "step": 3067 }, { "epoch": 0.56, "learning_rate": 1.3006453648662244e-05, "loss": 0.2206, "step": 3068 }, { "epoch": 0.56, "learning_rate": 1.2997723926415612e-05, "loss": 0.2367, "step": 3069 }, { "epoch": 0.56, "learning_rate": 1.2988994894593375e-05, "loss": 0.2404, "step": 3070 }, { "epoch": 0.56, "learning_rate": 1.298026655620547e-05, "loss": 0.2693, "step": 3071 }, { "epoch": 0.56, "learning_rate": 1.2971538914261607e-05, "loss": 0.1633, "step": 3072 }, { "epoch": 0.56, "learning_rate": 1.2962811971771245e-05, "loss": 0.2525, "step": 3073 }, { "epoch": 0.56, "learning_rate": 1.295408573174361e-05, "loss": 0.1598, "step": 3074 }, { "epoch": 0.56, "learning_rate": 1.2945360197187681e-05, "loss": 0.2178, "step": 3075 }, { "epoch": 0.56, "learning_rate": 1.2936635371112192e-05, "loss": 0.2742, "step": 3076 }, { "epoch": 0.56, "learning_rate": 1.292791125652564e-05, "loss": 0.168, "step": 3077 }, { "epoch": 0.56, "learning_rate": 1.2919187856436266e-05, "loss": 0.2535, "step": 3078 }, { "epoch": 0.56, "learning_rate": 1.291046517385208e-05, "loss": 0.2279, "step": 3079 }, { "epoch": 0.56, "learning_rate": 1.290174321178083e-05, "loss": 0.1824, "step": 3080 }, { "epoch": 0.56, "learning_rate": 1.2893021973230019e-05, "loss": 0.2249, "step": 3081 }, { "epoch": 0.56, "learning_rate": 1.2884301461206906e-05, "loss": 0.2893, "step": 3082 }, { "epoch": 0.56, "learning_rate": 1.2875581678718494e-05, "loss": 0.2653, "step": 3083 }, { "epoch": 0.56, "learning_rate": 1.286686262877154e-05, "loss": 0.2372, "step": 3084 }, { "epoch": 0.56, "learning_rate": 1.2858144314372541e-05, "loss": 0.1672, "step": 3085 }, { "epoch": 0.56, "learning_rate": 1.2849426738527744e-05, "loss": 0.1842, "step": 3086 }, { "epoch": 0.56, "learning_rate": 1.2840709904243145e-05, "loss": 0.1815, "step": 3087 }, { "epoch": 0.56, "learning_rate": 1.2831993814524478e-05, "loss": 0.2154, "step": 3088 }, { "epoch": 0.56, "learning_rate": 1.2823278472377226e-05, "loss": 0.1904, "step": 3089 }, { "epoch": 0.56, "learning_rate": 1.2814563880806607e-05, "loss": 0.2227, "step": 3090 }, { "epoch": 0.56, "learning_rate": 1.280585004281759e-05, "loss": 0.2664, "step": 3091 }, { "epoch": 0.56, "learning_rate": 1.2797136961414875e-05, "loss": 0.2727, "step": 3092 }, { "epoch": 0.56, "learning_rate": 1.2788424639602905e-05, "loss": 0.183, "step": 3093 }, { "epoch": 0.56, "learning_rate": 1.2779713080385862e-05, "loss": 0.2686, "step": 3094 }, { "epoch": 0.56, "learning_rate": 1.2771002286767662e-05, "loss": 0.1859, "step": 3095 }, { "epoch": 0.56, "learning_rate": 1.2762292261751964e-05, "loss": 0.1854, "step": 3096 }, { "epoch": 0.56, "learning_rate": 1.2753583008342151e-05, "loss": 0.163, "step": 3097 }, { "epoch": 0.56, "learning_rate": 1.2744874529541346e-05, "loss": 0.2485, "step": 3098 }, { "epoch": 0.56, "learning_rate": 1.2736166828352409e-05, "loss": 0.271, "step": 3099 }, { "epoch": 0.56, "learning_rate": 1.2727459907777923e-05, "loss": 0.1978, "step": 3100 }, { "epoch": 0.56, "learning_rate": 1.2718753770820212e-05, "loss": 0.1605, "step": 3101 }, { "epoch": 0.56, "learning_rate": 1.271004842048132e-05, "loss": 0.1548, "step": 3102 }, { "epoch": 0.56, "learning_rate": 1.2701343859763023e-05, "loss": 0.1971, "step": 3103 }, { "epoch": 0.56, "learning_rate": 1.2692640091666831e-05, "loss": 0.1978, "step": 3104 }, { "epoch": 0.56, "learning_rate": 1.2683937119193968e-05, "loss": 0.1684, "step": 3105 }, { "epoch": 0.56, "learning_rate": 1.26752349453454e-05, "loss": 0.1148, "step": 3106 }, { "epoch": 0.56, "learning_rate": 1.26665335731218e-05, "loss": 0.2031, "step": 3107 }, { "epoch": 0.56, "learning_rate": 1.265783300552358e-05, "loss": 0.2191, "step": 3108 }, { "epoch": 0.56, "learning_rate": 1.2649133245550864e-05, "loss": 0.2129, "step": 3109 }, { "epoch": 0.56, "learning_rate": 1.26404342962035e-05, "loss": 0.1829, "step": 3110 }, { "epoch": 0.56, "learning_rate": 1.2631736160481063e-05, "loss": 0.2289, "step": 3111 }, { "epoch": 0.56, "learning_rate": 1.2623038841382836e-05, "loss": 0.2828, "step": 3112 }, { "epoch": 0.56, "learning_rate": 1.2614342341907831e-05, "loss": 0.1928, "step": 3113 }, { "epoch": 0.56, "learning_rate": 1.2605646665054779e-05, "loss": 0.1731, "step": 3114 }, { "epoch": 0.56, "learning_rate": 1.2596951813822108e-05, "loss": 0.1677, "step": 3115 }, { "epoch": 0.56, "learning_rate": 1.2588257791207979e-05, "loss": 0.2762, "step": 3116 }, { "epoch": 0.57, "learning_rate": 1.2579564600210268e-05, "loss": 0.2059, "step": 3117 }, { "epoch": 0.57, "learning_rate": 1.2570872243826554e-05, "loss": 0.2057, "step": 3118 }, { "epoch": 0.57, "learning_rate": 1.2562180725054135e-05, "loss": 0.2049, "step": 3119 }, { "epoch": 0.57, "learning_rate": 1.2553490046890021e-05, "loss": 0.1553, "step": 3120 }, { "epoch": 0.57, "learning_rate": 1.2544800212330927e-05, "loss": 0.1815, "step": 3121 }, { "epoch": 0.57, "learning_rate": 1.253611122437328e-05, "loss": 0.259, "step": 3122 }, { "epoch": 0.57, "learning_rate": 1.2527423086013214e-05, "loss": 0.1987, "step": 3123 }, { "epoch": 0.57, "learning_rate": 1.2518735800246576e-05, "loss": 0.205, "step": 3124 }, { "epoch": 0.57, "learning_rate": 1.251004937006891e-05, "loss": 0.212, "step": 3125 }, { "epoch": 0.57, "learning_rate": 1.2501363798475465e-05, "loss": 0.1514, "step": 3126 }, { "epoch": 0.57, "learning_rate": 1.2492679088461208e-05, "loss": 0.2177, "step": 3127 }, { "epoch": 0.57, "learning_rate": 1.2483995243020787e-05, "loss": 0.248, "step": 3128 }, { "epoch": 0.57, "learning_rate": 1.2475312265148574e-05, "loss": 0.2317, "step": 3129 }, { "epoch": 0.57, "learning_rate": 1.2466630157838627e-05, "loss": 0.185, "step": 3130 }, { "epoch": 0.57, "learning_rate": 1.2457948924084706e-05, "loss": 0.2086, "step": 3131 }, { "epoch": 0.57, "learning_rate": 1.2449268566880277e-05, "loss": 0.2297, "step": 3132 }, { "epoch": 0.57, "learning_rate": 1.2440589089218493e-05, "loss": 0.246, "step": 3133 }, { "epoch": 0.57, "learning_rate": 1.2431910494092219e-05, "loss": 0.1448, "step": 3134 }, { "epoch": 0.57, "learning_rate": 1.2423232784493997e-05, "loss": 0.1874, "step": 3135 }, { "epoch": 0.57, "learning_rate": 1.2414555963416074e-05, "loss": 0.2348, "step": 3136 }, { "epoch": 0.57, "learning_rate": 1.2405880033850394e-05, "loss": 0.2005, "step": 3137 }, { "epoch": 0.57, "learning_rate": 1.2397204998788584e-05, "loss": 0.2012, "step": 3138 }, { "epoch": 0.57, "learning_rate": 1.2388530861221974e-05, "loss": 0.2167, "step": 3139 }, { "epoch": 0.57, "learning_rate": 1.2379857624141573e-05, "loss": 0.2196, "step": 3140 }, { "epoch": 0.57, "learning_rate": 1.2371185290538088e-05, "loss": 0.1806, "step": 3141 }, { "epoch": 0.57, "learning_rate": 1.236251386340191e-05, "loss": 0.2407, "step": 3142 }, { "epoch": 0.57, "learning_rate": 1.2353843345723114e-05, "loss": 0.2257, "step": 3143 }, { "epoch": 0.57, "learning_rate": 1.2345173740491474e-05, "loss": 0.1738, "step": 3144 }, { "epoch": 0.57, "learning_rate": 1.2336505050696433e-05, "loss": 0.1819, "step": 3145 }, { "epoch": 0.57, "learning_rate": 1.2327837279327136e-05, "loss": 0.1591, "step": 3146 }, { "epoch": 0.57, "learning_rate": 1.2319170429372397e-05, "loss": 0.1725, "step": 3147 }, { "epoch": 0.57, "learning_rate": 1.2310504503820712e-05, "loss": 0.2394, "step": 3148 }, { "epoch": 0.57, "learning_rate": 1.2301839505660277e-05, "loss": 0.174, "step": 3149 }, { "epoch": 0.57, "learning_rate": 1.229317543787894e-05, "loss": 0.2201, "step": 3150 }, { "epoch": 0.57, "learning_rate": 1.2284512303464251e-05, "loss": 0.168, "step": 3151 }, { "epoch": 0.57, "learning_rate": 1.2275850105403432e-05, "loss": 0.1972, "step": 3152 }, { "epoch": 0.57, "learning_rate": 1.2267188846683372e-05, "loss": 0.2565, "step": 3153 }, { "epoch": 0.57, "learning_rate": 1.2258528530290655e-05, "loss": 0.2581, "step": 3154 }, { "epoch": 0.57, "learning_rate": 1.2249869159211518e-05, "loss": 0.1326, "step": 3155 }, { "epoch": 0.57, "learning_rate": 1.2241210736431893e-05, "loss": 0.1652, "step": 3156 }, { "epoch": 0.57, "learning_rate": 1.2232553264937371e-05, "loss": 0.1902, "step": 3157 }, { "epoch": 0.57, "learning_rate": 1.2223896747713217e-05, "loss": 0.1945, "step": 3158 }, { "epoch": 0.57, "learning_rate": 1.2215241187744377e-05, "loss": 0.1861, "step": 3159 }, { "epoch": 0.57, "learning_rate": 1.220658658801545e-05, "loss": 0.2388, "step": 3160 }, { "epoch": 0.57, "learning_rate": 1.219793295151072e-05, "loss": 0.219, "step": 3161 }, { "epoch": 0.57, "learning_rate": 1.2189280281214128e-05, "loss": 0.2477, "step": 3162 }, { "epoch": 0.57, "learning_rate": 1.2180628580109298e-05, "loss": 0.2696, "step": 3163 }, { "epoch": 0.57, "learning_rate": 1.2171977851179484e-05, "loss": 0.1758, "step": 3164 }, { "epoch": 0.57, "learning_rate": 1.216332809740765e-05, "loss": 0.1915, "step": 3165 }, { "epoch": 0.57, "learning_rate": 1.2154679321776385e-05, "loss": 0.2361, "step": 3166 }, { "epoch": 0.57, "learning_rate": 1.2146031527267974e-05, "loss": 0.1643, "step": 3167 }, { "epoch": 0.57, "learning_rate": 1.2137384716864339e-05, "loss": 0.1685, "step": 3168 }, { "epoch": 0.57, "learning_rate": 1.2128738893547069e-05, "loss": 0.2192, "step": 3169 }, { "epoch": 0.57, "learning_rate": 1.2120094060297422e-05, "loss": 0.2368, "step": 3170 }, { "epoch": 0.57, "learning_rate": 1.2111450220096301e-05, "loss": 0.2335, "step": 3171 }, { "epoch": 0.58, "learning_rate": 1.210280737592428e-05, "loss": 0.2014, "step": 3172 }, { "epoch": 0.58, "learning_rate": 1.2094165530761574e-05, "loss": 0.2479, "step": 3173 }, { "epoch": 0.58, "learning_rate": 1.2085524687588072e-05, "loss": 0.2045, "step": 3174 }, { "epoch": 0.58, "learning_rate": 1.20768848493833e-05, "loss": 0.1943, "step": 3175 }, { "epoch": 0.58, "learning_rate": 1.2068246019126446e-05, "loss": 0.2792, "step": 3176 }, { "epoch": 0.58, "learning_rate": 1.2059608199796354e-05, "loss": 0.1525, "step": 3177 }, { "epoch": 0.58, "learning_rate": 1.2050971394371507e-05, "loss": 0.1808, "step": 3178 }, { "epoch": 0.58, "learning_rate": 1.2042335605830055e-05, "loss": 0.2015, "step": 3179 }, { "epoch": 0.58, "learning_rate": 1.2033700837149789e-05, "loss": 0.1279, "step": 3180 }, { "epoch": 0.58, "learning_rate": 1.2025067091308137e-05, "loss": 0.2506, "step": 3181 }, { "epoch": 0.58, "learning_rate": 1.2016434371282195e-05, "loss": 0.2128, "step": 3182 }, { "epoch": 0.58, "learning_rate": 1.2007802680048692e-05, "loss": 0.194, "step": 3183 }, { "epoch": 0.58, "learning_rate": 1.199917202058401e-05, "loss": 0.1552, "step": 3184 }, { "epoch": 0.58, "learning_rate": 1.1990542395864166e-05, "loss": 0.1676, "step": 3185 }, { "epoch": 0.58, "learning_rate": 1.1981913808864828e-05, "loss": 0.1796, "step": 3186 }, { "epoch": 0.58, "learning_rate": 1.19732862625613e-05, "loss": 0.2266, "step": 3187 }, { "epoch": 0.58, "learning_rate": 1.1964659759928535e-05, "loss": 0.2234, "step": 3188 }, { "epoch": 0.58, "learning_rate": 1.1956034303941122e-05, "loss": 0.1743, "step": 3189 }, { "epoch": 0.58, "learning_rate": 1.1947409897573285e-05, "loss": 0.1697, "step": 3190 }, { "epoch": 0.58, "learning_rate": 1.1938786543798891e-05, "loss": 0.1338, "step": 3191 }, { "epoch": 0.58, "learning_rate": 1.1930164245591446e-05, "loss": 0.1764, "step": 3192 }, { "epoch": 0.58, "learning_rate": 1.1921543005924083e-05, "loss": 0.1829, "step": 3193 }, { "epoch": 0.58, "learning_rate": 1.1912922827769583e-05, "loss": 0.16, "step": 3194 }, { "epoch": 0.58, "learning_rate": 1.1904303714100347e-05, "loss": 0.226, "step": 3195 }, { "epoch": 0.58, "learning_rate": 1.1895685667888422e-05, "loss": 0.1604, "step": 3196 }, { "epoch": 0.58, "learning_rate": 1.188706869210548e-05, "loss": 0.2453, "step": 3197 }, { "epoch": 0.58, "learning_rate": 1.187845278972282e-05, "loss": 0.1879, "step": 3198 }, { "epoch": 0.58, "learning_rate": 1.186983796371138e-05, "loss": 0.2332, "step": 3199 }, { "epoch": 0.58, "learning_rate": 1.186122421704172e-05, "loss": 0.2484, "step": 3200 }, { "epoch": 0.58, "learning_rate": 1.185261155268403e-05, "loss": 0.1705, "step": 3201 }, { "epoch": 0.58, "learning_rate": 1.1843999973608133e-05, "loss": 0.1396, "step": 3202 }, { "epoch": 0.58, "learning_rate": 1.183538948278346e-05, "loss": 0.2468, "step": 3203 }, { "epoch": 0.58, "learning_rate": 1.182678008317909e-05, "loss": 0.2017, "step": 3204 }, { "epoch": 0.58, "learning_rate": 1.1818171777763706e-05, "loss": 0.1688, "step": 3205 }, { "epoch": 0.58, "learning_rate": 1.1809564569505629e-05, "loss": 0.2668, "step": 3206 }, { "epoch": 0.58, "learning_rate": 1.1800958461372791e-05, "loss": 0.1841, "step": 3207 }, { "epoch": 0.58, "learning_rate": 1.1792353456332746e-05, "loss": 0.1517, "step": 3208 }, { "epoch": 0.58, "learning_rate": 1.1783749557352676e-05, "loss": 0.2041, "step": 3209 }, { "epoch": 0.58, "learning_rate": 1.1775146767399368e-05, "loss": 0.1873, "step": 3210 }, { "epoch": 0.58, "learning_rate": 1.1766545089439242e-05, "loss": 0.218, "step": 3211 }, { "epoch": 0.58, "learning_rate": 1.175794452643832e-05, "loss": 0.2365, "step": 3212 }, { "epoch": 0.58, "learning_rate": 1.1749345081362257e-05, "loss": 0.2463, "step": 3213 }, { "epoch": 0.58, "learning_rate": 1.1740746757176297e-05, "loss": 0.1869, "step": 3214 }, { "epoch": 0.58, "learning_rate": 1.173214955684532e-05, "loss": 0.1957, "step": 3215 }, { "epoch": 0.58, "learning_rate": 1.1723553483333807e-05, "loss": 0.2175, "step": 3216 }, { "epoch": 0.58, "learning_rate": 1.1714958539605862e-05, "loss": 0.1884, "step": 3217 }, { "epoch": 0.58, "learning_rate": 1.1706364728625183e-05, "loss": 0.2298, "step": 3218 }, { "epoch": 0.58, "learning_rate": 1.169777205335509e-05, "loss": 0.1621, "step": 3219 }, { "epoch": 0.58, "learning_rate": 1.1689180516758508e-05, "loss": 0.1443, "step": 3220 }, { "epoch": 0.58, "learning_rate": 1.1680590121797965e-05, "loss": 0.1977, "step": 3221 }, { "epoch": 0.58, "learning_rate": 1.1672000871435605e-05, "loss": 0.1582, "step": 3222 }, { "epoch": 0.58, "learning_rate": 1.1663412768633169e-05, "loss": 0.1978, "step": 3223 }, { "epoch": 0.58, "learning_rate": 1.1654825816352e-05, "loss": 0.1588, "step": 3224 }, { "epoch": 0.58, "learning_rate": 1.1646240017553055e-05, "loss": 0.2931, "step": 3225 }, { "epoch": 0.58, "learning_rate": 1.1637655375196885e-05, "loss": 0.1475, "step": 3226 }, { "epoch": 0.59, "learning_rate": 1.1629071892243645e-05, "loss": 0.2493, "step": 3227 }, { "epoch": 0.59, "learning_rate": 1.1620489571653086e-05, "loss": 0.1976, "step": 3228 }, { "epoch": 0.59, "learning_rate": 1.1611908416384571e-05, "loss": 0.2306, "step": 3229 }, { "epoch": 0.59, "learning_rate": 1.1603328429397045e-05, "loss": 0.2065, "step": 3230 }, { "epoch": 0.59, "learning_rate": 1.1594749613649055e-05, "loss": 0.1938, "step": 3231 }, { "epoch": 0.59, "learning_rate": 1.1586171972098753e-05, "loss": 0.2079, "step": 3232 }, { "epoch": 0.59, "learning_rate": 1.1577595507703873e-05, "loss": 0.2029, "step": 3233 }, { "epoch": 0.59, "learning_rate": 1.1569020223421755e-05, "loss": 0.1739, "step": 3234 }, { "epoch": 0.59, "learning_rate": 1.1560446122209327e-05, "loss": 0.1943, "step": 3235 }, { "epoch": 0.59, "learning_rate": 1.1551873207023102e-05, "loss": 0.2477, "step": 3236 }, { "epoch": 0.59, "learning_rate": 1.1543301480819197e-05, "loss": 0.1724, "step": 3237 }, { "epoch": 0.59, "learning_rate": 1.1534730946553308e-05, "loss": 0.1994, "step": 3238 }, { "epoch": 0.59, "learning_rate": 1.1526161607180733e-05, "loss": 0.259, "step": 3239 }, { "epoch": 0.59, "learning_rate": 1.151759346565634e-05, "loss": 0.2058, "step": 3240 }, { "epoch": 0.59, "learning_rate": 1.1509026524934597e-05, "loss": 0.2199, "step": 3241 }, { "epoch": 0.59, "learning_rate": 1.1500460787969558e-05, "loss": 0.1809, "step": 3242 }, { "epoch": 0.59, "learning_rate": 1.1491896257714853e-05, "loss": 0.2473, "step": 3243 }, { "epoch": 0.59, "learning_rate": 1.1483332937123707e-05, "loss": 0.2946, "step": 3244 }, { "epoch": 0.59, "learning_rate": 1.1474770829148916e-05, "loss": 0.2138, "step": 3245 }, { "epoch": 0.59, "learning_rate": 1.146620993674287e-05, "loss": 0.2208, "step": 3246 }, { "epoch": 0.59, "learning_rate": 1.1457650262857534e-05, "loss": 0.2001, "step": 3247 }, { "epoch": 0.59, "learning_rate": 1.1449091810444447e-05, "loss": 0.2298, "step": 3248 }, { "epoch": 0.59, "learning_rate": 1.1440534582454738e-05, "loss": 0.1938, "step": 3249 }, { "epoch": 0.59, "learning_rate": 1.1431978581839105e-05, "loss": 0.1555, "step": 3250 }, { "epoch": 0.59, "learning_rate": 1.1423423811547831e-05, "loss": 0.1574, "step": 3251 }, { "epoch": 0.59, "learning_rate": 1.1414870274530767e-05, "loss": 0.1711, "step": 3252 }, { "epoch": 0.59, "learning_rate": 1.1406317973737339e-05, "loss": 0.1854, "step": 3253 }, { "epoch": 0.59, "learning_rate": 1.1397766912116555e-05, "loss": 0.1954, "step": 3254 }, { "epoch": 0.59, "learning_rate": 1.1389217092616984e-05, "loss": 0.1589, "step": 3255 }, { "epoch": 0.59, "learning_rate": 1.138066851818678e-05, "loss": 0.2239, "step": 3256 }, { "epoch": 0.59, "learning_rate": 1.1372121191773655e-05, "loss": 0.1953, "step": 3257 }, { "epoch": 0.59, "learning_rate": 1.1363575116324897e-05, "loss": 0.1753, "step": 3258 }, { "epoch": 0.59, "learning_rate": 1.1355030294787363e-05, "loss": 0.1604, "step": 3259 }, { "epoch": 0.59, "learning_rate": 1.1346486730107476e-05, "loss": 0.196, "step": 3260 }, { "epoch": 0.59, "learning_rate": 1.133794442523123e-05, "loss": 0.209, "step": 3261 }, { "epoch": 0.59, "learning_rate": 1.1329403383104173e-05, "loss": 0.1488, "step": 3262 }, { "epoch": 0.59, "learning_rate": 1.1320863606671437e-05, "loss": 0.1987, "step": 3263 }, { "epoch": 0.59, "learning_rate": 1.1312325098877692e-05, "loss": 0.1922, "step": 3264 }, { "epoch": 0.59, "learning_rate": 1.1303787862667191e-05, "loss": 0.1698, "step": 3265 }, { "epoch": 0.59, "learning_rate": 1.1295251900983741e-05, "loss": 0.1735, "step": 3266 }, { "epoch": 0.59, "learning_rate": 1.1286717216770712e-05, "loss": 0.1966, "step": 3267 }, { "epoch": 0.59, "learning_rate": 1.1278183812971032e-05, "loss": 0.2438, "step": 3268 }, { "epoch": 0.59, "learning_rate": 1.1269651692527181e-05, "loss": 0.2026, "step": 3269 }, { "epoch": 0.59, "learning_rate": 1.1261120858381213e-05, "loss": 0.2499, "step": 3270 }, { "epoch": 0.59, "learning_rate": 1.1252591313474719e-05, "loss": 0.1525, "step": 3271 }, { "epoch": 0.59, "learning_rate": 1.1244063060748864e-05, "loss": 0.2167, "step": 3272 }, { "epoch": 0.59, "learning_rate": 1.1235536103144352e-05, "loss": 0.1819, "step": 3273 }, { "epoch": 0.59, "learning_rate": 1.1227010443601444e-05, "loss": 0.2526, "step": 3274 }, { "epoch": 0.59, "learning_rate": 1.1218486085059964e-05, "loss": 0.2043, "step": 3275 }, { "epoch": 0.59, "learning_rate": 1.1209963030459274e-05, "loss": 0.2297, "step": 3276 }, { "epoch": 0.59, "learning_rate": 1.1201441282738296e-05, "loss": 0.1711, "step": 3277 }, { "epoch": 0.59, "learning_rate": 1.119292084483549e-05, "loss": 0.1931, "step": 3278 }, { "epoch": 0.59, "learning_rate": 1.118440171968888e-05, "loss": 0.2003, "step": 3279 }, { "epoch": 0.59, "learning_rate": 1.1175883910236024e-05, "loss": 0.217, "step": 3280 }, { "epoch": 0.59, "learning_rate": 1.116736741941403e-05, "loss": 0.1675, "step": 3281 }, { "epoch": 0.59, "learning_rate": 1.1158852250159558e-05, "loss": 0.1819, "step": 3282 }, { "epoch": 0.6, "learning_rate": 1.11503384054088e-05, "loss": 0.2005, "step": 3283 }, { "epoch": 0.6, "learning_rate": 1.1141825888097503e-05, "loss": 0.1834, "step": 3284 }, { "epoch": 0.6, "learning_rate": 1.113331470116095e-05, "loss": 0.1846, "step": 3285 }, { "epoch": 0.6, "learning_rate": 1.1124804847533964e-05, "loss": 0.184, "step": 3286 }, { "epoch": 0.6, "learning_rate": 1.1116296330150917e-05, "loss": 0.2356, "step": 3287 }, { "epoch": 0.6, "learning_rate": 1.1107789151945705e-05, "loss": 0.2188, "step": 3288 }, { "epoch": 0.6, "learning_rate": 1.109928331585178e-05, "loss": 0.2368, "step": 3289 }, { "epoch": 0.6, "learning_rate": 1.1090778824802119e-05, "loss": 0.1454, "step": 3290 }, { "epoch": 0.6, "learning_rate": 1.1082275681729237e-05, "loss": 0.1821, "step": 3291 }, { "epoch": 0.6, "learning_rate": 1.107377388956519e-05, "loss": 0.1983, "step": 3292 }, { "epoch": 0.6, "learning_rate": 1.1065273451241558e-05, "loss": 0.2361, "step": 3293 }, { "epoch": 0.6, "learning_rate": 1.1056774369689469e-05, "loss": 0.1907, "step": 3294 }, { "epoch": 0.6, "learning_rate": 1.1048276647839565e-05, "loss": 0.1933, "step": 3295 }, { "epoch": 0.6, "learning_rate": 1.1039780288622036e-05, "loss": 0.2224, "step": 3296 }, { "epoch": 0.6, "learning_rate": 1.1031285294966592e-05, "loss": 0.1607, "step": 3297 }, { "epoch": 0.6, "learning_rate": 1.1022791669802473e-05, "loss": 0.2816, "step": 3298 }, { "epoch": 0.6, "learning_rate": 1.101429941605845e-05, "loss": 0.2416, "step": 3299 }, { "epoch": 0.6, "learning_rate": 1.1005808536662819e-05, "loss": 0.2113, "step": 3300 }, { "epoch": 0.6, "learning_rate": 1.0997319034543409e-05, "loss": 0.2212, "step": 3301 }, { "epoch": 0.6, "learning_rate": 1.0988830912627563e-05, "loss": 0.2357, "step": 3302 }, { "epoch": 0.6, "learning_rate": 1.0980344173842151e-05, "loss": 0.262, "step": 3303 }, { "epoch": 0.6, "learning_rate": 1.0971858821113576e-05, "loss": 0.2095, "step": 3304 }, { "epoch": 0.6, "learning_rate": 1.0963374857367745e-05, "loss": 0.2166, "step": 3305 }, { "epoch": 0.6, "learning_rate": 1.095489228553011e-05, "loss": 0.1799, "step": 3306 }, { "epoch": 0.6, "learning_rate": 1.094641110852562e-05, "loss": 0.1652, "step": 3307 }, { "epoch": 0.6, "learning_rate": 1.093793132927875e-05, "loss": 0.2329, "step": 3308 }, { "epoch": 0.6, "learning_rate": 1.0929452950713505e-05, "loss": 0.1226, "step": 3309 }, { "epoch": 0.6, "learning_rate": 1.092097597575339e-05, "loss": 0.1162, "step": 3310 }, { "epoch": 0.6, "learning_rate": 1.091250040732144e-05, "loss": 0.2258, "step": 3311 }, { "epoch": 0.6, "learning_rate": 1.0904026248340198e-05, "loss": 0.1368, "step": 3312 }, { "epoch": 0.6, "learning_rate": 1.0895553501731715e-05, "loss": 0.1783, "step": 3313 }, { "epoch": 0.6, "learning_rate": 1.0887082170417564e-05, "loss": 0.2176, "step": 3314 }, { "epoch": 0.6, "learning_rate": 1.0878612257318832e-05, "loss": 0.1768, "step": 3315 }, { "epoch": 0.6, "learning_rate": 1.0870143765356105e-05, "loss": 0.2369, "step": 3316 }, { "epoch": 0.6, "learning_rate": 1.0861676697449496e-05, "loss": 0.1872, "step": 3317 }, { "epoch": 0.6, "learning_rate": 1.0853211056518609e-05, "loss": 0.1573, "step": 3318 }, { "epoch": 0.6, "learning_rate": 1.0844746845482567e-05, "loss": 0.1263, "step": 3319 }, { "epoch": 0.6, "learning_rate": 1.0836284067260004e-05, "loss": 0.1909, "step": 3320 }, { "epoch": 0.6, "learning_rate": 1.0827822724769042e-05, "loss": 0.1865, "step": 3321 }, { "epoch": 0.6, "learning_rate": 1.0819362820927329e-05, "loss": 0.1978, "step": 3322 }, { "epoch": 0.6, "learning_rate": 1.0810904358652003e-05, "loss": 0.2324, "step": 3323 }, { "epoch": 0.6, "learning_rate": 1.0802447340859704e-05, "loss": 0.144, "step": 3324 }, { "epoch": 0.6, "learning_rate": 1.0793991770466589e-05, "loss": 0.1091, "step": 3325 }, { "epoch": 0.6, "learning_rate": 1.0785537650388299e-05, "loss": 0.2277, "step": 3326 }, { "epoch": 0.6, "learning_rate": 1.0777084983539988e-05, "loss": 0.2574, "step": 3327 }, { "epoch": 0.6, "learning_rate": 1.0768633772836294e-05, "loss": 0.1778, "step": 3328 }, { "epoch": 0.6, "learning_rate": 1.0760184021191369e-05, "loss": 0.1623, "step": 3329 }, { "epoch": 0.6, "learning_rate": 1.0751735731518853e-05, "loss": 0.1527, "step": 3330 }, { "epoch": 0.6, "learning_rate": 1.0743288906731877e-05, "loss": 0.1268, "step": 3331 }, { "epoch": 0.6, "learning_rate": 1.0734843549743082e-05, "loss": 0.2224, "step": 3332 }, { "epoch": 0.6, "learning_rate": 1.0726399663464587e-05, "loss": 0.1636, "step": 3333 }, { "epoch": 0.6, "learning_rate": 1.0717957250808021e-05, "loss": 0.2031, "step": 3334 }, { "epoch": 0.6, "learning_rate": 1.0709516314684487e-05, "loss": 0.2169, "step": 3335 }, { "epoch": 0.6, "learning_rate": 1.0701076858004583e-05, "loss": 0.2013, "step": 3336 }, { "epoch": 0.6, "learning_rate": 1.0692638883678409e-05, "loss": 0.247, "step": 3337 }, { "epoch": 0.61, "learning_rate": 1.068420239461554e-05, "loss": 0.1708, "step": 3338 }, { "epoch": 0.61, "learning_rate": 1.0675767393725053e-05, "loss": 0.1726, "step": 3339 }, { "epoch": 0.61, "learning_rate": 1.0667333883915492e-05, "loss": 0.251, "step": 3340 }, { "epoch": 0.61, "learning_rate": 1.0658901868094901e-05, "loss": 0.2244, "step": 3341 }, { "epoch": 0.61, "learning_rate": 1.0650471349170811e-05, "loss": 0.1871, "step": 3342 }, { "epoch": 0.61, "learning_rate": 1.0642042330050222e-05, "loss": 0.1915, "step": 3343 }, { "epoch": 0.61, "learning_rate": 1.0633614813639638e-05, "loss": 0.2109, "step": 3344 }, { "epoch": 0.61, "learning_rate": 1.0625188802845026e-05, "loss": 0.1218, "step": 3345 }, { "epoch": 0.61, "learning_rate": 1.0616764300571845e-05, "loss": 0.332, "step": 3346 }, { "epoch": 0.61, "learning_rate": 1.0608341309725028e-05, "loss": 0.1856, "step": 3347 }, { "epoch": 0.61, "learning_rate": 1.0599919833208987e-05, "loss": 0.1405, "step": 3348 }, { "epoch": 0.61, "learning_rate": 1.0591499873927616e-05, "loss": 0.2169, "step": 3349 }, { "epoch": 0.61, "learning_rate": 1.0583081434784284e-05, "loss": 0.2002, "step": 3350 }, { "epoch": 0.61, "learning_rate": 1.0574664518681832e-05, "loss": 0.1375, "step": 3351 }, { "epoch": 0.61, "learning_rate": 1.0566249128522587e-05, "loss": 0.1782, "step": 3352 }, { "epoch": 0.61, "learning_rate": 1.055783526720833e-05, "loss": 0.2609, "step": 3353 }, { "epoch": 0.61, "learning_rate": 1.0549422937640339e-05, "loss": 0.1714, "step": 3354 }, { "epoch": 0.61, "learning_rate": 1.0541012142719342e-05, "loss": 0.159, "step": 3355 }, { "epoch": 0.61, "learning_rate": 1.0532602885345552e-05, "loss": 0.201, "step": 3356 }, { "epoch": 0.61, "learning_rate": 1.052419516841865e-05, "loss": 0.1837, "step": 3357 }, { "epoch": 0.61, "learning_rate": 1.0515788994837775e-05, "loss": 0.2205, "step": 3358 }, { "epoch": 0.61, "learning_rate": 1.0507384367501553e-05, "loss": 0.143, "step": 3359 }, { "epoch": 0.61, "learning_rate": 1.0498981289308052e-05, "loss": 0.1742, "step": 3360 }, { "epoch": 0.61, "learning_rate": 1.0490579763154832e-05, "loss": 0.197, "step": 3361 }, { "epoch": 0.61, "learning_rate": 1.0482179791938904e-05, "loss": 0.2201, "step": 3362 }, { "epoch": 0.61, "learning_rate": 1.0473781378556737e-05, "loss": 0.2582, "step": 3363 }, { "epoch": 0.61, "learning_rate": 1.0465384525904269e-05, "loss": 0.2862, "step": 3364 }, { "epoch": 0.61, "learning_rate": 1.0456989236876912e-05, "loss": 0.2462, "step": 3365 }, { "epoch": 0.61, "learning_rate": 1.0448595514369515e-05, "loss": 0.192, "step": 3366 }, { "epoch": 0.61, "learning_rate": 1.0440203361276412e-05, "loss": 0.153, "step": 3367 }, { "epoch": 0.61, "learning_rate": 1.0431812780491374e-05, "loss": 0.2021, "step": 3368 }, { "epoch": 0.61, "learning_rate": 1.042342377490764e-05, "loss": 0.2231, "step": 3369 }, { "epoch": 0.61, "learning_rate": 1.0415036347417912e-05, "loss": 0.1555, "step": 3370 }, { "epoch": 0.61, "learning_rate": 1.040665050091433e-05, "loss": 0.1555, "step": 3371 }, { "epoch": 0.61, "learning_rate": 1.0398266238288514e-05, "loss": 0.23, "step": 3372 }, { "epoch": 0.61, "learning_rate": 1.0389883562431512e-05, "loss": 0.2393, "step": 3373 }, { "epoch": 0.61, "learning_rate": 1.0381502476233837e-05, "loss": 0.201, "step": 3374 }, { "epoch": 0.61, "learning_rate": 1.037312298258546e-05, "loss": 0.2358, "step": 3375 }, { "epoch": 0.61, "learning_rate": 1.036474508437579e-05, "loss": 0.233, "step": 3376 }, { "epoch": 0.61, "learning_rate": 1.0356368784493698e-05, "loss": 0.2716, "step": 3377 }, { "epoch": 0.61, "learning_rate": 1.0347994085827489e-05, "loss": 0.1652, "step": 3378 }, { "epoch": 0.61, "learning_rate": 1.0339620991264935e-05, "loss": 0.22, "step": 3379 }, { "epoch": 0.61, "learning_rate": 1.033124950369324e-05, "loss": 0.1635, "step": 3380 }, { "epoch": 0.61, "learning_rate": 1.0322879625999056e-05, "loss": 0.1973, "step": 3381 }, { "epoch": 0.61, "learning_rate": 1.0314511361068484e-05, "loss": 0.2129, "step": 3382 }, { "epoch": 0.61, "learning_rate": 1.0306144711787068e-05, "loss": 0.1607, "step": 3383 }, { "epoch": 0.61, "learning_rate": 1.0297779681039796e-05, "loss": 0.202, "step": 3384 }, { "epoch": 0.61, "learning_rate": 1.0289416271711094e-05, "loss": 0.1699, "step": 3385 }, { "epoch": 0.61, "learning_rate": 1.0281054486684828e-05, "loss": 0.232, "step": 3386 }, { "epoch": 0.61, "learning_rate": 1.0272694328844312e-05, "loss": 0.2101, "step": 3387 }, { "epoch": 0.61, "learning_rate": 1.026433580107229e-05, "loss": 0.1891, "step": 3388 }, { "epoch": 0.61, "learning_rate": 1.0255978906250953e-05, "loss": 0.1938, "step": 3389 }, { "epoch": 0.61, "learning_rate": 1.0247623647261919e-05, "loss": 0.2024, "step": 3390 }, { "epoch": 0.61, "learning_rate": 1.0239270026986243e-05, "loss": 0.216, "step": 3391 }, { "epoch": 0.61, "learning_rate": 1.023091804830443e-05, "loss": 0.1671, "step": 3392 }, { "epoch": 0.62, "learning_rate": 1.0222567714096394e-05, "loss": 0.1618, "step": 3393 }, { "epoch": 0.62, "learning_rate": 1.0214219027241504e-05, "loss": 0.178, "step": 3394 }, { "epoch": 0.62, "learning_rate": 1.0205871990618555e-05, "loss": 0.1922, "step": 3395 }, { "epoch": 0.62, "learning_rate": 1.0197526607105759e-05, "loss": 0.173, "step": 3396 }, { "epoch": 0.62, "learning_rate": 1.0189182879580777e-05, "loss": 0.1887, "step": 3397 }, { "epoch": 0.62, "learning_rate": 1.0180840810920689e-05, "loss": 0.2001, "step": 3398 }, { "epoch": 0.62, "learning_rate": 1.0172500404002011e-05, "loss": 0.1316, "step": 3399 }, { "epoch": 0.62, "learning_rate": 1.0164161661700673e-05, "loss": 0.1708, "step": 3400 }, { "epoch": 0.62, "learning_rate": 1.0155824586892042e-05, "loss": 0.2054, "step": 3401 }, { "epoch": 0.62, "learning_rate": 1.0147489182450906e-05, "loss": 0.1371, "step": 3402 }, { "epoch": 0.62, "learning_rate": 1.0139155451251471e-05, "loss": 0.1183, "step": 3403 }, { "epoch": 0.62, "learning_rate": 1.0130823396167383e-05, "loss": 0.2149, "step": 3404 }, { "epoch": 0.62, "learning_rate": 1.0122493020071692e-05, "loss": 0.1424, "step": 3405 }, { "epoch": 0.62, "learning_rate": 1.011416432583688e-05, "loss": 0.2224, "step": 3406 }, { "epoch": 0.62, "learning_rate": 1.0105837316334848e-05, "loss": 0.125, "step": 3407 }, { "epoch": 0.62, "learning_rate": 1.0097511994436902e-05, "loss": 0.2067, "step": 3408 }, { "epoch": 0.62, "learning_rate": 1.0089188363013792e-05, "loss": 0.2002, "step": 3409 }, { "epoch": 0.62, "learning_rate": 1.0080866424935658e-05, "loss": 0.1321, "step": 3410 }, { "epoch": 0.62, "learning_rate": 1.0072546183072083e-05, "loss": 0.1836, "step": 3411 }, { "epoch": 0.62, "learning_rate": 1.006422764029204e-05, "loss": 0.1951, "step": 3412 }, { "epoch": 0.62, "learning_rate": 1.005591079946393e-05, "loss": 0.2239, "step": 3413 }, { "epoch": 0.62, "learning_rate": 1.0047595663455559e-05, "loss": 0.2025, "step": 3414 }, { "epoch": 0.62, "learning_rate": 1.0039282235134162e-05, "loss": 0.1503, "step": 3415 }, { "epoch": 0.62, "learning_rate": 1.0030970517366363e-05, "loss": 0.2034, "step": 3416 }, { "epoch": 0.62, "learning_rate": 1.0022660513018212e-05, "loss": 0.248, "step": 3417 }, { "epoch": 0.62, "learning_rate": 1.0014352224955165e-05, "loss": 0.1714, "step": 3418 }, { "epoch": 0.62, "learning_rate": 1.0006045656042076e-05, "loss": 0.2594, "step": 3419 }, { "epoch": 0.62, "learning_rate": 9.99774080914322e-06, "loss": 0.2068, "step": 3420 }, { "epoch": 0.62, "learning_rate": 9.989437687122272e-06, "loss": 0.2179, "step": 3421 }, { "epoch": 0.62, "learning_rate": 9.981136292842314e-06, "loss": 0.2357, "step": 3422 }, { "epoch": 0.62, "learning_rate": 9.97283662916583e-06, "loss": 0.1828, "step": 3423 }, { "epoch": 0.62, "learning_rate": 9.964538698954703e-06, "loss": 0.2644, "step": 3424 }, { "epoch": 0.62, "learning_rate": 9.956242505070232e-06, "loss": 0.2378, "step": 3425 }, { "epoch": 0.62, "learning_rate": 9.947948050373098e-06, "loss": 0.1851, "step": 3426 }, { "epoch": 0.62, "learning_rate": 9.939655337723406e-06, "loss": 0.1765, "step": 3427 }, { "epoch": 0.62, "learning_rate": 9.93136436998064e-06, "loss": 0.1811, "step": 3428 }, { "epoch": 0.62, "learning_rate": 9.923075150003687e-06, "loss": 0.2458, "step": 3429 }, { "epoch": 0.62, "learning_rate": 9.914787680650841e-06, "loss": 0.165, "step": 3430 }, { "epoch": 0.62, "learning_rate": 9.906501964779778e-06, "loss": 0.1602, "step": 3431 }, { "epoch": 0.62, "learning_rate": 9.898218005247585e-06, "loss": 0.1652, "step": 3432 }, { "epoch": 0.62, "learning_rate": 9.889935804910728e-06, "loss": 0.1704, "step": 3433 }, { "epoch": 0.62, "learning_rate": 9.881655366625076e-06, "loss": 0.2143, "step": 3434 }, { "epoch": 0.62, "learning_rate": 9.873376693245891e-06, "loss": 0.1275, "step": 3435 }, { "epoch": 0.62, "learning_rate": 9.865099787627817e-06, "loss": 0.1805, "step": 3436 }, { "epoch": 0.62, "learning_rate": 9.856824652624902e-06, "loss": 0.148, "step": 3437 }, { "epoch": 0.62, "learning_rate": 9.848551291090569e-06, "loss": 0.1548, "step": 3438 }, { "epoch": 0.62, "learning_rate": 9.840279705877641e-06, "loss": 0.2006, "step": 3439 }, { "epoch": 0.62, "learning_rate": 9.832009899838326e-06, "loss": 0.1439, "step": 3440 }, { "epoch": 0.62, "learning_rate": 9.82374187582421e-06, "loss": 0.2852, "step": 3441 }, { "epoch": 0.62, "learning_rate": 9.815475636686277e-06, "loss": 0.21, "step": 3442 }, { "epoch": 0.62, "learning_rate": 9.807211185274888e-06, "loss": 0.1876, "step": 3443 }, { "epoch": 0.62, "learning_rate": 9.798948524439789e-06, "loss": 0.2361, "step": 3444 }, { "epoch": 0.62, "learning_rate": 9.79068765703011e-06, "loss": 0.1996, "step": 3445 }, { "epoch": 0.62, "learning_rate": 9.782428585894356e-06, "loss": 0.1862, "step": 3446 }, { "epoch": 0.62, "learning_rate": 9.774171313880428e-06, "loss": 0.1671, "step": 3447 }, { "epoch": 0.63, "learning_rate": 9.765915843835588e-06, "loss": 0.2189, "step": 3448 }, { "epoch": 0.63, "learning_rate": 9.757662178606493e-06, "loss": 0.2326, "step": 3449 }, { "epoch": 0.63, "learning_rate": 9.74941032103916e-06, "loss": 0.1941, "step": 3450 }, { "epoch": 0.63, "learning_rate": 9.741160273979002e-06, "loss": 0.2375, "step": 3451 }, { "epoch": 0.63, "learning_rate": 9.732912040270797e-06, "loss": 0.2235, "step": 3452 }, { "epoch": 0.63, "learning_rate": 9.724665622758692e-06, "loss": 0.1725, "step": 3453 }, { "epoch": 0.63, "learning_rate": 9.716421024286223e-06, "loss": 0.118, "step": 3454 }, { "epoch": 0.63, "learning_rate": 9.708178247696286e-06, "loss": 0.1843, "step": 3455 }, { "epoch": 0.63, "learning_rate": 9.699937295831156e-06, "loss": 0.1963, "step": 3456 }, { "epoch": 0.63, "learning_rate": 9.691698171532476e-06, "loss": 0.2045, "step": 3457 }, { "epoch": 0.63, "learning_rate": 9.683460877641254e-06, "loss": 0.2012, "step": 3458 }, { "epoch": 0.63, "learning_rate": 9.675225416997878e-06, "loss": 0.1533, "step": 3459 }, { "epoch": 0.63, "learning_rate": 9.666991792442095e-06, "loss": 0.1655, "step": 3460 }, { "epoch": 0.63, "learning_rate": 9.658760006813032e-06, "loss": 0.1409, "step": 3461 }, { "epoch": 0.63, "learning_rate": 9.65053006294915e-06, "loss": 0.2928, "step": 3462 }, { "epoch": 0.63, "learning_rate": 9.642301963688314e-06, "loss": 0.1681, "step": 3463 }, { "epoch": 0.63, "learning_rate": 9.634075711867724e-06, "loss": 0.137, "step": 3464 }, { "epoch": 0.63, "learning_rate": 9.625851310323964e-06, "loss": 0.1918, "step": 3465 }, { "epoch": 0.63, "learning_rate": 9.617628761892964e-06, "loss": 0.2068, "step": 3466 }, { "epoch": 0.63, "learning_rate": 9.609408069410023e-06, "loss": 0.1061, "step": 3467 }, { "epoch": 0.63, "learning_rate": 9.601189235709805e-06, "loss": 0.1499, "step": 3468 }, { "epoch": 0.63, "learning_rate": 9.592972263626312e-06, "loss": 0.2056, "step": 3469 }, { "epoch": 0.63, "learning_rate": 9.58475715599293e-06, "loss": 0.2615, "step": 3470 }, { "epoch": 0.63, "learning_rate": 9.576543915642387e-06, "loss": 0.2417, "step": 3471 }, { "epoch": 0.63, "learning_rate": 9.568332545406773e-06, "loss": 0.1733, "step": 3472 }, { "epoch": 0.63, "learning_rate": 9.560123048117529e-06, "loss": 0.203, "step": 3473 }, { "epoch": 0.63, "learning_rate": 9.55191542660545e-06, "loss": 0.1948, "step": 3474 }, { "epoch": 0.63, "learning_rate": 9.543709683700691e-06, "loss": 0.2188, "step": 3475 }, { "epoch": 0.63, "learning_rate": 9.535505822232747e-06, "loss": 0.1198, "step": 3476 }, { "epoch": 0.63, "learning_rate": 9.52730384503048e-06, "loss": 0.2292, "step": 3477 }, { "epoch": 0.63, "learning_rate": 9.519103754922092e-06, "loss": 0.2188, "step": 3478 }, { "epoch": 0.63, "learning_rate": 9.51090555473513e-06, "loss": 0.2672, "step": 3479 }, { "epoch": 0.63, "learning_rate": 9.502709247296503e-06, "loss": 0.2977, "step": 3480 }, { "epoch": 0.63, "learning_rate": 9.494514835432455e-06, "loss": 0.2597, "step": 3481 }, { "epoch": 0.63, "learning_rate": 9.486322321968585e-06, "loss": 0.2095, "step": 3482 }, { "epoch": 0.63, "learning_rate": 9.478131709729831e-06, "loss": 0.1491, "step": 3483 }, { "epoch": 0.63, "learning_rate": 9.46994300154048e-06, "loss": 0.1808, "step": 3484 }, { "epoch": 0.63, "learning_rate": 9.461756200224163e-06, "loss": 0.2373, "step": 3485 }, { "epoch": 0.63, "learning_rate": 9.453571308603841e-06, "loss": 0.2157, "step": 3486 }, { "epoch": 0.63, "learning_rate": 9.445388329501839e-06, "loss": 0.2676, "step": 3487 }, { "epoch": 0.63, "learning_rate": 9.437207265739802e-06, "loss": 0.2844, "step": 3488 }, { "epoch": 0.63, "learning_rate": 9.429028120138726e-06, "loss": 0.1724, "step": 3489 }, { "epoch": 0.63, "learning_rate": 9.420850895518939e-06, "loss": 0.2161, "step": 3490 }, { "epoch": 0.63, "learning_rate": 9.412675594700113e-06, "loss": 0.1827, "step": 3491 }, { "epoch": 0.63, "learning_rate": 9.404502220501253e-06, "loss": 0.2309, "step": 3492 }, { "epoch": 0.63, "learning_rate": 9.396330775740695e-06, "loss": 0.1886, "step": 3493 }, { "epoch": 0.63, "learning_rate": 9.388161263236125e-06, "loss": 0.192, "step": 3494 }, { "epoch": 0.63, "learning_rate": 9.379993685804546e-06, "loss": 0.2365, "step": 3495 }, { "epoch": 0.63, "learning_rate": 9.371828046262299e-06, "loss": 0.1368, "step": 3496 }, { "epoch": 0.63, "learning_rate": 9.36366434742506e-06, "loss": 0.2333, "step": 3497 }, { "epoch": 0.63, "learning_rate": 9.355502592107832e-06, "loss": 0.2005, "step": 3498 }, { "epoch": 0.63, "learning_rate": 9.347342783124954e-06, "loss": 0.2544, "step": 3499 }, { "epoch": 0.63, "learning_rate": 9.339184923290085e-06, "loss": 0.1393, "step": 3500 }, { "epoch": 0.63, "learning_rate": 9.331029015416222e-06, "loss": 0.1991, "step": 3501 }, { "epoch": 0.63, "learning_rate": 9.322875062315677e-06, "loss": 0.2829, "step": 3502 }, { "epoch": 0.64, "learning_rate": 9.314723066800095e-06, "loss": 0.2193, "step": 3503 }, { "epoch": 0.64, "learning_rate": 9.306573031680451e-06, "loss": 0.2167, "step": 3504 }, { "epoch": 0.64, "learning_rate": 9.298424959767032e-06, "loss": 0.1118, "step": 3505 }, { "epoch": 0.64, "learning_rate": 9.290278853869458e-06, "loss": 0.2886, "step": 3506 }, { "epoch": 0.64, "learning_rate": 9.282134716796668e-06, "loss": 0.25, "step": 3507 }, { "epoch": 0.64, "learning_rate": 9.273992551356918e-06, "loss": 0.2905, "step": 3508 }, { "epoch": 0.64, "learning_rate": 9.26585236035779e-06, "loss": 0.1537, "step": 3509 }, { "epoch": 0.64, "learning_rate": 9.257714146606188e-06, "loss": 0.2023, "step": 3510 }, { "epoch": 0.64, "learning_rate": 9.24957791290832e-06, "loss": 0.2333, "step": 3511 }, { "epoch": 0.64, "learning_rate": 9.241443662069717e-06, "loss": 0.1718, "step": 3512 }, { "epoch": 0.64, "learning_rate": 9.233311396895243e-06, "loss": 0.125, "step": 3513 }, { "epoch": 0.64, "learning_rate": 9.225181120189053e-06, "loss": 0.1833, "step": 3514 }, { "epoch": 0.64, "learning_rate": 9.217052834754636e-06, "loss": 0.158, "step": 3515 }, { "epoch": 0.64, "learning_rate": 9.208926543394777e-06, "loss": 0.2176, "step": 3516 }, { "epoch": 0.64, "learning_rate": 9.20080224891159e-06, "loss": 0.1783, "step": 3517 }, { "epoch": 0.64, "learning_rate": 9.192679954106489e-06, "loss": 0.1753, "step": 3518 }, { "epoch": 0.64, "learning_rate": 9.184559661780195e-06, "loss": 0.2084, "step": 3519 }, { "epoch": 0.64, "learning_rate": 9.176441374732757e-06, "loss": 0.1884, "step": 3520 }, { "epoch": 0.64, "learning_rate": 9.168325095763514e-06, "loss": 0.1661, "step": 3521 }, { "epoch": 0.64, "learning_rate": 9.160210827671124e-06, "loss": 0.1837, "step": 3522 }, { "epoch": 0.64, "learning_rate": 9.152098573253543e-06, "loss": 0.2672, "step": 3523 }, { "epoch": 0.64, "learning_rate": 9.143988335308034e-06, "loss": 0.1573, "step": 3524 }, { "epoch": 0.64, "learning_rate": 9.135880116631177e-06, "loss": 0.1824, "step": 3525 }, { "epoch": 0.64, "learning_rate": 9.127773920018838e-06, "loss": 0.1837, "step": 3526 }, { "epoch": 0.64, "learning_rate": 9.119669748266199e-06, "loss": 0.2353, "step": 3527 }, { "epoch": 0.64, "learning_rate": 9.111567604167736e-06, "loss": 0.2123, "step": 3528 }, { "epoch": 0.64, "learning_rate": 9.103467490517225e-06, "loss": 0.2315, "step": 3529 }, { "epoch": 0.64, "learning_rate": 9.095369410107755e-06, "loss": 0.151, "step": 3530 }, { "epoch": 0.64, "learning_rate": 9.087273365731693e-06, "loss": 0.226, "step": 3531 }, { "epoch": 0.64, "learning_rate": 9.079179360180724e-06, "loss": 0.1534, "step": 3532 }, { "epoch": 0.64, "learning_rate": 9.071087396245817e-06, "loss": 0.1594, "step": 3533 }, { "epoch": 0.64, "learning_rate": 9.062997476717246e-06, "loss": 0.1504, "step": 3534 }, { "epoch": 0.64, "learning_rate": 9.054909604384569e-06, "loss": 0.2036, "step": 3535 }, { "epoch": 0.64, "learning_rate": 9.046823782036646e-06, "loss": 0.157, "step": 3536 }, { "epoch": 0.64, "learning_rate": 9.03874001246163e-06, "loss": 0.1888, "step": 3537 }, { "epoch": 0.64, "learning_rate": 9.030658298446961e-06, "loss": 0.1736, "step": 3538 }, { "epoch": 0.64, "learning_rate": 9.02257864277938e-06, "loss": 0.2018, "step": 3539 }, { "epoch": 0.64, "learning_rate": 9.01450104824491e-06, "loss": 0.242, "step": 3540 }, { "epoch": 0.64, "learning_rate": 9.006425517628864e-06, "loss": 0.1554, "step": 3541 }, { "epoch": 0.64, "learning_rate": 8.998352053715845e-06, "loss": 0.2686, "step": 3542 }, { "epoch": 0.64, "learning_rate": 8.990280659289739e-06, "loss": 0.1865, "step": 3543 }, { "epoch": 0.64, "learning_rate": 8.982211337133731e-06, "loss": 0.211, "step": 3544 }, { "epoch": 0.64, "learning_rate": 8.974144090030278e-06, "loss": 0.1409, "step": 3545 }, { "epoch": 0.64, "learning_rate": 8.966078920761125e-06, "loss": 0.2701, "step": 3546 }, { "epoch": 0.64, "learning_rate": 8.958015832107304e-06, "loss": 0.1984, "step": 3547 }, { "epoch": 0.64, "learning_rate": 8.949954826849128e-06, "loss": 0.1281, "step": 3548 }, { "epoch": 0.64, "learning_rate": 8.941895907766188e-06, "loss": 0.2349, "step": 3549 }, { "epoch": 0.64, "learning_rate": 8.933839077637362e-06, "loss": 0.1866, "step": 3550 }, { "epoch": 0.64, "learning_rate": 8.925784339240803e-06, "loss": 0.228, "step": 3551 }, { "epoch": 0.64, "learning_rate": 8.917731695353947e-06, "loss": 0.1609, "step": 3552 }, { "epoch": 0.64, "learning_rate": 8.909681148753497e-06, "loss": 0.1697, "step": 3553 }, { "epoch": 0.64, "learning_rate": 8.90163270221545e-06, "loss": 0.1717, "step": 3554 }, { "epoch": 0.64, "learning_rate": 8.893586358515063e-06, "loss": 0.1484, "step": 3555 }, { "epoch": 0.64, "learning_rate": 8.885542120426879e-06, "loss": 0.241, "step": 3556 }, { "epoch": 0.64, "learning_rate": 8.877499990724707e-06, "loss": 0.2245, "step": 3557 }, { "epoch": 0.65, "learning_rate": 8.869459972181633e-06, "loss": 0.1932, "step": 3558 }, { "epoch": 0.65, "learning_rate": 8.861422067570017e-06, "loss": 0.2052, "step": 3559 }, { "epoch": 0.65, "learning_rate": 8.85338627966149e-06, "loss": 0.1929, "step": 3560 }, { "epoch": 0.65, "learning_rate": 8.845352611226939e-06, "loss": 0.2003, "step": 3561 }, { "epoch": 0.65, "learning_rate": 8.83732106503654e-06, "loss": 0.185, "step": 3562 }, { "epoch": 0.65, "learning_rate": 8.829291643859729e-06, "loss": 0.1354, "step": 3563 }, { "epoch": 0.65, "learning_rate": 8.821264350465206e-06, "loss": 0.2029, "step": 3564 }, { "epoch": 0.65, "learning_rate": 8.813239187620944e-06, "loss": 0.2012, "step": 3565 }, { "epoch": 0.65, "learning_rate": 8.805216158094177e-06, "loss": 0.1432, "step": 3566 }, { "epoch": 0.65, "learning_rate": 8.797195264651403e-06, "loss": 0.2476, "step": 3567 }, { "epoch": 0.65, "learning_rate": 8.789176510058387e-06, "loss": 0.2419, "step": 3568 }, { "epoch": 0.65, "learning_rate": 8.781159897080151e-06, "loss": 0.1901, "step": 3569 }, { "epoch": 0.65, "learning_rate": 8.773145428480983e-06, "loss": 0.1945, "step": 3570 }, { "epoch": 0.65, "learning_rate": 8.765133107024431e-06, "loss": 0.1356, "step": 3571 }, { "epoch": 0.65, "learning_rate": 8.757122935473305e-06, "loss": 0.151, "step": 3572 }, { "epoch": 0.65, "learning_rate": 8.749114916589664e-06, "loss": 0.1667, "step": 3573 }, { "epoch": 0.65, "learning_rate": 8.741109053134837e-06, "loss": 0.2302, "step": 3574 }, { "epoch": 0.65, "learning_rate": 8.7331053478694e-06, "loss": 0.236, "step": 3575 }, { "epoch": 0.65, "learning_rate": 8.725103803553186e-06, "loss": 0.1449, "step": 3576 }, { "epoch": 0.65, "learning_rate": 8.717104422945293e-06, "loss": 0.1966, "step": 3577 }, { "epoch": 0.65, "learning_rate": 8.709107208804067e-06, "loss": 0.1742, "step": 3578 }, { "epoch": 0.65, "learning_rate": 8.701112163887099e-06, "loss": 0.1607, "step": 3579 }, { "epoch": 0.65, "learning_rate": 8.693119290951238e-06, "loss": 0.2027, "step": 3580 }, { "epoch": 0.65, "learning_rate": 8.685128592752595e-06, "loss": 0.2216, "step": 3581 }, { "epoch": 0.65, "learning_rate": 8.677140072046506e-06, "loss": 0.1943, "step": 3582 }, { "epoch": 0.65, "learning_rate": 8.66915373158758e-06, "loss": 0.148, "step": 3583 }, { "epoch": 0.65, "learning_rate": 8.66116957412967e-06, "loss": 0.2249, "step": 3584 }, { "epoch": 0.65, "learning_rate": 8.65318760242586e-06, "loss": 0.2531, "step": 3585 }, { "epoch": 0.65, "learning_rate": 8.645207819228493e-06, "loss": 0.2107, "step": 3586 }, { "epoch": 0.65, "learning_rate": 8.637230227289162e-06, "loss": 0.1718, "step": 3587 }, { "epoch": 0.65, "learning_rate": 8.629254829358703e-06, "loss": 0.1915, "step": 3588 }, { "epoch": 0.65, "learning_rate": 8.621281628187177e-06, "loss": 0.2045, "step": 3589 }, { "epoch": 0.65, "learning_rate": 8.61331062652391e-06, "loss": 0.1996, "step": 3590 }, { "epoch": 0.65, "learning_rate": 8.605341827117464e-06, "loss": 0.248, "step": 3591 }, { "epoch": 0.65, "learning_rate": 8.59737523271563e-06, "loss": 0.1959, "step": 3592 }, { "epoch": 0.65, "learning_rate": 8.58941084606545e-06, "loss": 0.1458, "step": 3593 }, { "epoch": 0.65, "learning_rate": 8.581448669913205e-06, "loss": 0.1448, "step": 3594 }, { "epoch": 0.65, "learning_rate": 8.573488707004417e-06, "loss": 0.2638, "step": 3595 }, { "epoch": 0.65, "learning_rate": 8.565530960083822e-06, "loss": 0.1947, "step": 3596 }, { "epoch": 0.65, "learning_rate": 8.557575431895421e-06, "loss": 0.1828, "step": 3597 }, { "epoch": 0.65, "learning_rate": 8.549622125182437e-06, "loss": 0.1773, "step": 3598 }, { "epoch": 0.65, "learning_rate": 8.54167104268732e-06, "loss": 0.2115, "step": 3599 }, { "epoch": 0.65, "learning_rate": 8.533722187151767e-06, "loss": 0.1788, "step": 3600 }, { "epoch": 0.65, "learning_rate": 8.525775561316702e-06, "loss": 0.2955, "step": 3601 }, { "epoch": 0.65, "learning_rate": 8.51783116792227e-06, "loss": 0.2044, "step": 3602 }, { "epoch": 0.65, "learning_rate": 8.509889009707861e-06, "loss": 0.2145, "step": 3603 }, { "epoch": 0.65, "learning_rate": 8.501949089412091e-06, "loss": 0.1932, "step": 3604 }, { "epoch": 0.65, "learning_rate": 8.4940114097728e-06, "loss": 0.1909, "step": 3605 }, { "epoch": 0.65, "learning_rate": 8.486075973527052e-06, "loss": 0.1826, "step": 3606 }, { "epoch": 0.65, "learning_rate": 8.478142783411143e-06, "loss": 0.175, "step": 3607 }, { "epoch": 0.65, "learning_rate": 8.470211842160604e-06, "loss": 0.1902, "step": 3608 }, { "epoch": 0.65, "learning_rate": 8.462283152510173e-06, "loss": 0.1335, "step": 3609 }, { "epoch": 0.65, "learning_rate": 8.454356717193814e-06, "loss": 0.1159, "step": 3610 }, { "epoch": 0.65, "learning_rate": 8.446432538944721e-06, "loss": 0.1548, "step": 3611 }, { "epoch": 0.65, "learning_rate": 8.438510620495318e-06, "loss": 0.2019, "step": 3612 }, { "epoch": 0.66, "learning_rate": 8.430590964577225e-06, "loss": 0.1819, "step": 3613 }, { "epoch": 0.66, "learning_rate": 8.422673573921301e-06, "loss": 0.2507, "step": 3614 }, { "epoch": 0.66, "learning_rate": 8.414758451257622e-06, "loss": 0.1687, "step": 3615 }, { "epoch": 0.66, "learning_rate": 8.406845599315482e-06, "loss": 0.189, "step": 3616 }, { "epoch": 0.66, "learning_rate": 8.398935020823376e-06, "loss": 0.1976, "step": 3617 }, { "epoch": 0.66, "learning_rate": 8.391026718509038e-06, "loss": 0.1374, "step": 3618 }, { "epoch": 0.66, "learning_rate": 8.383120695099412e-06, "loss": 0.218, "step": 3619 }, { "epoch": 0.66, "learning_rate": 8.375216953320635e-06, "loss": 0.1754, "step": 3620 }, { "epoch": 0.66, "learning_rate": 8.367315495898081e-06, "loss": 0.2239, "step": 3621 }, { "epoch": 0.66, "learning_rate": 8.35941632555634e-06, "loss": 0.1863, "step": 3622 }, { "epoch": 0.66, "learning_rate": 8.351519445019184e-06, "loss": 0.145, "step": 3623 }, { "epoch": 0.66, "learning_rate": 8.343624857009625e-06, "loss": 0.1716, "step": 3624 }, { "epoch": 0.66, "learning_rate": 8.335732564249868e-06, "loss": 0.1964, "step": 3625 }, { "epoch": 0.66, "learning_rate": 8.32784256946134e-06, "loss": 0.1497, "step": 3626 }, { "epoch": 0.66, "learning_rate": 8.319954875364655e-06, "loss": 0.1661, "step": 3627 }, { "epoch": 0.66, "learning_rate": 8.31206948467965e-06, "loss": 0.1565, "step": 3628 }, { "epoch": 0.66, "learning_rate": 8.304186400125377e-06, "loss": 0.1666, "step": 3629 }, { "epoch": 0.66, "learning_rate": 8.296305624420057e-06, "loss": 0.2653, "step": 3630 }, { "epoch": 0.66, "learning_rate": 8.28842716028115e-06, "loss": 0.0984, "step": 3631 }, { "epoch": 0.66, "learning_rate": 8.280551010425304e-06, "loss": 0.1858, "step": 3632 }, { "epoch": 0.66, "learning_rate": 8.27267717756838e-06, "loss": 0.1304, "step": 3633 }, { "epoch": 0.66, "learning_rate": 8.264805664425417e-06, "loss": 0.1827, "step": 3634 }, { "epoch": 0.66, "learning_rate": 8.256936473710674e-06, "loss": 0.121, "step": 3635 }, { "epoch": 0.66, "learning_rate": 8.249069608137608e-06, "loss": 0.2375, "step": 3636 }, { "epoch": 0.66, "learning_rate": 8.241205070418865e-06, "loss": 0.1759, "step": 3637 }, { "epoch": 0.66, "learning_rate": 8.23334286326629e-06, "loss": 0.1767, "step": 3638 }, { "epoch": 0.66, "learning_rate": 8.225482989390939e-06, "loss": 0.1553, "step": 3639 }, { "epoch": 0.66, "learning_rate": 8.217625451503041e-06, "loss": 0.1194, "step": 3640 }, { "epoch": 0.66, "learning_rate": 8.209770252312032e-06, "loss": 0.195, "step": 3641 }, { "epoch": 0.66, "learning_rate": 8.20191739452654e-06, "loss": 0.2149, "step": 3642 }, { "epoch": 0.66, "learning_rate": 8.194066880854399e-06, "loss": 0.1102, "step": 3643 }, { "epoch": 0.66, "learning_rate": 8.186218714002602e-06, "loss": 0.2276, "step": 3644 }, { "epoch": 0.66, "learning_rate": 8.17837289667736e-06, "loss": 0.1612, "step": 3645 }, { "epoch": 0.66, "learning_rate": 8.170529431584073e-06, "loss": 0.2845, "step": 3646 }, { "epoch": 0.66, "learning_rate": 8.16268832142731e-06, "loss": 0.1826, "step": 3647 }, { "epoch": 0.66, "learning_rate": 8.154849568910849e-06, "loss": 0.134, "step": 3648 }, { "epoch": 0.66, "learning_rate": 8.147013176737643e-06, "loss": 0.2105, "step": 3649 }, { "epoch": 0.66, "learning_rate": 8.139179147609846e-06, "loss": 0.1719, "step": 3650 }, { "epoch": 0.66, "learning_rate": 8.131347484228768e-06, "loss": 0.2107, "step": 3651 }, { "epoch": 0.66, "learning_rate": 8.123518189294935e-06, "loss": 0.2389, "step": 3652 }, { "epoch": 0.66, "learning_rate": 8.11569126550804e-06, "loss": 0.1951, "step": 3653 }, { "epoch": 0.66, "learning_rate": 8.107866715566957e-06, "loss": 0.2392, "step": 3654 }, { "epoch": 0.66, "learning_rate": 8.100044542169747e-06, "loss": 0.1638, "step": 3655 }, { "epoch": 0.66, "learning_rate": 8.092224748013658e-06, "loss": 0.1747, "step": 3656 }, { "epoch": 0.66, "learning_rate": 8.084407335795096e-06, "loss": 0.207, "step": 3657 }, { "epoch": 0.66, "learning_rate": 8.076592308209668e-06, "loss": 0.1696, "step": 3658 }, { "epoch": 0.66, "learning_rate": 8.068779667952155e-06, "loss": 0.1647, "step": 3659 }, { "epoch": 0.66, "learning_rate": 8.060969417716495e-06, "loss": 0.1527, "step": 3660 }, { "epoch": 0.66, "learning_rate": 8.05316156019583e-06, "loss": 0.1619, "step": 3661 }, { "epoch": 0.66, "learning_rate": 8.045356098082459e-06, "loss": 0.2422, "step": 3662 }, { "epoch": 0.66, "learning_rate": 8.037553034067855e-06, "loss": 0.1561, "step": 3663 }, { "epoch": 0.66, "learning_rate": 8.029752370842679e-06, "loss": 0.1604, "step": 3664 }, { "epoch": 0.66, "learning_rate": 8.021954111096744e-06, "loss": 0.1709, "step": 3665 }, { "epoch": 0.66, "learning_rate": 8.014158257519046e-06, "loss": 0.257, "step": 3666 }, { "epoch": 0.66, "learning_rate": 8.006364812797761e-06, "loss": 0.1198, "step": 3667 }, { "epoch": 0.66, "learning_rate": 7.998573779620208e-06, "loss": 0.1942, "step": 3668 }, { "epoch": 0.67, "learning_rate": 7.990785160672894e-06, "loss": 0.1744, "step": 3669 }, { "epoch": 0.67, "learning_rate": 7.98299895864149e-06, "loss": 0.1873, "step": 3670 }, { "epoch": 0.67, "learning_rate": 7.975215176210841e-06, "loss": 0.2028, "step": 3671 }, { "epoch": 0.67, "learning_rate": 7.967433816064933e-06, "loss": 0.1865, "step": 3672 }, { "epoch": 0.67, "learning_rate": 7.95965488088694e-06, "loss": 0.1273, "step": 3673 }, { "epoch": 0.67, "learning_rate": 7.9518783733592e-06, "loss": 0.1947, "step": 3674 }, { "epoch": 0.67, "learning_rate": 7.944104296163193e-06, "loss": 0.1785, "step": 3675 }, { "epoch": 0.67, "learning_rate": 7.936332651979584e-06, "loss": 0.1638, "step": 3676 }, { "epoch": 0.67, "learning_rate": 7.928563443488185e-06, "loss": 0.156, "step": 3677 }, { "epoch": 0.67, "learning_rate": 7.920796673367982e-06, "loss": 0.1767, "step": 3678 }, { "epoch": 0.67, "learning_rate": 7.913032344297097e-06, "loss": 0.2327, "step": 3679 }, { "epoch": 0.67, "learning_rate": 7.905270458952832e-06, "loss": 0.1557, "step": 3680 }, { "epoch": 0.67, "learning_rate": 7.897511020011643e-06, "loss": 0.1914, "step": 3681 }, { "epoch": 0.67, "learning_rate": 7.889754030149128e-06, "loss": 0.2133, "step": 3682 }, { "epoch": 0.67, "learning_rate": 7.881999492040054e-06, "loss": 0.173, "step": 3683 }, { "epoch": 0.67, "learning_rate": 7.874247408358347e-06, "loss": 0.2139, "step": 3684 }, { "epoch": 0.67, "learning_rate": 7.866497781777065e-06, "loss": 0.1555, "step": 3685 }, { "epoch": 0.67, "learning_rate": 7.858750614968439e-06, "loss": 0.1344, "step": 3686 }, { "epoch": 0.67, "learning_rate": 7.851005910603848e-06, "loss": 0.14, "step": 3687 }, { "epoch": 0.67, "learning_rate": 7.843263671353823e-06, "loss": 0.2027, "step": 3688 }, { "epoch": 0.67, "learning_rate": 7.835523899888028e-06, "loss": 0.1741, "step": 3689 }, { "epoch": 0.67, "learning_rate": 7.827786598875297e-06, "loss": 0.2048, "step": 3690 }, { "epoch": 0.67, "learning_rate": 7.820051770983611e-06, "loss": 0.118, "step": 3691 }, { "epoch": 0.67, "learning_rate": 7.812319418880078e-06, "loss": 0.1884, "step": 3692 }, { "epoch": 0.67, "learning_rate": 7.804589545230974e-06, "loss": 0.1678, "step": 3693 }, { "epoch": 0.67, "learning_rate": 7.796862152701712e-06, "loss": 0.155, "step": 3694 }, { "epoch": 0.67, "learning_rate": 7.789137243956854e-06, "loss": 0.1608, "step": 3695 }, { "epoch": 0.67, "learning_rate": 7.781414821660089e-06, "loss": 0.1694, "step": 3696 }, { "epoch": 0.67, "learning_rate": 7.773694888474268e-06, "loss": 0.1424, "step": 3697 }, { "epoch": 0.67, "learning_rate": 7.765977447061384e-06, "loss": 0.2572, "step": 3698 }, { "epoch": 0.67, "learning_rate": 7.758262500082549e-06, "loss": 0.1387, "step": 3699 }, { "epoch": 0.67, "learning_rate": 7.750550050198039e-06, "loss": 0.1619, "step": 3700 }, { "epoch": 0.67, "learning_rate": 7.74284010006726e-06, "loss": 0.1467, "step": 3701 }, { "epoch": 0.67, "learning_rate": 7.735132652348747e-06, "loss": 0.219, "step": 3702 }, { "epoch": 0.67, "learning_rate": 7.727427709700186e-06, "loss": 0.2385, "step": 3703 }, { "epoch": 0.67, "learning_rate": 7.719725274778394e-06, "loss": 0.224, "step": 3704 }, { "epoch": 0.67, "learning_rate": 7.712025350239326e-06, "loss": 0.1818, "step": 3705 }, { "epoch": 0.67, "learning_rate": 7.70432793873806e-06, "loss": 0.2083, "step": 3706 }, { "epoch": 0.67, "learning_rate": 7.69663304292882e-06, "loss": 0.2512, "step": 3707 }, { "epoch": 0.67, "learning_rate": 7.688940665464963e-06, "loss": 0.1584, "step": 3708 }, { "epoch": 0.67, "learning_rate": 7.681250808998968e-06, "loss": 0.1597, "step": 3709 }, { "epoch": 0.67, "learning_rate": 7.673563476182444e-06, "loss": 0.1796, "step": 3710 }, { "epoch": 0.67, "learning_rate": 7.665878669666142e-06, "loss": 0.2178, "step": 3711 }, { "epoch": 0.67, "learning_rate": 7.658196392099942e-06, "loss": 0.2367, "step": 3712 }, { "epoch": 0.67, "learning_rate": 7.650516646132829e-06, "loss": 0.1632, "step": 3713 }, { "epoch": 0.67, "learning_rate": 7.642839434412942e-06, "loss": 0.2664, "step": 3714 }, { "epoch": 0.67, "learning_rate": 7.635164759587532e-06, "loss": 0.2145, "step": 3715 }, { "epoch": 0.67, "learning_rate": 7.627492624302986e-06, "loss": 0.2188, "step": 3716 }, { "epoch": 0.67, "learning_rate": 7.619823031204799e-06, "loss": 0.2028, "step": 3717 }, { "epoch": 0.67, "learning_rate": 7.612155982937599e-06, "loss": 0.2761, "step": 3718 }, { "epoch": 0.67, "learning_rate": 7.604491482145146e-06, "loss": 0.1826, "step": 3719 }, { "epoch": 0.67, "learning_rate": 7.596829531470298e-06, "loss": 0.1786, "step": 3720 }, { "epoch": 0.67, "learning_rate": 7.5891701335550515e-06, "loss": 0.1617, "step": 3721 }, { "epoch": 0.67, "learning_rate": 7.581513291040526e-06, "loss": 0.1738, "step": 3722 }, { "epoch": 0.67, "learning_rate": 7.57385900656694e-06, "loss": 0.1871, "step": 3723 }, { "epoch": 0.68, "learning_rate": 7.56620728277365e-06, "loss": 0.1228, "step": 3724 }, { "epoch": 0.68, "learning_rate": 7.558558122299119e-06, "loss": 0.1104, "step": 3725 }, { "epoch": 0.68, "learning_rate": 7.550911527780937e-06, "loss": 0.2341, "step": 3726 }, { "epoch": 0.68, "learning_rate": 7.543267501855787e-06, "loss": 0.169, "step": 3727 }, { "epoch": 0.68, "learning_rate": 7.535626047159487e-06, "loss": 0.146, "step": 3728 }, { "epoch": 0.68, "learning_rate": 7.527987166326967e-06, "loss": 0.2226, "step": 3729 }, { "epoch": 0.68, "learning_rate": 7.520350861992255e-06, "loss": 0.164, "step": 3730 }, { "epoch": 0.68, "learning_rate": 7.512717136788505e-06, "loss": 0.2617, "step": 3731 }, { "epoch": 0.68, "learning_rate": 7.505085993347972e-06, "loss": 0.1978, "step": 3732 }, { "epoch": 0.68, "learning_rate": 7.497457434302038e-06, "loss": 0.1748, "step": 3733 }, { "epoch": 0.68, "learning_rate": 7.489831462281164e-06, "loss": 0.1662, "step": 3734 }, { "epoch": 0.68, "learning_rate": 7.482208079914944e-06, "loss": 0.1823, "step": 3735 }, { "epoch": 0.68, "learning_rate": 7.4745872898320755e-06, "loss": 0.2256, "step": 3736 }, { "epoch": 0.68, "learning_rate": 7.466969094660348e-06, "loss": 0.2573, "step": 3737 }, { "epoch": 0.68, "learning_rate": 7.45935349702667e-06, "loss": 0.2356, "step": 3738 }, { "epoch": 0.68, "learning_rate": 7.451740499557056e-06, "loss": 0.2144, "step": 3739 }, { "epoch": 0.68, "learning_rate": 7.444130104876608e-06, "loss": 0.2361, "step": 3740 }, { "epoch": 0.68, "learning_rate": 7.436522315609546e-06, "loss": 0.2502, "step": 3741 }, { "epoch": 0.68, "learning_rate": 7.428917134379186e-06, "loss": 0.2349, "step": 3742 }, { "epoch": 0.68, "learning_rate": 7.421314563807949e-06, "loss": 0.159, "step": 3743 }, { "epoch": 0.68, "learning_rate": 7.413714606517344e-06, "loss": 0.144, "step": 3744 }, { "epoch": 0.68, "learning_rate": 7.406117265127987e-06, "loss": 0.1922, "step": 3745 }, { "epoch": 0.68, "learning_rate": 7.398522542259602e-06, "loss": 0.1045, "step": 3746 }, { "epoch": 0.68, "learning_rate": 7.390930440530986e-06, "loss": 0.1738, "step": 3747 }, { "epoch": 0.68, "learning_rate": 7.3833409625600525e-06, "loss": 0.165, "step": 3748 }, { "epoch": 0.68, "learning_rate": 7.375754110963804e-06, "loss": 0.2279, "step": 3749 }, { "epoch": 0.68, "learning_rate": 7.368169888358339e-06, "loss": 0.1521, "step": 3750 }, { "epoch": 0.68, "learning_rate": 7.360588297358842e-06, "loss": 0.2205, "step": 3751 }, { "epoch": 0.68, "learning_rate": 7.353009340579596e-06, "loss": 0.1996, "step": 3752 }, { "epoch": 0.68, "learning_rate": 7.345433020633984e-06, "loss": 0.171, "step": 3753 }, { "epoch": 0.68, "learning_rate": 7.33785934013446e-06, "loss": 0.1584, "step": 3754 }, { "epoch": 0.68, "learning_rate": 7.33028830169258e-06, "loss": 0.2494, "step": 3755 }, { "epoch": 0.68, "learning_rate": 7.322719907918999e-06, "loss": 0.1602, "step": 3756 }, { "epoch": 0.68, "learning_rate": 7.315154161423434e-06, "loss": 0.1868, "step": 3757 }, { "epoch": 0.68, "learning_rate": 7.307591064814717e-06, "loss": 0.1951, "step": 3758 }, { "epoch": 0.68, "learning_rate": 7.300030620700742e-06, "loss": 0.1541, "step": 3759 }, { "epoch": 0.68, "learning_rate": 7.292472831688504e-06, "loss": 0.2001, "step": 3760 }, { "epoch": 0.68, "learning_rate": 7.284917700384085e-06, "loss": 0.1682, "step": 3761 }, { "epoch": 0.68, "learning_rate": 7.277365229392631e-06, "loss": 0.1821, "step": 3762 }, { "epoch": 0.68, "learning_rate": 7.269815421318391e-06, "loss": 0.165, "step": 3763 }, { "epoch": 0.68, "learning_rate": 7.262268278764693e-06, "loss": 0.2311, "step": 3764 }, { "epoch": 0.68, "learning_rate": 7.254723804333929e-06, "loss": 0.2099, "step": 3765 }, { "epoch": 0.68, "learning_rate": 7.2471820006275885e-06, "loss": 0.2334, "step": 3766 }, { "epoch": 0.68, "learning_rate": 7.239642870246243e-06, "loss": 0.128, "step": 3767 }, { "epoch": 0.68, "learning_rate": 7.232106415789518e-06, "loss": 0.2026, "step": 3768 }, { "epoch": 0.68, "learning_rate": 7.2245726398561426e-06, "loss": 0.2363, "step": 3769 }, { "epoch": 0.68, "learning_rate": 7.2170415450439084e-06, "loss": 0.2642, "step": 3770 }, { "epoch": 0.68, "learning_rate": 7.209513133949692e-06, "loss": 0.1715, "step": 3771 }, { "epoch": 0.68, "learning_rate": 7.201987409169426e-06, "loss": 0.1575, "step": 3772 }, { "epoch": 0.68, "learning_rate": 7.194464373298138e-06, "loss": 0.2136, "step": 3773 }, { "epoch": 0.68, "learning_rate": 7.186944028929924e-06, "loss": 0.1971, "step": 3774 }, { "epoch": 0.68, "learning_rate": 7.1794263786579345e-06, "loss": 0.1765, "step": 3775 }, { "epoch": 0.68, "learning_rate": 7.171911425074412e-06, "loss": 0.1947, "step": 3776 }, { "epoch": 0.68, "learning_rate": 7.164399170770665e-06, "loss": 0.1752, "step": 3777 }, { "epoch": 0.68, "learning_rate": 7.156889618337057e-06, "loss": 0.1654, "step": 3778 }, { "epoch": 0.69, "learning_rate": 7.149382770363036e-06, "loss": 0.1375, "step": 3779 }, { "epoch": 0.69, "learning_rate": 7.141878629437109e-06, "loss": 0.166, "step": 3780 }, { "epoch": 0.69, "learning_rate": 7.134377198146862e-06, "loss": 0.1344, "step": 3781 }, { "epoch": 0.69, "learning_rate": 7.126878479078924e-06, "loss": 0.1314, "step": 3782 }, { "epoch": 0.69, "learning_rate": 7.119382474819007e-06, "loss": 0.206, "step": 3783 }, { "epoch": 0.69, "learning_rate": 7.111889187951886e-06, "loss": 0.152, "step": 3784 }, { "epoch": 0.69, "learning_rate": 7.1043986210613846e-06, "loss": 0.1603, "step": 3785 }, { "epoch": 0.69, "learning_rate": 7.096910776730406e-06, "loss": 0.1913, "step": 3786 }, { "epoch": 0.69, "learning_rate": 7.089425657540901e-06, "loss": 0.1864, "step": 3787 }, { "epoch": 0.69, "learning_rate": 7.081943266073899e-06, "loss": 0.1869, "step": 3788 }, { "epoch": 0.69, "learning_rate": 7.074463604909462e-06, "loss": 0.1744, "step": 3789 }, { "epoch": 0.69, "learning_rate": 7.06698667662673e-06, "loss": 0.1205, "step": 3790 }, { "epoch": 0.69, "learning_rate": 7.059512483803904e-06, "loss": 0.1941, "step": 3791 }, { "epoch": 0.69, "learning_rate": 7.052041029018221e-06, "loss": 0.1734, "step": 3792 }, { "epoch": 0.69, "learning_rate": 7.0445723148459915e-06, "loss": 0.1606, "step": 3793 }, { "epoch": 0.69, "learning_rate": 7.0371063438625825e-06, "loss": 0.1711, "step": 3794 }, { "epoch": 0.69, "learning_rate": 7.029643118642396e-06, "loss": 0.2009, "step": 3795 }, { "epoch": 0.69, "learning_rate": 7.022182641758906e-06, "loss": 0.2008, "step": 3796 }, { "epoch": 0.69, "learning_rate": 7.0147249157846335e-06, "loss": 0.1469, "step": 3797 }, { "epoch": 0.69, "learning_rate": 7.007269943291153e-06, "loss": 0.1653, "step": 3798 }, { "epoch": 0.69, "learning_rate": 6.999817726849077e-06, "loss": 0.1617, "step": 3799 }, { "epoch": 0.69, "learning_rate": 6.9923682690280835e-06, "loss": 0.1474, "step": 3800 }, { "epoch": 0.69, "learning_rate": 6.9849215723968965e-06, "loss": 0.2384, "step": 3801 }, { "epoch": 0.69, "learning_rate": 6.9774776395232745e-06, "loss": 0.1764, "step": 3802 }, { "epoch": 0.69, "learning_rate": 6.970036472974038e-06, "loss": 0.1574, "step": 3803 }, { "epoch": 0.69, "learning_rate": 6.962598075315047e-06, "loss": 0.2029, "step": 3804 }, { "epoch": 0.69, "learning_rate": 6.955162449111217e-06, "loss": 0.215, "step": 3805 }, { "epoch": 0.69, "learning_rate": 6.9477295969264836e-06, "loss": 0.2008, "step": 3806 }, { "epoch": 0.69, "learning_rate": 6.940299521323857e-06, "loss": 0.1409, "step": 3807 }, { "epoch": 0.69, "learning_rate": 6.9328722248653595e-06, "loss": 0.1798, "step": 3808 }, { "epoch": 0.69, "learning_rate": 6.925447710112082e-06, "loss": 0.1638, "step": 3809 }, { "epoch": 0.69, "learning_rate": 6.9180259796241345e-06, "loss": 0.1682, "step": 3810 }, { "epoch": 0.69, "learning_rate": 6.910607035960678e-06, "loss": 0.2776, "step": 3811 }, { "epoch": 0.69, "learning_rate": 6.9031908816799215e-06, "loss": 0.2021, "step": 3812 }, { "epoch": 0.69, "learning_rate": 6.895777519339088e-06, "loss": 0.1407, "step": 3813 }, { "epoch": 0.69, "learning_rate": 6.888366951494456e-06, "loss": 0.1447, "step": 3814 }, { "epoch": 0.69, "learning_rate": 6.880959180701339e-06, "loss": 0.1869, "step": 3815 }, { "epoch": 0.69, "learning_rate": 6.873554209514085e-06, "loss": 0.2441, "step": 3816 }, { "epoch": 0.69, "learning_rate": 6.8661520404860666e-06, "loss": 0.1414, "step": 3817 }, { "epoch": 0.69, "learning_rate": 6.8587526761697e-06, "loss": 0.1718, "step": 3818 }, { "epoch": 0.69, "learning_rate": 6.851356119116441e-06, "loss": 0.2233, "step": 3819 }, { "epoch": 0.69, "learning_rate": 6.843962371876757e-06, "loss": 0.1827, "step": 3820 }, { "epoch": 0.69, "learning_rate": 6.836571437000164e-06, "loss": 0.1771, "step": 3821 }, { "epoch": 0.69, "learning_rate": 6.829183317035206e-06, "loss": 0.1984, "step": 3822 }, { "epoch": 0.69, "learning_rate": 6.821798014529444e-06, "loss": 0.1833, "step": 3823 }, { "epoch": 0.69, "learning_rate": 6.814415532029481e-06, "loss": 0.1724, "step": 3824 }, { "epoch": 0.69, "learning_rate": 6.807035872080944e-06, "loss": 0.248, "step": 3825 }, { "epoch": 0.69, "learning_rate": 6.799659037228489e-06, "loss": 0.1472, "step": 3826 }, { "epoch": 0.69, "learning_rate": 6.7922850300157864e-06, "loss": 0.1937, "step": 3827 }, { "epoch": 0.69, "learning_rate": 6.784913852985544e-06, "loss": 0.2164, "step": 3828 }, { "epoch": 0.69, "learning_rate": 6.777545508679494e-06, "loss": 0.1594, "step": 3829 }, { "epoch": 0.69, "learning_rate": 6.7701799996383765e-06, "loss": 0.2384, "step": 3830 }, { "epoch": 0.69, "learning_rate": 6.762817328401971e-06, "loss": 0.1702, "step": 3831 }, { "epoch": 0.69, "learning_rate": 6.755457497509074e-06, "loss": 0.2107, "step": 3832 }, { "epoch": 0.69, "learning_rate": 6.748100509497503e-06, "loss": 0.2283, "step": 3833 }, { "epoch": 0.7, "learning_rate": 6.740746366904081e-06, "loss": 0.171, "step": 3834 }, { "epoch": 0.7, "learning_rate": 6.733395072264673e-06, "loss": 0.2245, "step": 3835 }, { "epoch": 0.7, "learning_rate": 6.72604662811415e-06, "loss": 0.2156, "step": 3836 }, { "epoch": 0.7, "learning_rate": 6.718701036986394e-06, "loss": 0.2278, "step": 3837 }, { "epoch": 0.7, "learning_rate": 6.7113583014143135e-06, "loss": 0.1633, "step": 3838 }, { "epoch": 0.7, "learning_rate": 6.704018423929835e-06, "loss": 0.1735, "step": 3839 }, { "epoch": 0.7, "learning_rate": 6.696681407063885e-06, "loss": 0.1853, "step": 3840 }, { "epoch": 0.7, "learning_rate": 6.689347253346413e-06, "loss": 0.2185, "step": 3841 }, { "epoch": 0.7, "learning_rate": 6.682015965306382e-06, "loss": 0.1538, "step": 3842 }, { "epoch": 0.7, "learning_rate": 6.674687545471772e-06, "loss": 0.1968, "step": 3843 }, { "epoch": 0.7, "learning_rate": 6.667361996369555e-06, "loss": 0.188, "step": 3844 }, { "epoch": 0.7, "learning_rate": 6.66003932052573e-06, "loss": 0.1553, "step": 3845 }, { "epoch": 0.7, "learning_rate": 6.6527195204653094e-06, "loss": 0.1816, "step": 3846 }, { "epoch": 0.7, "learning_rate": 6.645402598712289e-06, "loss": 0.1859, "step": 3847 }, { "epoch": 0.7, "learning_rate": 6.638088557789696e-06, "loss": 0.2012, "step": 3848 }, { "epoch": 0.7, "learning_rate": 6.630777400219558e-06, "loss": 0.1907, "step": 3849 }, { "epoch": 0.7, "learning_rate": 6.623469128522911e-06, "loss": 0.1318, "step": 3850 }, { "epoch": 0.7, "learning_rate": 6.616163745219779e-06, "loss": 0.2686, "step": 3851 }, { "epoch": 0.7, "learning_rate": 6.608861252829209e-06, "loss": 0.1274, "step": 3852 }, { "epoch": 0.7, "learning_rate": 6.6015616538692525e-06, "loss": 0.3063, "step": 3853 }, { "epoch": 0.7, "learning_rate": 6.594264950856942e-06, "loss": 0.1931, "step": 3854 }, { "epoch": 0.7, "learning_rate": 6.586971146308332e-06, "loss": 0.1924, "step": 3855 }, { "epoch": 0.7, "learning_rate": 6.579680242738475e-06, "loss": 0.1743, "step": 3856 }, { "epoch": 0.7, "learning_rate": 6.572392242661414e-06, "loss": 0.1942, "step": 3857 }, { "epoch": 0.7, "learning_rate": 6.565107148590191e-06, "loss": 0.1579, "step": 3858 }, { "epoch": 0.7, "learning_rate": 6.557824963036852e-06, "loss": 0.1671, "step": 3859 }, { "epoch": 0.7, "learning_rate": 6.55054568851245e-06, "loss": 0.1501, "step": 3860 }, { "epoch": 0.7, "learning_rate": 6.543269327527006e-06, "loss": 0.2278, "step": 3861 }, { "epoch": 0.7, "learning_rate": 6.535995882589564e-06, "loss": 0.2577, "step": 3862 }, { "epoch": 0.7, "learning_rate": 6.528725356208148e-06, "loss": 0.186, "step": 3863 }, { "epoch": 0.7, "learning_rate": 6.521457750889786e-06, "loss": 0.1553, "step": 3864 }, { "epoch": 0.7, "learning_rate": 6.514193069140481e-06, "loss": 0.2166, "step": 3865 }, { "epoch": 0.7, "learning_rate": 6.506931313465245e-06, "loss": 0.2011, "step": 3866 }, { "epoch": 0.7, "learning_rate": 6.499672486368079e-06, "loss": 0.1564, "step": 3867 }, { "epoch": 0.7, "learning_rate": 6.492416590351961e-06, "loss": 0.1662, "step": 3868 }, { "epoch": 0.7, "learning_rate": 6.485163627918871e-06, "loss": 0.206, "step": 3869 }, { "epoch": 0.7, "learning_rate": 6.477913601569776e-06, "loss": 0.1169, "step": 3870 }, { "epoch": 0.7, "learning_rate": 6.4706665138046336e-06, "loss": 0.1734, "step": 3871 }, { "epoch": 0.7, "learning_rate": 6.463422367122369e-06, "loss": 0.1552, "step": 3872 }, { "epoch": 0.7, "learning_rate": 6.456181164020917e-06, "loss": 0.1847, "step": 3873 }, { "epoch": 0.7, "learning_rate": 6.4489429069971905e-06, "loss": 0.2402, "step": 3874 }, { "epoch": 0.7, "learning_rate": 6.441707598547074e-06, "loss": 0.197, "step": 3875 }, { "epoch": 0.7, "learning_rate": 6.43447524116545e-06, "loss": 0.1811, "step": 3876 }, { "epoch": 0.7, "learning_rate": 6.427245837346183e-06, "loss": 0.1935, "step": 3877 }, { "epoch": 0.7, "learning_rate": 6.420019389582106e-06, "loss": 0.1899, "step": 3878 }, { "epoch": 0.7, "learning_rate": 6.412795900365044e-06, "loss": 0.22, "step": 3879 }, { "epoch": 0.7, "learning_rate": 6.4055753721857985e-06, "loss": 0.1557, "step": 3880 }, { "epoch": 0.7, "learning_rate": 6.398357807534158e-06, "loss": 0.2014, "step": 3881 }, { "epoch": 0.7, "learning_rate": 6.39114320889887e-06, "loss": 0.1718, "step": 3882 }, { "epoch": 0.7, "learning_rate": 6.383931578767675e-06, "loss": 0.1953, "step": 3883 }, { "epoch": 0.7, "learning_rate": 6.376722919627293e-06, "loss": 0.2228, "step": 3884 }, { "epoch": 0.7, "learning_rate": 6.369517233963401e-06, "loss": 0.1307, "step": 3885 }, { "epoch": 0.7, "learning_rate": 6.3623145242606666e-06, "loss": 0.179, "step": 3886 }, { "epoch": 0.7, "learning_rate": 6.355114793002727e-06, "loss": 0.2182, "step": 3887 }, { "epoch": 0.7, "learning_rate": 6.3479180426721976e-06, "loss": 0.1628, "step": 3888 }, { "epoch": 0.71, "learning_rate": 6.340724275750649e-06, "loss": 0.1454, "step": 3889 }, { "epoch": 0.71, "learning_rate": 6.333533494718639e-06, "loss": 0.1683, "step": 3890 }, { "epoch": 0.71, "learning_rate": 6.326345702055698e-06, "loss": 0.1548, "step": 3891 }, { "epoch": 0.71, "learning_rate": 6.319160900240307e-06, "loss": 0.1719, "step": 3892 }, { "epoch": 0.71, "learning_rate": 6.3119790917499345e-06, "loss": 0.1576, "step": 3893 }, { "epoch": 0.71, "learning_rate": 6.304800279061015e-06, "loss": 0.1627, "step": 3894 }, { "epoch": 0.71, "learning_rate": 6.297624464648933e-06, "loss": 0.1819, "step": 3895 }, { "epoch": 0.71, "learning_rate": 6.29045165098806e-06, "loss": 0.1605, "step": 3896 }, { "epoch": 0.71, "learning_rate": 6.28328184055172e-06, "loss": 0.1509, "step": 3897 }, { "epoch": 0.71, "learning_rate": 6.276115035812213e-06, "loss": 0.179, "step": 3898 }, { "epoch": 0.71, "learning_rate": 6.268951239240784e-06, "loss": 0.1303, "step": 3899 }, { "epoch": 0.71, "learning_rate": 6.261790453307655e-06, "loss": 0.2084, "step": 3900 }, { "epoch": 0.71, "learning_rate": 6.254632680482017e-06, "loss": 0.1461, "step": 3901 }, { "epoch": 0.71, "learning_rate": 6.247477923231996e-06, "loss": 0.2482, "step": 3902 }, { "epoch": 0.71, "learning_rate": 6.240326184024703e-06, "loss": 0.1321, "step": 3903 }, { "epoch": 0.71, "learning_rate": 6.2331774653261955e-06, "loss": 0.1642, "step": 3904 }, { "epoch": 0.71, "learning_rate": 6.2260317696015e-06, "loss": 0.1559, "step": 3905 }, { "epoch": 0.71, "learning_rate": 6.218889099314585e-06, "loss": 0.1248, "step": 3906 }, { "epoch": 0.71, "learning_rate": 6.211749456928393e-06, "loss": 0.1512, "step": 3907 }, { "epoch": 0.71, "learning_rate": 6.204612844904802e-06, "loss": 0.2014, "step": 3908 }, { "epoch": 0.71, "learning_rate": 6.19747926570467e-06, "loss": 0.2028, "step": 3909 }, { "epoch": 0.71, "learning_rate": 6.190348721787784e-06, "loss": 0.1946, "step": 3910 }, { "epoch": 0.71, "learning_rate": 6.1832212156129045e-06, "loss": 0.1758, "step": 3911 }, { "epoch": 0.71, "learning_rate": 6.176096749637739e-06, "loss": 0.1138, "step": 3912 }, { "epoch": 0.71, "learning_rate": 6.168975326318935e-06, "loss": 0.1719, "step": 3913 }, { "epoch": 0.71, "learning_rate": 6.161856948112106e-06, "loss": 0.1712, "step": 3914 }, { "epoch": 0.71, "learning_rate": 6.154741617471807e-06, "loss": 0.1675, "step": 3915 }, { "epoch": 0.71, "learning_rate": 6.147629336851553e-06, "loss": 0.1738, "step": 3916 }, { "epoch": 0.71, "learning_rate": 6.140520108703787e-06, "loss": 0.1383, "step": 3917 }, { "epoch": 0.71, "learning_rate": 6.133413935479917e-06, "loss": 0.222, "step": 3918 }, { "epoch": 0.71, "learning_rate": 6.126310819630297e-06, "loss": 0.1866, "step": 3919 }, { "epoch": 0.71, "learning_rate": 6.1192107636042114e-06, "loss": 0.2078, "step": 3920 }, { "epoch": 0.71, "learning_rate": 6.112113769849906e-06, "loss": 0.217, "step": 3921 }, { "epoch": 0.71, "learning_rate": 6.10501984081457e-06, "loss": 0.1987, "step": 3922 }, { "epoch": 0.71, "learning_rate": 6.0979289789443185e-06, "loss": 0.1258, "step": 3923 }, { "epoch": 0.71, "learning_rate": 6.090841186684228e-06, "loss": 0.201, "step": 3924 }, { "epoch": 0.71, "learning_rate": 6.083756466478307e-06, "loss": 0.2709, "step": 3925 }, { "epoch": 0.71, "learning_rate": 6.0766748207695145e-06, "loss": 0.2424, "step": 3926 }, { "epoch": 0.71, "learning_rate": 6.06959625199973e-06, "loss": 0.1709, "step": 3927 }, { "epoch": 0.71, "learning_rate": 6.062520762609791e-06, "loss": 0.166, "step": 3928 }, { "epoch": 0.71, "learning_rate": 6.0554483550394716e-06, "loss": 0.1859, "step": 3929 }, { "epoch": 0.71, "learning_rate": 6.048379031727464e-06, "loss": 0.1756, "step": 3930 }, { "epoch": 0.71, "learning_rate": 6.0413127951114196e-06, "loss": 0.2403, "step": 3931 }, { "epoch": 0.71, "learning_rate": 6.034249647627915e-06, "loss": 0.1304, "step": 3932 }, { "epoch": 0.71, "learning_rate": 6.027189591712469e-06, "loss": 0.2075, "step": 3933 }, { "epoch": 0.71, "learning_rate": 6.020132629799516e-06, "loss": 0.1357, "step": 3934 }, { "epoch": 0.71, "learning_rate": 6.013078764322443e-06, "loss": 0.1818, "step": 3935 }, { "epoch": 0.71, "learning_rate": 6.006027997713569e-06, "loss": 0.2338, "step": 3936 }, { "epoch": 0.71, "learning_rate": 5.998980332404125e-06, "loss": 0.1407, "step": 3937 }, { "epoch": 0.71, "learning_rate": 5.991935770824292e-06, "loss": 0.147, "step": 3938 }, { "epoch": 0.71, "learning_rate": 5.984894315403178e-06, "loss": 0.1722, "step": 3939 }, { "epoch": 0.71, "learning_rate": 5.977855968568806e-06, "loss": 0.1775, "step": 3940 }, { "epoch": 0.71, "learning_rate": 5.970820732748144e-06, "loss": 0.2239, "step": 3941 }, { "epoch": 0.71, "learning_rate": 5.963788610367078e-06, "loss": 0.235, "step": 3942 }, { "epoch": 0.71, "learning_rate": 5.95675960385043e-06, "loss": 0.1582, "step": 3943 }, { "epoch": 0.72, "learning_rate": 5.94973371562193e-06, "loss": 0.1816, "step": 3944 }, { "epoch": 0.72, "learning_rate": 5.942710948104246e-06, "loss": 0.1826, "step": 3945 }, { "epoch": 0.72, "learning_rate": 5.935691303718977e-06, "loss": 0.1501, "step": 3946 }, { "epoch": 0.72, "learning_rate": 5.92867478488662e-06, "loss": 0.1046, "step": 3947 }, { "epoch": 0.72, "learning_rate": 5.921661394026617e-06, "loss": 0.153, "step": 3948 }, { "epoch": 0.72, "learning_rate": 5.914651133557329e-06, "loss": 0.2437, "step": 3949 }, { "epoch": 0.72, "learning_rate": 5.907644005896024e-06, "loss": 0.1969, "step": 3950 }, { "epoch": 0.72, "learning_rate": 5.900640013458898e-06, "loss": 0.1864, "step": 3951 }, { "epoch": 0.72, "learning_rate": 5.89363915866107e-06, "loss": 0.1464, "step": 3952 }, { "epoch": 0.72, "learning_rate": 5.886641443916579e-06, "loss": 0.1279, "step": 3953 }, { "epoch": 0.72, "learning_rate": 5.879646871638364e-06, "loss": 0.1436, "step": 3954 }, { "epoch": 0.72, "learning_rate": 5.872655444238296e-06, "loss": 0.1421, "step": 3955 }, { "epoch": 0.72, "learning_rate": 5.8656671641271645e-06, "loss": 0.2155, "step": 3956 }, { "epoch": 0.72, "learning_rate": 5.858682033714659e-06, "loss": 0.1836, "step": 3957 }, { "epoch": 0.72, "learning_rate": 5.851700055409387e-06, "loss": 0.1237, "step": 3958 }, { "epoch": 0.72, "learning_rate": 5.8447212316188784e-06, "loss": 0.1947, "step": 3959 }, { "epoch": 0.72, "learning_rate": 5.8377455647495764e-06, "loss": 0.1711, "step": 3960 }, { "epoch": 0.72, "learning_rate": 5.830773057206814e-06, "loss": 0.2126, "step": 3961 }, { "epoch": 0.72, "learning_rate": 5.8238037113948584e-06, "loss": 0.1688, "step": 3962 }, { "epoch": 0.72, "learning_rate": 5.816837529716877e-06, "loss": 0.15, "step": 3963 }, { "epoch": 0.72, "learning_rate": 5.809874514574952e-06, "loss": 0.1699, "step": 3964 }, { "epoch": 0.72, "learning_rate": 5.802914668370057e-06, "loss": 0.1774, "step": 3965 }, { "epoch": 0.72, "learning_rate": 5.795957993502093e-06, "loss": 0.1511, "step": 3966 }, { "epoch": 0.72, "learning_rate": 5.789004492369862e-06, "loss": 0.2373, "step": 3967 }, { "epoch": 0.72, "learning_rate": 5.782054167371058e-06, "loss": 0.1967, "step": 3968 }, { "epoch": 0.72, "learning_rate": 5.775107020902299e-06, "loss": 0.1886, "step": 3969 }, { "epoch": 0.72, "learning_rate": 5.768163055359094e-06, "loss": 0.1685, "step": 3970 }, { "epoch": 0.72, "learning_rate": 5.761222273135867e-06, "loss": 0.2445, "step": 3971 }, { "epoch": 0.72, "learning_rate": 5.7542846766259286e-06, "loss": 0.196, "step": 3972 }, { "epoch": 0.72, "learning_rate": 5.747350268221501e-06, "loss": 0.1986, "step": 3973 }, { "epoch": 0.72, "learning_rate": 5.740419050313712e-06, "loss": 0.1436, "step": 3974 }, { "epoch": 0.72, "learning_rate": 5.733491025292571e-06, "loss": 0.1426, "step": 3975 }, { "epoch": 0.72, "learning_rate": 5.726566195547006e-06, "loss": 0.1689, "step": 3976 }, { "epoch": 0.72, "learning_rate": 5.719644563464838e-06, "loss": 0.169, "step": 3977 }, { "epoch": 0.72, "learning_rate": 5.712726131432774e-06, "loss": 0.1646, "step": 3978 }, { "epoch": 0.72, "learning_rate": 5.705810901836432e-06, "loss": 0.1523, "step": 3979 }, { "epoch": 0.72, "learning_rate": 5.698898877060317e-06, "loss": 0.1702, "step": 3980 }, { "epoch": 0.72, "learning_rate": 5.69199005948784e-06, "loss": 0.2469, "step": 3981 }, { "epoch": 0.72, "learning_rate": 5.685084451501284e-06, "loss": 0.1725, "step": 3982 }, { "epoch": 0.72, "learning_rate": 5.678182055481847e-06, "loss": 0.2023, "step": 3983 }, { "epoch": 0.72, "learning_rate": 5.671282873809618e-06, "loss": 0.1936, "step": 3984 }, { "epoch": 0.72, "learning_rate": 5.664386908863557e-06, "loss": 0.2108, "step": 3985 }, { "epoch": 0.72, "learning_rate": 5.657494163021537e-06, "loss": 0.1754, "step": 3986 }, { "epoch": 0.72, "learning_rate": 5.650604638660315e-06, "loss": 0.1888, "step": 3987 }, { "epoch": 0.72, "learning_rate": 5.6437183381555365e-06, "loss": 0.1848, "step": 3988 }, { "epoch": 0.72, "learning_rate": 5.636835263881727e-06, "loss": 0.2167, "step": 3989 }, { "epoch": 0.72, "learning_rate": 5.629955418212308e-06, "loss": 0.1676, "step": 3990 }, { "epoch": 0.72, "learning_rate": 5.623078803519595e-06, "loss": 0.2043, "step": 3991 }, { "epoch": 0.72, "learning_rate": 5.61620542217477e-06, "loss": 0.199, "step": 3992 }, { "epoch": 0.72, "learning_rate": 5.609335276547915e-06, "loss": 0.1952, "step": 3993 }, { "epoch": 0.72, "learning_rate": 5.602468369007997e-06, "loss": 0.1738, "step": 3994 }, { "epoch": 0.72, "learning_rate": 5.59560470192285e-06, "loss": 0.1728, "step": 3995 }, { "epoch": 0.72, "learning_rate": 5.588744277659211e-06, "loss": 0.1911, "step": 3996 }, { "epoch": 0.72, "learning_rate": 5.581887098582688e-06, "loss": 0.1798, "step": 3997 }, { "epoch": 0.72, "learning_rate": 5.5750331670577775e-06, "loss": 0.2218, "step": 3998 }, { "epoch": 0.72, "learning_rate": 5.568182485447839e-06, "loss": 0.1451, "step": 3999 }, { "epoch": 0.73, "learning_rate": 5.56133505611513e-06, "loss": 0.1173, "step": 4000 }, { "epoch": 0.73, "learning_rate": 5.5544908814207825e-06, "loss": 0.1413, "step": 4001 }, { "epoch": 0.73, "learning_rate": 5.547649963724795e-06, "loss": 0.1434, "step": 4002 }, { "epoch": 0.73, "learning_rate": 5.540812305386057e-06, "loss": 0.1863, "step": 4003 }, { "epoch": 0.73, "learning_rate": 5.533977908762324e-06, "loss": 0.2473, "step": 4004 }, { "epoch": 0.73, "learning_rate": 5.5271467762102415e-06, "loss": 0.1668, "step": 4005 }, { "epoch": 0.73, "learning_rate": 5.520318910085313e-06, "loss": 0.2123, "step": 4006 }, { "epoch": 0.73, "learning_rate": 5.513494312741913e-06, "loss": 0.1724, "step": 4007 }, { "epoch": 0.73, "learning_rate": 5.506672986533307e-06, "loss": 0.204, "step": 4008 }, { "epoch": 0.73, "learning_rate": 5.499854933811626e-06, "loss": 0.2054, "step": 4009 }, { "epoch": 0.73, "learning_rate": 5.4930401569278595e-06, "loss": 0.1685, "step": 4010 }, { "epoch": 0.73, "learning_rate": 5.4862286582318825e-06, "loss": 0.2031, "step": 4011 }, { "epoch": 0.73, "learning_rate": 5.479420440072442e-06, "loss": 0.1746, "step": 4012 }, { "epoch": 0.73, "learning_rate": 5.472615504797132e-06, "loss": 0.1967, "step": 4013 }, { "epoch": 0.73, "learning_rate": 5.465813854752435e-06, "loss": 0.116, "step": 4014 }, { "epoch": 0.73, "learning_rate": 5.459015492283702e-06, "loss": 0.299, "step": 4015 }, { "epoch": 0.73, "learning_rate": 5.45222041973513e-06, "loss": 0.1688, "step": 4016 }, { "epoch": 0.73, "learning_rate": 5.4454286394498006e-06, "loss": 0.2104, "step": 4017 }, { "epoch": 0.73, "learning_rate": 5.438640153769654e-06, "loss": 0.1671, "step": 4018 }, { "epoch": 0.73, "learning_rate": 5.4318549650355e-06, "loss": 0.1948, "step": 4019 }, { "epoch": 0.73, "learning_rate": 5.425073075586994e-06, "loss": 0.1664, "step": 4020 }, { "epoch": 0.73, "learning_rate": 5.418294487762671e-06, "loss": 0.1514, "step": 4021 }, { "epoch": 0.73, "learning_rate": 5.41151920389993e-06, "loss": 0.2065, "step": 4022 }, { "epoch": 0.73, "learning_rate": 5.40474722633501e-06, "loss": 0.1733, "step": 4023 }, { "epoch": 0.73, "learning_rate": 5.397978557403029e-06, "loss": 0.2304, "step": 4024 }, { "epoch": 0.73, "learning_rate": 5.391213199437956e-06, "loss": 0.203, "step": 4025 }, { "epoch": 0.73, "learning_rate": 5.384451154772627e-06, "loss": 0.2378, "step": 4026 }, { "epoch": 0.73, "learning_rate": 5.377692425738719e-06, "loss": 0.2228, "step": 4027 }, { "epoch": 0.73, "learning_rate": 5.370937014666779e-06, "loss": 0.1988, "step": 4028 }, { "epoch": 0.73, "learning_rate": 5.364184923886213e-06, "loss": 0.1884, "step": 4029 }, { "epoch": 0.73, "learning_rate": 5.357436155725266e-06, "loss": 0.1779, "step": 4030 }, { "epoch": 0.73, "learning_rate": 5.3506907125110485e-06, "loss": 0.1464, "step": 4031 }, { "epoch": 0.73, "learning_rate": 5.343948596569531e-06, "loss": 0.2118, "step": 4032 }, { "epoch": 0.73, "learning_rate": 5.337209810225515e-06, "loss": 0.1586, "step": 4033 }, { "epoch": 0.73, "learning_rate": 5.330474355802677e-06, "loss": 0.1826, "step": 4034 }, { "epoch": 0.73, "learning_rate": 5.32374223562353e-06, "loss": 0.1806, "step": 4035 }, { "epoch": 0.73, "learning_rate": 5.3170134520094505e-06, "loss": 0.1873, "step": 4036 }, { "epoch": 0.73, "learning_rate": 5.310288007280646e-06, "loss": 0.154, "step": 4037 }, { "epoch": 0.73, "learning_rate": 5.303565903756186e-06, "loss": 0.1481, "step": 4038 }, { "epoch": 0.73, "learning_rate": 5.296847143753991e-06, "loss": 0.14, "step": 4039 }, { "epoch": 0.73, "learning_rate": 5.290131729590813e-06, "loss": 0.2407, "step": 4040 }, { "epoch": 0.73, "learning_rate": 5.283419663582263e-06, "loss": 0.2267, "step": 4041 }, { "epoch": 0.73, "learning_rate": 5.276710948042794e-06, "loss": 0.1802, "step": 4042 }, { "epoch": 0.73, "learning_rate": 5.270005585285712e-06, "loss": 0.2065, "step": 4043 }, { "epoch": 0.73, "learning_rate": 5.263303577623145e-06, "loss": 0.1909, "step": 4044 }, { "epoch": 0.73, "learning_rate": 5.256604927366083e-06, "loss": 0.1579, "step": 4045 }, { "epoch": 0.73, "learning_rate": 5.249909636824361e-06, "loss": 0.2143, "step": 4046 }, { "epoch": 0.73, "learning_rate": 5.243217708306635e-06, "loss": 0.1442, "step": 4047 }, { "epoch": 0.73, "learning_rate": 5.236529144120421e-06, "loss": 0.1776, "step": 4048 }, { "epoch": 0.73, "learning_rate": 5.229843946572072e-06, "loss": 0.129, "step": 4049 }, { "epoch": 0.73, "learning_rate": 5.223162117966767e-06, "loss": 0.2926, "step": 4050 }, { "epoch": 0.73, "learning_rate": 5.216483660608537e-06, "loss": 0.2078, "step": 4051 }, { "epoch": 0.73, "learning_rate": 5.2098085768002465e-06, "loss": 0.1593, "step": 4052 }, { "epoch": 0.73, "learning_rate": 5.203136868843602e-06, "loss": 0.1189, "step": 4053 }, { "epoch": 0.73, "learning_rate": 5.196468539039129e-06, "loss": 0.1656, "step": 4054 }, { "epoch": 0.74, "learning_rate": 5.189803589686212e-06, "loss": 0.235, "step": 4055 }, { "epoch": 0.74, "learning_rate": 5.183142023083045e-06, "loss": 0.2079, "step": 4056 }, { "epoch": 0.74, "learning_rate": 5.176483841526679e-06, "loss": 0.1616, "step": 4057 }, { "epoch": 0.74, "learning_rate": 5.169829047312975e-06, "loss": 0.1984, "step": 4058 }, { "epoch": 0.74, "learning_rate": 5.163177642736647e-06, "loss": 0.1808, "step": 4059 }, { "epoch": 0.74, "learning_rate": 5.15652963009123e-06, "loss": 0.2344, "step": 4060 }, { "epoch": 0.74, "learning_rate": 5.1498850116690836e-06, "loss": 0.1737, "step": 4061 }, { "epoch": 0.74, "learning_rate": 5.14324378976141e-06, "loss": 0.1905, "step": 4062 }, { "epoch": 0.74, "learning_rate": 5.13660596665823e-06, "loss": 0.1647, "step": 4063 }, { "epoch": 0.74, "learning_rate": 5.129971544648405e-06, "loss": 0.197, "step": 4064 }, { "epoch": 0.74, "learning_rate": 5.1233405260196024e-06, "loss": 0.2113, "step": 4065 }, { "epoch": 0.74, "learning_rate": 5.116712913058335e-06, "loss": 0.2391, "step": 4066 }, { "epoch": 0.74, "learning_rate": 5.110088708049939e-06, "loss": 0.1344, "step": 4067 }, { "epoch": 0.74, "learning_rate": 5.1034679132785615e-06, "loss": 0.2018, "step": 4068 }, { "epoch": 0.74, "learning_rate": 5.096850531027188e-06, "loss": 0.1799, "step": 4069 }, { "epoch": 0.74, "learning_rate": 5.0902365635776245e-06, "loss": 0.1866, "step": 4070 }, { "epoch": 0.74, "learning_rate": 5.083626013210501e-06, "loss": 0.185, "step": 4071 }, { "epoch": 0.74, "learning_rate": 5.077018882205257e-06, "loss": 0.1346, "step": 4072 }, { "epoch": 0.74, "learning_rate": 5.070415172840166e-06, "loss": 0.1434, "step": 4073 }, { "epoch": 0.74, "learning_rate": 5.0638148873923245e-06, "loss": 0.1671, "step": 4074 }, { "epoch": 0.74, "learning_rate": 5.05721802813763e-06, "loss": 0.2273, "step": 4075 }, { "epoch": 0.74, "learning_rate": 5.050624597350815e-06, "loss": 0.1908, "step": 4076 }, { "epoch": 0.74, "learning_rate": 5.0440345973054294e-06, "loss": 0.1766, "step": 4077 }, { "epoch": 0.74, "learning_rate": 5.037448030273827e-06, "loss": 0.2164, "step": 4078 }, { "epoch": 0.74, "learning_rate": 5.03086489852719e-06, "loss": 0.1776, "step": 4079 }, { "epoch": 0.74, "learning_rate": 5.0242852043355144e-06, "loss": 0.2072, "step": 4080 }, { "epoch": 0.74, "learning_rate": 5.017708949967612e-06, "loss": 0.1723, "step": 4081 }, { "epoch": 0.74, "learning_rate": 5.0111361376910945e-06, "loss": 0.249, "step": 4082 }, { "epoch": 0.74, "learning_rate": 5.004566769772406e-06, "loss": 0.1772, "step": 4083 }, { "epoch": 0.74, "learning_rate": 4.9980008484767965e-06, "loss": 0.2401, "step": 4084 }, { "epoch": 0.74, "learning_rate": 4.991438376068317e-06, "loss": 0.145, "step": 4085 }, { "epoch": 0.74, "learning_rate": 4.984879354809843e-06, "loss": 0.155, "step": 4086 }, { "epoch": 0.74, "learning_rate": 4.978323786963053e-06, "loss": 0.1609, "step": 4087 }, { "epoch": 0.74, "learning_rate": 4.971771674788443e-06, "loss": 0.1936, "step": 4088 }, { "epoch": 0.74, "learning_rate": 4.965223020545301e-06, "loss": 0.1718, "step": 4089 }, { "epoch": 0.74, "learning_rate": 4.958677826491735e-06, "loss": 0.1887, "step": 4090 }, { "epoch": 0.74, "learning_rate": 4.952136094884666e-06, "loss": 0.1571, "step": 4091 }, { "epoch": 0.74, "learning_rate": 4.945597827979801e-06, "loss": 0.2188, "step": 4092 }, { "epoch": 0.74, "learning_rate": 4.939063028031668e-06, "loss": 0.2001, "step": 4093 }, { "epoch": 0.74, "learning_rate": 4.932531697293599e-06, "loss": 0.1147, "step": 4094 }, { "epoch": 0.74, "learning_rate": 4.9260038380177204e-06, "loss": 0.0969, "step": 4095 }, { "epoch": 0.74, "learning_rate": 4.919479452454969e-06, "loss": 0.1785, "step": 4096 }, { "epoch": 0.74, "learning_rate": 4.9129585428550814e-06, "loss": 0.1703, "step": 4097 }, { "epoch": 0.74, "learning_rate": 4.906441111466603e-06, "loss": 0.2162, "step": 4098 }, { "epoch": 0.74, "learning_rate": 4.899927160536862e-06, "loss": 0.1647, "step": 4099 }, { "epoch": 0.74, "learning_rate": 4.893416692312004e-06, "loss": 0.1448, "step": 4100 }, { "epoch": 0.74, "learning_rate": 4.886909709036971e-06, "loss": 0.1111, "step": 4101 }, { "epoch": 0.74, "learning_rate": 4.880406212955488e-06, "loss": 0.2155, "step": 4102 }, { "epoch": 0.74, "learning_rate": 4.873906206310098e-06, "loss": 0.1642, "step": 4103 }, { "epoch": 0.74, "learning_rate": 4.867409691342128e-06, "loss": 0.1452, "step": 4104 }, { "epoch": 0.74, "learning_rate": 4.860916670291717e-06, "loss": 0.2325, "step": 4105 }, { "epoch": 0.74, "learning_rate": 4.854427145397768e-06, "loss": 0.1685, "step": 4106 }, { "epoch": 0.74, "learning_rate": 4.847941118898005e-06, "loss": 0.1625, "step": 4107 }, { "epoch": 0.74, "learning_rate": 4.841458593028938e-06, "loss": 0.1809, "step": 4108 }, { "epoch": 0.74, "learning_rate": 4.834979570025878e-06, "loss": 0.2417, "step": 4109 }, { "epoch": 0.75, "learning_rate": 4.828504052122908e-06, "loss": 0.1721, "step": 4110 }, { "epoch": 0.75, "learning_rate": 4.822032041552921e-06, "loss": 0.1531, "step": 4111 }, { "epoch": 0.75, "learning_rate": 4.815563540547598e-06, "loss": 0.2033, "step": 4112 }, { "epoch": 0.75, "learning_rate": 4.8090985513373975e-06, "loss": 0.1788, "step": 4113 }, { "epoch": 0.75, "learning_rate": 4.802637076151579e-06, "loss": 0.2026, "step": 4114 }, { "epoch": 0.75, "learning_rate": 4.796179117218194e-06, "loss": 0.1658, "step": 4115 }, { "epoch": 0.75, "learning_rate": 4.789724676764062e-06, "loss": 0.1731, "step": 4116 }, { "epoch": 0.75, "learning_rate": 4.783273757014809e-06, "loss": 0.1594, "step": 4117 }, { "epoch": 0.75, "learning_rate": 4.7768263601948375e-06, "loss": 0.156, "step": 4118 }, { "epoch": 0.75, "learning_rate": 4.770382488527345e-06, "loss": 0.2749, "step": 4119 }, { "epoch": 0.75, "learning_rate": 4.763942144234293e-06, "loss": 0.2656, "step": 4120 }, { "epoch": 0.75, "learning_rate": 4.757505329536448e-06, "loss": 0.1689, "step": 4121 }, { "epoch": 0.75, "learning_rate": 4.751072046653353e-06, "loss": 0.1595, "step": 4122 }, { "epoch": 0.75, "learning_rate": 4.7446422978033225e-06, "loss": 0.2911, "step": 4123 }, { "epoch": 0.75, "learning_rate": 4.738216085203467e-06, "loss": 0.1691, "step": 4124 }, { "epoch": 0.75, "learning_rate": 4.731793411069669e-06, "loss": 0.2147, "step": 4125 }, { "epoch": 0.75, "learning_rate": 4.7253742776166005e-06, "loss": 0.1795, "step": 4126 }, { "epoch": 0.75, "learning_rate": 4.718958687057696e-06, "loss": 0.1368, "step": 4127 }, { "epoch": 0.75, "learning_rate": 4.712546641605184e-06, "loss": 0.2474, "step": 4128 }, { "epoch": 0.75, "learning_rate": 4.706138143470065e-06, "loss": 0.2085, "step": 4129 }, { "epoch": 0.75, "learning_rate": 4.699733194862111e-06, "loss": 0.2321, "step": 4130 }, { "epoch": 0.75, "learning_rate": 4.693331797989878e-06, "loss": 0.2018, "step": 4131 }, { "epoch": 0.75, "learning_rate": 4.686933955060698e-06, "loss": 0.2064, "step": 4132 }, { "epoch": 0.75, "learning_rate": 4.680539668280664e-06, "loss": 0.1865, "step": 4133 }, { "epoch": 0.75, "learning_rate": 4.674148939854661e-06, "loss": 0.2676, "step": 4134 }, { "epoch": 0.75, "learning_rate": 4.667761771986335e-06, "loss": 0.1257, "step": 4135 }, { "epoch": 0.75, "learning_rate": 4.661378166878114e-06, "loss": 0.1616, "step": 4136 }, { "epoch": 0.75, "learning_rate": 4.6549981267311805e-06, "loss": 0.1406, "step": 4137 }, { "epoch": 0.75, "learning_rate": 4.648621653745504e-06, "loss": 0.1472, "step": 4138 }, { "epoch": 0.75, "learning_rate": 4.642248750119824e-06, "loss": 0.1558, "step": 4139 }, { "epoch": 0.75, "learning_rate": 4.635879418051633e-06, "loss": 0.1568, "step": 4140 }, { "epoch": 0.75, "learning_rate": 4.629513659737209e-06, "loss": 0.1331, "step": 4141 }, { "epoch": 0.75, "learning_rate": 4.62315147737159e-06, "loss": 0.2094, "step": 4142 }, { "epoch": 0.75, "learning_rate": 4.616792873148588e-06, "loss": 0.2041, "step": 4143 }, { "epoch": 0.75, "learning_rate": 4.610437849260765e-06, "loss": 0.187, "step": 4144 }, { "epoch": 0.75, "learning_rate": 4.604086407899465e-06, "loss": 0.2402, "step": 4145 }, { "epoch": 0.75, "learning_rate": 4.597738551254795e-06, "loss": 0.1846, "step": 4146 }, { "epoch": 0.75, "learning_rate": 4.591394281515609e-06, "loss": 0.2329, "step": 4147 }, { "epoch": 0.75, "learning_rate": 4.585053600869546e-06, "loss": 0.1948, "step": 4148 }, { "epoch": 0.75, "learning_rate": 4.578716511503001e-06, "loss": 0.1915, "step": 4149 }, { "epoch": 0.75, "learning_rate": 4.5723830156011185e-06, "loss": 0.1726, "step": 4150 }, { "epoch": 0.75, "learning_rate": 4.566053115347815e-06, "loss": 0.1823, "step": 4151 }, { "epoch": 0.75, "learning_rate": 4.559726812925771e-06, "loss": 0.1941, "step": 4152 }, { "epoch": 0.75, "learning_rate": 4.55340411051642e-06, "loss": 0.1844, "step": 4153 }, { "epoch": 0.75, "learning_rate": 4.5470850102999535e-06, "loss": 0.1463, "step": 4154 }, { "epoch": 0.75, "learning_rate": 4.540769514455317e-06, "loss": 0.1305, "step": 4155 }, { "epoch": 0.75, "learning_rate": 4.53445762516022e-06, "loss": 0.2036, "step": 4156 }, { "epoch": 0.75, "learning_rate": 4.528149344591137e-06, "loss": 0.1948, "step": 4157 }, { "epoch": 0.75, "learning_rate": 4.521844674923272e-06, "loss": 0.2605, "step": 4158 }, { "epoch": 0.75, "learning_rate": 4.515543618330611e-06, "loss": 0.1244, "step": 4159 }, { "epoch": 0.75, "learning_rate": 4.5092461769858826e-06, "loss": 0.1542, "step": 4160 }, { "epoch": 0.75, "learning_rate": 4.502952353060563e-06, "loss": 0.1948, "step": 4161 }, { "epoch": 0.75, "learning_rate": 4.496662148724892e-06, "loss": 0.1637, "step": 4162 }, { "epoch": 0.75, "learning_rate": 4.490375566147853e-06, "loss": 0.1967, "step": 4163 }, { "epoch": 0.75, "learning_rate": 4.48409260749719e-06, "loss": 0.2141, "step": 4164 }, { "epoch": 0.76, "learning_rate": 4.477813274939384e-06, "loss": 0.1926, "step": 4165 }, { "epoch": 0.76, "learning_rate": 4.471537570639677e-06, "loss": 0.266, "step": 4166 }, { "epoch": 0.76, "learning_rate": 4.46526549676206e-06, "loss": 0.1301, "step": 4167 }, { "epoch": 0.76, "learning_rate": 4.45899705546926e-06, "loss": 0.1989, "step": 4168 }, { "epoch": 0.76, "learning_rate": 4.452732248922761e-06, "loss": 0.1333, "step": 4169 }, { "epoch": 0.76, "learning_rate": 4.446471079282796e-06, "loss": 0.1976, "step": 4170 }, { "epoch": 0.76, "learning_rate": 4.440213548708342e-06, "loss": 0.1882, "step": 4171 }, { "epoch": 0.76, "learning_rate": 4.433959659357112e-06, "loss": 0.2079, "step": 4172 }, { "epoch": 0.76, "learning_rate": 4.427709413385575e-06, "loss": 0.261, "step": 4173 }, { "epoch": 0.76, "learning_rate": 4.421462812948943e-06, "loss": 0.1715, "step": 4174 }, { "epoch": 0.76, "learning_rate": 4.41521986020116e-06, "loss": 0.1428, "step": 4175 }, { "epoch": 0.76, "learning_rate": 4.408980557294922e-06, "loss": 0.2286, "step": 4176 }, { "epoch": 0.76, "learning_rate": 4.402744906381669e-06, "loss": 0.1534, "step": 4177 }, { "epoch": 0.76, "learning_rate": 4.396512909611568e-06, "loss": 0.1777, "step": 4178 }, { "epoch": 0.76, "learning_rate": 4.390284569133542e-06, "loss": 0.2112, "step": 4179 }, { "epoch": 0.76, "learning_rate": 4.384059887095241e-06, "loss": 0.2433, "step": 4180 }, { "epoch": 0.76, "learning_rate": 4.377838865643067e-06, "loss": 0.1283, "step": 4181 }, { "epoch": 0.76, "learning_rate": 4.371621506922141e-06, "loss": 0.1977, "step": 4182 }, { "epoch": 0.76, "learning_rate": 4.365407813076334e-06, "loss": 0.1553, "step": 4183 }, { "epoch": 0.76, "learning_rate": 4.359197786248258e-06, "loss": 0.2218, "step": 4184 }, { "epoch": 0.76, "learning_rate": 4.352991428579242e-06, "loss": 0.1796, "step": 4185 }, { "epoch": 0.76, "learning_rate": 4.346788742209367e-06, "loss": 0.1522, "step": 4186 }, { "epoch": 0.76, "learning_rate": 4.340589729277443e-06, "loss": 0.1865, "step": 4187 }, { "epoch": 0.76, "learning_rate": 4.334394391921009e-06, "loss": 0.1939, "step": 4188 }, { "epoch": 0.76, "learning_rate": 4.328202732276338e-06, "loss": 0.1587, "step": 4189 }, { "epoch": 0.76, "learning_rate": 4.322014752478441e-06, "loss": 0.1962, "step": 4190 }, { "epoch": 0.76, "learning_rate": 4.315830454661059e-06, "loss": 0.1531, "step": 4191 }, { "epoch": 0.76, "learning_rate": 4.309649840956652e-06, "loss": 0.2227, "step": 4192 }, { "epoch": 0.76, "learning_rate": 4.303472913496422e-06, "loss": 0.1618, "step": 4193 }, { "epoch": 0.76, "learning_rate": 4.2972996744103e-06, "loss": 0.1736, "step": 4194 }, { "epoch": 0.76, "learning_rate": 4.2911301258269305e-06, "loss": 0.1782, "step": 4195 }, { "epoch": 0.76, "learning_rate": 4.284964269873704e-06, "loss": 0.218, "step": 4196 }, { "epoch": 0.76, "learning_rate": 4.2788021086767235e-06, "loss": 0.1889, "step": 4197 }, { "epoch": 0.76, "learning_rate": 4.272643644360835e-06, "loss": 0.1516, "step": 4198 }, { "epoch": 0.76, "learning_rate": 4.2664888790495876e-06, "loss": 0.1317, "step": 4199 }, { "epoch": 0.76, "learning_rate": 4.26033781486527e-06, "loss": 0.2, "step": 4200 }, { "epoch": 0.76, "learning_rate": 4.254190453928894e-06, "loss": 0.2026, "step": 4201 }, { "epoch": 0.76, "learning_rate": 4.248046798360186e-06, "loss": 0.201, "step": 4202 }, { "epoch": 0.76, "learning_rate": 4.241906850277601e-06, "loss": 0.1947, "step": 4203 }, { "epoch": 0.76, "learning_rate": 4.23577061179832e-06, "loss": 0.1901, "step": 4204 }, { "epoch": 0.76, "learning_rate": 4.229638085038234e-06, "loss": 0.1453, "step": 4205 }, { "epoch": 0.76, "learning_rate": 4.2235092721119575e-06, "loss": 0.1262, "step": 4206 }, { "epoch": 0.76, "learning_rate": 4.217384175132831e-06, "loss": 0.1518, "step": 4207 }, { "epoch": 0.76, "learning_rate": 4.211262796212905e-06, "loss": 0.1633, "step": 4208 }, { "epoch": 0.76, "learning_rate": 4.2051451374629614e-06, "loss": 0.122, "step": 4209 }, { "epoch": 0.76, "learning_rate": 4.199031200992477e-06, "loss": 0.1831, "step": 4210 }, { "epoch": 0.76, "learning_rate": 4.1929209889096656e-06, "loss": 0.1703, "step": 4211 }, { "epoch": 0.76, "learning_rate": 4.186814503321451e-06, "loss": 0.1907, "step": 4212 }, { "epoch": 0.76, "learning_rate": 4.180711746333463e-06, "loss": 0.1229, "step": 4213 }, { "epoch": 0.76, "learning_rate": 4.174612720050057e-06, "loss": 0.2052, "step": 4214 }, { "epoch": 0.76, "learning_rate": 4.168517426574304e-06, "loss": 0.164, "step": 4215 }, { "epoch": 0.76, "learning_rate": 4.162425868007969e-06, "loss": 0.1494, "step": 4216 }, { "epoch": 0.76, "learning_rate": 4.1563380464515495e-06, "loss": 0.1971, "step": 4217 }, { "epoch": 0.76, "learning_rate": 4.150253964004244e-06, "loss": 0.2274, "step": 4218 }, { "epoch": 0.76, "learning_rate": 4.144173622763972e-06, "loss": 0.2965, "step": 4219 }, { "epoch": 0.77, "learning_rate": 4.138097024827347e-06, "loss": 0.1975, "step": 4220 }, { "epoch": 0.77, "learning_rate": 4.132024172289699e-06, "loss": 0.2158, "step": 4221 }, { "epoch": 0.77, "learning_rate": 4.1259550672450785e-06, "loss": 0.145, "step": 4222 }, { "epoch": 0.77, "learning_rate": 4.119889711786221e-06, "loss": 0.2, "step": 4223 }, { "epoch": 0.77, "learning_rate": 4.113828108004586e-06, "loss": 0.17, "step": 4224 }, { "epoch": 0.77, "learning_rate": 4.107770257990334e-06, "loss": 0.2013, "step": 4225 }, { "epoch": 0.77, "learning_rate": 4.101716163832337e-06, "loss": 0.1551, "step": 4226 }, { "epoch": 0.77, "learning_rate": 4.095665827618156e-06, "loss": 0.1943, "step": 4227 }, { "epoch": 0.77, "learning_rate": 4.089619251434072e-06, "loss": 0.212, "step": 4228 }, { "epoch": 0.77, "learning_rate": 4.083576437365067e-06, "loss": 0.2092, "step": 4229 }, { "epoch": 0.77, "learning_rate": 4.077537387494818e-06, "loss": 0.181, "step": 4230 }, { "epoch": 0.77, "learning_rate": 4.071502103905707e-06, "loss": 0.1896, "step": 4231 }, { "epoch": 0.77, "learning_rate": 4.06547058867883e-06, "loss": 0.1628, "step": 4232 }, { "epoch": 0.77, "learning_rate": 4.0594428438939596e-06, "loss": 0.1971, "step": 4233 }, { "epoch": 0.77, "learning_rate": 4.053418871629588e-06, "loss": 0.2336, "step": 4234 }, { "epoch": 0.77, "learning_rate": 4.0473986739629e-06, "loss": 0.1378, "step": 4235 }, { "epoch": 0.77, "learning_rate": 4.04138225296978e-06, "loss": 0.1727, "step": 4236 }, { "epoch": 0.77, "learning_rate": 4.035369610724805e-06, "loss": 0.1927, "step": 4237 }, { "epoch": 0.77, "learning_rate": 4.029360749301254e-06, "loss": 0.1634, "step": 4238 }, { "epoch": 0.77, "learning_rate": 4.023355670771106e-06, "loss": 0.1765, "step": 4239 }, { "epoch": 0.77, "learning_rate": 4.017354377205025e-06, "loss": 0.1888, "step": 4240 }, { "epoch": 0.77, "learning_rate": 4.011356870672375e-06, "loss": 0.2184, "step": 4241 }, { "epoch": 0.77, "learning_rate": 4.005363153241219e-06, "loss": 0.1866, "step": 4242 }, { "epoch": 0.77, "learning_rate": 3.999373226978312e-06, "loss": 0.2191, "step": 4243 }, { "epoch": 0.77, "learning_rate": 3.993387093949091e-06, "loss": 0.1979, "step": 4244 }, { "epoch": 0.77, "learning_rate": 3.987404756217698e-06, "loss": 0.2119, "step": 4245 }, { "epoch": 0.77, "learning_rate": 3.981426215846964e-06, "loss": 0.1736, "step": 4246 }, { "epoch": 0.77, "learning_rate": 3.9754514748984e-06, "loss": 0.229, "step": 4247 }, { "epoch": 0.77, "learning_rate": 3.96948053543222e-06, "loss": 0.1684, "step": 4248 }, { "epoch": 0.77, "learning_rate": 3.9635133995073256e-06, "loss": 0.1989, "step": 4249 }, { "epoch": 0.77, "learning_rate": 3.957550069181293e-06, "loss": 0.1422, "step": 4250 }, { "epoch": 0.77, "learning_rate": 3.951590546510404e-06, "loss": 0.1423, "step": 4251 }, { "epoch": 0.77, "learning_rate": 3.945634833549619e-06, "loss": 0.1941, "step": 4252 }, { "epoch": 0.77, "learning_rate": 3.939682932352594e-06, "loss": 0.1864, "step": 4253 }, { "epoch": 0.77, "learning_rate": 3.933734844971642e-06, "loss": 0.1407, "step": 4254 }, { "epoch": 0.77, "learning_rate": 3.927790573457794e-06, "loss": 0.205, "step": 4255 }, { "epoch": 0.77, "learning_rate": 3.9218501198607495e-06, "loss": 0.147, "step": 4256 }, { "epoch": 0.77, "learning_rate": 3.915913486228903e-06, "loss": 0.166, "step": 4257 }, { "epoch": 0.77, "learning_rate": 3.90998067460931e-06, "loss": 0.1105, "step": 4258 }, { "epoch": 0.77, "learning_rate": 3.904051687047726e-06, "loss": 0.1406, "step": 4259 }, { "epoch": 0.77, "learning_rate": 3.89812652558859e-06, "loss": 0.1708, "step": 4260 }, { "epoch": 0.77, "learning_rate": 3.8922051922750045e-06, "loss": 0.1462, "step": 4261 }, { "epoch": 0.77, "learning_rate": 3.886287689148767e-06, "loss": 0.2006, "step": 4262 }, { "epoch": 0.77, "learning_rate": 3.88037401825035e-06, "loss": 0.1852, "step": 4263 }, { "epoch": 0.77, "learning_rate": 3.874464181618909e-06, "loss": 0.1553, "step": 4264 }, { "epoch": 0.77, "learning_rate": 3.868558181292261e-06, "loss": 0.207, "step": 4265 }, { "epoch": 0.77, "learning_rate": 3.86265601930692e-06, "loss": 0.1973, "step": 4266 }, { "epoch": 0.77, "learning_rate": 3.856757697698071e-06, "loss": 0.1354, "step": 4267 }, { "epoch": 0.77, "learning_rate": 3.8508632184995614e-06, "loss": 0.1793, "step": 4268 }, { "epoch": 0.77, "learning_rate": 3.844972583743931e-06, "loss": 0.1362, "step": 4269 }, { "epoch": 0.77, "learning_rate": 3.83908579546239e-06, "loss": 0.2112, "step": 4270 }, { "epoch": 0.77, "learning_rate": 3.8332028556848125e-06, "loss": 0.1978, "step": 4271 }, { "epoch": 0.77, "learning_rate": 3.827323766439755e-06, "loss": 0.2163, "step": 4272 }, { "epoch": 0.77, "learning_rate": 3.821448529754445e-06, "loss": 0.2198, "step": 4273 }, { "epoch": 0.77, "learning_rate": 3.8155771476547825e-06, "loss": 0.1992, "step": 4274 }, { "epoch": 0.78, "learning_rate": 3.809709622165331e-06, "loss": 0.1849, "step": 4275 }, { "epoch": 0.78, "learning_rate": 3.8038459553093308e-06, "loss": 0.1654, "step": 4276 }, { "epoch": 0.78, "learning_rate": 3.797986149108696e-06, "loss": 0.164, "step": 4277 }, { "epoch": 0.78, "learning_rate": 3.792130205583995e-06, "loss": 0.1891, "step": 4278 }, { "epoch": 0.78, "learning_rate": 3.7862781267544765e-06, "loss": 0.1591, "step": 4279 }, { "epoch": 0.78, "learning_rate": 3.7804299146380527e-06, "loss": 0.2274, "step": 4280 }, { "epoch": 0.78, "learning_rate": 3.774585571251309e-06, "loss": 0.163, "step": 4281 }, { "epoch": 0.78, "learning_rate": 3.7687450986094795e-06, "loss": 0.2097, "step": 4282 }, { "epoch": 0.78, "learning_rate": 3.7629084987264804e-06, "loss": 0.1404, "step": 4283 }, { "epoch": 0.78, "learning_rate": 3.757075773614892e-06, "loss": 0.1535, "step": 4284 }, { "epoch": 0.78, "learning_rate": 3.751246925285943e-06, "loss": 0.1931, "step": 4285 }, { "epoch": 0.78, "learning_rate": 3.74542195574954e-06, "loss": 0.1964, "step": 4286 }, { "epoch": 0.78, "learning_rate": 3.739600867014254e-06, "loss": 0.1512, "step": 4287 }, { "epoch": 0.78, "learning_rate": 3.733783661087301e-06, "loss": 0.1986, "step": 4288 }, { "epoch": 0.78, "learning_rate": 3.7279703399745718e-06, "loss": 0.1827, "step": 4289 }, { "epoch": 0.78, "learning_rate": 3.7221609056806177e-06, "loss": 0.1505, "step": 4290 }, { "epoch": 0.78, "learning_rate": 3.7163553602086496e-06, "loss": 0.2253, "step": 4291 }, { "epoch": 0.78, "learning_rate": 3.7105537055605247e-06, "loss": 0.178, "step": 4292 }, { "epoch": 0.78, "learning_rate": 3.704755943736773e-06, "loss": 0.1303, "step": 4293 }, { "epoch": 0.78, "learning_rate": 3.698962076736583e-06, "loss": 0.1612, "step": 4294 }, { "epoch": 0.78, "learning_rate": 3.693172106557785e-06, "loss": 0.1893, "step": 4295 }, { "epoch": 0.78, "learning_rate": 3.687386035196879e-06, "loss": 0.1951, "step": 4296 }, { "epoch": 0.78, "learning_rate": 3.6816038646490175e-06, "loss": 0.1948, "step": 4297 }, { "epoch": 0.78, "learning_rate": 3.675825596908012e-06, "loss": 0.1316, "step": 4298 }, { "epoch": 0.78, "learning_rate": 3.6700512339663132e-06, "loss": 0.1348, "step": 4299 }, { "epoch": 0.78, "learning_rate": 3.664280777815043e-06, "loss": 0.2903, "step": 4300 }, { "epoch": 0.78, "learning_rate": 3.658514230443972e-06, "loss": 0.1709, "step": 4301 }, { "epoch": 0.78, "learning_rate": 3.652751593841508e-06, "loss": 0.1733, "step": 4302 }, { "epoch": 0.78, "learning_rate": 3.6469928699947354e-06, "loss": 0.1866, "step": 4303 }, { "epoch": 0.78, "learning_rate": 3.641238060889364e-06, "loss": 0.1877, "step": 4304 }, { "epoch": 0.78, "learning_rate": 3.6354871685097756e-06, "loss": 0.1003, "step": 4305 }, { "epoch": 0.78, "learning_rate": 3.6297401948389837e-06, "loss": 0.2874, "step": 4306 }, { "epoch": 0.78, "learning_rate": 3.623997141858662e-06, "loss": 0.1828, "step": 4307 }, { "epoch": 0.78, "learning_rate": 3.61825801154913e-06, "loss": 0.1796, "step": 4308 }, { "epoch": 0.78, "learning_rate": 3.6125228058893555e-06, "loss": 0.1633, "step": 4309 }, { "epoch": 0.78, "learning_rate": 3.606791526856944e-06, "loss": 0.1421, "step": 4310 }, { "epoch": 0.78, "learning_rate": 3.6010641764281567e-06, "loss": 0.1241, "step": 4311 }, { "epoch": 0.78, "learning_rate": 3.595340756577903e-06, "loss": 0.2819, "step": 4312 }, { "epoch": 0.78, "learning_rate": 3.5896212692797228e-06, "loss": 0.1455, "step": 4313 }, { "epoch": 0.78, "learning_rate": 3.5839057165058113e-06, "loss": 0.2121, "step": 4314 }, { "epoch": 0.78, "learning_rate": 3.5781941002270087e-06, "loss": 0.1799, "step": 4315 }, { "epoch": 0.78, "learning_rate": 3.572486422412787e-06, "loss": 0.1552, "step": 4316 }, { "epoch": 0.78, "learning_rate": 3.5667826850312666e-06, "loss": 0.1241, "step": 4317 }, { "epoch": 0.78, "learning_rate": 3.5610828900492123e-06, "loss": 0.1835, "step": 4318 }, { "epoch": 0.78, "learning_rate": 3.5553870394320304e-06, "loss": 0.2062, "step": 4319 }, { "epoch": 0.78, "learning_rate": 3.5496951351437507e-06, "loss": 0.2204, "step": 4320 }, { "epoch": 0.78, "learning_rate": 3.544007179147063e-06, "loss": 0.2195, "step": 4321 }, { "epoch": 0.78, "learning_rate": 3.53832317340329e-06, "loss": 0.1766, "step": 4322 }, { "epoch": 0.78, "learning_rate": 3.5326431198723792e-06, "loss": 0.1656, "step": 4323 }, { "epoch": 0.78, "learning_rate": 3.52696702051293e-06, "loss": 0.2248, "step": 4324 }, { "epoch": 0.78, "learning_rate": 3.5212948772821755e-06, "loss": 0.1836, "step": 4325 }, { "epoch": 0.78, "learning_rate": 3.5156266921359866e-06, "loss": 0.238, "step": 4326 }, { "epoch": 0.78, "learning_rate": 3.5099624670288573e-06, "loss": 0.1945, "step": 4327 }, { "epoch": 0.78, "learning_rate": 3.5043022039139254e-06, "loss": 0.1709, "step": 4328 }, { "epoch": 0.78, "learning_rate": 3.49864590474297e-06, "loss": 0.1239, "step": 4329 }, { "epoch": 0.78, "learning_rate": 3.4929935714663847e-06, "loss": 0.1529, "step": 4330 }, { "epoch": 0.79, "learning_rate": 3.4873452060332112e-06, "loss": 0.179, "step": 4331 }, { "epoch": 0.79, "learning_rate": 3.4817008103911202e-06, "loss": 0.155, "step": 4332 }, { "epoch": 0.79, "learning_rate": 3.4760603864864033e-06, "loss": 0.1794, "step": 4333 }, { "epoch": 0.79, "learning_rate": 3.470423936263993e-06, "loss": 0.2344, "step": 4334 }, { "epoch": 0.79, "learning_rate": 3.464791461667451e-06, "loss": 0.2362, "step": 4335 }, { "epoch": 0.79, "learning_rate": 3.4591629646389705e-06, "loss": 0.1474, "step": 4336 }, { "epoch": 0.79, "learning_rate": 3.453538447119358e-06, "loss": 0.1849, "step": 4337 }, { "epoch": 0.79, "learning_rate": 3.447917911048064e-06, "loss": 0.1392, "step": 4338 }, { "epoch": 0.79, "learning_rate": 3.442301358363163e-06, "loss": 0.1946, "step": 4339 }, { "epoch": 0.79, "learning_rate": 3.4366887910013482e-06, "loss": 0.2162, "step": 4340 }, { "epoch": 0.79, "learning_rate": 3.4310802108979456e-06, "loss": 0.1101, "step": 4341 }, { "epoch": 0.79, "learning_rate": 3.425475619986904e-06, "loss": 0.1303, "step": 4342 }, { "epoch": 0.79, "learning_rate": 3.4198750202008032e-06, "loss": 0.1771, "step": 4343 }, { "epoch": 0.79, "learning_rate": 3.4142784134708317e-06, "loss": 0.2001, "step": 4344 }, { "epoch": 0.79, "learning_rate": 3.4086858017268154e-06, "loss": 0.1338, "step": 4345 }, { "epoch": 0.79, "learning_rate": 3.4030971868972e-06, "loss": 0.1899, "step": 4346 }, { "epoch": 0.79, "learning_rate": 3.3975125709090427e-06, "loss": 0.2448, "step": 4347 }, { "epoch": 0.79, "learning_rate": 3.391931955688033e-06, "loss": 0.1546, "step": 4348 }, { "epoch": 0.79, "learning_rate": 3.386355343158483e-06, "loss": 0.1853, "step": 4349 }, { "epoch": 0.79, "learning_rate": 3.3807827352433103e-06, "loss": 0.1486, "step": 4350 }, { "epoch": 0.79, "learning_rate": 3.3752141338640636e-06, "loss": 0.1253, "step": 4351 }, { "epoch": 0.79, "learning_rate": 3.369649540940912e-06, "loss": 0.0958, "step": 4352 }, { "epoch": 0.79, "learning_rate": 3.3640889583926316e-06, "loss": 0.1862, "step": 4353 }, { "epoch": 0.79, "learning_rate": 3.358532388136619e-06, "loss": 0.1431, "step": 4354 }, { "epoch": 0.79, "learning_rate": 3.3529798320888927e-06, "loss": 0.1376, "step": 4355 }, { "epoch": 0.79, "learning_rate": 3.3474312921640838e-06, "loss": 0.1696, "step": 4356 }, { "epoch": 0.79, "learning_rate": 3.3418867702754418e-06, "loss": 0.1603, "step": 4357 }, { "epoch": 0.79, "learning_rate": 3.336346268334821e-06, "loss": 0.2053, "step": 4358 }, { "epoch": 0.79, "learning_rate": 3.3308097882527e-06, "loss": 0.1979, "step": 4359 }, { "epoch": 0.79, "learning_rate": 3.3252773319381697e-06, "loss": 0.2109, "step": 4360 }, { "epoch": 0.79, "learning_rate": 3.3197489012989226e-06, "loss": 0.1269, "step": 4361 }, { "epoch": 0.79, "learning_rate": 3.314224498241275e-06, "loss": 0.17, "step": 4362 }, { "epoch": 0.79, "learning_rate": 3.308704124670152e-06, "loss": 0.1478, "step": 4363 }, { "epoch": 0.79, "learning_rate": 3.3031877824890895e-06, "loss": 0.1996, "step": 4364 }, { "epoch": 0.79, "learning_rate": 3.2976754736002233e-06, "loss": 0.1771, "step": 4365 }, { "epoch": 0.79, "learning_rate": 3.2921671999043107e-06, "loss": 0.1948, "step": 4366 }, { "epoch": 0.79, "learning_rate": 3.2866629633007202e-06, "loss": 0.1682, "step": 4367 }, { "epoch": 0.79, "learning_rate": 3.2811627656874083e-06, "loss": 0.1755, "step": 4368 }, { "epoch": 0.79, "learning_rate": 3.2756666089609617e-06, "loss": 0.1784, "step": 4369 }, { "epoch": 0.79, "learning_rate": 3.2701744950165624e-06, "loss": 0.1729, "step": 4370 }, { "epoch": 0.79, "learning_rate": 3.2646864257479958e-06, "loss": 0.2253, "step": 4371 }, { "epoch": 0.79, "learning_rate": 3.2592024030476596e-06, "loss": 0.157, "step": 4372 }, { "epoch": 0.79, "learning_rate": 3.253722428806555e-06, "loss": 0.1887, "step": 4373 }, { "epoch": 0.79, "learning_rate": 3.2482465049142846e-06, "loss": 0.1245, "step": 4374 }, { "epoch": 0.79, "learning_rate": 3.2427746332590518e-06, "loss": 0.1703, "step": 4375 }, { "epoch": 0.79, "learning_rate": 3.2373068157276692e-06, "loss": 0.1711, "step": 4376 }, { "epoch": 0.79, "learning_rate": 3.2318430542055514e-06, "loss": 0.2, "step": 4377 }, { "epoch": 0.79, "learning_rate": 3.2263833505767038e-06, "loss": 0.1552, "step": 4378 }, { "epoch": 0.79, "learning_rate": 3.2209277067237446e-06, "loss": 0.2418, "step": 4379 }, { "epoch": 0.79, "learning_rate": 3.215476124527888e-06, "loss": 0.1812, "step": 4380 }, { "epoch": 0.79, "learning_rate": 3.2100286058689517e-06, "loss": 0.2595, "step": 4381 }, { "epoch": 0.79, "learning_rate": 3.204585152625338e-06, "loss": 0.2091, "step": 4382 }, { "epoch": 0.79, "learning_rate": 3.199145766674062e-06, "loss": 0.1776, "step": 4383 }, { "epoch": 0.79, "learning_rate": 3.1937104498907353e-06, "loss": 0.1681, "step": 4384 }, { "epoch": 0.79, "learning_rate": 3.188279204149557e-06, "loss": 0.2075, "step": 4385 }, { "epoch": 0.8, "learning_rate": 3.1828520313233296e-06, "loss": 0.1592, "step": 4386 }, { "epoch": 0.8, "learning_rate": 3.1774289332834533e-06, "loss": 0.2177, "step": 4387 }, { "epoch": 0.8, "learning_rate": 3.172009911899913e-06, "loss": 0.1998, "step": 4388 }, { "epoch": 0.8, "learning_rate": 3.166594969041299e-06, "loss": 0.1476, "step": 4389 }, { "epoch": 0.8, "learning_rate": 3.1611841065747886e-06, "loss": 0.1339, "step": 4390 }, { "epoch": 0.8, "learning_rate": 3.1557773263661605e-06, "loss": 0.1567, "step": 4391 }, { "epoch": 0.8, "learning_rate": 3.150374630279769e-06, "loss": 0.2032, "step": 4392 }, { "epoch": 0.8, "learning_rate": 3.1449760201785764e-06, "loss": 0.1384, "step": 4393 }, { "epoch": 0.8, "learning_rate": 3.139581497924135e-06, "loss": 0.1921, "step": 4394 }, { "epoch": 0.8, "learning_rate": 3.1341910653765715e-06, "loss": 0.1971, "step": 4395 }, { "epoch": 0.8, "learning_rate": 3.128804724394621e-06, "loss": 0.1402, "step": 4396 }, { "epoch": 0.8, "learning_rate": 3.123422476835598e-06, "loss": 0.1271, "step": 4397 }, { "epoch": 0.8, "learning_rate": 3.1180443245554125e-06, "loss": 0.194, "step": 4398 }, { "epoch": 0.8, "learning_rate": 3.1126702694085514e-06, "loss": 0.1464, "step": 4399 }, { "epoch": 0.8, "learning_rate": 3.1073003132480986e-06, "loss": 0.1086, "step": 4400 }, { "epoch": 0.8, "learning_rate": 3.1019344579257234e-06, "loss": 0.1917, "step": 4401 }, { "epoch": 0.8, "learning_rate": 3.096572705291676e-06, "loss": 0.1609, "step": 4402 }, { "epoch": 0.8, "learning_rate": 3.0912150571947918e-06, "loss": 0.1239, "step": 4403 }, { "epoch": 0.8, "learning_rate": 3.0858615154824948e-06, "loss": 0.1856, "step": 4404 }, { "epoch": 0.8, "learning_rate": 3.0805120820008004e-06, "loss": 0.1838, "step": 4405 }, { "epoch": 0.8, "learning_rate": 3.0751667585942878e-06, "loss": 0.1583, "step": 4406 }, { "epoch": 0.8, "learning_rate": 3.0698255471061343e-06, "loss": 0.1445, "step": 4407 }, { "epoch": 0.8, "learning_rate": 3.0644884493780985e-06, "loss": 0.1868, "step": 4408 }, { "epoch": 0.8, "learning_rate": 3.059155467250518e-06, "loss": 0.15, "step": 4409 }, { "epoch": 0.8, "learning_rate": 3.053826602562302e-06, "loss": 0.194, "step": 4410 }, { "epoch": 0.8, "learning_rate": 3.0485018571509552e-06, "loss": 0.1772, "step": 4411 }, { "epoch": 0.8, "learning_rate": 3.043181232852558e-06, "loss": 0.1319, "step": 4412 }, { "epoch": 0.8, "learning_rate": 3.0378647315017576e-06, "loss": 0.212, "step": 4413 }, { "epoch": 0.8, "learning_rate": 3.0325523549317957e-06, "loss": 0.2049, "step": 4414 }, { "epoch": 0.8, "learning_rate": 3.0272441049744855e-06, "loss": 0.0919, "step": 4415 }, { "epoch": 0.8, "learning_rate": 3.021939983460211e-06, "loss": 0.1889, "step": 4416 }, { "epoch": 0.8, "learning_rate": 3.0166399922179406e-06, "loss": 0.142, "step": 4417 }, { "epoch": 0.8, "learning_rate": 3.011344133075218e-06, "loss": 0.1821, "step": 4418 }, { "epoch": 0.8, "learning_rate": 3.006052407858162e-06, "loss": 0.1633, "step": 4419 }, { "epoch": 0.8, "learning_rate": 3.0007648183914565e-06, "loss": 0.2019, "step": 4420 }, { "epoch": 0.8, "learning_rate": 2.9954813664983733e-06, "loss": 0.1248, "step": 4421 }, { "epoch": 0.8, "learning_rate": 2.990202054000753e-06, "loss": 0.2298, "step": 4422 }, { "epoch": 0.8, "learning_rate": 2.984926882718998e-06, "loss": 0.168, "step": 4423 }, { "epoch": 0.8, "learning_rate": 2.9796558544720982e-06, "loss": 0.142, "step": 4424 }, { "epoch": 0.8, "learning_rate": 2.9743889710776113e-06, "loss": 0.1575, "step": 4425 }, { "epoch": 0.8, "learning_rate": 2.9691262343516544e-06, "loss": 0.2024, "step": 4426 }, { "epoch": 0.8, "learning_rate": 2.9638676461089283e-06, "loss": 0.1305, "step": 4427 }, { "epoch": 0.8, "learning_rate": 2.958613208162698e-06, "loss": 0.1364, "step": 4428 }, { "epoch": 0.8, "learning_rate": 2.9533629223248002e-06, "loss": 0.1739, "step": 4429 }, { "epoch": 0.8, "learning_rate": 2.94811679040563e-06, "loss": 0.1423, "step": 4430 }, { "epoch": 0.8, "learning_rate": 2.9428748142141614e-06, "loss": 0.1697, "step": 4431 }, { "epoch": 0.8, "learning_rate": 2.9376369955579373e-06, "loss": 0.1859, "step": 4432 }, { "epoch": 0.8, "learning_rate": 2.9324033362430513e-06, "loss": 0.1665, "step": 4433 }, { "epoch": 0.8, "learning_rate": 2.9271738380741762e-06, "loss": 0.1905, "step": 4434 }, { "epoch": 0.8, "learning_rate": 2.9219485028545486e-06, "loss": 0.1439, "step": 4435 }, { "epoch": 0.8, "learning_rate": 2.916727332385968e-06, "loss": 0.1577, "step": 4436 }, { "epoch": 0.8, "learning_rate": 2.9115103284687927e-06, "loss": 0.1728, "step": 4437 }, { "epoch": 0.8, "learning_rate": 2.906297492901951e-06, "loss": 0.1621, "step": 4438 }, { "epoch": 0.8, "learning_rate": 2.901088827482938e-06, "loss": 0.1944, "step": 4439 }, { "epoch": 0.8, "learning_rate": 2.8958843340077917e-06, "loss": 0.1432, "step": 4440 }, { "epoch": 0.81, "learning_rate": 2.890684014271134e-06, "loss": 0.1748, "step": 4441 }, { "epoch": 0.81, "learning_rate": 2.885487870066138e-06, "loss": 0.2324, "step": 4442 }, { "epoch": 0.81, "learning_rate": 2.880295903184531e-06, "loss": 0.2188, "step": 4443 }, { "epoch": 0.81, "learning_rate": 2.8751081154166078e-06, "loss": 0.2098, "step": 4444 }, { "epoch": 0.81, "learning_rate": 2.8699245085512234e-06, "loss": 0.1563, "step": 4445 }, { "epoch": 0.81, "learning_rate": 2.86474508437579e-06, "loss": 0.1993, "step": 4446 }, { "epoch": 0.81, "learning_rate": 2.8595698446762673e-06, "loss": 0.2094, "step": 4447 }, { "epoch": 0.81, "learning_rate": 2.854398791237185e-06, "loss": 0.2055, "step": 4448 }, { "epoch": 0.81, "learning_rate": 2.849231925841628e-06, "loss": 0.1609, "step": 4449 }, { "epoch": 0.81, "learning_rate": 2.8440692502712255e-06, "loss": 0.1851, "step": 4450 }, { "epoch": 0.81, "learning_rate": 2.83891076630618e-06, "loss": 0.1375, "step": 4451 }, { "epoch": 0.81, "learning_rate": 2.8337564757252305e-06, "loss": 0.1722, "step": 4452 }, { "epoch": 0.81, "learning_rate": 2.828606380305683e-06, "loss": 0.202, "step": 4453 }, { "epoch": 0.81, "learning_rate": 2.823460481823388e-06, "loss": 0.1718, "step": 4454 }, { "epoch": 0.81, "learning_rate": 2.8183187820527544e-06, "loss": 0.1395, "step": 4455 }, { "epoch": 0.81, "learning_rate": 2.8131812827667445e-06, "loss": 0.1602, "step": 4456 }, { "epoch": 0.81, "learning_rate": 2.808047985736873e-06, "loss": 0.1564, "step": 4457 }, { "epoch": 0.81, "learning_rate": 2.8029188927331915e-06, "loss": 0.1591, "step": 4458 }, { "epoch": 0.81, "learning_rate": 2.7977940055243213e-06, "loss": 0.1761, "step": 4459 }, { "epoch": 0.81, "learning_rate": 2.7926733258774246e-06, "loss": 0.1018, "step": 4460 }, { "epoch": 0.81, "learning_rate": 2.7875568555582075e-06, "loss": 0.1808, "step": 4461 }, { "epoch": 0.81, "learning_rate": 2.7824445963309343e-06, "loss": 0.174, "step": 4462 }, { "epoch": 0.81, "learning_rate": 2.777336549958411e-06, "loss": 0.1375, "step": 4463 }, { "epoch": 0.81, "learning_rate": 2.7722327182019985e-06, "loss": 0.1802, "step": 4464 }, { "epoch": 0.81, "learning_rate": 2.767133102821593e-06, "loss": 0.1684, "step": 4465 }, { "epoch": 0.81, "learning_rate": 2.7620377055756426e-06, "loss": 0.1505, "step": 4466 }, { "epoch": 0.81, "learning_rate": 2.7569465282211464e-06, "loss": 0.1482, "step": 4467 }, { "epoch": 0.81, "learning_rate": 2.7518595725136376e-06, "loss": 0.226, "step": 4468 }, { "epoch": 0.81, "learning_rate": 2.746776840207199e-06, "loss": 0.2084, "step": 4469 }, { "epoch": 0.81, "learning_rate": 2.741698333054466e-06, "loss": 0.1435, "step": 4470 }, { "epoch": 0.81, "learning_rate": 2.7366240528065954e-06, "loss": 0.1546, "step": 4471 }, { "epoch": 0.81, "learning_rate": 2.7315540012133068e-06, "loss": 0.1492, "step": 4472 }, { "epoch": 0.81, "learning_rate": 2.726488180022854e-06, "loss": 0.2237, "step": 4473 }, { "epoch": 0.81, "learning_rate": 2.721426590982034e-06, "loss": 0.1981, "step": 4474 }, { "epoch": 0.81, "learning_rate": 2.7163692358361768e-06, "loss": 0.1635, "step": 4475 }, { "epoch": 0.81, "learning_rate": 2.7113161163291627e-06, "loss": 0.2156, "step": 4476 }, { "epoch": 0.81, "learning_rate": 2.7062672342034096e-06, "loss": 0.1901, "step": 4477 }, { "epoch": 0.81, "learning_rate": 2.7012225911998663e-06, "loss": 0.1834, "step": 4478 }, { "epoch": 0.81, "learning_rate": 2.696182189058026e-06, "loss": 0.2007, "step": 4479 }, { "epoch": 0.81, "learning_rate": 2.691146029515923e-06, "loss": 0.2119, "step": 4480 }, { "epoch": 0.81, "learning_rate": 2.686114114310127e-06, "loss": 0.1733, "step": 4481 }, { "epoch": 0.81, "learning_rate": 2.681086445175732e-06, "loss": 0.1662, "step": 4482 }, { "epoch": 0.81, "learning_rate": 2.6760630238463853e-06, "loss": 0.174, "step": 4483 }, { "epoch": 0.81, "learning_rate": 2.6710438520542636e-06, "loss": 0.1106, "step": 4484 }, { "epoch": 0.81, "learning_rate": 2.6660289315300677e-06, "loss": 0.1608, "step": 4485 }, { "epoch": 0.81, "learning_rate": 2.6610182640030483e-06, "loss": 0.2385, "step": 4486 }, { "epoch": 0.81, "learning_rate": 2.6560118512009844e-06, "loss": 0.1385, "step": 4487 }, { "epoch": 0.81, "learning_rate": 2.6510096948501804e-06, "loss": 0.1346, "step": 4488 }, { "epoch": 0.81, "learning_rate": 2.6460117966754797e-06, "loss": 0.1752, "step": 4489 }, { "epoch": 0.81, "learning_rate": 2.641018158400259e-06, "loss": 0.2331, "step": 4490 }, { "epoch": 0.81, "learning_rate": 2.6360287817464257e-06, "loss": 0.1459, "step": 4491 }, { "epoch": 0.81, "learning_rate": 2.63104366843441e-06, "loss": 0.2175, "step": 4492 }, { "epoch": 0.81, "learning_rate": 2.6260628201831793e-06, "loss": 0.1453, "step": 4493 }, { "epoch": 0.81, "learning_rate": 2.6210862387102325e-06, "loss": 0.1544, "step": 4494 }, { "epoch": 0.81, "learning_rate": 2.616113925731587e-06, "loss": 0.1132, "step": 4495 }, { "epoch": 0.82, "learning_rate": 2.6111458829617962e-06, "loss": 0.1668, "step": 4496 }, { "epoch": 0.82, "learning_rate": 2.6061821121139433e-06, "loss": 0.1516, "step": 4497 }, { "epoch": 0.82, "learning_rate": 2.601222614899636e-06, "loss": 0.14, "step": 4498 }, { "epoch": 0.82, "learning_rate": 2.5962673930289976e-06, "loss": 0.1583, "step": 4499 }, { "epoch": 0.82, "learning_rate": 2.591316448210695e-06, "loss": 0.2256, "step": 4500 }, { "epoch": 0.82, "learning_rate": 2.586369782151912e-06, "loss": 0.1861, "step": 4501 }, { "epoch": 0.82, "learning_rate": 2.5814273965583534e-06, "loss": 0.1857, "step": 4502 }, { "epoch": 0.82, "learning_rate": 2.5764892931342503e-06, "loss": 0.1381, "step": 4503 }, { "epoch": 0.82, "learning_rate": 2.5715554735823595e-06, "loss": 0.1398, "step": 4504 }, { "epoch": 0.82, "learning_rate": 2.566625939603966e-06, "loss": 0.1657, "step": 4505 }, { "epoch": 0.82, "learning_rate": 2.561700692898859e-06, "loss": 0.2228, "step": 4506 }, { "epoch": 0.82, "learning_rate": 2.556779735165368e-06, "loss": 0.1582, "step": 4507 }, { "epoch": 0.82, "learning_rate": 2.551863068100341e-06, "loss": 0.2325, "step": 4508 }, { "epoch": 0.82, "learning_rate": 2.546950693399132e-06, "loss": 0.1351, "step": 4509 }, { "epoch": 0.82, "learning_rate": 2.542042612755629e-06, "loss": 0.1517, "step": 4510 }, { "epoch": 0.82, "learning_rate": 2.537138827862238e-06, "loss": 0.1712, "step": 4511 }, { "epoch": 0.82, "learning_rate": 2.5322393404098806e-06, "loss": 0.1384, "step": 4512 }, { "epoch": 0.82, "learning_rate": 2.5273441520879923e-06, "loss": 0.1686, "step": 4513 }, { "epoch": 0.82, "learning_rate": 2.5224532645845343e-06, "loss": 0.1549, "step": 4514 }, { "epoch": 0.82, "learning_rate": 2.517566679585985e-06, "loss": 0.1218, "step": 4515 }, { "epoch": 0.82, "learning_rate": 2.512684398777329e-06, "loss": 0.1557, "step": 4516 }, { "epoch": 0.82, "learning_rate": 2.507806423842074e-06, "loss": 0.1588, "step": 4517 }, { "epoch": 0.82, "learning_rate": 2.5029327564622445e-06, "loss": 0.205, "step": 4518 }, { "epoch": 0.82, "learning_rate": 2.4980633983183813e-06, "loss": 0.1875, "step": 4519 }, { "epoch": 0.82, "learning_rate": 2.493198351089527e-06, "loss": 0.1479, "step": 4520 }, { "epoch": 0.82, "learning_rate": 2.48833761645325e-06, "loss": 0.1796, "step": 4521 }, { "epoch": 0.82, "learning_rate": 2.4834811960856325e-06, "loss": 0.193, "step": 4522 }, { "epoch": 0.82, "learning_rate": 2.478629091661256e-06, "loss": 0.221, "step": 4523 }, { "epoch": 0.82, "learning_rate": 2.4737813048532255e-06, "loss": 0.1493, "step": 4524 }, { "epoch": 0.82, "learning_rate": 2.4689378373331572e-06, "loss": 0.1928, "step": 4525 }, { "epoch": 0.82, "learning_rate": 2.464098690771169e-06, "loss": 0.2328, "step": 4526 }, { "epoch": 0.82, "learning_rate": 2.4592638668358952e-06, "loss": 0.1705, "step": 4527 }, { "epoch": 0.82, "learning_rate": 2.4544333671944815e-06, "loss": 0.2, "step": 4528 }, { "epoch": 0.82, "learning_rate": 2.4496071935125815e-06, "loss": 0.154, "step": 4529 }, { "epoch": 0.82, "learning_rate": 2.444785347454349e-06, "loss": 0.1344, "step": 4530 }, { "epoch": 0.82, "learning_rate": 2.4399678306824565e-06, "loss": 0.2322, "step": 4531 }, { "epoch": 0.82, "learning_rate": 2.4351546448580793e-06, "loss": 0.1979, "step": 4532 }, { "epoch": 0.82, "learning_rate": 2.430345791640895e-06, "loss": 0.1666, "step": 4533 }, { "epoch": 0.82, "learning_rate": 2.425541272689094e-06, "loss": 0.1868, "step": 4534 }, { "epoch": 0.82, "learning_rate": 2.4207410896593697e-06, "loss": 0.2209, "step": 4535 }, { "epoch": 0.82, "learning_rate": 2.4159452442069224e-06, "loss": 0.1501, "step": 4536 }, { "epoch": 0.82, "learning_rate": 2.411153737985448e-06, "loss": 0.1704, "step": 4537 }, { "epoch": 0.82, "learning_rate": 2.4063665726471584e-06, "loss": 0.2138, "step": 4538 }, { "epoch": 0.82, "learning_rate": 2.401583749842763e-06, "loss": 0.1619, "step": 4539 }, { "epoch": 0.82, "learning_rate": 2.396805271221469e-06, "loss": 0.1382, "step": 4540 }, { "epoch": 0.82, "learning_rate": 2.3920311384309918e-06, "loss": 0.2096, "step": 4541 }, { "epoch": 0.82, "learning_rate": 2.387261353117553e-06, "loss": 0.2159, "step": 4542 }, { "epoch": 0.82, "learning_rate": 2.3824959169258587e-06, "loss": 0.1817, "step": 4543 }, { "epoch": 0.82, "learning_rate": 2.3777348314991327e-06, "loss": 0.2298, "step": 4544 }, { "epoch": 0.82, "learning_rate": 2.372978098479088e-06, "loss": 0.2237, "step": 4545 }, { "epoch": 0.82, "learning_rate": 2.3682257195059453e-06, "loss": 0.2141, "step": 4546 }, { "epoch": 0.82, "learning_rate": 2.363477696218413e-06, "loss": 0.2149, "step": 4547 }, { "epoch": 0.82, "learning_rate": 2.3587340302537045e-06, "loss": 0.1877, "step": 4548 }, { "epoch": 0.82, "learning_rate": 2.3539947232475355e-06, "loss": 0.1571, "step": 4549 }, { "epoch": 0.82, "learning_rate": 2.3492597768341033e-06, "loss": 0.1575, "step": 4550 }, { "epoch": 0.83, "learning_rate": 2.3445291926461203e-06, "loss": 0.1815, "step": 4551 }, { "epoch": 0.83, "learning_rate": 2.3398029723147784e-06, "loss": 0.1107, "step": 4552 }, { "epoch": 0.83, "learning_rate": 2.335081117469777e-06, "loss": 0.1881, "step": 4553 }, { "epoch": 0.83, "learning_rate": 2.3303636297392995e-06, "loss": 0.1205, "step": 4554 }, { "epoch": 0.83, "learning_rate": 2.3256505107500336e-06, "loss": 0.1617, "step": 4555 }, { "epoch": 0.83, "learning_rate": 2.3209417621271517e-06, "loss": 0.202, "step": 4556 }, { "epoch": 0.83, "learning_rate": 2.3162373854943313e-06, "loss": 0.132, "step": 4557 }, { "epoch": 0.83, "learning_rate": 2.311537382473727e-06, "loss": 0.2121, "step": 4558 }, { "epoch": 0.83, "learning_rate": 2.306841754685993e-06, "loss": 0.1052, "step": 4559 }, { "epoch": 0.83, "learning_rate": 2.302150503750282e-06, "loss": 0.1725, "step": 4560 }, { "epoch": 0.83, "learning_rate": 2.2974636312842214e-06, "loss": 0.1512, "step": 4561 }, { "epoch": 0.83, "learning_rate": 2.2927811389039394e-06, "loss": 0.204, "step": 4562 }, { "epoch": 0.83, "learning_rate": 2.288103028224055e-06, "loss": 0.1506, "step": 4563 }, { "epoch": 0.83, "learning_rate": 2.2834293008576763e-06, "loss": 0.1393, "step": 4564 }, { "epoch": 0.83, "learning_rate": 2.278759958416387e-06, "loss": 0.1677, "step": 4565 }, { "epoch": 0.83, "learning_rate": 2.274095002510276e-06, "loss": 0.1958, "step": 4566 }, { "epoch": 0.83, "learning_rate": 2.269434434747914e-06, "loss": 0.1548, "step": 4567 }, { "epoch": 0.83, "learning_rate": 2.2647782567363497e-06, "loss": 0.1975, "step": 4568 }, { "epoch": 0.83, "learning_rate": 2.2601264700811304e-06, "loss": 0.2076, "step": 4569 }, { "epoch": 0.83, "learning_rate": 2.2554790763862866e-06, "loss": 0.2212, "step": 4570 }, { "epoch": 0.83, "learning_rate": 2.250836077254323e-06, "loss": 0.1958, "step": 4571 }, { "epoch": 0.83, "learning_rate": 2.246197474286244e-06, "loss": 0.1667, "step": 4572 }, { "epoch": 0.83, "learning_rate": 2.2415632690815285e-06, "loss": 0.1547, "step": 4573 }, { "epoch": 0.83, "learning_rate": 2.2369334632381485e-06, "loss": 0.1362, "step": 4574 }, { "epoch": 0.83, "learning_rate": 2.2323080583525444e-06, "loss": 0.1938, "step": 4575 }, { "epoch": 0.83, "learning_rate": 2.2276870560196515e-06, "loss": 0.1736, "step": 4576 }, { "epoch": 0.83, "learning_rate": 2.2230704578328848e-06, "loss": 0.2833, "step": 4577 }, { "epoch": 0.83, "learning_rate": 2.218458265384132e-06, "loss": 0.2458, "step": 4578 }, { "epoch": 0.83, "learning_rate": 2.2138504802637737e-06, "loss": 0.1315, "step": 4579 }, { "epoch": 0.83, "learning_rate": 2.2092471040606634e-06, "loss": 0.1387, "step": 4580 }, { "epoch": 0.83, "learning_rate": 2.204648138362141e-06, "loss": 0.1998, "step": 4581 }, { "epoch": 0.83, "learning_rate": 2.2000535847540115e-06, "loss": 0.1575, "step": 4582 }, { "epoch": 0.83, "learning_rate": 2.1954634448205745e-06, "loss": 0.1423, "step": 4583 }, { "epoch": 0.83, "learning_rate": 2.190877720144602e-06, "loss": 0.2144, "step": 4584 }, { "epoch": 0.83, "learning_rate": 2.186296412307338e-06, "loss": 0.1431, "step": 4585 }, { "epoch": 0.83, "learning_rate": 2.1817195228885084e-06, "loss": 0.1718, "step": 4586 }, { "epoch": 0.83, "learning_rate": 2.1771470534663223e-06, "loss": 0.1738, "step": 4587 }, { "epoch": 0.83, "learning_rate": 2.1725790056174473e-06, "loss": 0.2012, "step": 4588 }, { "epoch": 0.83, "learning_rate": 2.168015380917043e-06, "loss": 0.1388, "step": 4589 }, { "epoch": 0.83, "learning_rate": 2.1634561809387336e-06, "loss": 0.1329, "step": 4590 }, { "epoch": 0.83, "learning_rate": 2.1589014072546294e-06, "loss": 0.1938, "step": 4591 }, { "epoch": 0.83, "learning_rate": 2.1543510614352978e-06, "loss": 0.2899, "step": 4592 }, { "epoch": 0.83, "learning_rate": 2.1498051450497895e-06, "loss": 0.1497, "step": 4593 }, { "epoch": 0.83, "learning_rate": 2.1452636596656324e-06, "loss": 0.1121, "step": 4594 }, { "epoch": 0.83, "learning_rate": 2.140726606848813e-06, "loss": 0.1523, "step": 4595 }, { "epoch": 0.83, "learning_rate": 2.1361939881638003e-06, "loss": 0.1866, "step": 4596 }, { "epoch": 0.83, "learning_rate": 2.1316658051735327e-06, "loss": 0.1848, "step": 4597 }, { "epoch": 0.83, "learning_rate": 2.1271420594394125e-06, "loss": 0.2035, "step": 4598 }, { "epoch": 0.83, "learning_rate": 2.1226227525213206e-06, "loss": 0.1208, "step": 4599 }, { "epoch": 0.83, "learning_rate": 2.1181078859776033e-06, "loss": 0.1851, "step": 4600 }, { "epoch": 0.83, "learning_rate": 2.113597461365072e-06, "loss": 0.166, "step": 4601 }, { "epoch": 0.83, "learning_rate": 2.109091480239018e-06, "loss": 0.1356, "step": 4602 }, { "epoch": 0.83, "learning_rate": 2.1045899441531835e-06, "loss": 0.1496, "step": 4603 }, { "epoch": 0.83, "learning_rate": 2.1000928546597897e-06, "loss": 0.1604, "step": 4604 }, { "epoch": 0.83, "learning_rate": 2.0956002133095297e-06, "loss": 0.1984, "step": 4605 }, { "epoch": 0.84, "learning_rate": 2.0911120216515446e-06, "loss": 0.1819, "step": 4606 }, { "epoch": 0.84, "learning_rate": 2.086628281233456e-06, "loss": 0.1773, "step": 4607 }, { "epoch": 0.84, "learning_rate": 2.0821489936013503e-06, "loss": 0.1773, "step": 4608 }, { "epoch": 0.84, "learning_rate": 2.077674160299766e-06, "loss": 0.1573, "step": 4609 }, { "epoch": 0.84, "learning_rate": 2.0732037828717204e-06, "loss": 0.1691, "step": 4610 }, { "epoch": 0.84, "learning_rate": 2.0687378628586865e-06, "loss": 0.1707, "step": 4611 }, { "epoch": 0.84, "learning_rate": 2.064276401800605e-06, "loss": 0.166, "step": 4612 }, { "epoch": 0.84, "learning_rate": 2.059819401235868e-06, "loss": 0.1381, "step": 4613 }, { "epoch": 0.84, "learning_rate": 2.055366862701343e-06, "loss": 0.1793, "step": 4614 }, { "epoch": 0.84, "learning_rate": 2.0509187877323563e-06, "loss": 0.1936, "step": 4615 }, { "epoch": 0.84, "learning_rate": 2.046475177862684e-06, "loss": 0.1282, "step": 4616 }, { "epoch": 0.84, "learning_rate": 2.042036034624576e-06, "loss": 0.1799, "step": 4617 }, { "epoch": 0.84, "learning_rate": 2.0376013595487348e-06, "loss": 0.1522, "step": 4618 }, { "epoch": 0.84, "learning_rate": 2.0331711541643287e-06, "loss": 0.1898, "step": 4619 }, { "epoch": 0.84, "learning_rate": 2.028745419998973e-06, "loss": 0.1995, "step": 4620 }, { "epoch": 0.84, "learning_rate": 2.0243241585787514e-06, "loss": 0.1757, "step": 4621 }, { "epoch": 0.84, "learning_rate": 2.019907371428208e-06, "loss": 0.1826, "step": 4622 }, { "epoch": 0.84, "learning_rate": 2.0154950600703278e-06, "loss": 0.2391, "step": 4623 }, { "epoch": 0.84, "learning_rate": 2.011087226026568e-06, "loss": 0.2349, "step": 4624 }, { "epoch": 0.84, "learning_rate": 2.0066838708168412e-06, "loss": 0.1543, "step": 4625 }, { "epoch": 0.84, "learning_rate": 2.0022849959595047e-06, "loss": 0.156, "step": 4626 }, { "epoch": 0.84, "learning_rate": 1.997890602971379e-06, "loss": 0.1471, "step": 4627 }, { "epoch": 0.84, "learning_rate": 1.99350069336774e-06, "loss": 0.1301, "step": 4628 }, { "epoch": 0.84, "learning_rate": 1.9891152686623176e-06, "loss": 0.2147, "step": 4629 }, { "epoch": 0.84, "learning_rate": 1.984734330367285e-06, "loss": 0.2011, "step": 4630 }, { "epoch": 0.84, "learning_rate": 1.980357879993282e-06, "loss": 0.1586, "step": 4631 }, { "epoch": 0.84, "learning_rate": 1.9759859190493986e-06, "loss": 0.1709, "step": 4632 }, { "epoch": 0.84, "learning_rate": 1.971618449043165e-06, "loss": 0.125, "step": 4633 }, { "epoch": 0.84, "learning_rate": 1.9672554714805753e-06, "loss": 0.1536, "step": 4634 }, { "epoch": 0.84, "learning_rate": 1.962896987866071e-06, "loss": 0.1443, "step": 4635 }, { "epoch": 0.84, "learning_rate": 1.9585429997025446e-06, "loss": 0.1856, "step": 4636 }, { "epoch": 0.84, "learning_rate": 1.9541935084913338e-06, "loss": 0.1859, "step": 4637 }, { "epoch": 0.84, "learning_rate": 1.9498485157322306e-06, "loss": 0.1644, "step": 4638 }, { "epoch": 0.84, "learning_rate": 1.9455080229234797e-06, "loss": 0.1168, "step": 4639 }, { "epoch": 0.84, "learning_rate": 1.9411720315617586e-06, "loss": 0.1176, "step": 4640 }, { "epoch": 0.84, "learning_rate": 1.9368405431422105e-06, "loss": 0.1866, "step": 4641 }, { "epoch": 0.84, "learning_rate": 1.932513559158418e-06, "loss": 0.1959, "step": 4642 }, { "epoch": 0.84, "learning_rate": 1.928191081102407e-06, "loss": 0.1823, "step": 4643 }, { "epoch": 0.84, "learning_rate": 1.923873110464654e-06, "loss": 0.2593, "step": 4644 }, { "epoch": 0.84, "learning_rate": 1.9195596487340823e-06, "loss": 0.163, "step": 4645 }, { "epoch": 0.84, "learning_rate": 1.9152506973980617e-06, "loss": 0.1619, "step": 4646 }, { "epoch": 0.84, "learning_rate": 1.9109462579423985e-06, "loss": 0.1638, "step": 4647 }, { "epoch": 0.84, "learning_rate": 1.9066463318513493e-06, "loss": 0.242, "step": 4648 }, { "epoch": 0.84, "learning_rate": 1.9023509206076195e-06, "loss": 0.1446, "step": 4649 }, { "epoch": 0.84, "learning_rate": 1.8980600256923474e-06, "loss": 0.1503, "step": 4650 }, { "epoch": 0.84, "learning_rate": 1.8937736485851148e-06, "loss": 0.1678, "step": 4651 }, { "epoch": 0.84, "learning_rate": 1.8894917907639542e-06, "loss": 0.1254, "step": 4652 }, { "epoch": 0.84, "learning_rate": 1.8852144537053362e-06, "loss": 0.1815, "step": 4653 }, { "epoch": 0.84, "learning_rate": 1.880941638884166e-06, "loss": 0.1935, "step": 4654 }, { "epoch": 0.84, "learning_rate": 1.876673347773797e-06, "loss": 0.1953, "step": 4655 }, { "epoch": 0.84, "learning_rate": 1.8724095818460197e-06, "loss": 0.1551, "step": 4656 }, { "epoch": 0.84, "learning_rate": 1.86815034257107e-06, "loss": 0.1412, "step": 4657 }, { "epoch": 0.84, "learning_rate": 1.8638956314176114e-06, "loss": 0.1698, "step": 4658 }, { "epoch": 0.84, "learning_rate": 1.8596454498527538e-06, "loss": 0.1373, "step": 4659 }, { "epoch": 0.84, "learning_rate": 1.8553997993420495e-06, "loss": 0.2223, "step": 4660 }, { "epoch": 0.84, "learning_rate": 1.851158681349473e-06, "loss": 0.2346, "step": 4661 }, { "epoch": 0.85, "learning_rate": 1.8469220973374523e-06, "loss": 0.1465, "step": 4662 }, { "epoch": 0.85, "learning_rate": 1.8426900487668474e-06, "loss": 0.186, "step": 4663 }, { "epoch": 0.85, "learning_rate": 1.838462537096946e-06, "loss": 0.1846, "step": 4664 }, { "epoch": 0.85, "learning_rate": 1.8342395637854786e-06, "loss": 0.1627, "step": 4665 }, { "epoch": 0.85, "learning_rate": 1.8300211302886138e-06, "loss": 0.1621, "step": 4666 }, { "epoch": 0.85, "learning_rate": 1.825807238060953e-06, "loss": 0.2128, "step": 4667 }, { "epoch": 0.85, "learning_rate": 1.8215978885555207e-06, "loss": 0.1394, "step": 4668 }, { "epoch": 0.85, "learning_rate": 1.817393083223789e-06, "loss": 0.1186, "step": 4669 }, { "epoch": 0.85, "learning_rate": 1.8131928235156613e-06, "loss": 0.138, "step": 4670 }, { "epoch": 0.85, "learning_rate": 1.8089971108794644e-06, "loss": 0.1591, "step": 4671 }, { "epoch": 0.85, "learning_rate": 1.8048059467619644e-06, "loss": 0.205, "step": 4672 }, { "epoch": 0.85, "learning_rate": 1.8006193326083598e-06, "loss": 0.1786, "step": 4673 }, { "epoch": 0.85, "learning_rate": 1.7964372698622777e-06, "loss": 0.2011, "step": 4674 }, { "epoch": 0.85, "learning_rate": 1.792259759965773e-06, "loss": 0.1076, "step": 4675 }, { "epoch": 0.85, "learning_rate": 1.7880868043593352e-06, "loss": 0.1822, "step": 4676 }, { "epoch": 0.85, "learning_rate": 1.7839184044818862e-06, "loss": 0.128, "step": 4677 }, { "epoch": 0.85, "learning_rate": 1.7797545617707666e-06, "loss": 0.2143, "step": 4678 }, { "epoch": 0.85, "learning_rate": 1.7755952776617529e-06, "loss": 0.1731, "step": 4679 }, { "epoch": 0.85, "learning_rate": 1.7714405535890544e-06, "loss": 0.1757, "step": 4680 }, { "epoch": 0.85, "learning_rate": 1.7672903909852917e-06, "loss": 0.2413, "step": 4681 }, { "epoch": 0.85, "learning_rate": 1.76314479128153e-06, "loss": 0.1657, "step": 4682 }, { "epoch": 0.85, "learning_rate": 1.7590037559072546e-06, "loss": 0.1635, "step": 4683 }, { "epoch": 0.85, "learning_rate": 1.7548672862903765e-06, "loss": 0.1339, "step": 4684 }, { "epoch": 0.85, "learning_rate": 1.7507353838572259e-06, "loss": 0.218, "step": 4685 }, { "epoch": 0.85, "learning_rate": 1.7466080500325704e-06, "loss": 0.1351, "step": 4686 }, { "epoch": 0.85, "learning_rate": 1.742485286239598e-06, "loss": 0.1749, "step": 4687 }, { "epoch": 0.85, "learning_rate": 1.738367093899913e-06, "loss": 0.173, "step": 4688 }, { "epoch": 0.85, "learning_rate": 1.7342534744335513e-06, "loss": 0.1743, "step": 4689 }, { "epoch": 0.85, "learning_rate": 1.730144429258972e-06, "loss": 0.1712, "step": 4690 }, { "epoch": 0.85, "learning_rate": 1.7260399597930592e-06, "loss": 0.115, "step": 4691 }, { "epoch": 0.85, "learning_rate": 1.7219400674511066e-06, "loss": 0.1083, "step": 4692 }, { "epoch": 0.85, "learning_rate": 1.7178447536468428e-06, "loss": 0.1492, "step": 4693 }, { "epoch": 0.85, "learning_rate": 1.7137540197924152e-06, "loss": 0.1229, "step": 4694 }, { "epoch": 0.85, "learning_rate": 1.7096678672983834e-06, "loss": 0.1981, "step": 4695 }, { "epoch": 0.85, "learning_rate": 1.7055862975737373e-06, "loss": 0.146, "step": 4696 }, { "epoch": 0.85, "learning_rate": 1.7015093120258862e-06, "loss": 0.1451, "step": 4697 }, { "epoch": 0.85, "learning_rate": 1.6974369120606504e-06, "loss": 0.1838, "step": 4698 }, { "epoch": 0.85, "learning_rate": 1.6933690990822765e-06, "loss": 0.1991, "step": 4699 }, { "epoch": 0.85, "learning_rate": 1.6893058744934242e-06, "loss": 0.156, "step": 4700 }, { "epoch": 0.85, "learning_rate": 1.6852472396951747e-06, "loss": 0.1918, "step": 4701 }, { "epoch": 0.85, "learning_rate": 1.681193196087028e-06, "loss": 0.2163, "step": 4702 }, { "epoch": 0.85, "learning_rate": 1.6771437450668946e-06, "loss": 0.2041, "step": 4703 }, { "epoch": 0.85, "learning_rate": 1.6730988880311076e-06, "loss": 0.154, "step": 4704 }, { "epoch": 0.85, "learning_rate": 1.6690586263744179e-06, "loss": 0.1748, "step": 4705 }, { "epoch": 0.85, "learning_rate": 1.6650229614899792e-06, "loss": 0.1791, "step": 4706 }, { "epoch": 0.85, "learning_rate": 1.660991894769372e-06, "loss": 0.2095, "step": 4707 }, { "epoch": 0.85, "learning_rate": 1.6569654276025926e-06, "loss": 0.1546, "step": 4708 }, { "epoch": 0.85, "learning_rate": 1.6529435613780387e-06, "loss": 0.1714, "step": 4709 }, { "epoch": 0.85, "learning_rate": 1.6489262974825326e-06, "loss": 0.1849, "step": 4710 }, { "epoch": 0.85, "learning_rate": 1.6449136373013063e-06, "loss": 0.1964, "step": 4711 }, { "epoch": 0.85, "learning_rate": 1.6409055822180097e-06, "loss": 0.1207, "step": 4712 }, { "epoch": 0.85, "learning_rate": 1.6369021336146905e-06, "loss": 0.2058, "step": 4713 }, { "epoch": 0.85, "learning_rate": 1.6329032928718213e-06, "loss": 0.176, "step": 4714 }, { "epoch": 0.85, "learning_rate": 1.6289090613682856e-06, "loss": 0.2211, "step": 4715 }, { "epoch": 0.85, "learning_rate": 1.6249194404813632e-06, "loss": 0.1918, "step": 4716 }, { "epoch": 0.86, "learning_rate": 1.6209344315867625e-06, "loss": 0.1297, "step": 4717 }, { "epoch": 0.86, "learning_rate": 1.6169540360585904e-06, "loss": 0.3009, "step": 4718 }, { "epoch": 0.86, "learning_rate": 1.6129782552693695e-06, "loss": 0.142, "step": 4719 }, { "epoch": 0.86, "learning_rate": 1.6090070905900229e-06, "loss": 0.2146, "step": 4720 }, { "epoch": 0.86, "learning_rate": 1.6050405433898885e-06, "loss": 0.1458, "step": 4721 }, { "epoch": 0.86, "learning_rate": 1.6010786150367124e-06, "loss": 0.1482, "step": 4722 }, { "epoch": 0.86, "learning_rate": 1.5971213068966417e-06, "loss": 0.1948, "step": 4723 }, { "epoch": 0.86, "learning_rate": 1.593168620334235e-06, "loss": 0.1416, "step": 4724 }, { "epoch": 0.86, "learning_rate": 1.5892205567124635e-06, "loss": 0.158, "step": 4725 }, { "epoch": 0.86, "learning_rate": 1.5852771173926883e-06, "loss": 0.1511, "step": 4726 }, { "epoch": 0.86, "learning_rate": 1.58133830373469e-06, "loss": 0.23, "step": 4727 }, { "epoch": 0.86, "learning_rate": 1.5774041170966491e-06, "loss": 0.1727, "step": 4728 }, { "epoch": 0.86, "learning_rate": 1.5734745588351534e-06, "loss": 0.1375, "step": 4729 }, { "epoch": 0.86, "learning_rate": 1.5695496303051877e-06, "loss": 0.1679, "step": 4730 }, { "epoch": 0.86, "learning_rate": 1.5656293328601473e-06, "loss": 0.1637, "step": 4731 }, { "epoch": 0.86, "learning_rate": 1.5617136678518296e-06, "loss": 0.1407, "step": 4732 }, { "epoch": 0.86, "learning_rate": 1.5578026366304292e-06, "loss": 0.1928, "step": 4733 }, { "epoch": 0.86, "learning_rate": 1.5538962405445511e-06, "loss": 0.1301, "step": 4734 }, { "epoch": 0.86, "learning_rate": 1.549994480941196e-06, "loss": 0.1299, "step": 4735 }, { "epoch": 0.86, "learning_rate": 1.5460973591657707e-06, "loss": 0.163, "step": 4736 }, { "epoch": 0.86, "learning_rate": 1.5422048765620738e-06, "loss": 0.2097, "step": 4737 }, { "epoch": 0.86, "learning_rate": 1.5383170344723152e-06, "loss": 0.1669, "step": 4738 }, { "epoch": 0.86, "learning_rate": 1.5344338342371006e-06, "loss": 0.2162, "step": 4739 }, { "epoch": 0.86, "learning_rate": 1.530555277195429e-06, "loss": 0.1355, "step": 4740 }, { "epoch": 0.86, "learning_rate": 1.5266813646847072e-06, "loss": 0.2161, "step": 4741 }, { "epoch": 0.86, "learning_rate": 1.52281209804074e-06, "loss": 0.1691, "step": 4742 }, { "epoch": 0.86, "learning_rate": 1.5189474785977203e-06, "loss": 0.1962, "step": 4743 }, { "epoch": 0.86, "learning_rate": 1.515087507688247e-06, "loss": 0.1389, "step": 4744 }, { "epoch": 0.86, "learning_rate": 1.5112321866433172e-06, "loss": 0.181, "step": 4745 }, { "epoch": 0.86, "learning_rate": 1.507381516792322e-06, "loss": 0.2121, "step": 4746 }, { "epoch": 0.86, "learning_rate": 1.5035354994630452e-06, "loss": 0.2301, "step": 4747 }, { "epoch": 0.86, "learning_rate": 1.4996941359816718e-06, "loss": 0.2015, "step": 4748 }, { "epoch": 0.86, "learning_rate": 1.4958574276727826e-06, "loss": 0.2258, "step": 4749 }, { "epoch": 0.86, "learning_rate": 1.4920253758593462e-06, "loss": 0.1673, "step": 4750 }, { "epoch": 0.86, "learning_rate": 1.4881979818627301e-06, "loss": 0.2025, "step": 4751 }, { "epoch": 0.86, "learning_rate": 1.4843752470026955e-06, "loss": 0.1407, "step": 4752 }, { "epoch": 0.86, "learning_rate": 1.4805571725973998e-06, "loss": 0.1927, "step": 4753 }, { "epoch": 0.86, "learning_rate": 1.4767437599633882e-06, "loss": 0.1702, "step": 4754 }, { "epoch": 0.86, "learning_rate": 1.4729350104156004e-06, "loss": 0.1721, "step": 4755 }, { "epoch": 0.86, "learning_rate": 1.469130925267369e-06, "loss": 0.1555, "step": 4756 }, { "epoch": 0.86, "learning_rate": 1.46533150583042e-06, "loss": 0.2699, "step": 4757 }, { "epoch": 0.86, "learning_rate": 1.4615367534148654e-06, "loss": 0.1715, "step": 4758 }, { "epoch": 0.86, "learning_rate": 1.4577466693292136e-06, "loss": 0.1602, "step": 4759 }, { "epoch": 0.86, "learning_rate": 1.4539612548803605e-06, "loss": 0.1539, "step": 4760 }, { "epoch": 0.86, "learning_rate": 1.4501805113735873e-06, "loss": 0.1927, "step": 4761 }, { "epoch": 0.86, "learning_rate": 1.4464044401125726e-06, "loss": 0.1357, "step": 4762 }, { "epoch": 0.86, "learning_rate": 1.4426330423993834e-06, "loss": 0.1671, "step": 4763 }, { "epoch": 0.86, "learning_rate": 1.438866319534466e-06, "loss": 0.232, "step": 4764 }, { "epoch": 0.86, "learning_rate": 1.4351042728166648e-06, "loss": 0.1733, "step": 4765 }, { "epoch": 0.86, "learning_rate": 1.4313469035432054e-06, "loss": 0.1211, "step": 4766 }, { "epoch": 0.86, "learning_rate": 1.4275942130097097e-06, "loss": 0.1348, "step": 4767 }, { "epoch": 0.86, "learning_rate": 1.4238462025101696e-06, "loss": 0.127, "step": 4768 }, { "epoch": 0.86, "learning_rate": 1.4201028733369793e-06, "loss": 0.2044, "step": 4769 }, { "epoch": 0.86, "learning_rate": 1.4163642267809146e-06, "loss": 0.2009, "step": 4770 }, { "epoch": 0.86, "learning_rate": 1.4126302641311295e-06, "loss": 0.1576, "step": 4771 }, { "epoch": 0.87, "learning_rate": 1.4089009866751705e-06, "loss": 0.2547, "step": 4772 }, { "epoch": 0.87, "learning_rate": 1.405176395698966e-06, "loss": 0.1843, "step": 4773 }, { "epoch": 0.87, "learning_rate": 1.4014564924868334e-06, "loss": 0.1122, "step": 4774 }, { "epoch": 0.87, "learning_rate": 1.39774127832146e-06, "loss": 0.1482, "step": 4775 }, { "epoch": 0.87, "learning_rate": 1.3940307544839293e-06, "loss": 0.1255, "step": 4776 }, { "epoch": 0.87, "learning_rate": 1.3903249222537078e-06, "loss": 0.1601, "step": 4777 }, { "epoch": 0.87, "learning_rate": 1.386623782908632e-06, "loss": 0.2326, "step": 4778 }, { "epoch": 0.87, "learning_rate": 1.3829273377249307e-06, "loss": 0.1447, "step": 4779 }, { "epoch": 0.87, "learning_rate": 1.379235587977216e-06, "loss": 0.188, "step": 4780 }, { "epoch": 0.87, "learning_rate": 1.37554853493847e-06, "loss": 0.1387, "step": 4781 }, { "epoch": 0.87, "learning_rate": 1.3718661798800636e-06, "loss": 0.1927, "step": 4782 }, { "epoch": 0.87, "learning_rate": 1.3681885240717445e-06, "loss": 0.1249, "step": 4783 }, { "epoch": 0.87, "learning_rate": 1.3645155687816468e-06, "loss": 0.0915, "step": 4784 }, { "epoch": 0.87, "learning_rate": 1.3608473152762708e-06, "loss": 0.1698, "step": 4785 }, { "epoch": 0.87, "learning_rate": 1.357183764820506e-06, "loss": 0.1572, "step": 4786 }, { "epoch": 0.87, "learning_rate": 1.3535249186776187e-06, "loss": 0.2219, "step": 4787 }, { "epoch": 0.87, "learning_rate": 1.3498707781092485e-06, "loss": 0.1883, "step": 4788 }, { "epoch": 0.87, "learning_rate": 1.3462213443754167e-06, "loss": 0.1629, "step": 4789 }, { "epoch": 0.87, "learning_rate": 1.342576618734519e-06, "loss": 0.1561, "step": 4790 }, { "epoch": 0.87, "learning_rate": 1.3389366024433347e-06, "loss": 0.184, "step": 4791 }, { "epoch": 0.87, "learning_rate": 1.3353012967570033e-06, "loss": 0.1364, "step": 4792 }, { "epoch": 0.87, "learning_rate": 1.331670702929057e-06, "loss": 0.1764, "step": 4793 }, { "epoch": 0.87, "learning_rate": 1.3280448222113978e-06, "loss": 0.1443, "step": 4794 }, { "epoch": 0.87, "learning_rate": 1.3244236558542955e-06, "loss": 0.163, "step": 4795 }, { "epoch": 0.87, "learning_rate": 1.320807205106403e-06, "loss": 0.1338, "step": 4796 }, { "epoch": 0.87, "learning_rate": 1.3171954712147443e-06, "loss": 0.1675, "step": 4797 }, { "epoch": 0.87, "learning_rate": 1.3135884554247186e-06, "loss": 0.1387, "step": 4798 }, { "epoch": 0.87, "learning_rate": 1.3099861589800888e-06, "loss": 0.129, "step": 4799 }, { "epoch": 0.87, "learning_rate": 1.3063885831230032e-06, "loss": 0.1523, "step": 4800 }, { "epoch": 0.87, "learning_rate": 1.302795729093978e-06, "loss": 0.1528, "step": 4801 }, { "epoch": 0.87, "learning_rate": 1.299207598131902e-06, "loss": 0.2068, "step": 4802 }, { "epoch": 0.87, "learning_rate": 1.2956241914740269e-06, "loss": 0.1567, "step": 4803 }, { "epoch": 0.87, "learning_rate": 1.2920455103559863e-06, "loss": 0.2451, "step": 4804 }, { "epoch": 0.87, "learning_rate": 1.2884715560117816e-06, "loss": 0.1217, "step": 4805 }, { "epoch": 0.87, "learning_rate": 1.28490232967378e-06, "loss": 0.1436, "step": 4806 }, { "epoch": 0.87, "learning_rate": 1.2813378325727238e-06, "loss": 0.1623, "step": 4807 }, { "epoch": 0.87, "learning_rate": 1.2777780659377214e-06, "loss": 0.1193, "step": 4808 }, { "epoch": 0.87, "learning_rate": 1.2742230309962494e-06, "loss": 0.2364, "step": 4809 }, { "epoch": 0.87, "learning_rate": 1.2706727289741554e-06, "loss": 0.2167, "step": 4810 }, { "epoch": 0.87, "learning_rate": 1.267127161095652e-06, "loss": 0.1698, "step": 4811 }, { "epoch": 0.87, "learning_rate": 1.263586328583326e-06, "loss": 0.2483, "step": 4812 }, { "epoch": 0.87, "learning_rate": 1.260050232658121e-06, "loss": 0.1957, "step": 4813 }, { "epoch": 0.87, "learning_rate": 1.256518874539353e-06, "loss": 0.138, "step": 4814 }, { "epoch": 0.87, "learning_rate": 1.25299225544471e-06, "loss": 0.168, "step": 4815 }, { "epoch": 0.87, "learning_rate": 1.2494703765902337e-06, "loss": 0.2124, "step": 4816 }, { "epoch": 0.87, "learning_rate": 1.24595323919034e-06, "loss": 0.2003, "step": 4817 }, { "epoch": 0.87, "learning_rate": 1.2424408444578068e-06, "loss": 0.2, "step": 4818 }, { "epoch": 0.87, "learning_rate": 1.23893319360378e-06, "loss": 0.1385, "step": 4819 }, { "epoch": 0.87, "learning_rate": 1.2354302878377644e-06, "loss": 0.1384, "step": 4820 }, { "epoch": 0.87, "learning_rate": 1.23193212836763e-06, "loss": 0.2003, "step": 4821 }, { "epoch": 0.87, "learning_rate": 1.2284387163996153e-06, "loss": 0.1899, "step": 4822 }, { "epoch": 0.87, "learning_rate": 1.2249500531383134e-06, "loss": 0.1802, "step": 4823 }, { "epoch": 0.87, "learning_rate": 1.221466139786687e-06, "loss": 0.1505, "step": 4824 }, { "epoch": 0.87, "learning_rate": 1.217986977546059e-06, "loss": 0.1333, "step": 4825 }, { "epoch": 0.87, "learning_rate": 1.2145125676161107e-06, "loss": 0.0836, "step": 4826 }, { "epoch": 0.88, "learning_rate": 1.211042911194889e-06, "loss": 0.1609, "step": 4827 }, { "epoch": 0.88, "learning_rate": 1.2075780094788002e-06, "loss": 0.168, "step": 4828 }, { "epoch": 0.88, "learning_rate": 1.2041178636626137e-06, "loss": 0.1853, "step": 4829 }, { "epoch": 0.88, "learning_rate": 1.2006624749394485e-06, "loss": 0.1537, "step": 4830 }, { "epoch": 0.88, "learning_rate": 1.197211844500798e-06, "loss": 0.1361, "step": 4831 }, { "epoch": 0.88, "learning_rate": 1.1937659735365069e-06, "loss": 0.1569, "step": 4832 }, { "epoch": 0.88, "learning_rate": 1.1903248632347752e-06, "loss": 0.1847, "step": 4833 }, { "epoch": 0.88, "learning_rate": 1.18688851478217e-06, "loss": 0.1699, "step": 4834 }, { "epoch": 0.88, "learning_rate": 1.1834569293636138e-06, "loss": 0.1555, "step": 4835 }, { "epoch": 0.88, "learning_rate": 1.18003010816238e-06, "loss": 0.1679, "step": 4836 }, { "epoch": 0.88, "learning_rate": 1.1766080523601047e-06, "loss": 0.204, "step": 4837 }, { "epoch": 0.88, "learning_rate": 1.1731907631367845e-06, "loss": 0.2635, "step": 4838 }, { "epoch": 0.88, "learning_rate": 1.169778241670768e-06, "loss": 0.172, "step": 4839 }, { "epoch": 0.88, "learning_rate": 1.166370489138756e-06, "loss": 0.1534, "step": 4840 }, { "epoch": 0.88, "learning_rate": 1.1629675067158119e-06, "loss": 0.0901, "step": 4841 }, { "epoch": 0.88, "learning_rate": 1.1595692955753518e-06, "loss": 0.1777, "step": 4842 }, { "epoch": 0.88, "learning_rate": 1.1561758568891433e-06, "loss": 0.175, "step": 4843 }, { "epoch": 0.88, "learning_rate": 1.1527871918273142e-06, "loss": 0.1388, "step": 4844 }, { "epoch": 0.88, "learning_rate": 1.149403301558341e-06, "loss": 0.1076, "step": 4845 }, { "epoch": 0.88, "learning_rate": 1.146024187249059e-06, "loss": 0.1393, "step": 4846 }, { "epoch": 0.88, "learning_rate": 1.142649850064651e-06, "loss": 0.1342, "step": 4847 }, { "epoch": 0.88, "learning_rate": 1.1392802911686561e-06, "loss": 0.1664, "step": 4848 }, { "epoch": 0.88, "learning_rate": 1.1359155117229636e-06, "loss": 0.1229, "step": 4849 }, { "epoch": 0.88, "learning_rate": 1.1325555128878184e-06, "loss": 0.2216, "step": 4850 }, { "epoch": 0.88, "learning_rate": 1.1292002958218124e-06, "loss": 0.2493, "step": 4851 }, { "epoch": 0.88, "learning_rate": 1.1258498616818897e-06, "loss": 0.1536, "step": 4852 }, { "epoch": 0.88, "learning_rate": 1.1225042116233514e-06, "loss": 0.197, "step": 4853 }, { "epoch": 0.88, "learning_rate": 1.119163346799838e-06, "loss": 0.1604, "step": 4854 }, { "epoch": 0.88, "learning_rate": 1.1158272683633496e-06, "loss": 0.127, "step": 4855 }, { "epoch": 0.88, "learning_rate": 1.1124959774642296e-06, "loss": 0.1601, "step": 4856 }, { "epoch": 0.88, "learning_rate": 1.1091694752511756e-06, "loss": 0.1349, "step": 4857 }, { "epoch": 0.88, "learning_rate": 1.1058477628712283e-06, "loss": 0.1303, "step": 4858 }, { "epoch": 0.88, "learning_rate": 1.1025308414697816e-06, "loss": 0.1458, "step": 4859 }, { "epoch": 0.88, "learning_rate": 1.099218712190579e-06, "loss": 0.1733, "step": 4860 }, { "epoch": 0.88, "learning_rate": 1.0959113761757e-06, "loss": 0.2176, "step": 4861 }, { "epoch": 0.88, "learning_rate": 1.0926088345655861e-06, "loss": 0.1835, "step": 4862 }, { "epoch": 0.88, "learning_rate": 1.0893110884990177e-06, "loss": 0.2353, "step": 4863 }, { "epoch": 0.88, "learning_rate": 1.0860181391131203e-06, "loss": 0.1756, "step": 4864 }, { "epoch": 0.88, "learning_rate": 1.0827299875433704e-06, "loss": 0.2283, "step": 4865 }, { "epoch": 0.88, "learning_rate": 1.0794466349235865e-06, "loss": 0.1415, "step": 4866 }, { "epoch": 0.88, "learning_rate": 1.076168082385936e-06, "loss": 0.2084, "step": 4867 }, { "epoch": 0.88, "learning_rate": 1.0728943310609262e-06, "loss": 0.2618, "step": 4868 }, { "epoch": 0.88, "learning_rate": 1.069625382077411e-06, "loss": 0.177, "step": 4869 }, { "epoch": 0.88, "learning_rate": 1.066361236562592e-06, "loss": 0.2286, "step": 4870 }, { "epoch": 0.88, "learning_rate": 1.0631018956420053e-06, "loss": 0.1663, "step": 4871 }, { "epoch": 0.88, "learning_rate": 1.0598473604395403e-06, "loss": 0.225, "step": 4872 }, { "epoch": 0.88, "learning_rate": 1.0565976320774246e-06, "loss": 0.1709, "step": 4873 }, { "epoch": 0.88, "learning_rate": 1.0533527116762298e-06, "loss": 0.2264, "step": 4874 }, { "epoch": 0.88, "learning_rate": 1.050112600354866e-06, "loss": 0.2037, "step": 4875 }, { "epoch": 0.88, "learning_rate": 1.046877299230588e-06, "loss": 0.1956, "step": 4876 }, { "epoch": 0.88, "learning_rate": 1.0436468094189966e-06, "loss": 0.2043, "step": 4877 }, { "epoch": 0.88, "learning_rate": 1.0404211320340224e-06, "loss": 0.1109, "step": 4878 }, { "epoch": 0.88, "learning_rate": 1.0372002681879438e-06, "loss": 0.1772, "step": 4879 }, { "epoch": 0.88, "learning_rate": 1.0339842189913812e-06, "loss": 0.1212, "step": 4880 }, { "epoch": 0.88, "learning_rate": 1.0307729855532888e-06, "loss": 0.2067, "step": 4881 }, { "epoch": 0.89, "learning_rate": 1.027566568980966e-06, "loss": 0.2, "step": 4882 }, { "epoch": 0.89, "learning_rate": 1.0243649703800468e-06, "loss": 0.1802, "step": 4883 }, { "epoch": 0.89, "learning_rate": 1.0211681908545079e-06, "loss": 0.1721, "step": 4884 }, { "epoch": 0.89, "learning_rate": 1.0179762315066592e-06, "loss": 0.1677, "step": 4885 }, { "epoch": 0.89, "learning_rate": 1.014789093437154e-06, "loss": 0.1397, "step": 4886 }, { "epoch": 0.89, "learning_rate": 1.011606777744981e-06, "loss": 0.1777, "step": 4887 }, { "epoch": 0.89, "learning_rate": 1.0084292855274614e-06, "loss": 0.1731, "step": 4888 }, { "epoch": 0.89, "learning_rate": 1.0052566178802597e-06, "loss": 0.1385, "step": 4889 }, { "epoch": 0.89, "learning_rate": 1.0020887758973746e-06, "loss": 0.2015, "step": 4890 }, { "epoch": 0.89, "learning_rate": 9.989257606711437e-07, "loss": 0.1702, "step": 4891 }, { "epoch": 0.89, "learning_rate": 9.957675732922334e-07, "loss": 0.1658, "step": 4892 }, { "epoch": 0.89, "learning_rate": 9.926142148496487e-07, "loss": 0.1515, "step": 4893 }, { "epoch": 0.89, "learning_rate": 9.894656864307332e-07, "loss": 0.1924, "step": 4894 }, { "epoch": 0.89, "learning_rate": 9.86321989121159e-07, "loss": 0.1986, "step": 4895 }, { "epoch": 0.89, "learning_rate": 9.83183124004935e-07, "loss": 0.1718, "step": 4896 }, { "epoch": 0.89, "learning_rate": 9.80049092164409e-07, "loss": 0.1319, "step": 4897 }, { "epoch": 0.89, "learning_rate": 9.769198946802516e-07, "loss": 0.2162, "step": 4898 }, { "epoch": 0.89, "learning_rate": 9.737955326314713e-07, "loss": 0.1092, "step": 4899 }, { "epoch": 0.89, "learning_rate": 9.70676007095413e-07, "loss": 0.1723, "step": 4900 }, { "epoch": 0.89, "learning_rate": 9.675613191477529e-07, "loss": 0.153, "step": 4901 }, { "epoch": 0.89, "learning_rate": 9.6445146986249e-07, "loss": 0.181, "step": 4902 }, { "epoch": 0.89, "learning_rate": 9.61346460311967e-07, "loss": 0.1851, "step": 4903 }, { "epoch": 0.89, "learning_rate": 9.582462915668521e-07, "loss": 0.1502, "step": 4904 }, { "epoch": 0.89, "learning_rate": 9.551509646961471e-07, "loss": 0.2075, "step": 4905 }, { "epoch": 0.89, "learning_rate": 9.520604807671745e-07, "loss": 0.1747, "step": 4906 }, { "epoch": 0.89, "learning_rate": 9.489748408456005e-07, "loss": 0.1427, "step": 4907 }, { "epoch": 0.89, "learning_rate": 9.458940459954158e-07, "loss": 0.1932, "step": 4908 }, { "epoch": 0.89, "learning_rate": 9.428180972789336e-07, "loss": 0.219, "step": 4909 }, { "epoch": 0.89, "learning_rate": 9.397469957568056e-07, "loss": 0.1562, "step": 4910 }, { "epoch": 0.89, "learning_rate": 9.366807424880081e-07, "loss": 0.2723, "step": 4911 }, { "epoch": 0.89, "learning_rate": 9.33619338529847e-07, "loss": 0.153, "step": 4912 }, { "epoch": 0.89, "learning_rate": 9.305627849379528e-07, "loss": 0.2502, "step": 4913 }, { "epoch": 0.89, "learning_rate": 9.275110827662841e-07, "loss": 0.1378, "step": 4914 }, { "epoch": 0.89, "learning_rate": 9.244642330671343e-07, "loss": 0.1946, "step": 4915 }, { "epoch": 0.89, "learning_rate": 9.214222368911112e-07, "loss": 0.1467, "step": 4916 }, { "epoch": 0.89, "learning_rate": 9.183850952871575e-07, "loss": 0.2244, "step": 4917 }, { "epoch": 0.89, "learning_rate": 9.153528093025403e-07, "loss": 0.1414, "step": 4918 }, { "epoch": 0.89, "learning_rate": 9.123253799828518e-07, "loss": 0.2351, "step": 4919 }, { "epoch": 0.89, "learning_rate": 9.093028083720068e-07, "loss": 0.1774, "step": 4920 }, { "epoch": 0.89, "learning_rate": 9.062850955122515e-07, "loss": 0.2222, "step": 4921 }, { "epoch": 0.89, "learning_rate": 9.03272242444152e-07, "loss": 0.1506, "step": 4922 }, { "epoch": 0.89, "learning_rate": 9.002642502065989e-07, "loss": 0.1597, "step": 4923 }, { "epoch": 0.89, "learning_rate": 8.972611198368058e-07, "loss": 0.174, "step": 4924 }, { "epoch": 0.89, "learning_rate": 8.942628523703161e-07, "loss": 0.1708, "step": 4925 }, { "epoch": 0.89, "learning_rate": 8.912694488409862e-07, "loss": 0.1551, "step": 4926 }, { "epoch": 0.89, "learning_rate": 8.882809102810052e-07, "loss": 0.1322, "step": 4927 }, { "epoch": 0.89, "learning_rate": 8.852972377208773e-07, "loss": 0.2333, "step": 4928 }, { "epoch": 0.89, "learning_rate": 8.823184321894345e-07, "loss": 0.1458, "step": 4929 }, { "epoch": 0.89, "learning_rate": 8.793444947138252e-07, "loss": 0.1136, "step": 4930 }, { "epoch": 0.89, "learning_rate": 8.763754263195239e-07, "loss": 0.1475, "step": 4931 }, { "epoch": 0.89, "learning_rate": 8.734112280303236e-07, "loss": 0.1969, "step": 4932 }, { "epoch": 0.89, "learning_rate": 8.704519008683381e-07, "loss": 0.1747, "step": 4933 }, { "epoch": 0.89, "learning_rate": 8.674974458540014e-07, "loss": 0.1064, "step": 4934 }, { "epoch": 0.89, "learning_rate": 8.6454786400607e-07, "loss": 0.1801, "step": 4935 }, { "epoch": 0.89, "learning_rate": 8.616031563416154e-07, "loss": 0.2267, "step": 4936 }, { "epoch": 0.9, "learning_rate": 8.586633238760322e-07, "loss": 0.1548, "step": 4937 }, { "epoch": 0.9, "learning_rate": 8.557283676230327e-07, "loss": 0.1964, "step": 4938 }, { "epoch": 0.9, "learning_rate": 8.527982885946522e-07, "loss": 0.2379, "step": 4939 }, { "epoch": 0.9, "learning_rate": 8.498730878012312e-07, "loss": 0.1529, "step": 4940 }, { "epoch": 0.9, "learning_rate": 8.469527662514425e-07, "loss": 0.1452, "step": 4941 }, { "epoch": 0.9, "learning_rate": 8.440373249522726e-07, "loss": 0.117, "step": 4942 }, { "epoch": 0.9, "learning_rate": 8.411267649090171e-07, "loss": 0.1637, "step": 4943 }, { "epoch": 0.9, "learning_rate": 8.382210871252982e-07, "loss": 0.2058, "step": 4944 }, { "epoch": 0.9, "learning_rate": 8.353202926030512e-07, "loss": 0.1216, "step": 4945 }, { "epoch": 0.9, "learning_rate": 8.324243823425292e-07, "loss": 0.1883, "step": 4946 }, { "epoch": 0.9, "learning_rate": 8.29533357342297e-07, "loss": 0.2021, "step": 4947 }, { "epoch": 0.9, "learning_rate": 8.266472185992352e-07, "loss": 0.1487, "step": 4948 }, { "epoch": 0.9, "learning_rate": 8.237659671085446e-07, "loss": 0.2461, "step": 4949 }, { "epoch": 0.9, "learning_rate": 8.20889603863737e-07, "loss": 0.2495, "step": 4950 }, { "epoch": 0.9, "learning_rate": 8.180181298566391e-07, "loss": 0.1886, "step": 4951 }, { "epoch": 0.9, "learning_rate": 8.151515460773906e-07, "loss": 0.2317, "step": 4952 }, { "epoch": 0.9, "learning_rate": 8.122898535144507e-07, "loss": 0.1613, "step": 4953 }, { "epoch": 0.9, "learning_rate": 8.094330531545818e-07, "loss": 0.129, "step": 4954 }, { "epoch": 0.9, "learning_rate": 8.065811459828692e-07, "loss": 0.1515, "step": 4955 }, { "epoch": 0.9, "learning_rate": 8.037341329827047e-07, "loss": 0.1444, "step": 4956 }, { "epoch": 0.9, "learning_rate": 8.008920151357979e-07, "loss": 0.1528, "step": 4957 }, { "epoch": 0.9, "learning_rate": 7.980547934221616e-07, "loss": 0.1234, "step": 4958 }, { "epoch": 0.9, "learning_rate": 7.952224688201315e-07, "loss": 0.1777, "step": 4959 }, { "epoch": 0.9, "learning_rate": 7.92395042306348e-07, "loss": 0.1947, "step": 4960 }, { "epoch": 0.9, "learning_rate": 7.895725148557598e-07, "loss": 0.1221, "step": 4961 }, { "epoch": 0.9, "learning_rate": 7.867548874416347e-07, "loss": 0.1862, "step": 4962 }, { "epoch": 0.9, "learning_rate": 7.839421610355457e-07, "loss": 0.1603, "step": 4963 }, { "epoch": 0.9, "learning_rate": 7.811343366073736e-07, "loss": 0.1971, "step": 4964 }, { "epoch": 0.9, "learning_rate": 7.783314151253157e-07, "loss": 0.1377, "step": 4965 }, { "epoch": 0.9, "learning_rate": 7.755333975558704e-07, "loss": 0.2161, "step": 4966 }, { "epoch": 0.9, "learning_rate": 7.727402848638543e-07, "loss": 0.1744, "step": 4967 }, { "epoch": 0.9, "learning_rate": 7.699520780123837e-07, "loss": 0.1681, "step": 4968 }, { "epoch": 0.9, "learning_rate": 7.671687779628894e-07, "loss": 0.1425, "step": 4969 }, { "epoch": 0.9, "learning_rate": 7.643903856751106e-07, "loss": 0.1788, "step": 4970 }, { "epoch": 0.9, "learning_rate": 7.616169021070874e-07, "loss": 0.1742, "step": 4971 }, { "epoch": 0.9, "learning_rate": 7.588483282151732e-07, "loss": 0.1467, "step": 4972 }, { "epoch": 0.9, "learning_rate": 7.560846649540276e-07, "loss": 0.1146, "step": 4973 }, { "epoch": 0.9, "learning_rate": 7.533259132766201e-07, "loss": 0.1902, "step": 4974 }, { "epoch": 0.9, "learning_rate": 7.505720741342165e-07, "loss": 0.144, "step": 4975 }, { "epoch": 0.9, "learning_rate": 7.47823148476397e-07, "loss": 0.1695, "step": 4976 }, { "epoch": 0.9, "learning_rate": 7.450791372510485e-07, "loss": 0.1547, "step": 4977 }, { "epoch": 0.9, "learning_rate": 7.423400414043557e-07, "loss": 0.1613, "step": 4978 }, { "epoch": 0.9, "learning_rate": 7.396058618808166e-07, "loss": 0.1377, "step": 4979 }, { "epoch": 0.9, "learning_rate": 7.368765996232301e-07, "loss": 0.2366, "step": 4980 }, { "epoch": 0.9, "learning_rate": 7.341522555726971e-07, "loss": 0.2211, "step": 4981 }, { "epoch": 0.9, "learning_rate": 7.314328306686275e-07, "loss": 0.1437, "step": 4982 }, { "epoch": 0.9, "learning_rate": 7.287183258487312e-07, "loss": 0.1337, "step": 4983 }, { "epoch": 0.9, "learning_rate": 7.26008742049028e-07, "loss": 0.1365, "step": 4984 }, { "epoch": 0.9, "learning_rate": 7.233040802038304e-07, "loss": 0.1731, "step": 4985 }, { "epoch": 0.9, "learning_rate": 7.206043412457608e-07, "loss": 0.1985, "step": 4986 }, { "epoch": 0.9, "learning_rate": 7.179095261057444e-07, "loss": 0.1814, "step": 4987 }, { "epoch": 0.9, "learning_rate": 7.152196357130031e-07, "loss": 0.1543, "step": 4988 }, { "epoch": 0.9, "learning_rate": 7.125346709950681e-07, "loss": 0.1392, "step": 4989 }, { "epoch": 0.9, "learning_rate": 7.098546328777656e-07, "loss": 0.1364, "step": 4990 }, { "epoch": 0.9, "learning_rate": 7.071795222852296e-07, "loss": 0.1496, "step": 4991 }, { "epoch": 0.91, "learning_rate": 7.045093401398872e-07, "loss": 0.1728, "step": 4992 }, { "epoch": 0.91, "learning_rate": 7.018440873624704e-07, "loss": 0.2419, "step": 4993 }, { "epoch": 0.91, "learning_rate": 6.99183764872014e-07, "loss": 0.2053, "step": 4994 }, { "epoch": 0.91, "learning_rate": 6.965283735858457e-07, "loss": 0.1386, "step": 4995 }, { "epoch": 0.91, "learning_rate": 6.938779144196017e-07, "loss": 0.1487, "step": 4996 }, { "epoch": 0.91, "learning_rate": 6.912323882872074e-07, "loss": 0.1275, "step": 4997 }, { "epoch": 0.91, "learning_rate": 6.885917961008981e-07, "loss": 0.2038, "step": 4998 }, { "epoch": 0.91, "learning_rate": 6.859561387711971e-07, "loss": 0.1572, "step": 4999 }, { "epoch": 0.91, "learning_rate": 6.833254172069342e-07, "loss": 0.1805, "step": 5000 }, { "epoch": 0.91, "learning_rate": 6.806996323152337e-07, "loss": 0.1325, "step": 5001 }, { "epoch": 0.91, "learning_rate": 6.78078785001518e-07, "loss": 0.0941, "step": 5002 }, { "epoch": 0.91, "learning_rate": 6.754628761695058e-07, "loss": 0.1927, "step": 5003 }, { "epoch": 0.91, "learning_rate": 6.728519067212158e-07, "loss": 0.192, "step": 5004 }, { "epoch": 0.91, "learning_rate": 6.702458775569625e-07, "loss": 0.1806, "step": 5005 }, { "epoch": 0.91, "learning_rate": 6.67644789575354e-07, "loss": 0.1309, "step": 5006 }, { "epoch": 0.91, "learning_rate": 6.650486436732994e-07, "loss": 0.2129, "step": 5007 }, { "epoch": 0.91, "learning_rate": 6.62457440746001e-07, "loss": 0.149, "step": 5008 }, { "epoch": 0.91, "learning_rate": 6.598711816869541e-07, "loss": 0.1114, "step": 5009 }, { "epoch": 0.91, "learning_rate": 6.572898673879535e-07, "loss": 0.1407, "step": 5010 }, { "epoch": 0.91, "learning_rate": 6.547134987390874e-07, "loss": 0.2, "step": 5011 }, { "epoch": 0.91, "learning_rate": 6.5214207662874e-07, "loss": 0.2206, "step": 5012 }, { "epoch": 0.91, "learning_rate": 6.495756019435872e-07, "loss": 0.1443, "step": 5013 }, { "epoch": 0.91, "learning_rate": 6.470140755685994e-07, "loss": 0.0644, "step": 5014 }, { "epoch": 0.91, "learning_rate": 6.44457498387045e-07, "loss": 0.2316, "step": 5015 }, { "epoch": 0.91, "learning_rate": 6.41905871280477e-07, "loss": 0.1423, "step": 5016 }, { "epoch": 0.91, "learning_rate": 6.393591951287502e-07, "loss": 0.2105, "step": 5017 }, { "epoch": 0.91, "learning_rate": 6.368174708100121e-07, "loss": 0.1644, "step": 5018 }, { "epoch": 0.91, "learning_rate": 6.342806992006916e-07, "loss": 0.1684, "step": 5019 }, { "epoch": 0.91, "learning_rate": 6.31748881175524e-07, "loss": 0.1939, "step": 5020 }, { "epoch": 0.91, "learning_rate": 6.292220176075275e-07, "loss": 0.1438, "step": 5021 }, { "epoch": 0.91, "learning_rate": 6.267001093680169e-07, "loss": 0.1686, "step": 5022 }, { "epoch": 0.91, "learning_rate": 6.241831573265949e-07, "loss": 0.1725, "step": 5023 }, { "epoch": 0.91, "learning_rate": 6.216711623511539e-07, "loss": 0.1633, "step": 5024 }, { "epoch": 0.91, "learning_rate": 6.191641253078844e-07, "loss": 0.1893, "step": 5025 }, { "epoch": 0.91, "learning_rate": 6.166620470612566e-07, "loss": 0.1982, "step": 5026 }, { "epoch": 0.91, "learning_rate": 6.141649284740402e-07, "loss": 0.2567, "step": 5027 }, { "epoch": 0.91, "learning_rate": 6.1167277040729e-07, "loss": 0.1789, "step": 5028 }, { "epoch": 0.91, "learning_rate": 6.09185573720355e-07, "loss": 0.1714, "step": 5029 }, { "epoch": 0.91, "learning_rate": 6.067033392708643e-07, "loss": 0.149, "step": 5030 }, { "epoch": 0.91, "learning_rate": 6.042260679147433e-07, "loss": 0.1221, "step": 5031 }, { "epoch": 0.91, "learning_rate": 6.017537605062068e-07, "loss": 0.1583, "step": 5032 }, { "epoch": 0.91, "learning_rate": 5.992864178977498e-07, "loss": 0.173, "step": 5033 }, { "epoch": 0.91, "learning_rate": 5.968240409401649e-07, "loss": 0.1457, "step": 5034 }, { "epoch": 0.91, "learning_rate": 5.943666304825312e-07, "loss": 0.1789, "step": 5035 }, { "epoch": 0.91, "learning_rate": 5.919141873722045e-07, "loss": 0.1371, "step": 5036 }, { "epoch": 0.91, "learning_rate": 5.894667124548414e-07, "loss": 0.1664, "step": 5037 }, { "epoch": 0.91, "learning_rate": 5.870242065743786e-07, "loss": 0.1781, "step": 5038 }, { "epoch": 0.91, "learning_rate": 5.845866705730407e-07, "loss": 0.1721, "step": 5039 }, { "epoch": 0.91, "learning_rate": 5.821541052913371e-07, "loss": 0.1949, "step": 5040 }, { "epoch": 0.91, "learning_rate": 5.79726511568065e-07, "loss": 0.1265, "step": 5041 }, { "epoch": 0.91, "learning_rate": 5.773038902403083e-07, "loss": 0.1673, "step": 5042 }, { "epoch": 0.91, "learning_rate": 5.748862421434304e-07, "loss": 0.1824, "step": 5043 }, { "epoch": 0.91, "learning_rate": 5.724735681110876e-07, "loss": 0.1866, "step": 5044 }, { "epoch": 0.91, "learning_rate": 5.700658689752181e-07, "loss": 0.1147, "step": 5045 }, { "epoch": 0.91, "learning_rate": 5.676631455660458e-07, "loss": 0.1004, "step": 5046 }, { "epoch": 0.91, "learning_rate": 5.652653987120715e-07, "loss": 0.1692, "step": 5047 }, { "epoch": 0.92, "learning_rate": 5.628726292400888e-07, "loss": 0.158, "step": 5048 }, { "epoch": 0.92, "learning_rate": 5.60484837975171e-07, "loss": 0.1681, "step": 5049 }, { "epoch": 0.92, "learning_rate": 5.581020257406793e-07, "loss": 0.2338, "step": 5050 }, { "epoch": 0.92, "learning_rate": 5.557241933582502e-07, "loss": 0.1785, "step": 5051 }, { "epoch": 0.92, "learning_rate": 5.533513416478092e-07, "loss": 0.223, "step": 5052 }, { "epoch": 0.92, "learning_rate": 5.509834714275653e-07, "loss": 0.194, "step": 5053 }, { "epoch": 0.92, "learning_rate": 5.486205835140002e-07, "loss": 0.2082, "step": 5054 }, { "epoch": 0.92, "learning_rate": 5.462626787218888e-07, "loss": 0.2067, "step": 5055 }, { "epoch": 0.92, "learning_rate": 5.439097578642827e-07, "loss": 0.1796, "step": 5056 }, { "epoch": 0.92, "learning_rate": 5.415618217525175e-07, "loss": 0.1348, "step": 5057 }, { "epoch": 0.92, "learning_rate": 5.392188711962043e-07, "loss": 0.1832, "step": 5058 }, { "epoch": 0.92, "learning_rate": 5.368809070032416e-07, "loss": 0.1646, "step": 5059 }, { "epoch": 0.92, "learning_rate": 5.345479299798046e-07, "loss": 0.1302, "step": 5060 }, { "epoch": 0.92, "learning_rate": 5.322199409303496e-07, "loss": 0.1706, "step": 5061 }, { "epoch": 0.92, "learning_rate": 5.298969406576132e-07, "loss": 0.2328, "step": 5062 }, { "epoch": 0.92, "learning_rate": 5.275789299626143e-07, "loss": 0.1414, "step": 5063 }, { "epoch": 0.92, "learning_rate": 5.252659096446455e-07, "loss": 0.219, "step": 5064 }, { "epoch": 0.92, "learning_rate": 5.229578805012819e-07, "loss": 0.1288, "step": 5065 }, { "epoch": 0.92, "learning_rate": 5.206548433283803e-07, "loss": 0.1756, "step": 5066 }, { "epoch": 0.92, "learning_rate": 5.183567989200738e-07, "loss": 0.1134, "step": 5067 }, { "epoch": 0.92, "learning_rate": 5.160637480687702e-07, "loss": 0.2064, "step": 5068 }, { "epoch": 0.92, "learning_rate": 5.137756915651604e-07, "loss": 0.1412, "step": 5069 }, { "epoch": 0.92, "learning_rate": 5.114926301982132e-07, "loss": 0.1591, "step": 5070 }, { "epoch": 0.92, "learning_rate": 5.092145647551688e-07, "loss": 0.1946, "step": 5071 }, { "epoch": 0.92, "learning_rate": 5.069414960215541e-07, "loss": 0.1506, "step": 5072 }, { "epoch": 0.92, "learning_rate": 5.046734247811669e-07, "loss": 0.1479, "step": 5073 }, { "epoch": 0.92, "learning_rate": 5.024103518160816e-07, "loss": 0.1555, "step": 5074 }, { "epoch": 0.92, "learning_rate": 5.001522779066508e-07, "loss": 0.1646, "step": 5075 }, { "epoch": 0.92, "learning_rate": 4.978992038315033e-07, "loss": 0.1827, "step": 5076 }, { "epoch": 0.92, "learning_rate": 4.95651130367546e-07, "loss": 0.1683, "step": 5077 }, { "epoch": 0.92, "learning_rate": 4.934080582899553e-07, "loss": 0.1349, "step": 5078 }, { "epoch": 0.92, "learning_rate": 4.911699883721876e-07, "loss": 0.2219, "step": 5079 }, { "epoch": 0.92, "learning_rate": 4.889369213859773e-07, "loss": 0.1859, "step": 5080 }, { "epoch": 0.92, "learning_rate": 4.867088581013251e-07, "loss": 0.1808, "step": 5081 }, { "epoch": 0.92, "learning_rate": 4.844857992865149e-07, "loss": 0.1949, "step": 5082 }, { "epoch": 0.92, "learning_rate": 4.822677457080982e-07, "loss": 0.1656, "step": 5083 }, { "epoch": 0.92, "learning_rate": 4.800546981309068e-07, "loss": 0.1782, "step": 5084 }, { "epoch": 0.92, "learning_rate": 4.778466573180418e-07, "loss": 0.1256, "step": 5085 }, { "epoch": 0.92, "learning_rate": 4.7564362403087923e-07, "loss": 0.1791, "step": 5086 }, { "epoch": 0.92, "learning_rate": 4.734455990290698e-07, "loss": 0.1445, "step": 5087 }, { "epoch": 0.92, "learning_rate": 4.7125258307053385e-07, "loss": 0.2346, "step": 5088 }, { "epoch": 0.92, "learning_rate": 4.6906457691146655e-07, "loss": 0.1967, "step": 5089 }, { "epoch": 0.92, "learning_rate": 4.66881581306341e-07, "loss": 0.188, "step": 5090 }, { "epoch": 0.92, "learning_rate": 4.6470359700789e-07, "loss": 0.0995, "step": 5091 }, { "epoch": 0.92, "learning_rate": 4.625306247671279e-07, "loss": 0.1562, "step": 5092 }, { "epoch": 0.92, "learning_rate": 4.6036266533334204e-07, "loss": 0.2056, "step": 5093 }, { "epoch": 0.92, "learning_rate": 4.5819971945408436e-07, "loss": 0.2083, "step": 5094 }, { "epoch": 0.92, "learning_rate": 4.5604178787518326e-07, "loss": 0.1902, "step": 5095 }, { "epoch": 0.92, "learning_rate": 4.5388887134073354e-07, "loss": 0.1378, "step": 5096 }, { "epoch": 0.92, "learning_rate": 4.517409705931047e-07, "loss": 0.181, "step": 5097 }, { "epoch": 0.92, "learning_rate": 4.4959808637293764e-07, "loss": 0.1494, "step": 5098 }, { "epoch": 0.92, "learning_rate": 4.4746021941913796e-07, "loss": 0.1298, "step": 5099 }, { "epoch": 0.92, "learning_rate": 4.4532737046888596e-07, "loss": 0.1838, "step": 5100 }, { "epoch": 0.92, "learning_rate": 4.4319954025762996e-07, "loss": 0.1652, "step": 5101 }, { "epoch": 0.92, "learning_rate": 4.410767295190882e-07, "loss": 0.1505, "step": 5102 }, { "epoch": 0.93, "learning_rate": 4.389589389852466e-07, "loss": 0.1474, "step": 5103 }, { "epoch": 0.93, "learning_rate": 4.3684616938636124e-07, "loss": 0.2365, "step": 5104 }, { "epoch": 0.93, "learning_rate": 4.347384214509609e-07, "loss": 0.14, "step": 5105 }, { "epoch": 0.93, "learning_rate": 4.3263569590583253e-07, "loss": 0.1536, "step": 5106 }, { "epoch": 0.93, "learning_rate": 4.3053799347604116e-07, "loss": 0.137, "step": 5107 }, { "epoch": 0.93, "learning_rate": 4.2844531488491824e-07, "loss": 0.1749, "step": 5108 }, { "epoch": 0.93, "learning_rate": 4.263576608540548e-07, "loss": 0.1825, "step": 5109 }, { "epoch": 0.93, "learning_rate": 4.242750321033201e-07, "loss": 0.1422, "step": 5110 }, { "epoch": 0.93, "learning_rate": 4.2219742935084295e-07, "loss": 0.1412, "step": 5111 }, { "epoch": 0.93, "learning_rate": 4.201248533130253e-07, "loss": 0.1578, "step": 5112 }, { "epoch": 0.93, "learning_rate": 4.180573047045272e-07, "loss": 0.1793, "step": 5113 }, { "epoch": 0.93, "learning_rate": 4.159947842382833e-07, "loss": 0.2436, "step": 5114 }, { "epoch": 0.93, "learning_rate": 4.1393729262549305e-07, "loss": 0.1147, "step": 5115 }, { "epoch": 0.93, "learning_rate": 4.1188483057561734e-07, "loss": 0.2142, "step": 5116 }, { "epoch": 0.93, "learning_rate": 4.0983739879638827e-07, "loss": 0.1546, "step": 5117 }, { "epoch": 0.93, "learning_rate": 4.077949979937995e-07, "loss": 0.1828, "step": 5118 }, { "epoch": 0.93, "learning_rate": 4.0575762887210933e-07, "loss": 0.1534, "step": 5119 }, { "epoch": 0.93, "learning_rate": 4.037252921338441e-07, "loss": 0.2157, "step": 5120 }, { "epoch": 0.93, "learning_rate": 4.01697988479795e-07, "loss": 0.1297, "step": 5121 }, { "epoch": 0.93, "learning_rate": 3.9967571860901777e-07, "loss": 0.1575, "step": 5122 }, { "epoch": 0.93, "learning_rate": 3.976584832188279e-07, "loss": 0.1883, "step": 5123 }, { "epoch": 0.93, "learning_rate": 3.9564628300480743e-07, "loss": 0.1432, "step": 5124 }, { "epoch": 0.93, "learning_rate": 3.9363911866080794e-07, "loss": 0.123, "step": 5125 }, { "epoch": 0.93, "learning_rate": 3.9163699087893244e-07, "loss": 0.1896, "step": 5126 }, { "epoch": 0.93, "learning_rate": 3.8963990034955874e-07, "loss": 0.1729, "step": 5127 }, { "epoch": 0.93, "learning_rate": 3.8764784776132267e-07, "loss": 0.2255, "step": 5128 }, { "epoch": 0.93, "learning_rate": 3.856608338011247e-07, "loss": 0.2018, "step": 5129 }, { "epoch": 0.93, "learning_rate": 3.836788591541218e-07, "loss": 0.1396, "step": 5130 }, { "epoch": 0.93, "learning_rate": 3.8170192450374233e-07, "loss": 0.1769, "step": 5131 }, { "epoch": 0.93, "learning_rate": 3.7973003053167255e-07, "loss": 0.1197, "step": 5132 }, { "epoch": 0.93, "learning_rate": 3.7776317791785533e-07, "loss": 0.1212, "step": 5133 }, { "epoch": 0.93, "learning_rate": 3.758013673405064e-07, "loss": 0.1244, "step": 5134 }, { "epoch": 0.93, "learning_rate": 3.7384459947609474e-07, "loss": 0.1397, "step": 5135 }, { "epoch": 0.93, "learning_rate": 3.718928749993522e-07, "loss": 0.1789, "step": 5136 }, { "epoch": 0.93, "learning_rate": 3.699461945832705e-07, "loss": 0.198, "step": 5137 }, { "epoch": 0.93, "learning_rate": 3.68004558899106e-07, "loss": 0.1623, "step": 5138 }, { "epoch": 0.93, "learning_rate": 3.6606796861637315e-07, "loss": 0.1307, "step": 5139 }, { "epoch": 0.93, "learning_rate": 3.641364244028461e-07, "loss": 0.1812, "step": 5140 }, { "epoch": 0.93, "learning_rate": 3.622099269245571e-07, "loss": 0.1988, "step": 5141 }, { "epoch": 0.93, "learning_rate": 3.602884768458048e-07, "loss": 0.1589, "step": 5142 }, { "epoch": 0.93, "learning_rate": 3.583720748291391e-07, "loss": 0.1956, "step": 5143 }, { "epoch": 0.93, "learning_rate": 3.5646072153537657e-07, "loss": 0.2243, "step": 5144 }, { "epoch": 0.93, "learning_rate": 3.545544176235882e-07, "loss": 0.1548, "step": 5145 }, { "epoch": 0.93, "learning_rate": 3.526531637511066e-07, "loss": 0.1727, "step": 5146 }, { "epoch": 0.93, "learning_rate": 3.5075696057351726e-07, "loss": 0.1483, "step": 5147 }, { "epoch": 0.93, "learning_rate": 3.488658087446739e-07, "loss": 0.157, "step": 5148 }, { "epoch": 0.93, "learning_rate": 3.4697970891667986e-07, "loss": 0.1722, "step": 5149 }, { "epoch": 0.93, "learning_rate": 3.4509866173990154e-07, "loss": 0.2472, "step": 5150 }, { "epoch": 0.93, "learning_rate": 3.432226678629602e-07, "loss": 0.1813, "step": 5151 }, { "epoch": 0.93, "learning_rate": 3.4135172793273493e-07, "loss": 0.1533, "step": 5152 }, { "epoch": 0.93, "learning_rate": 3.3948584259436475e-07, "loss": 0.1541, "step": 5153 }, { "epoch": 0.93, "learning_rate": 3.376250124912417e-07, "loss": 0.1715, "step": 5154 }, { "epoch": 0.93, "learning_rate": 3.35769238265019e-07, "loss": 0.1646, "step": 5155 }, { "epoch": 0.93, "learning_rate": 3.3391852055560337e-07, "loss": 0.1245, "step": 5156 }, { "epoch": 0.93, "learning_rate": 3.3207286000115745e-07, "loss": 0.1707, "step": 5157 }, { "epoch": 0.94, "learning_rate": 3.302322572381039e-07, "loss": 0.1525, "step": 5158 }, { "epoch": 0.94, "learning_rate": 3.2839671290111673e-07, "loss": 0.1992, "step": 5159 }, { "epoch": 0.94, "learning_rate": 3.2656622762313124e-07, "loss": 0.1563, "step": 5160 }, { "epoch": 0.94, "learning_rate": 3.2474080203533094e-07, "loss": 0.1772, "step": 5161 }, { "epoch": 0.94, "learning_rate": 3.22920436767159e-07, "loss": 0.1552, "step": 5162 }, { "epoch": 0.94, "learning_rate": 3.2110513244631825e-07, "loss": 0.1727, "step": 5163 }, { "epoch": 0.94, "learning_rate": 3.1929488969875633e-07, "loss": 0.1045, "step": 5164 }, { "epoch": 0.94, "learning_rate": 3.1748970914868226e-07, "loss": 0.1299, "step": 5165 }, { "epoch": 0.94, "learning_rate": 3.1568959141855814e-07, "loss": 0.1778, "step": 5166 }, { "epoch": 0.94, "learning_rate": 3.1389453712910244e-07, "loss": 0.1831, "step": 5167 }, { "epoch": 0.94, "learning_rate": 3.121045468992817e-07, "loss": 0.1915, "step": 5168 }, { "epoch": 0.94, "learning_rate": 3.1031962134632053e-07, "loss": 0.1533, "step": 5169 }, { "epoch": 0.94, "learning_rate": 3.085397610856999e-07, "loss": 0.1632, "step": 5170 }, { "epoch": 0.94, "learning_rate": 3.067649667311473e-07, "loss": 0.2199, "step": 5171 }, { "epoch": 0.94, "learning_rate": 3.0499523889464644e-07, "loss": 0.1838, "step": 5172 }, { "epoch": 0.94, "learning_rate": 3.032305781864392e-07, "loss": 0.13, "step": 5173 }, { "epoch": 0.94, "learning_rate": 3.014709852150105e-07, "loss": 0.1633, "step": 5174 }, { "epoch": 0.94, "learning_rate": 2.99716460587105e-07, "loss": 0.2169, "step": 5175 }, { "epoch": 0.94, "learning_rate": 2.979670049077171e-07, "loss": 0.1751, "step": 5176 }, { "epoch": 0.94, "learning_rate": 2.9622261878009427e-07, "loss": 0.187, "step": 5177 }, { "epoch": 0.94, "learning_rate": 2.944833028057353e-07, "loss": 0.1967, "step": 5178 }, { "epoch": 0.94, "learning_rate": 2.927490575843889e-07, "loss": 0.1885, "step": 5179 }, { "epoch": 0.94, "learning_rate": 2.910198837140615e-07, "loss": 0.1515, "step": 5180 }, { "epoch": 0.94, "learning_rate": 2.8929578179100137e-07, "loss": 0.1258, "step": 5181 }, { "epoch": 0.94, "learning_rate": 2.875767524097145e-07, "loss": 0.1513, "step": 5182 }, { "epoch": 0.94, "learning_rate": 2.858627961629567e-07, "loss": 0.2174, "step": 5183 }, { "epoch": 0.94, "learning_rate": 2.8415391364173706e-07, "loss": 0.1616, "step": 5184 }, { "epoch": 0.94, "learning_rate": 2.824501054353057e-07, "loss": 0.1811, "step": 5185 }, { "epoch": 0.94, "learning_rate": 2.807513721311744e-07, "loss": 0.1601, "step": 5186 }, { "epoch": 0.94, "learning_rate": 2.790577143150996e-07, "loss": 0.1543, "step": 5187 }, { "epoch": 0.94, "learning_rate": 2.773691325710859e-07, "loss": 0.2155, "step": 5188 }, { "epoch": 0.94, "learning_rate": 2.75685627481389e-07, "loss": 0.1599, "step": 5189 }, { "epoch": 0.94, "learning_rate": 2.740071996265198e-07, "loss": 0.2193, "step": 5190 }, { "epoch": 0.94, "learning_rate": 2.7233384958522675e-07, "loss": 0.1491, "step": 5191 }, { "epoch": 0.94, "learning_rate": 2.706655779345185e-07, "loss": 0.185, "step": 5192 }, { "epoch": 0.94, "learning_rate": 2.690023852496465e-07, "loss": 0.1661, "step": 5193 }, { "epoch": 0.94, "learning_rate": 2.673442721041153e-07, "loss": 0.1736, "step": 5194 }, { "epoch": 0.94, "learning_rate": 2.6569123906967083e-07, "loss": 0.0995, "step": 5195 }, { "epoch": 0.94, "learning_rate": 2.640432867163123e-07, "loss": 0.1612, "step": 5196 }, { "epoch": 0.94, "learning_rate": 2.6240041561228834e-07, "loss": 0.1772, "step": 5197 }, { "epoch": 0.94, "learning_rate": 2.607626263240942e-07, "loss": 0.163, "step": 5198 }, { "epoch": 0.94, "learning_rate": 2.5912991941646814e-07, "loss": 0.1251, "step": 5199 }, { "epoch": 0.94, "learning_rate": 2.5750229545240314e-07, "loss": 0.1756, "step": 5200 }, { "epoch": 0.94, "learning_rate": 2.558797549931352e-07, "loss": 0.0893, "step": 5201 }, { "epoch": 0.94, "learning_rate": 2.5426229859814685e-07, "loss": 0.1374, "step": 5202 }, { "epoch": 0.94, "learning_rate": 2.526499268251703e-07, "loss": 0.1811, "step": 5203 }, { "epoch": 0.94, "learning_rate": 2.510426402301824e-07, "loss": 0.124, "step": 5204 }, { "epoch": 0.94, "learning_rate": 2.4944043936741155e-07, "loss": 0.2333, "step": 5205 }, { "epoch": 0.94, "learning_rate": 2.478433247893225e-07, "loss": 0.2605, "step": 5206 }, { "epoch": 0.94, "learning_rate": 2.4625129704663463e-07, "loss": 0.2463, "step": 5207 }, { "epoch": 0.94, "learning_rate": 2.4466435668831056e-07, "loss": 0.2298, "step": 5208 }, { "epoch": 0.94, "learning_rate": 2.430825042615592e-07, "loss": 0.1496, "step": 5209 }, { "epoch": 0.94, "learning_rate": 2.415057403118326e-07, "loss": 0.1965, "step": 5210 }, { "epoch": 0.94, "learning_rate": 2.399340653828308e-07, "loss": 0.1703, "step": 5211 }, { "epoch": 0.94, "learning_rate": 2.38367480016502e-07, "loss": 0.1721, "step": 5212 }, { "epoch": 0.95, "learning_rate": 2.3680598475303072e-07, "loss": 0.087, "step": 5213 }, { "epoch": 0.95, "learning_rate": 2.3524958013085296e-07, "loss": 0.1588, "step": 5214 }, { "epoch": 0.95, "learning_rate": 2.336982666866494e-07, "loss": 0.1769, "step": 5215 }, { "epoch": 0.95, "learning_rate": 2.3215204495534215e-07, "loss": 0.1985, "step": 5216 }, { "epoch": 0.95, "learning_rate": 2.3061091547009972e-07, "loss": 0.1587, "step": 5217 }, { "epoch": 0.95, "learning_rate": 2.2907487876233536e-07, "loss": 0.1304, "step": 5218 }, { "epoch": 0.95, "learning_rate": 2.2754393536170204e-07, "loss": 0.181, "step": 5219 }, { "epoch": 0.95, "learning_rate": 2.2601808579609919e-07, "loss": 0.204, "step": 5220 }, { "epoch": 0.95, "learning_rate": 2.2449733059167253e-07, "loss": 0.1711, "step": 5221 }, { "epoch": 0.95, "learning_rate": 2.2298167027280769e-07, "loss": 0.1929, "step": 5222 }, { "epoch": 0.95, "learning_rate": 2.214711053621332e-07, "loss": 0.1731, "step": 5223 }, { "epoch": 0.95, "learning_rate": 2.1996563638052413e-07, "loss": 0.1416, "step": 5224 }, { "epoch": 0.95, "learning_rate": 2.1846526384709353e-07, "loss": 0.1736, "step": 5225 }, { "epoch": 0.95, "learning_rate": 2.1696998827919924e-07, "loss": 0.1857, "step": 5226 }, { "epoch": 0.95, "learning_rate": 2.154798101924438e-07, "loss": 0.1472, "step": 5227 }, { "epoch": 0.95, "learning_rate": 2.139947301006695e-07, "loss": 0.1859, "step": 5228 }, { "epoch": 0.95, "learning_rate": 2.1251474851596176e-07, "loss": 0.1794, "step": 5229 }, { "epoch": 0.95, "learning_rate": 2.1103986594864567e-07, "loss": 0.1959, "step": 5230 }, { "epoch": 0.95, "learning_rate": 2.0957008290729108e-07, "loss": 0.1752, "step": 5231 }, { "epoch": 0.95, "learning_rate": 2.0810539989870925e-07, "loss": 0.2157, "step": 5232 }, { "epoch": 0.95, "learning_rate": 2.0664581742794953e-07, "loss": 0.1837, "step": 5233 }, { "epoch": 0.95, "learning_rate": 2.0519133599830596e-07, "loss": 0.1833, "step": 5234 }, { "epoch": 0.95, "learning_rate": 2.0374195611131408e-07, "loss": 0.1281, "step": 5235 }, { "epoch": 0.95, "learning_rate": 2.0229767826674738e-07, "loss": 0.1706, "step": 5236 }, { "epoch": 0.95, "learning_rate": 2.008585029626192e-07, "loss": 0.1647, "step": 5237 }, { "epoch": 0.95, "learning_rate": 1.9942443069518924e-07, "loss": 0.1141, "step": 5238 }, { "epoch": 0.95, "learning_rate": 1.9799546195895358e-07, "loss": 0.1925, "step": 5239 }, { "epoch": 0.95, "learning_rate": 1.9657159724664643e-07, "loss": 0.1312, "step": 5240 }, { "epoch": 0.95, "learning_rate": 1.951528370492467e-07, "loss": 0.1888, "step": 5241 }, { "epoch": 0.95, "learning_rate": 1.9373918185596973e-07, "loss": 0.1945, "step": 5242 }, { "epoch": 0.95, "learning_rate": 1.9233063215427227e-07, "loss": 0.1748, "step": 5243 }, { "epoch": 0.95, "learning_rate": 1.909271884298508e-07, "loss": 0.15, "step": 5244 }, { "epoch": 0.95, "learning_rate": 1.8952885116663988e-07, "loss": 0.1775, "step": 5245 }, { "epoch": 0.95, "learning_rate": 1.8813562084681379e-07, "loss": 0.1837, "step": 5246 }, { "epoch": 0.95, "learning_rate": 1.8674749795078493e-07, "loss": 0.1534, "step": 5247 }, { "epoch": 0.95, "learning_rate": 1.853644829572071e-07, "loss": 0.1854, "step": 5248 }, { "epoch": 0.95, "learning_rate": 1.8398657634297046e-07, "loss": 0.1525, "step": 5249 }, { "epoch": 0.95, "learning_rate": 1.8261377858320493e-07, "loss": 0.1537, "step": 5250 }, { "epoch": 0.95, "learning_rate": 1.8124609015127857e-07, "loss": 0.1552, "step": 5251 }, { "epoch": 0.95, "learning_rate": 1.798835115187958e-07, "loss": 0.1727, "step": 5252 }, { "epoch": 0.95, "learning_rate": 1.785260431556024e-07, "loss": 0.1262, "step": 5253 }, { "epoch": 0.95, "learning_rate": 1.7717368552977897e-07, "loss": 0.1945, "step": 5254 }, { "epoch": 0.95, "learning_rate": 1.7582643910764585e-07, "loss": 0.1899, "step": 5255 }, { "epoch": 0.95, "learning_rate": 1.7448430435376138e-07, "loss": 0.1319, "step": 5256 }, { "epoch": 0.95, "learning_rate": 1.7314728173091875e-07, "loss": 0.1938, "step": 5257 }, { "epoch": 0.95, "learning_rate": 1.7181537170014915e-07, "loss": 0.1555, "step": 5258 }, { "epoch": 0.95, "learning_rate": 1.704885747207219e-07, "loss": 0.2722, "step": 5259 }, { "epoch": 0.95, "learning_rate": 1.6916689125014595e-07, "loss": 0.2169, "step": 5260 }, { "epoch": 0.95, "learning_rate": 1.6785032174416014e-07, "loss": 0.1544, "step": 5261 }, { "epoch": 0.95, "learning_rate": 1.66538866656743e-07, "loss": 0.1382, "step": 5262 }, { "epoch": 0.95, "learning_rate": 1.6523252644011444e-07, "loss": 0.1588, "step": 5263 }, { "epoch": 0.95, "learning_rate": 1.6393130154472413e-07, "loss": 0.1481, "step": 5264 }, { "epoch": 0.95, "learning_rate": 1.6263519241925818e-07, "loss": 0.2579, "step": 5265 }, { "epoch": 0.95, "learning_rate": 1.6134419951064406e-07, "loss": 0.1759, "step": 5266 }, { "epoch": 0.95, "learning_rate": 1.60058323264039e-07, "loss": 0.1539, "step": 5267 }, { "epoch": 0.96, "learning_rate": 1.5877756412284005e-07, "loss": 0.2261, "step": 5268 }, { "epoch": 0.96, "learning_rate": 1.575019225286789e-07, "loss": 0.164, "step": 5269 }, { "epoch": 0.96, "learning_rate": 1.5623139892142037e-07, "loss": 0.2455, "step": 5270 }, { "epoch": 0.96, "learning_rate": 1.549659937391673e-07, "loss": 0.2524, "step": 5271 }, { "epoch": 0.96, "learning_rate": 1.5370570741825408e-07, "loss": 0.1306, "step": 5272 }, { "epoch": 0.96, "learning_rate": 1.5245054039325635e-07, "loss": 0.1987, "step": 5273 }, { "epoch": 0.96, "learning_rate": 1.5120049309697793e-07, "loss": 0.1803, "step": 5274 }, { "epoch": 0.96, "learning_rate": 1.49955565960459e-07, "loss": 0.2294, "step": 5275 }, { "epoch": 0.96, "learning_rate": 1.4871575941297623e-07, "loss": 0.1861, "step": 5276 }, { "epoch": 0.96, "learning_rate": 1.4748107388204256e-07, "loss": 0.111, "step": 5277 }, { "epoch": 0.96, "learning_rate": 1.4625150979339585e-07, "loss": 0.1648, "step": 5278 }, { "epoch": 0.96, "learning_rate": 1.4502706757101858e-07, "loss": 0.1857, "step": 5279 }, { "epoch": 0.96, "learning_rate": 1.4380774763712134e-07, "loss": 0.1653, "step": 5280 }, { "epoch": 0.96, "learning_rate": 1.425935504121495e-07, "loss": 0.2333, "step": 5281 }, { "epoch": 0.96, "learning_rate": 1.4138447631478147e-07, "loss": 0.1897, "step": 5282 }, { "epoch": 0.96, "learning_rate": 1.4018052576193208e-07, "loss": 0.1658, "step": 5283 }, { "epoch": 0.96, "learning_rate": 1.3898169916874758e-07, "loss": 0.1572, "step": 5284 }, { "epoch": 0.96, "learning_rate": 1.37787996948604e-07, "loss": 0.1555, "step": 5285 }, { "epoch": 0.96, "learning_rate": 1.3659941951311372e-07, "loss": 0.1148, "step": 5286 }, { "epoch": 0.96, "learning_rate": 1.3541596727212557e-07, "loss": 0.163, "step": 5287 }, { "epoch": 0.96, "learning_rate": 1.342376406337148e-07, "loss": 0.1096, "step": 5288 }, { "epoch": 0.96, "learning_rate": 1.3306444000419137e-07, "loss": 0.1558, "step": 5289 }, { "epoch": 0.96, "learning_rate": 1.3189636578809839e-07, "loss": 0.1541, "step": 5290 }, { "epoch": 0.96, "learning_rate": 1.3073341838821028e-07, "loss": 0.1458, "step": 5291 }, { "epoch": 0.96, "learning_rate": 1.2957559820553632e-07, "loss": 0.143, "step": 5292 }, { "epoch": 0.96, "learning_rate": 1.284229056393138e-07, "loss": 0.1462, "step": 5293 }, { "epoch": 0.96, "learning_rate": 1.2727534108701644e-07, "loss": 0.1546, "step": 5294 }, { "epoch": 0.96, "learning_rate": 1.2613290494434438e-07, "loss": 0.1505, "step": 5295 }, { "epoch": 0.96, "learning_rate": 1.249955976052325e-07, "loss": 0.1964, "step": 5296 }, { "epoch": 0.96, "learning_rate": 1.2386341946184875e-07, "loss": 0.1625, "step": 5297 }, { "epoch": 0.96, "learning_rate": 1.2273637090458922e-07, "loss": 0.23, "step": 5298 }, { "epoch": 0.96, "learning_rate": 1.2161445232208303e-07, "loss": 0.1646, "step": 5299 }, { "epoch": 0.96, "learning_rate": 1.2049766410118901e-07, "loss": 0.1698, "step": 5300 }, { "epoch": 0.96, "learning_rate": 1.1938600662699918e-07, "loss": 0.1763, "step": 5301 }, { "epoch": 0.96, "learning_rate": 1.1827948028283353e-07, "loss": 0.1714, "step": 5302 }, { "epoch": 0.96, "learning_rate": 1.1717808545024522e-07, "loss": 0.1507, "step": 5303 }, { "epoch": 0.96, "learning_rate": 1.1608182250901545e-07, "loss": 0.1818, "step": 5304 }, { "epoch": 0.96, "learning_rate": 1.1499069183716182e-07, "loss": 0.2536, "step": 5305 }, { "epoch": 0.96, "learning_rate": 1.1390469381092172e-07, "loss": 0.1927, "step": 5306 }, { "epoch": 0.96, "learning_rate": 1.1282382880477393e-07, "loss": 0.2011, "step": 5307 }, { "epoch": 0.96, "learning_rate": 1.1174809719141866e-07, "loss": 0.1305, "step": 5308 }, { "epoch": 0.96, "learning_rate": 1.1067749934179083e-07, "loss": 0.1737, "step": 5309 }, { "epoch": 0.96, "learning_rate": 1.0961203562505351e-07, "loss": 0.1631, "step": 5310 }, { "epoch": 0.96, "learning_rate": 1.0855170640859947e-07, "loss": 0.204, "step": 5311 }, { "epoch": 0.96, "learning_rate": 1.0749651205805122e-07, "loss": 0.1559, "step": 5312 }, { "epoch": 0.96, "learning_rate": 1.0644645293726108e-07, "loss": 0.1781, "step": 5313 }, { "epoch": 0.96, "learning_rate": 1.0540152940830938e-07, "loss": 0.2399, "step": 5314 }, { "epoch": 0.96, "learning_rate": 1.0436174183150792e-07, "loss": 0.2003, "step": 5315 }, { "epoch": 0.96, "learning_rate": 1.0332709056539491e-07, "loss": 0.1847, "step": 5316 }, { "epoch": 0.96, "learning_rate": 1.0229757596673828e-07, "loss": 0.1458, "step": 5317 }, { "epoch": 0.96, "learning_rate": 1.0127319839053572e-07, "loss": 0.1699, "step": 5318 }, { "epoch": 0.96, "learning_rate": 1.0025395819001303e-07, "loss": 0.2444, "step": 5319 }, { "epoch": 0.96, "learning_rate": 9.923985571662408e-08, "loss": 0.1752, "step": 5320 }, { "epoch": 0.96, "learning_rate": 9.82308913200508e-08, "loss": 0.1171, "step": 5321 }, { "epoch": 0.96, "learning_rate": 9.722706534820491e-08, "loss": 0.1827, "step": 5322 }, { "epoch": 0.97, "learning_rate": 9.622837814722619e-08, "loss": 0.174, "step": 5323 }, { "epoch": 0.97, "learning_rate": 9.523483006147914e-08, "loss": 0.1808, "step": 5324 }, { "epoch": 0.97, "learning_rate": 9.424642143356144e-08, "loss": 0.1461, "step": 5325 }, { "epoch": 0.97, "learning_rate": 9.326315260429374e-08, "loss": 0.1398, "step": 5326 }, { "epoch": 0.97, "learning_rate": 9.228502391272985e-08, "loss": 0.1323, "step": 5327 }, { "epoch": 0.97, "learning_rate": 9.131203569614498e-08, "loss": 0.2028, "step": 5328 }, { "epoch": 0.97, "learning_rate": 9.034418829004409e-08, "loss": 0.1866, "step": 5329 }, { "epoch": 0.97, "learning_rate": 8.938148202816187e-08, "loss": 0.2111, "step": 5330 }, { "epoch": 0.97, "learning_rate": 8.842391724245946e-08, "loss": 0.2139, "step": 5331 }, { "epoch": 0.97, "learning_rate": 8.747149426312273e-08, "loss": 0.1382, "step": 5332 }, { "epoch": 0.97, "learning_rate": 8.652421341856565e-08, "loss": 0.2332, "step": 5333 }, { "epoch": 0.97, "learning_rate": 8.558207503542858e-08, "loss": 0.2021, "step": 5334 }, { "epoch": 0.97, "learning_rate": 8.464507943858335e-08, "loss": 0.161, "step": 5335 }, { "epoch": 0.97, "learning_rate": 8.371322695111983e-08, "loss": 0.1624, "step": 5336 }, { "epoch": 0.97, "learning_rate": 8.278651789436099e-08, "loss": 0.2006, "step": 5337 }, { "epoch": 0.97, "learning_rate": 8.18649525878562e-08, "loss": 0.1586, "step": 5338 }, { "epoch": 0.97, "learning_rate": 8.094853134937797e-08, "loss": 0.1588, "step": 5339 }, { "epoch": 0.97, "learning_rate": 8.003725449492516e-08, "loss": 0.132, "step": 5340 }, { "epoch": 0.97, "learning_rate": 7.913112233872477e-08, "loss": 0.1852, "step": 5341 }, { "epoch": 0.97, "learning_rate": 7.823013519323186e-08, "loss": 0.1625, "step": 5342 }, { "epoch": 0.97, "learning_rate": 7.73342933691229e-08, "loss": 0.1728, "step": 5343 }, { "epoch": 0.97, "learning_rate": 7.644359717529915e-08, "loss": 0.1723, "step": 5344 }, { "epoch": 0.97, "learning_rate": 7.555804691889323e-08, "loss": 0.2087, "step": 5345 }, { "epoch": 0.97, "learning_rate": 7.467764290525925e-08, "loss": 0.201, "step": 5346 }, { "epoch": 0.97, "learning_rate": 7.380238543797934e-08, "loss": 0.1808, "step": 5347 }, { "epoch": 0.97, "learning_rate": 7.293227481885712e-08, "loss": 0.1721, "step": 5348 }, { "epoch": 0.97, "learning_rate": 7.206731134792587e-08, "loss": 0.1594, "step": 5349 }, { "epoch": 0.97, "learning_rate": 7.120749532344206e-08, "loss": 0.1871, "step": 5350 }, { "epoch": 0.97, "learning_rate": 7.035282704188684e-08, "loss": 0.1399, "step": 5351 }, { "epoch": 0.97, "learning_rate": 6.950330679796612e-08, "loss": 0.1964, "step": 5352 }, { "epoch": 0.97, "learning_rate": 6.865893488461394e-08, "loss": 0.147, "step": 5353 }, { "epoch": 0.97, "learning_rate": 6.781971159298239e-08, "loss": 0.1906, "step": 5354 }, { "epoch": 0.97, "learning_rate": 6.698563721245332e-08, "loss": 0.1429, "step": 5355 }, { "epoch": 0.97, "learning_rate": 6.6156712030635e-08, "loss": 0.1264, "step": 5356 }, { "epoch": 0.97, "learning_rate": 6.533293633335547e-08, "loss": 0.1645, "step": 5357 }, { "epoch": 0.97, "learning_rate": 6.451431040466749e-08, "loss": 0.2192, "step": 5358 }, { "epoch": 0.97, "learning_rate": 6.370083452685027e-08, "loss": 0.1525, "step": 5359 }, { "epoch": 0.97, "learning_rate": 6.289250898040777e-08, "loss": 0.2331, "step": 5360 }, { "epoch": 0.97, "learning_rate": 6.208933404406536e-08, "loss": 0.1556, "step": 5361 }, { "epoch": 0.97, "learning_rate": 6.129130999477317e-08, "loss": 0.1236, "step": 5362 }, { "epoch": 0.97, "learning_rate": 6.049843710770774e-08, "loss": 0.1758, "step": 5363 }, { "epoch": 0.97, "learning_rate": 5.97107156562654e-08, "loss": 0.1607, "step": 5364 }, { "epoch": 0.97, "learning_rate": 5.892814591206719e-08, "loss": 0.1798, "step": 5365 }, { "epoch": 0.97, "learning_rate": 5.815072814496225e-08, "loss": 0.2271, "step": 5366 }, { "epoch": 0.97, "learning_rate": 5.737846262301616e-08, "loss": 0.1576, "step": 5367 }, { "epoch": 0.97, "learning_rate": 5.661134961252257e-08, "loss": 0.1596, "step": 5368 }, { "epoch": 0.97, "learning_rate": 5.5849389377998214e-08, "loss": 0.1437, "step": 5369 }, { "epoch": 0.97, "learning_rate": 5.509258218218127e-08, "loss": 0.1886, "step": 5370 }, { "epoch": 0.97, "learning_rate": 5.434092828603299e-08, "loss": 0.1651, "step": 5371 }, { "epoch": 0.97, "learning_rate": 5.359442794873937e-08, "loss": 0.2118, "step": 5372 }, { "epoch": 0.97, "learning_rate": 5.285308142770784e-08, "loss": 0.121, "step": 5373 }, { "epoch": 0.97, "learning_rate": 5.2116888978570586e-08, "loss": 0.173, "step": 5374 }, { "epoch": 0.97, "learning_rate": 5.138585085517955e-08, "loss": 0.1494, "step": 5375 }, { "epoch": 0.97, "learning_rate": 5.0659967309613086e-08, "loss": 0.1627, "step": 5376 }, { "epoch": 0.97, "learning_rate": 4.9939238592169314e-08, "loss": 0.1826, "step": 5377 }, { "epoch": 0.97, "learning_rate": 4.922366495136943e-08, "loss": 0.1252, "step": 5378 }, { "epoch": 0.98, "learning_rate": 4.851324663395773e-08, "loss": 0.2045, "step": 5379 }, { "epoch": 0.98, "learning_rate": 4.78079838849016e-08, "loss": 0.1463, "step": 5380 }, { "epoch": 0.98, "learning_rate": 4.71078769473865e-08, "loss": 0.1566, "step": 5381 }, { "epoch": 0.98, "learning_rate": 4.641292606282599e-08, "loss": 0.1711, "step": 5382 }, { "epoch": 0.98, "learning_rate": 4.572313147085172e-08, "loss": 0.1883, "step": 5383 }, { "epoch": 0.98, "learning_rate": 4.5038493409318426e-08, "loss": 0.1735, "step": 5384 }, { "epoch": 0.98, "learning_rate": 4.435901211430393e-08, "loss": 0.2134, "step": 5385 }, { "epoch": 0.98, "learning_rate": 4.3684687820104155e-08, "loss": 0.1923, "step": 5386 }, { "epoch": 0.98, "learning_rate": 4.301552075924309e-08, "loss": 0.1648, "step": 5387 }, { "epoch": 0.98, "learning_rate": 4.2351511162461165e-08, "loss": 0.2069, "step": 5388 }, { "epoch": 0.98, "learning_rate": 4.16926592587219e-08, "loss": 0.1548, "step": 5389 }, { "epoch": 0.98, "learning_rate": 4.103896527521023e-08, "loss": 0.1245, "step": 5390 }, { "epoch": 0.98, "learning_rate": 4.039042943733251e-08, "loss": 0.146, "step": 5391 }, { "epoch": 0.98, "learning_rate": 3.974705196871653e-08, "loss": 0.3042, "step": 5392 }, { "epoch": 0.98, "learning_rate": 3.910883309121316e-08, "loss": 0.1377, "step": 5393 }, { "epoch": 0.98, "learning_rate": 3.8475773024893025e-08, "loss": 0.1562, "step": 5394 }, { "epoch": 0.98, "learning_rate": 3.7847871988044846e-08, "loss": 0.1538, "step": 5395 }, { "epoch": 0.98, "learning_rate": 3.7225130197183764e-08, "loss": 0.1795, "step": 5396 }, { "epoch": 0.98, "learning_rate": 3.660754786704301e-08, "loss": 0.1954, "step": 5397 }, { "epoch": 0.98, "learning_rate": 3.5995125210578905e-08, "loss": 0.1192, "step": 5398 }, { "epoch": 0.98, "learning_rate": 3.5387862438962524e-08, "loss": 0.1334, "step": 5399 }, { "epoch": 0.98, "learning_rate": 3.478575976159304e-08, "loss": 0.1422, "step": 5400 }, { "epoch": 0.98, "learning_rate": 3.41888173860877e-08, "loss": 0.1568, "step": 5401 }, { "epoch": 0.98, "learning_rate": 3.3597035518283527e-08, "loss": 0.1507, "step": 5402 }, { "epoch": 0.98, "learning_rate": 3.301041436223728e-08, "loss": 0.1166, "step": 5403 }, { "epoch": 0.98, "learning_rate": 3.242895412022884e-08, "loss": 0.1755, "step": 5404 }, { "epoch": 0.98, "learning_rate": 3.18526549927578e-08, "loss": 0.1656, "step": 5405 }, { "epoch": 0.98, "learning_rate": 3.128151717854188e-08, "loss": 0.2096, "step": 5406 }, { "epoch": 0.98, "learning_rate": 3.071554087452022e-08, "loss": 0.1225, "step": 5407 }, { "epoch": 0.98, "learning_rate": 3.0154726275856694e-08, "loss": 0.1595, "step": 5408 }, { "epoch": 0.98, "learning_rate": 2.9599073575926615e-08, "loss": 0.197, "step": 5409 }, { "epoch": 0.98, "learning_rate": 2.9048582966331728e-08, "loss": 0.1656, "step": 5410 }, { "epoch": 0.98, "learning_rate": 2.8503254636893515e-08, "loss": 0.1659, "step": 5411 }, { "epoch": 0.98, "learning_rate": 2.7963088775649903e-08, "loss": 0.2149, "step": 5412 }, { "epoch": 0.98, "learning_rate": 2.7428085568861894e-08, "loss": 0.2125, "step": 5413 }, { "epoch": 0.98, "learning_rate": 2.6898245201008587e-08, "loss": 0.1058, "step": 5414 }, { "epoch": 0.98, "learning_rate": 2.6373567854790505e-08, "loss": 0.1315, "step": 5415 }, { "epoch": 0.98, "learning_rate": 2.5854053711124593e-08, "loss": 0.1401, "step": 5416 }, { "epoch": 0.98, "learning_rate": 2.5339702949152554e-08, "loss": 0.1369, "step": 5417 }, { "epoch": 0.98, "learning_rate": 2.4830515746230852e-08, "loss": 0.2296, "step": 5418 }, { "epoch": 0.98, "learning_rate": 2.4326492277935707e-08, "loss": 0.1497, "step": 5419 }, { "epoch": 0.98, "learning_rate": 2.3827632718068093e-08, "loss": 0.1458, "step": 5420 }, { "epoch": 0.98, "learning_rate": 2.333393723864208e-08, "loss": 0.13, "step": 5421 }, { "epoch": 0.98, "learning_rate": 2.284540600989482e-08, "loss": 0.2104, "step": 5422 }, { "epoch": 0.98, "learning_rate": 2.2362039200279906e-08, "loss": 0.2156, "step": 5423 }, { "epoch": 0.98, "learning_rate": 2.1883836976474004e-08, "loss": 0.2108, "step": 5424 }, { "epoch": 0.98, "learning_rate": 2.1410799503370215e-08, "loss": 0.1699, "step": 5425 }, { "epoch": 0.98, "learning_rate": 2.0942926944079733e-08, "loss": 0.2236, "step": 5426 }, { "epoch": 0.98, "learning_rate": 2.0480219459935167e-08, "loss": 0.1176, "step": 5427 }, { "epoch": 0.98, "learning_rate": 2.0022677210487227e-08, "loss": 0.1964, "step": 5428 }, { "epoch": 0.98, "learning_rate": 1.9570300353506377e-08, "loss": 0.2245, "step": 5429 }, { "epoch": 0.98, "learning_rate": 1.9123089044977837e-08, "loss": 0.1809, "step": 5430 }, { "epoch": 0.98, "learning_rate": 1.8681043439113256e-08, "loss": 0.1254, "step": 5431 }, { "epoch": 0.98, "learning_rate": 1.8244163688337367e-08, "loss": 0.1579, "step": 5432 }, { "epoch": 0.98, "learning_rate": 1.781244994329301e-08, "loss": 0.193, "step": 5433 }, { "epoch": 0.99, "learning_rate": 1.7385902352846096e-08, "loss": 0.1645, "step": 5434 }, { "epoch": 0.99, "learning_rate": 1.696452106407731e-08, "loss": 0.1408, "step": 5435 }, { "epoch": 0.99, "learning_rate": 1.654830622228709e-08, "loss": 0.202, "step": 5436 }, { "epoch": 0.99, "learning_rate": 1.613725797099397e-08, "loss": 0.213, "step": 5437 }, { "epoch": 0.99, "learning_rate": 1.5731376451937894e-08, "loss": 0.1343, "step": 5438 }, { "epoch": 0.99, "learning_rate": 1.5330661805073587e-08, "loss": 0.1263, "step": 5439 }, { "epoch": 0.99, "learning_rate": 1.4935114168573848e-08, "loss": 0.2016, "step": 5440 }, { "epoch": 0.99, "learning_rate": 1.4544733678832911e-08, "loss": 0.1429, "step": 5441 }, { "epoch": 0.99, "learning_rate": 1.4159520470463094e-08, "loss": 0.1722, "step": 5442 }, { "epoch": 0.99, "learning_rate": 1.3779474676291481e-08, "loss": 0.1359, "step": 5443 }, { "epoch": 0.99, "learning_rate": 1.3404596427364912e-08, "loss": 0.1414, "step": 5444 }, { "epoch": 0.99, "learning_rate": 1.3034885852949986e-08, "loss": 0.2317, "step": 5445 }, { "epoch": 0.99, "learning_rate": 1.2670343080529722e-08, "loss": 0.1538, "step": 5446 }, { "epoch": 0.99, "learning_rate": 1.2310968235805242e-08, "loss": 0.1519, "step": 5447 }, { "epoch": 0.99, "learning_rate": 1.1956761442699082e-08, "loss": 0.161, "step": 5448 }, { "epoch": 0.99, "learning_rate": 1.1607722823345212e-08, "loss": 0.1315, "step": 5449 }, { "epoch": 0.99, "learning_rate": 1.1263852498099025e-08, "loss": 0.1793, "step": 5450 }, { "epoch": 0.99, "learning_rate": 1.0925150585537336e-08, "loss": 0.159, "step": 5451 }, { "epoch": 0.99, "learning_rate": 1.0591617202446724e-08, "loss": 0.1879, "step": 5452 }, { "epoch": 0.99, "learning_rate": 1.026325246384019e-08, "loss": 0.2042, "step": 5453 }, { "epoch": 0.99, "learning_rate": 9.94005648294216e-09, "loss": 0.1994, "step": 5454 }, { "epoch": 0.99, "learning_rate": 9.622029371196828e-09, "loss": 0.203, "step": 5455 }, { "epoch": 0.99, "learning_rate": 9.309171238268132e-09, "loss": 0.1818, "step": 5456 }, { "epoch": 0.99, "learning_rate": 9.001482192033116e-09, "loss": 0.1472, "step": 5457 }, { "epoch": 0.99, "learning_rate": 8.698962338590244e-09, "loss": 0.1264, "step": 5458 }, { "epoch": 0.99, "learning_rate": 8.40161178225607e-09, "loss": 0.1855, "step": 5459 }, { "epoch": 0.99, "learning_rate": 8.109430625560243e-09, "loss": 0.2368, "step": 5460 }, { "epoch": 0.99, "learning_rate": 7.822418969253842e-09, "loss": 0.1779, "step": 5461 }, { "epoch": 0.99, "learning_rate": 7.540576912302699e-09, "loss": 0.1278, "step": 5462 }, { "epoch": 0.99, "learning_rate": 7.263904551894074e-09, "loss": 0.1205, "step": 5463 }, { "epoch": 0.99, "learning_rate": 6.9924019834299856e-09, "loss": 0.2326, "step": 5464 }, { "epoch": 0.99, "learning_rate": 6.726069300527216e-09, "loss": 0.2791, "step": 5465 }, { "epoch": 0.99, "learning_rate": 6.464906595023967e-09, "loss": 0.2037, "step": 5466 }, { "epoch": 0.99, "learning_rate": 6.208913956974871e-09, "loss": 0.1672, "step": 5467 }, { "epoch": 0.99, "learning_rate": 5.958091474650984e-09, "loss": 0.2332, "step": 5468 }, { "epoch": 0.99, "learning_rate": 5.71243923453979e-09, "loss": 0.1943, "step": 5469 }, { "epoch": 0.99, "learning_rate": 5.4719573213501964e-09, "loss": 0.2198, "step": 5470 }, { "epoch": 0.99, "learning_rate": 5.236645818000873e-09, "loss": 0.1352, "step": 5471 }, { "epoch": 0.99, "learning_rate": 5.006504805633583e-09, "loss": 0.1233, "step": 5472 }, { "epoch": 0.99, "learning_rate": 4.7815343636065105e-09, "loss": 0.1954, "step": 5473 }, { "epoch": 0.99, "learning_rate": 4.561734569494269e-09, "loss": 0.1706, "step": 5474 }, { "epoch": 0.99, "learning_rate": 4.347105499086235e-09, "loss": 0.1579, "step": 5475 }, { "epoch": 0.99, "learning_rate": 4.1376472263898736e-09, "loss": 0.195, "step": 5476 }, { "epoch": 0.99, "learning_rate": 3.933359823634075e-09, "loss": 0.1945, "step": 5477 }, { "epoch": 0.99, "learning_rate": 3.73424336125916e-09, "loss": 0.1516, "step": 5478 }, { "epoch": 0.99, "learning_rate": 3.540297907925205e-09, "loss": 0.1735, "step": 5479 }, { "epoch": 0.99, "learning_rate": 3.3515235305070504e-09, "loss": 0.2415, "step": 5480 }, { "epoch": 0.99, "learning_rate": 3.1679202940992914e-09, "loss": 0.1761, "step": 5481 }, { "epoch": 0.99, "learning_rate": 2.9894882620096207e-09, "loss": 0.1654, "step": 5482 }, { "epoch": 0.99, "learning_rate": 2.8162274957688193e-09, "loss": 0.1094, "step": 5483 }, { "epoch": 0.99, "learning_rate": 2.648138055117433e-09, "loss": 0.1776, "step": 5484 }, { "epoch": 0.99, "learning_rate": 2.4852199980174295e-09, "loss": 0.2162, "step": 5485 }, { "epoch": 0.99, "learning_rate": 2.3274733806455394e-09, "loss": 0.1454, "step": 5486 }, { "epoch": 0.99, "learning_rate": 2.1748982573949193e-09, "loss": 0.1921, "step": 5487 }, { "epoch": 0.99, "learning_rate": 2.0274946808784833e-09, "loss": 0.2012, "step": 5488 }, { "epoch": 1.0, "learning_rate": 1.8852627019239065e-09, "loss": 0.1339, "step": 5489 }, { "epoch": 1.0, "learning_rate": 1.7482023695752914e-09, "loss": 0.1626, "step": 5490 }, { "epoch": 1.0, "learning_rate": 1.616313731091501e-09, "loss": 0.1977, "step": 5491 }, { "epoch": 1.0, "learning_rate": 1.4895968319528219e-09, "loss": 0.1158, "step": 5492 }, { "epoch": 1.0, "learning_rate": 1.3680517158543015e-09, "loss": 0.1703, "step": 5493 }, { "epoch": 1.0, "learning_rate": 1.2516784247040835e-09, "loss": 0.2352, "step": 5494 }, { "epoch": 1.0, "learning_rate": 1.1404769986333997e-09, "loss": 0.1661, "step": 5495 }, { "epoch": 1.0, "learning_rate": 1.0344474759832467e-09, "loss": 0.2015, "step": 5496 }, { "epoch": 1.0, "learning_rate": 9.3358989331771e-10, "loss": 0.1017, "step": 5497 }, { "epoch": 1.0, "learning_rate": 8.379042854139706e-10, "loss": 0.1523, "step": 5498 }, { "epoch": 1.0, "learning_rate": 7.473906852639711e-10, "loss": 0.108, "step": 5499 }, { "epoch": 1.0, "learning_rate": 6.620491240810766e-10, "loss": 0.2133, "step": 5500 }, { "epoch": 1.0, "learning_rate": 5.818796312917485e-10, "loss": 0.1665, "step": 5501 }, { "epoch": 1.0, "learning_rate": 5.0688223454054e-10, "loss": 0.1258, "step": 5502 }, { "epoch": 1.0, "learning_rate": 4.370569596867657e-10, "loss": 0.1739, "step": 5503 }, { "epoch": 1.0, "learning_rate": 3.724038308094979e-10, "loss": 0.1375, "step": 5504 }, { "epoch": 1.0, "learning_rate": 3.129228702009046e-10, "loss": 0.19, "step": 5505 }, { "epoch": 1.0, "learning_rate": 2.586140983712459e-10, "loss": 0.1464, "step": 5506 }, { "epoch": 1.0, "learning_rate": 2.0947753404720882e-10, "loss": 0.1952, "step": 5507 }, { "epoch": 1.0, "learning_rate": 1.6551319417357214e-10, "loss": 0.135, "step": 5508 }, { "epoch": 1.0, "learning_rate": 1.267210939098762e-10, "loss": 0.1773, "step": 5509 }, { "epoch": 1.0, "learning_rate": 9.310124663042263e-11, "loss": 0.1206, "step": 5510 }, { "epoch": 1.0, "learning_rate": 6.465366392927053e-11, "loss": 0.1892, "step": 5511 }, { "epoch": 1.0, "learning_rate": 4.1378355615240283e-11, "loss": 0.1978, "step": 5512 }, { "epoch": 1.0, "learning_rate": 2.3275329715244375e-11, "loss": 0.1658, "step": 5513 }, { "epoch": 1.0, "learning_rate": 1.0344592470956648e-11, "loss": 0.219, "step": 5514 }, { "epoch": 1.0, "learning_rate": 2.5861483404776564e-12, "loss": 0.2264, "step": 5515 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.2115, "step": 5516 }, { "epoch": 1.0, "step": 5516, "total_flos": 9.45350436703961e+18, "train_loss": 0.25788842464720885, "train_runtime": 36245.7084, "train_samples_per_second": 19.476, "train_steps_per_second": 0.152 } ], "logging_steps": 1.0, "max_steps": 5516, "num_train_epochs": 1, "save_steps": 50000, "total_flos": 9.45350436703961e+18, "trial_name": null, "trial_params": null }