{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999768572089794, "global_step": 3240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0408163265306121e-07, "loss": 3.0106, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.0816326530612243e-07, "loss": 2.9297, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.122448979591837e-07, "loss": 3.0025, "step": 3 }, { "epoch": 0.0, "learning_rate": 8.163265306122449e-07, "loss": 2.9326, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.0204081632653063e-06, "loss": 2.7998, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.2244897959183673e-06, "loss": 2.7867, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.4285714285714286e-06, "loss": 2.6389, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.6326530612244897e-06, "loss": 2.5628, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.8367346938775512e-06, "loss": 2.4567, "step": 9 }, { "epoch": 0.01, "learning_rate": 2.0408163265306125e-06, "loss": 2.3802, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.244897959183674e-06, "loss": 2.3859, "step": 11 }, { "epoch": 0.01, "learning_rate": 2.4489795918367347e-06, "loss": 2.3559, "step": 12 }, { "epoch": 0.01, "learning_rate": 2.6530612244897964e-06, "loss": 2.3266, "step": 13 }, { "epoch": 0.01, "learning_rate": 2.8571428571428573e-06, "loss": 2.3431, "step": 14 }, { "epoch": 0.01, "learning_rate": 3.0612244897959185e-06, "loss": 2.2694, "step": 15 }, { "epoch": 0.01, "learning_rate": 3.2653061224489794e-06, "loss": 2.2728, "step": 16 }, { "epoch": 0.02, "learning_rate": 3.469387755102041e-06, "loss": 2.2339, "step": 17 }, { "epoch": 0.02, "learning_rate": 3.6734693877551024e-06, "loss": 2.1722, "step": 18 }, { "epoch": 0.02, "learning_rate": 3.877551020408164e-06, "loss": 2.1433, "step": 19 }, { "epoch": 0.02, "learning_rate": 4.081632653061225e-06, "loss": 2.2311, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.2857142857142855e-06, "loss": 2.1308, "step": 21 }, { "epoch": 0.02, "learning_rate": 4.489795918367348e-06, "loss": 2.2164, "step": 22 }, { "epoch": 0.02, "learning_rate": 4.693877551020409e-06, "loss": 2.137, "step": 23 }, { "epoch": 0.02, "learning_rate": 4.897959183673469e-06, "loss": 2.1153, "step": 24 }, { "epoch": 0.02, "learning_rate": 5.1020408163265315e-06, "loss": 2.1266, "step": 25 }, { "epoch": 0.02, "learning_rate": 5.306122448979593e-06, "loss": 2.1682, "step": 26 }, { "epoch": 0.02, "learning_rate": 5.510204081632653e-06, "loss": 2.1152, "step": 27 }, { "epoch": 0.03, "learning_rate": 5.7142857142857145e-06, "loss": 2.0586, "step": 28 }, { "epoch": 0.03, "learning_rate": 5.918367346938776e-06, "loss": 2.0635, "step": 29 }, { "epoch": 0.03, "learning_rate": 6.122448979591837e-06, "loss": 2.082, "step": 30 }, { "epoch": 0.03, "learning_rate": 6.326530612244899e-06, "loss": 2.0739, "step": 31 }, { "epoch": 0.03, "learning_rate": 6.530612244897959e-06, "loss": 2.1073, "step": 32 }, { "epoch": 0.03, "learning_rate": 6.734693877551021e-06, "loss": 2.0471, "step": 33 }, { "epoch": 0.03, "learning_rate": 6.938775510204082e-06, "loss": 2.0025, "step": 34 }, { "epoch": 0.03, "learning_rate": 7.1428571428571436e-06, "loss": 2.1306, "step": 35 }, { "epoch": 0.03, "learning_rate": 7.346938775510205e-06, "loss": 1.9763, "step": 36 }, { "epoch": 0.03, "learning_rate": 7.551020408163265e-06, "loss": 2.0322, "step": 37 }, { "epoch": 0.04, "learning_rate": 7.755102040816327e-06, "loss": 2.0537, "step": 38 }, { "epoch": 0.04, "learning_rate": 7.959183673469388e-06, "loss": 2.038, "step": 39 }, { "epoch": 0.04, "learning_rate": 8.16326530612245e-06, "loss": 2.0448, "step": 40 }, { "epoch": 0.04, "learning_rate": 8.36734693877551e-06, "loss": 1.9847, "step": 41 }, { "epoch": 0.04, "learning_rate": 8.571428571428571e-06, "loss": 2.0265, "step": 42 }, { "epoch": 0.04, "learning_rate": 8.775510204081633e-06, "loss": 1.9699, "step": 43 }, { "epoch": 0.04, "learning_rate": 8.979591836734695e-06, "loss": 1.9868, "step": 44 }, { "epoch": 0.04, "learning_rate": 9.183673469387756e-06, "loss": 1.9156, "step": 45 }, { "epoch": 0.04, "learning_rate": 9.387755102040818e-06, "loss": 2.0087, "step": 46 }, { "epoch": 0.04, "learning_rate": 9.591836734693878e-06, "loss": 1.971, "step": 47 }, { "epoch": 0.04, "learning_rate": 9.795918367346939e-06, "loss": 1.9905, "step": 48 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 1.9504, "step": 49 }, { "epoch": 0.05, "learning_rate": 1.0204081632653063e-05, "loss": 1.9596, "step": 50 }, { "epoch": 0.05, "learning_rate": 1.0408163265306123e-05, "loss": 1.8734, "step": 51 }, { "epoch": 0.05, "learning_rate": 1.0612244897959186e-05, "loss": 1.9662, "step": 52 }, { "epoch": 0.05, "learning_rate": 1.0816326530612246e-05, "loss": 2.0354, "step": 53 }, { "epoch": 0.05, "learning_rate": 1.1020408163265306e-05, "loss": 1.9645, "step": 54 }, { "epoch": 0.05, "learning_rate": 1.1224489795918367e-05, "loss": 1.9583, "step": 55 }, { "epoch": 0.05, "learning_rate": 1.1428571428571429e-05, "loss": 2.013, "step": 56 }, { "epoch": 0.05, "learning_rate": 1.1632653061224491e-05, "loss": 1.8484, "step": 57 }, { "epoch": 0.05, "learning_rate": 1.1836734693877552e-05, "loss": 1.889, "step": 58 }, { "epoch": 0.05, "learning_rate": 1.2040816326530614e-05, "loss": 1.9051, "step": 59 }, { "epoch": 0.06, "learning_rate": 1.2244897959183674e-05, "loss": 1.946, "step": 60 }, { "epoch": 0.06, "learning_rate": 1.2448979591836736e-05, "loss": 1.8871, "step": 61 }, { "epoch": 0.06, "learning_rate": 1.2653061224489798e-05, "loss": 1.9507, "step": 62 }, { "epoch": 0.06, "learning_rate": 1.2857142857142859e-05, "loss": 1.9894, "step": 63 }, { "epoch": 0.06, "learning_rate": 1.3061224489795918e-05, "loss": 1.935, "step": 64 }, { "epoch": 0.06, "learning_rate": 1.326530612244898e-05, "loss": 1.8699, "step": 65 }, { "epoch": 0.06, "learning_rate": 1.3469387755102042e-05, "loss": 1.8827, "step": 66 }, { "epoch": 0.06, "learning_rate": 1.3673469387755102e-05, "loss": 1.8581, "step": 67 }, { "epoch": 0.06, "learning_rate": 1.3877551020408165e-05, "loss": 1.845, "step": 68 }, { "epoch": 0.06, "learning_rate": 1.4081632653061225e-05, "loss": 1.8418, "step": 69 }, { "epoch": 0.06, "learning_rate": 1.4285714285714287e-05, "loss": 1.9089, "step": 70 }, { "epoch": 0.07, "learning_rate": 1.448979591836735e-05, "loss": 1.867, "step": 71 }, { "epoch": 0.07, "learning_rate": 1.469387755102041e-05, "loss": 1.9254, "step": 72 }, { "epoch": 0.07, "learning_rate": 1.4897959183673472e-05, "loss": 1.992, "step": 73 }, { "epoch": 0.07, "learning_rate": 1.510204081632653e-05, "loss": 1.8507, "step": 74 }, { "epoch": 0.07, "learning_rate": 1.530612244897959e-05, "loss": 1.8939, "step": 75 }, { "epoch": 0.07, "learning_rate": 1.5510204081632655e-05, "loss": 1.9995, "step": 76 }, { "epoch": 0.07, "learning_rate": 1.5714285714285715e-05, "loss": 1.8635, "step": 77 }, { "epoch": 0.07, "learning_rate": 1.5918367346938776e-05, "loss": 1.8076, "step": 78 }, { "epoch": 0.07, "learning_rate": 1.612244897959184e-05, "loss": 1.87, "step": 79 }, { "epoch": 0.07, "learning_rate": 1.63265306122449e-05, "loss": 1.9116, "step": 80 }, { "epoch": 0.07, "learning_rate": 1.653061224489796e-05, "loss": 1.8728, "step": 81 }, { "epoch": 0.08, "learning_rate": 1.673469387755102e-05, "loss": 1.7926, "step": 82 }, { "epoch": 0.08, "learning_rate": 1.6938775510204085e-05, "loss": 1.8428, "step": 83 }, { "epoch": 0.08, "learning_rate": 1.7142857142857142e-05, "loss": 1.8949, "step": 84 }, { "epoch": 0.08, "learning_rate": 1.7346938775510206e-05, "loss": 1.9029, "step": 85 }, { "epoch": 0.08, "learning_rate": 1.7551020408163266e-05, "loss": 1.8754, "step": 86 }, { "epoch": 0.08, "learning_rate": 1.7755102040816327e-05, "loss": 1.7577, "step": 87 }, { "epoch": 0.08, "learning_rate": 1.795918367346939e-05, "loss": 1.8494, "step": 88 }, { "epoch": 0.08, "learning_rate": 1.816326530612245e-05, "loss": 1.9175, "step": 89 }, { "epoch": 0.08, "learning_rate": 1.836734693877551e-05, "loss": 1.8344, "step": 90 }, { "epoch": 0.08, "learning_rate": 1.8571428571428575e-05, "loss": 1.8043, "step": 91 }, { "epoch": 0.09, "learning_rate": 1.8775510204081636e-05, "loss": 1.8534, "step": 92 }, { "epoch": 0.09, "learning_rate": 1.8979591836734696e-05, "loss": 1.8783, "step": 93 }, { "epoch": 0.09, "learning_rate": 1.9183673469387756e-05, "loss": 1.8328, "step": 94 }, { "epoch": 0.09, "learning_rate": 1.9387755102040817e-05, "loss": 1.8762, "step": 95 }, { "epoch": 0.09, "learning_rate": 1.9591836734693877e-05, "loss": 1.7551, "step": 96 }, { "epoch": 0.09, "learning_rate": 1.979591836734694e-05, "loss": 1.8275, "step": 97 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 1.8482, "step": 98 }, { "epoch": 0.09, "learning_rate": 1.9999995001296792e-05, "loss": 1.7939, "step": 99 }, { "epoch": 0.09, "learning_rate": 1.9999980005192153e-05, "loss": 1.8788, "step": 100 }, { "epoch": 0.09, "learning_rate": 1.999995501170108e-05, "loss": 1.8112, "step": 101 }, { "epoch": 0.09, "learning_rate": 1.9999920020848563e-05, "loss": 1.8746, "step": 102 }, { "epoch": 0.1, "learning_rate": 1.9999875032669576e-05, "loss": 1.6869, "step": 103 }, { "epoch": 0.1, "learning_rate": 1.9999820047209103e-05, "loss": 1.8205, "step": 104 }, { "epoch": 0.1, "learning_rate": 1.9999755064522116e-05, "loss": 1.7301, "step": 105 }, { "epoch": 0.1, "learning_rate": 1.999968008467357e-05, "loss": 1.8794, "step": 106 }, { "epoch": 0.1, "learning_rate": 1.9999595107738437e-05, "loss": 1.8282, "step": 107 }, { "epoch": 0.1, "learning_rate": 1.9999500133801672e-05, "loss": 1.8117, "step": 108 }, { "epoch": 0.1, "learning_rate": 1.9999395162958212e-05, "loss": 1.871, "step": 109 }, { "epoch": 0.1, "learning_rate": 1.9999280195313013e-05, "loss": 1.799, "step": 110 }, { "epoch": 0.1, "learning_rate": 1.9999155230981006e-05, "loss": 1.797, "step": 111 }, { "epoch": 0.1, "learning_rate": 1.999902027008713e-05, "loss": 1.7641, "step": 112 }, { "epoch": 0.1, "learning_rate": 1.99988753127663e-05, "loss": 1.8579, "step": 113 }, { "epoch": 0.11, "learning_rate": 1.9998720359163447e-05, "loss": 1.9091, "step": 114 }, { "epoch": 0.11, "learning_rate": 1.999855540943348e-05, "loss": 1.7814, "step": 115 }, { "epoch": 0.11, "learning_rate": 1.99983804637413e-05, "loss": 1.8526, "step": 116 }, { "epoch": 0.11, "learning_rate": 1.9998195522261814e-05, "loss": 1.8153, "step": 117 }, { "epoch": 0.11, "learning_rate": 1.9998000585179915e-05, "loss": 1.8187, "step": 118 }, { "epoch": 0.11, "learning_rate": 1.999779565269049e-05, "loss": 1.7922, "step": 119 }, { "epoch": 0.11, "learning_rate": 1.999758072499842e-05, "loss": 1.7853, "step": 120 }, { "epoch": 0.11, "learning_rate": 1.999735580231857e-05, "loss": 1.8746, "step": 121 }, { "epoch": 0.11, "learning_rate": 1.9997120884875804e-05, "loss": 1.75, "step": 122 }, { "epoch": 0.11, "learning_rate": 1.9996875972904987e-05, "loss": 1.7985, "step": 123 }, { "epoch": 0.11, "learning_rate": 1.9996621066650964e-05, "loss": 1.7018, "step": 124 }, { "epoch": 0.12, "learning_rate": 1.9996356166368574e-05, "loss": 1.8261, "step": 125 }, { "epoch": 0.12, "learning_rate": 1.9996081272322646e-05, "loss": 1.7566, "step": 126 }, { "epoch": 0.12, "learning_rate": 1.999579638478801e-05, "loss": 1.8467, "step": 127 }, { "epoch": 0.12, "learning_rate": 1.9995501504049474e-05, "loss": 1.7588, "step": 128 }, { "epoch": 0.12, "learning_rate": 1.9995196630401845e-05, "loss": 1.8112, "step": 129 }, { "epoch": 0.12, "learning_rate": 1.9994881764149915e-05, "loss": 1.7269, "step": 130 }, { "epoch": 0.12, "learning_rate": 1.999455690560847e-05, "loss": 1.8115, "step": 131 }, { "epoch": 0.12, "learning_rate": 1.999422205510228e-05, "loss": 1.658, "step": 132 }, { "epoch": 0.12, "learning_rate": 1.9993877212966118e-05, "loss": 1.7918, "step": 133 }, { "epoch": 0.12, "learning_rate": 1.999352237954473e-05, "loss": 1.8567, "step": 134 }, { "epoch": 0.12, "learning_rate": 1.999315755519286e-05, "loss": 1.7958, "step": 135 }, { "epoch": 0.13, "learning_rate": 1.9992782740275232e-05, "loss": 1.8341, "step": 136 }, { "epoch": 0.13, "learning_rate": 1.9992397935166573e-05, "loss": 1.8179, "step": 137 }, { "epoch": 0.13, "learning_rate": 1.9992003140251582e-05, "loss": 1.8184, "step": 138 }, { "epoch": 0.13, "learning_rate": 1.9991598355924953e-05, "loss": 1.7714, "step": 139 }, { "epoch": 0.13, "learning_rate": 1.9991183582591368e-05, "loss": 1.808, "step": 140 }, { "epoch": 0.13, "learning_rate": 1.999075882066549e-05, "loss": 1.707, "step": 141 }, { "epoch": 0.13, "learning_rate": 1.9990324070571966e-05, "loss": 1.8003, "step": 142 }, { "epoch": 0.13, "learning_rate": 1.998987933274544e-05, "loss": 1.794, "step": 143 }, { "epoch": 0.13, "learning_rate": 1.9989424607630538e-05, "loss": 1.7204, "step": 144 }, { "epoch": 0.13, "learning_rate": 1.9988959895681853e-05, "loss": 1.7633, "step": 145 }, { "epoch": 0.14, "learning_rate": 1.9988485197363992e-05, "loss": 1.7979, "step": 146 }, { "epoch": 0.14, "learning_rate": 1.9988000513151525e-05, "loss": 1.8042, "step": 147 }, { "epoch": 0.14, "learning_rate": 1.9987505843529004e-05, "loss": 1.7736, "step": 148 }, { "epoch": 0.14, "learning_rate": 1.9987001188990977e-05, "loss": 1.7565, "step": 149 }, { "epoch": 0.14, "learning_rate": 1.9986486550041967e-05, "loss": 1.8011, "step": 150 }, { "epoch": 0.14, "learning_rate": 1.998596192719648e-05, "loss": 1.8643, "step": 151 }, { "epoch": 0.14, "learning_rate": 1.9985427320979e-05, "loss": 1.7254, "step": 152 }, { "epoch": 0.14, "learning_rate": 1.9984882731923992e-05, "loss": 1.7217, "step": 153 }, { "epoch": 0.14, "learning_rate": 1.9984328160575913e-05, "loss": 1.8794, "step": 154 }, { "epoch": 0.14, "learning_rate": 1.9983763607489183e-05, "loss": 1.7967, "step": 155 }, { "epoch": 0.14, "learning_rate": 1.998318907322821e-05, "loss": 1.6882, "step": 156 }, { "epoch": 0.15, "learning_rate": 1.998260455836738e-05, "loss": 1.7917, "step": 157 }, { "epoch": 0.15, "learning_rate": 1.998201006349106e-05, "loss": 1.6658, "step": 158 }, { "epoch": 0.15, "learning_rate": 1.9981405589193582e-05, "loss": 1.7443, "step": 159 }, { "epoch": 0.15, "learning_rate": 1.9980791136079274e-05, "loss": 1.7799, "step": 160 }, { "epoch": 0.15, "learning_rate": 1.998016670476242e-05, "loss": 1.7464, "step": 161 }, { "epoch": 0.15, "learning_rate": 1.9979532295867295e-05, "loss": 1.7084, "step": 162 }, { "epoch": 0.15, "learning_rate": 1.9978887910028143e-05, "loss": 1.7675, "step": 163 }, { "epoch": 0.15, "learning_rate": 1.9978233547889182e-05, "loss": 1.8198, "step": 164 }, { "epoch": 0.15, "learning_rate": 1.9977569210104603e-05, "loss": 1.7375, "step": 165 }, { "epoch": 0.15, "learning_rate": 1.9976894897338575e-05, "loss": 1.8148, "step": 166 }, { "epoch": 0.15, "learning_rate": 1.9976210610265234e-05, "loss": 1.7775, "step": 167 }, { "epoch": 0.16, "learning_rate": 1.997551634956868e-05, "loss": 1.7816, "step": 168 }, { "epoch": 0.16, "learning_rate": 1.9974812115943015e-05, "loss": 1.7231, "step": 169 }, { "epoch": 0.16, "learning_rate": 1.997409791009227e-05, "loss": 1.7435, "step": 170 }, { "epoch": 0.16, "learning_rate": 1.9973373732730476e-05, "loss": 1.709, "step": 171 }, { "epoch": 0.16, "learning_rate": 1.997263958458162e-05, "loss": 1.7678, "step": 172 }, { "epoch": 0.16, "learning_rate": 1.9971895466379662e-05, "loss": 1.7461, "step": 173 }, { "epoch": 0.16, "learning_rate": 1.997114137886852e-05, "loss": 1.7221, "step": 174 }, { "epoch": 0.16, "learning_rate": 1.9970377322802096e-05, "loss": 1.7375, "step": 175 }, { "epoch": 0.16, "learning_rate": 1.996960329894424e-05, "loss": 1.7553, "step": 176 }, { "epoch": 0.16, "learning_rate": 1.9968819308068776e-05, "loss": 1.7795, "step": 177 }, { "epoch": 0.16, "learning_rate": 1.9968025350959497e-05, "loss": 1.732, "step": 178 }, { "epoch": 0.17, "learning_rate": 1.9967221428410147e-05, "loss": 1.7815, "step": 179 }, { "epoch": 0.17, "learning_rate": 1.996640754122445e-05, "loss": 1.708, "step": 180 }, { "epoch": 0.17, "learning_rate": 1.9965583690216067e-05, "loss": 1.7438, "step": 181 }, { "epoch": 0.17, "learning_rate": 1.9964749876208648e-05, "loss": 1.7566, "step": 182 }, { "epoch": 0.17, "learning_rate": 1.9963906100035787e-05, "loss": 1.7025, "step": 183 }, { "epoch": 0.17, "learning_rate": 1.9963052362541046e-05, "loss": 1.7382, "step": 184 }, { "epoch": 0.17, "learning_rate": 1.996218866457793e-05, "loss": 1.7608, "step": 185 }, { "epoch": 0.17, "learning_rate": 1.9961315007009922e-05, "loss": 1.7736, "step": 186 }, { "epoch": 0.17, "learning_rate": 1.9960431390710445e-05, "loss": 1.7374, "step": 187 }, { "epoch": 0.17, "learning_rate": 1.9959537816562897e-05, "loss": 1.7431, "step": 188 }, { "epoch": 0.17, "learning_rate": 1.9958634285460615e-05, "loss": 1.7212, "step": 189 }, { "epoch": 0.18, "learning_rate": 1.995772079830689e-05, "loss": 1.693, "step": 190 }, { "epoch": 0.18, "learning_rate": 1.9956797356014982e-05, "loss": 1.701, "step": 191 }, { "epoch": 0.18, "learning_rate": 1.995586395950809e-05, "loss": 1.679, "step": 192 }, { "epoch": 0.18, "learning_rate": 1.9954920609719366e-05, "loss": 1.7196, "step": 193 }, { "epoch": 0.18, "learning_rate": 1.9953967307591916e-05, "loss": 1.7474, "step": 194 }, { "epoch": 0.18, "learning_rate": 1.9953004054078796e-05, "loss": 1.7105, "step": 195 }, { "epoch": 0.18, "learning_rate": 1.9952030850143012e-05, "loss": 1.7101, "step": 196 }, { "epoch": 0.18, "learning_rate": 1.9951047696757513e-05, "loss": 1.7413, "step": 197 }, { "epoch": 0.18, "learning_rate": 1.9950054594905196e-05, "loss": 1.7977, "step": 198 }, { "epoch": 0.18, "learning_rate": 1.9949051545578906e-05, "loss": 1.6627, "step": 199 }, { "epoch": 0.19, "learning_rate": 1.9948038549781436e-05, "loss": 1.7136, "step": 200 }, { "epoch": 0.19, "learning_rate": 1.9947015608525513e-05, "loss": 1.6353, "step": 201 }, { "epoch": 0.19, "learning_rate": 1.9945982722833817e-05, "loss": 1.7688, "step": 202 }, { "epoch": 0.19, "learning_rate": 1.9944939893738963e-05, "loss": 1.7152, "step": 203 }, { "epoch": 0.19, "learning_rate": 1.9943887122283512e-05, "loss": 1.7601, "step": 204 }, { "epoch": 0.19, "learning_rate": 1.9942824409519966e-05, "loss": 1.723, "step": 205 }, { "epoch": 0.19, "learning_rate": 1.9941751756510755e-05, "loss": 1.7823, "step": 206 }, { "epoch": 0.19, "learning_rate": 1.9940669164328253e-05, "loss": 1.7427, "step": 207 }, { "epoch": 0.19, "learning_rate": 1.9939576634054782e-05, "loss": 1.81, "step": 208 }, { "epoch": 0.19, "learning_rate": 1.993847416678258e-05, "loss": 1.8047, "step": 209 }, { "epoch": 0.19, "learning_rate": 1.993736176361383e-05, "loss": 1.6247, "step": 210 }, { "epoch": 0.2, "learning_rate": 1.9936239425660643e-05, "loss": 1.7159, "step": 211 }, { "epoch": 0.2, "learning_rate": 1.9935107154045075e-05, "loss": 1.7172, "step": 212 }, { "epoch": 0.2, "learning_rate": 1.99339649498991e-05, "loss": 1.6248, "step": 213 }, { "epoch": 0.2, "learning_rate": 1.993281281436462e-05, "loss": 1.7227, "step": 214 }, { "epoch": 0.2, "learning_rate": 1.993165074859348e-05, "loss": 1.7234, "step": 215 }, { "epoch": 0.2, "learning_rate": 1.9930478753747438e-05, "loss": 1.6676, "step": 216 }, { "epoch": 0.2, "learning_rate": 1.992929683099819e-05, "loss": 1.706, "step": 217 }, { "epoch": 0.2, "learning_rate": 1.992810498152735e-05, "loss": 1.7807, "step": 218 }, { "epoch": 0.2, "learning_rate": 1.992690320652646e-05, "loss": 1.7486, "step": 219 }, { "epoch": 0.2, "learning_rate": 1.992569150719698e-05, "loss": 1.6897, "step": 220 }, { "epoch": 0.2, "learning_rate": 1.992446988475029e-05, "loss": 1.7187, "step": 221 }, { "epoch": 0.21, "learning_rate": 1.992323834040771e-05, "loss": 1.7442, "step": 222 }, { "epoch": 0.21, "learning_rate": 1.992199687540046e-05, "loss": 1.6998, "step": 223 }, { "epoch": 0.21, "learning_rate": 1.992074549096967e-05, "loss": 1.6905, "step": 224 }, { "epoch": 0.21, "learning_rate": 1.991948418836642e-05, "loss": 1.6737, "step": 225 }, { "epoch": 0.21, "learning_rate": 1.9918212968851672e-05, "loss": 1.7179, "step": 226 }, { "epoch": 0.21, "learning_rate": 1.991693183369632e-05, "loss": 1.652, "step": 227 }, { "epoch": 0.21, "learning_rate": 1.9915640784181163e-05, "loss": 1.6959, "step": 228 }, { "epoch": 0.21, "learning_rate": 1.9914339821596927e-05, "loss": 1.6423, "step": 229 }, { "epoch": 0.21, "learning_rate": 1.9913028947244223e-05, "loss": 1.7294, "step": 230 }, { "epoch": 0.21, "learning_rate": 1.991170816243359e-05, "loss": 1.7326, "step": 231 }, { "epoch": 0.21, "learning_rate": 1.9910377468485475e-05, "loss": 1.7333, "step": 232 }, { "epoch": 0.22, "learning_rate": 1.9909036866730225e-05, "loss": 1.6681, "step": 233 }, { "epoch": 0.22, "learning_rate": 1.990768635850809e-05, "loss": 1.6923, "step": 234 }, { "epoch": 0.22, "learning_rate": 1.990632594516923e-05, "loss": 1.7128, "step": 235 }, { "epoch": 0.22, "learning_rate": 1.9904955628073708e-05, "loss": 1.7364, "step": 236 }, { "epoch": 0.22, "learning_rate": 1.9903575408591484e-05, "loss": 1.6989, "step": 237 }, { "epoch": 0.22, "learning_rate": 1.990218528810242e-05, "loss": 1.8233, "step": 238 }, { "epoch": 0.22, "learning_rate": 1.990078526799627e-05, "loss": 1.709, "step": 239 }, { "epoch": 0.22, "learning_rate": 1.9899375349672703e-05, "loss": 1.6963, "step": 240 }, { "epoch": 0.22, "learning_rate": 1.989795553454126e-05, "loss": 1.7382, "step": 241 }, { "epoch": 0.22, "learning_rate": 1.9896525824021397e-05, "loss": 1.6617, "step": 242 }, { "epoch": 0.22, "learning_rate": 1.989508621954245e-05, "loss": 1.7113, "step": 243 }, { "epoch": 0.23, "learning_rate": 1.9893636722543645e-05, "loss": 1.7422, "step": 244 }, { "epoch": 0.23, "learning_rate": 1.989217733447411e-05, "loss": 1.6386, "step": 245 }, { "epoch": 0.23, "learning_rate": 1.9890708056792852e-05, "loss": 1.7018, "step": 246 }, { "epoch": 0.23, "learning_rate": 1.988922889096877e-05, "loss": 1.7149, "step": 247 }, { "epoch": 0.23, "learning_rate": 1.988773983848064e-05, "loss": 1.7509, "step": 248 }, { "epoch": 0.23, "learning_rate": 1.9886240900817136e-05, "loss": 1.69, "step": 249 }, { "epoch": 0.23, "learning_rate": 1.9884732079476804e-05, "loss": 1.7509, "step": 250 }, { "epoch": 0.23, "learning_rate": 1.988321337596807e-05, "loss": 1.6935, "step": 251 }, { "epoch": 0.23, "learning_rate": 1.9881684791809248e-05, "loss": 1.7079, "step": 252 }, { "epoch": 0.23, "learning_rate": 1.9880146328528527e-05, "loss": 1.7328, "step": 253 }, { "epoch": 0.24, "learning_rate": 1.9878597987663966e-05, "loss": 1.7152, "step": 254 }, { "epoch": 0.24, "learning_rate": 1.987703977076351e-05, "loss": 1.7183, "step": 255 }, { "epoch": 0.24, "learning_rate": 1.9875471679384965e-05, "loss": 1.6761, "step": 256 }, { "epoch": 0.24, "learning_rate": 1.9873893715096023e-05, "loss": 1.7433, "step": 257 }, { "epoch": 0.24, "learning_rate": 1.9872305879474234e-05, "loss": 1.8061, "step": 258 }, { "epoch": 0.24, "learning_rate": 1.9870708174107024e-05, "loss": 1.6485, "step": 259 }, { "epoch": 0.24, "learning_rate": 1.9869100600591685e-05, "loss": 1.6579, "step": 260 }, { "epoch": 0.24, "learning_rate": 1.986748316053537e-05, "loss": 1.6586, "step": 261 }, { "epoch": 0.24, "learning_rate": 1.9865855855555104e-05, "loss": 1.7108, "step": 262 }, { "epoch": 0.24, "learning_rate": 1.9864218687277764e-05, "loss": 1.754, "step": 263 }, { "epoch": 0.24, "learning_rate": 1.98625716573401e-05, "loss": 1.6751, "step": 264 }, { "epoch": 0.25, "learning_rate": 1.986091476738871e-05, "loss": 1.6898, "step": 265 }, { "epoch": 0.25, "learning_rate": 1.9859248019080057e-05, "loss": 1.5974, "step": 266 }, { "epoch": 0.25, "learning_rate": 1.9857571414080454e-05, "loss": 1.7445, "step": 267 }, { "epoch": 0.25, "learning_rate": 1.9855884954066077e-05, "loss": 1.6499, "step": 268 }, { "epoch": 0.25, "learning_rate": 1.9854188640722946e-05, "loss": 1.7543, "step": 269 }, { "epoch": 0.25, "learning_rate": 1.9852482475746928e-05, "loss": 1.6573, "step": 270 }, { "epoch": 0.25, "learning_rate": 1.9850766460843752e-05, "loss": 1.729, "step": 271 }, { "epoch": 0.25, "learning_rate": 1.984904059772899e-05, "loss": 1.7073, "step": 272 }, { "epoch": 0.25, "learning_rate": 1.9847304888128055e-05, "loss": 1.6854, "step": 273 }, { "epoch": 0.25, "learning_rate": 1.9845559333776202e-05, "loss": 1.7604, "step": 274 }, { "epoch": 0.25, "learning_rate": 1.9843803936418538e-05, "loss": 1.7005, "step": 275 }, { "epoch": 0.26, "learning_rate": 1.9842038697810002e-05, "loss": 1.7124, "step": 276 }, { "epoch": 0.26, "learning_rate": 1.984026361971538e-05, "loss": 1.7814, "step": 277 }, { "epoch": 0.26, "learning_rate": 1.983847870390928e-05, "loss": 1.7164, "step": 278 }, { "epoch": 0.26, "learning_rate": 1.9836683952176166e-05, "loss": 1.6819, "step": 279 }, { "epoch": 0.26, "learning_rate": 1.983487936631032e-05, "loss": 1.7138, "step": 280 }, { "epoch": 0.26, "learning_rate": 1.9833064948115853e-05, "loss": 1.7394, "step": 281 }, { "epoch": 0.26, "learning_rate": 1.983124069940672e-05, "loss": 1.7212, "step": 282 }, { "epoch": 0.26, "learning_rate": 1.9829406622006698e-05, "loss": 1.7153, "step": 283 }, { "epoch": 0.26, "learning_rate": 1.982756271774938e-05, "loss": 1.7207, "step": 284 }, { "epoch": 0.26, "learning_rate": 1.9825708988478204e-05, "loss": 1.7137, "step": 285 }, { "epoch": 0.26, "learning_rate": 1.9823845436046406e-05, "loss": 1.694, "step": 286 }, { "epoch": 0.27, "learning_rate": 1.982197206231707e-05, "loss": 1.688, "step": 287 }, { "epoch": 0.27, "learning_rate": 1.9820088869163066e-05, "loss": 1.6669, "step": 288 }, { "epoch": 0.27, "learning_rate": 1.9818195858467115e-05, "loss": 1.6833, "step": 289 }, { "epoch": 0.27, "learning_rate": 1.9816293032121725e-05, "loss": 1.7326, "step": 290 }, { "epoch": 0.27, "learning_rate": 1.9814380392029237e-05, "loss": 1.7629, "step": 291 }, { "epoch": 0.27, "learning_rate": 1.981245794010179e-05, "loss": 1.6857, "step": 292 }, { "epoch": 0.27, "learning_rate": 1.9810525678261336e-05, "loss": 1.7007, "step": 293 }, { "epoch": 0.27, "learning_rate": 1.980858360843964e-05, "loss": 1.6837, "step": 294 }, { "epoch": 0.27, "learning_rate": 1.980663173257827e-05, "loss": 1.6858, "step": 295 }, { "epoch": 0.27, "learning_rate": 1.9804670052628588e-05, "loss": 1.6579, "step": 296 }, { "epoch": 0.27, "learning_rate": 1.9802698570551775e-05, "loss": 1.6462, "step": 297 }, { "epoch": 0.28, "learning_rate": 1.9800717288318792e-05, "loss": 1.6859, "step": 298 }, { "epoch": 0.28, "learning_rate": 1.9798726207910412e-05, "loss": 1.6647, "step": 299 }, { "epoch": 0.28, "learning_rate": 1.9796725331317196e-05, "loss": 1.6708, "step": 300 }, { "epoch": 0.28, "learning_rate": 1.9794714660539508e-05, "loss": 1.6591, "step": 301 }, { "epoch": 0.28, "learning_rate": 1.9792694197587496e-05, "loss": 1.7011, "step": 302 }, { "epoch": 0.28, "learning_rate": 1.979066394448109e-05, "loss": 1.7299, "step": 303 }, { "epoch": 0.28, "learning_rate": 1.9788623903250024e-05, "loss": 1.7186, "step": 304 }, { "epoch": 0.28, "learning_rate": 1.9786574075933817e-05, "loss": 1.6487, "step": 305 }, { "epoch": 0.28, "learning_rate": 1.9784514464581747e-05, "loss": 1.659, "step": 306 }, { "epoch": 0.28, "learning_rate": 1.9782445071252908e-05, "loss": 1.6985, "step": 307 }, { "epoch": 0.29, "learning_rate": 1.9780365898016147e-05, "loss": 1.6711, "step": 308 }, { "epoch": 0.29, "learning_rate": 1.97782769469501e-05, "loss": 1.6114, "step": 309 }, { "epoch": 0.29, "learning_rate": 1.9776178220143177e-05, "loss": 1.6149, "step": 310 }, { "epoch": 0.29, "learning_rate": 1.977406971969356e-05, "loss": 1.69, "step": 311 }, { "epoch": 0.29, "learning_rate": 1.9771951447709204e-05, "loss": 1.6948, "step": 312 }, { "epoch": 0.29, "learning_rate": 1.9769823406307828e-05, "loss": 1.6527, "step": 313 }, { "epoch": 0.29, "learning_rate": 1.976768559761693e-05, "loss": 1.6453, "step": 314 }, { "epoch": 0.29, "learning_rate": 1.9765538023773755e-05, "loss": 1.6442, "step": 315 }, { "epoch": 0.29, "learning_rate": 1.976338068692532e-05, "loss": 1.6006, "step": 316 }, { "epoch": 0.29, "learning_rate": 1.976121358922841e-05, "loss": 1.5816, "step": 317 }, { "epoch": 0.29, "learning_rate": 1.9759036732849552e-05, "loss": 1.6238, "step": 318 }, { "epoch": 0.3, "learning_rate": 1.9756850119965046e-05, "loss": 1.5985, "step": 319 }, { "epoch": 0.3, "learning_rate": 1.9754653752760932e-05, "loss": 1.6619, "step": 320 }, { "epoch": 0.3, "learning_rate": 1.9752447633433006e-05, "loss": 1.6115, "step": 321 }, { "epoch": 0.3, "learning_rate": 1.9750231764186824e-05, "loss": 1.6928, "step": 322 }, { "epoch": 0.3, "learning_rate": 1.974800614723767e-05, "loss": 1.6147, "step": 323 }, { "epoch": 0.3, "learning_rate": 1.9745770784810593e-05, "loss": 1.7534, "step": 324 }, { "epoch": 0.3, "learning_rate": 1.9743525679140368e-05, "loss": 1.7308, "step": 325 }, { "epoch": 0.3, "learning_rate": 1.9741270832471524e-05, "loss": 1.6289, "step": 326 }, { "epoch": 0.3, "learning_rate": 1.973900624705832e-05, "loss": 1.7031, "step": 327 }, { "epoch": 0.3, "learning_rate": 1.9736731925164755e-05, "loss": 1.5772, "step": 328 }, { "epoch": 0.3, "learning_rate": 1.973444786906456e-05, "loss": 1.6647, "step": 329 }, { "epoch": 0.31, "learning_rate": 1.9732154081041197e-05, "loss": 1.6248, "step": 330 }, { "epoch": 0.31, "learning_rate": 1.9729850563387863e-05, "loss": 1.6237, "step": 331 }, { "epoch": 0.31, "learning_rate": 1.9727537318407476e-05, "loss": 1.7545, "step": 332 }, { "epoch": 0.31, "learning_rate": 1.9725214348412685e-05, "loss": 1.6415, "step": 333 }, { "epoch": 0.31, "learning_rate": 1.9722881655725855e-05, "loss": 1.6791, "step": 334 }, { "epoch": 0.31, "learning_rate": 1.9720539242679068e-05, "loss": 1.6736, "step": 335 }, { "epoch": 0.31, "learning_rate": 1.9718187111614136e-05, "loss": 1.5559, "step": 336 }, { "epoch": 0.31, "learning_rate": 1.9715825264882578e-05, "loss": 1.7194, "step": 337 }, { "epoch": 0.31, "learning_rate": 1.971345370484563e-05, "loss": 1.6078, "step": 338 }, { "epoch": 0.31, "learning_rate": 1.9711072433874235e-05, "loss": 1.6238, "step": 339 }, { "epoch": 0.31, "learning_rate": 1.970868145434905e-05, "loss": 1.5952, "step": 340 }, { "epoch": 0.32, "learning_rate": 1.9706280768660428e-05, "loss": 1.6478, "step": 341 }, { "epoch": 0.32, "learning_rate": 1.9703870379208435e-05, "loss": 1.6577, "step": 342 }, { "epoch": 0.32, "learning_rate": 1.9701450288402837e-05, "loss": 1.716, "step": 343 }, { "epoch": 0.32, "learning_rate": 1.9699020498663096e-05, "loss": 1.7044, "step": 344 }, { "epoch": 0.32, "learning_rate": 1.9696581012418372e-05, "loss": 1.6582, "step": 345 }, { "epoch": 0.32, "learning_rate": 1.969413183210752e-05, "loss": 1.6606, "step": 346 }, { "epoch": 0.32, "learning_rate": 1.9691672960179076e-05, "loss": 1.5681, "step": 347 }, { "epoch": 0.32, "learning_rate": 1.9689204399091285e-05, "loss": 1.6137, "step": 348 }, { "epoch": 0.32, "learning_rate": 1.9686726151312065e-05, "loss": 1.6551, "step": 349 }, { "epoch": 0.32, "learning_rate": 1.968423821931902e-05, "loss": 1.6704, "step": 350 }, { "epoch": 0.32, "learning_rate": 1.9681740605599437e-05, "loss": 1.6895, "step": 351 }, { "epoch": 0.33, "learning_rate": 1.9679233312650283e-05, "loss": 1.6248, "step": 352 }, { "epoch": 0.33, "learning_rate": 1.9676716342978194e-05, "loss": 1.7029, "step": 353 }, { "epoch": 0.33, "learning_rate": 1.96741896990995e-05, "loss": 1.5891, "step": 354 }, { "epoch": 0.33, "learning_rate": 1.9671653383540176e-05, "loss": 1.7086, "step": 355 }, { "epoch": 0.33, "learning_rate": 1.966910739883589e-05, "loss": 1.6759, "step": 356 }, { "epoch": 0.33, "learning_rate": 1.9666551747531958e-05, "loss": 1.695, "step": 357 }, { "epoch": 0.33, "learning_rate": 1.9663986432183374e-05, "loss": 1.6362, "step": 358 }, { "epoch": 0.33, "learning_rate": 1.9661411455354783e-05, "loss": 1.6417, "step": 359 }, { "epoch": 0.33, "learning_rate": 1.96588268196205e-05, "loss": 1.683, "step": 360 }, { "epoch": 0.33, "learning_rate": 1.965623252756449e-05, "loss": 1.566, "step": 361 }, { "epoch": 0.34, "learning_rate": 1.965362858178037e-05, "loss": 1.5488, "step": 362 }, { "epoch": 0.34, "learning_rate": 1.9651014984871412e-05, "loss": 1.6096, "step": 363 }, { "epoch": 0.34, "learning_rate": 1.9648391739450532e-05, "loss": 1.6699, "step": 364 }, { "epoch": 0.34, "learning_rate": 1.9645758848140296e-05, "loss": 1.695, "step": 365 }, { "epoch": 0.34, "learning_rate": 1.9643116313572914e-05, "loss": 1.6836, "step": 366 }, { "epoch": 0.34, "learning_rate": 1.9640464138390233e-05, "loss": 1.627, "step": 367 }, { "epoch": 0.34, "learning_rate": 1.963780232524374e-05, "loss": 1.6236, "step": 368 }, { "epoch": 0.34, "learning_rate": 1.9635130876794564e-05, "loss": 1.6663, "step": 369 }, { "epoch": 0.34, "learning_rate": 1.9632449795713453e-05, "loss": 1.5965, "step": 370 }, { "epoch": 0.34, "learning_rate": 1.96297590846808e-05, "loss": 1.6093, "step": 371 }, { "epoch": 0.34, "learning_rate": 1.9627058746386608e-05, "loss": 1.6675, "step": 372 }, { "epoch": 0.35, "learning_rate": 1.9624348783530524e-05, "loss": 1.6005, "step": 373 }, { "epoch": 0.35, "learning_rate": 1.9621629198821802e-05, "loss": 1.6549, "step": 374 }, { "epoch": 0.35, "learning_rate": 1.9618899994979327e-05, "loss": 1.6708, "step": 375 }, { "epoch": 0.35, "learning_rate": 1.961616117473159e-05, "loss": 1.5961, "step": 376 }, { "epoch": 0.35, "learning_rate": 1.9613412740816703e-05, "loss": 1.6902, "step": 377 }, { "epoch": 0.35, "learning_rate": 1.961065469598239e-05, "loss": 1.636, "step": 378 }, { "epoch": 0.35, "learning_rate": 1.9607887042985977e-05, "loss": 1.6557, "step": 379 }, { "epoch": 0.35, "learning_rate": 1.96051097845944e-05, "loss": 1.7229, "step": 380 }, { "epoch": 0.35, "learning_rate": 1.9602322923584198e-05, "loss": 1.6529, "step": 381 }, { "epoch": 0.35, "learning_rate": 1.9599526462741508e-05, "loss": 1.5996, "step": 382 }, { "epoch": 0.35, "learning_rate": 1.9596720404862063e-05, "loss": 1.6688, "step": 383 }, { "epoch": 0.36, "learning_rate": 1.9593904752751197e-05, "loss": 1.6003, "step": 384 }, { "epoch": 0.36, "learning_rate": 1.9591079509223833e-05, "loss": 1.7359, "step": 385 }, { "epoch": 0.36, "learning_rate": 1.9588244677104477e-05, "loss": 1.594, "step": 386 }, { "epoch": 0.36, "learning_rate": 1.958540025922723e-05, "loss": 1.661, "step": 387 }, { "epoch": 0.36, "learning_rate": 1.958254625843577e-05, "loss": 1.5634, "step": 388 }, { "epoch": 0.36, "learning_rate": 1.9579682677583355e-05, "loss": 1.6698, "step": 389 }, { "epoch": 0.36, "learning_rate": 1.957680951953283e-05, "loss": 1.6361, "step": 390 }, { "epoch": 0.36, "learning_rate": 1.9573926787156602e-05, "loss": 1.6233, "step": 391 }, { "epoch": 0.36, "learning_rate": 1.957103448333666e-05, "loss": 1.6158, "step": 392 }, { "epoch": 0.36, "learning_rate": 1.9568132610964557e-05, "loss": 1.6852, "step": 393 }, { "epoch": 0.36, "learning_rate": 1.9565221172941408e-05, "loss": 1.6215, "step": 394 }, { "epoch": 0.37, "learning_rate": 1.9562300172177902e-05, "loss": 1.6139, "step": 395 }, { "epoch": 0.37, "learning_rate": 1.9559369611594278e-05, "loss": 1.6772, "step": 396 }, { "epoch": 0.37, "learning_rate": 1.9556429494120336e-05, "loss": 1.6799, "step": 397 }, { "epoch": 0.37, "learning_rate": 1.9553479822695435e-05, "loss": 1.5899, "step": 398 }, { "epoch": 0.37, "learning_rate": 1.955052060026848e-05, "loss": 1.7042, "step": 399 }, { "epoch": 0.37, "learning_rate": 1.9547551829797922e-05, "loss": 1.6338, "step": 400 }, { "epoch": 0.37, "learning_rate": 1.9544573514251766e-05, "loss": 1.6367, "step": 401 }, { "epoch": 0.37, "learning_rate": 1.954158565660756e-05, "loss": 1.6029, "step": 402 }, { "epoch": 0.37, "learning_rate": 1.9538588259852375e-05, "loss": 1.5336, "step": 403 }, { "epoch": 0.37, "learning_rate": 1.9535581326982834e-05, "loss": 1.6297, "step": 404 }, { "epoch": 0.37, "learning_rate": 1.9532564861005096e-05, "loss": 1.7, "step": 405 }, { "epoch": 0.38, "learning_rate": 1.9529538864934837e-05, "loss": 1.6746, "step": 406 }, { "epoch": 0.38, "learning_rate": 1.9526503341797274e-05, "loss": 1.6801, "step": 407 }, { "epoch": 0.38, "learning_rate": 1.952345829462714e-05, "loss": 1.5631, "step": 408 }, { "epoch": 0.38, "learning_rate": 1.9520403726468692e-05, "loss": 1.6667, "step": 409 }, { "epoch": 0.38, "learning_rate": 1.951733964037571e-05, "loss": 1.6667, "step": 410 }, { "epoch": 0.38, "learning_rate": 1.951426603941148e-05, "loss": 1.6026, "step": 411 }, { "epoch": 0.38, "learning_rate": 1.9511182926648808e-05, "loss": 1.6808, "step": 412 }, { "epoch": 0.38, "learning_rate": 1.950809030517001e-05, "loss": 1.6512, "step": 413 }, { "epoch": 0.38, "learning_rate": 1.95049881780669e-05, "loss": 1.6827, "step": 414 }, { "epoch": 0.38, "learning_rate": 1.9501876548440804e-05, "loss": 1.6254, "step": 415 }, { "epoch": 0.39, "learning_rate": 1.949875541940254e-05, "loss": 1.6428, "step": 416 }, { "epoch": 0.39, "learning_rate": 1.949562479407244e-05, "loss": 1.5806, "step": 417 }, { "epoch": 0.39, "learning_rate": 1.9492484675580302e-05, "loss": 1.6652, "step": 418 }, { "epoch": 0.39, "learning_rate": 1.948933506706544e-05, "loss": 1.5806, "step": 419 }, { "epoch": 0.39, "learning_rate": 1.948617597167664e-05, "loss": 1.6444, "step": 420 }, { "epoch": 0.39, "learning_rate": 1.948300739257218e-05, "loss": 1.6206, "step": 421 }, { "epoch": 0.39, "learning_rate": 1.947982933291982e-05, "loss": 1.5576, "step": 422 }, { "epoch": 0.39, "learning_rate": 1.9476641795896794e-05, "loss": 1.6169, "step": 423 }, { "epoch": 0.39, "learning_rate": 1.9473444784689814e-05, "loss": 1.5682, "step": 424 }, { "epoch": 0.39, "learning_rate": 1.9470238302495055e-05, "loss": 1.6124, "step": 425 }, { "epoch": 0.39, "learning_rate": 1.946702235251817e-05, "loss": 1.6994, "step": 426 }, { "epoch": 0.4, "learning_rate": 1.946379693797428e-05, "loss": 1.6763, "step": 427 }, { "epoch": 0.4, "learning_rate": 1.9460562062087963e-05, "loss": 1.6477, "step": 428 }, { "epoch": 0.4, "learning_rate": 1.945731772809325e-05, "loss": 1.6283, "step": 429 }, { "epoch": 0.4, "learning_rate": 1.9454063939233635e-05, "loss": 1.6896, "step": 430 }, { "epoch": 0.4, "learning_rate": 1.9450800698762066e-05, "loss": 1.7088, "step": 431 }, { "epoch": 0.4, "learning_rate": 1.9447528009940938e-05, "loss": 1.563, "step": 432 }, { "epoch": 0.4, "learning_rate": 1.9444245876042087e-05, "loss": 1.6244, "step": 433 }, { "epoch": 0.4, "learning_rate": 1.9440954300346797e-05, "loss": 1.6369, "step": 434 }, { "epoch": 0.4, "learning_rate": 1.9437653286145792e-05, "loss": 1.639, "step": 435 }, { "epoch": 0.4, "learning_rate": 1.9434342836739226e-05, "loss": 1.6093, "step": 436 }, { "epoch": 0.4, "learning_rate": 1.9431022955436692e-05, "loss": 1.6389, "step": 437 }, { "epoch": 0.41, "learning_rate": 1.942769364555721e-05, "loss": 1.626, "step": 438 }, { "epoch": 0.41, "learning_rate": 1.9424354910429227e-05, "loss": 1.6343, "step": 439 }, { "epoch": 0.41, "learning_rate": 1.9421006753390615e-05, "loss": 1.6992, "step": 440 }, { "epoch": 0.41, "learning_rate": 1.9417649177788657e-05, "loss": 1.5795, "step": 441 }, { "epoch": 0.41, "learning_rate": 1.9414282186980062e-05, "loss": 1.7026, "step": 442 }, { "epoch": 0.41, "learning_rate": 1.9410905784330945e-05, "loss": 1.6705, "step": 443 }, { "epoch": 0.41, "learning_rate": 1.9407519973216833e-05, "loss": 1.5981, "step": 444 }, { "epoch": 0.41, "learning_rate": 1.940412475702266e-05, "loss": 1.6598, "step": 445 }, { "epoch": 0.41, "learning_rate": 1.9400720139142763e-05, "loss": 1.6404, "step": 446 }, { "epoch": 0.41, "learning_rate": 1.9397306122980877e-05, "loss": 1.6395, "step": 447 }, { "epoch": 0.41, "learning_rate": 1.939388271195013e-05, "loss": 1.5656, "step": 448 }, { "epoch": 0.42, "learning_rate": 1.9390449909473042e-05, "loss": 1.6146, "step": 449 }, { "epoch": 0.42, "learning_rate": 1.9387007718981537e-05, "loss": 1.597, "step": 450 }, { "epoch": 0.42, "learning_rate": 1.9383556143916903e-05, "loss": 1.6264, "step": 451 }, { "epoch": 0.42, "learning_rate": 1.9380095187729817e-05, "loss": 1.5773, "step": 452 }, { "epoch": 0.42, "learning_rate": 1.9376624853880347e-05, "loss": 1.5979, "step": 453 }, { "epoch": 0.42, "learning_rate": 1.937314514583792e-05, "loss": 1.6151, "step": 454 }, { "epoch": 0.42, "learning_rate": 1.936965606708135e-05, "loss": 1.5806, "step": 455 }, { "epoch": 0.42, "learning_rate": 1.9366157621098794e-05, "loss": 1.6586, "step": 456 }, { "epoch": 0.42, "learning_rate": 1.936264981138781e-05, "loss": 1.5742, "step": 457 }, { "epoch": 0.42, "learning_rate": 1.935913264145529e-05, "loss": 1.5359, "step": 458 }, { "epoch": 0.42, "learning_rate": 1.9355606114817486e-05, "loss": 1.6565, "step": 459 }, { "epoch": 0.43, "learning_rate": 1.9352070235000016e-05, "loss": 1.5909, "step": 460 }, { "epoch": 0.43, "learning_rate": 1.9348525005537845e-05, "loss": 1.5928, "step": 461 }, { "epoch": 0.43, "learning_rate": 1.934497042997528e-05, "loss": 1.6886, "step": 462 }, { "epoch": 0.43, "learning_rate": 1.934140651186597e-05, "loss": 1.5927, "step": 463 }, { "epoch": 0.43, "learning_rate": 1.9337833254772914e-05, "loss": 1.5924, "step": 464 }, { "epoch": 0.43, "learning_rate": 1.9334250662268446e-05, "loss": 1.5341, "step": 465 }, { "epoch": 0.43, "learning_rate": 1.9330658737934222e-05, "loss": 1.6641, "step": 466 }, { "epoch": 0.43, "learning_rate": 1.9327057485361238e-05, "loss": 1.6588, "step": 467 }, { "epoch": 0.43, "learning_rate": 1.932344690814981e-05, "loss": 1.518, "step": 468 }, { "epoch": 0.43, "learning_rate": 1.9319827009909586e-05, "loss": 1.5598, "step": 469 }, { "epoch": 0.44, "learning_rate": 1.931619779425952e-05, "loss": 1.7209, "step": 470 }, { "epoch": 0.44, "learning_rate": 1.931255926482788e-05, "loss": 1.654, "step": 471 }, { "epoch": 0.44, "learning_rate": 1.9308911425252265e-05, "loss": 1.667, "step": 472 }, { "epoch": 0.44, "learning_rate": 1.9305254279179563e-05, "loss": 1.6412, "step": 473 }, { "epoch": 0.44, "learning_rate": 1.9301587830265967e-05, "loss": 1.6324, "step": 474 }, { "epoch": 0.44, "learning_rate": 1.9297912082176978e-05, "loss": 1.5957, "step": 475 }, { "epoch": 0.44, "learning_rate": 1.9294227038587392e-05, "loss": 1.6082, "step": 476 }, { "epoch": 0.44, "learning_rate": 1.92905327031813e-05, "loss": 1.7438, "step": 477 }, { "epoch": 0.44, "learning_rate": 1.928682907965207e-05, "loss": 1.5855, "step": 478 }, { "epoch": 0.44, "learning_rate": 1.9283116171702373e-05, "loss": 1.5984, "step": 479 }, { "epoch": 0.44, "learning_rate": 1.9279393983044153e-05, "loss": 1.618, "step": 480 }, { "epoch": 0.45, "learning_rate": 1.927566251739863e-05, "loss": 1.6596, "step": 481 }, { "epoch": 0.45, "learning_rate": 1.9271921778496305e-05, "loss": 1.6311, "step": 482 }, { "epoch": 0.45, "learning_rate": 1.926817177007694e-05, "loss": 1.6429, "step": 483 }, { "epoch": 0.45, "learning_rate": 1.9264412495889582e-05, "loss": 1.6304, "step": 484 }, { "epoch": 0.45, "learning_rate": 1.926064395969252e-05, "loss": 1.5909, "step": 485 }, { "epoch": 0.45, "learning_rate": 1.925686616525332e-05, "loss": 1.5835, "step": 486 }, { "epoch": 0.45, "learning_rate": 1.9253079116348794e-05, "loss": 1.6015, "step": 487 }, { "epoch": 0.45, "learning_rate": 1.924928281676501e-05, "loss": 1.6196, "step": 488 }, { "epoch": 0.45, "learning_rate": 1.9245477270297278e-05, "loss": 1.576, "step": 489 }, { "epoch": 0.45, "learning_rate": 1.924166248075016e-05, "loss": 1.6751, "step": 490 }, { "epoch": 0.45, "learning_rate": 1.923783845193746e-05, "loss": 1.5459, "step": 491 }, { "epoch": 0.46, "learning_rate": 1.923400518768221e-05, "loss": 1.5158, "step": 492 }, { "epoch": 0.46, "learning_rate": 1.9230162691816684e-05, "loss": 1.6516, "step": 493 }, { "epoch": 0.46, "learning_rate": 1.922631096818238e-05, "loss": 1.6233, "step": 494 }, { "epoch": 0.46, "learning_rate": 1.9222450020630017e-05, "loss": 1.5959, "step": 495 }, { "epoch": 0.46, "learning_rate": 1.921857985301955e-05, "loss": 1.6649, "step": 496 }, { "epoch": 0.46, "learning_rate": 1.921470046922014e-05, "loss": 1.6441, "step": 497 }, { "epoch": 0.46, "learning_rate": 1.9210811873110164e-05, "loss": 1.6859, "step": 498 }, { "epoch": 0.46, "learning_rate": 1.9206914068577206e-05, "loss": 1.4645, "step": 499 }, { "epoch": 0.46, "learning_rate": 1.9203007059518066e-05, "loss": 1.585, "step": 500 }, { "epoch": 0.46, "learning_rate": 1.9199090849838734e-05, "loss": 1.687, "step": 501 }, { "epoch": 0.46, "learning_rate": 1.919516544345441e-05, "loss": 1.642, "step": 502 }, { "epoch": 0.47, "learning_rate": 1.9191230844289474e-05, "loss": 1.6818, "step": 503 }, { "epoch": 0.47, "learning_rate": 1.9187287056277516e-05, "loss": 1.5421, "step": 504 }, { "epoch": 0.47, "learning_rate": 1.9183334083361292e-05, "loss": 1.6257, "step": 505 }, { "epoch": 0.47, "learning_rate": 1.9179371929492752e-05, "loss": 1.5963, "step": 506 }, { "epoch": 0.47, "learning_rate": 1.917540059863302e-05, "loss": 1.6713, "step": 507 }, { "epoch": 0.47, "learning_rate": 1.9171420094752404e-05, "loss": 1.4984, "step": 508 }, { "epoch": 0.47, "learning_rate": 1.9167430421830367e-05, "loss": 1.5725, "step": 509 }, { "epoch": 0.47, "learning_rate": 1.9163431583855557e-05, "loss": 1.6432, "step": 510 }, { "epoch": 0.47, "learning_rate": 1.9159423584825765e-05, "loss": 1.62, "step": 511 }, { "epoch": 0.47, "learning_rate": 1.9155406428747955e-05, "loss": 1.5279, "step": 512 }, { "epoch": 0.47, "learning_rate": 1.915138011963824e-05, "loss": 1.5881, "step": 513 }, { "epoch": 0.48, "learning_rate": 1.914734466152189e-05, "loss": 1.6046, "step": 514 }, { "epoch": 0.48, "learning_rate": 1.914330005843331e-05, "loss": 1.6863, "step": 515 }, { "epoch": 0.48, "learning_rate": 1.9139246314416056e-05, "loss": 1.5943, "step": 516 }, { "epoch": 0.48, "learning_rate": 1.913518343352282e-05, "loss": 1.6442, "step": 517 }, { "epoch": 0.48, "learning_rate": 1.9131111419815428e-05, "loss": 1.6378, "step": 518 }, { "epoch": 0.48, "learning_rate": 1.9127030277364845e-05, "loss": 1.5656, "step": 519 }, { "epoch": 0.48, "learning_rate": 1.9122940010251144e-05, "loss": 1.6154, "step": 520 }, { "epoch": 0.48, "learning_rate": 1.911884062256354e-05, "loss": 1.5862, "step": 521 }, { "epoch": 0.48, "learning_rate": 1.911473211840036e-05, "loss": 1.6484, "step": 522 }, { "epoch": 0.48, "learning_rate": 1.9110614501869028e-05, "loss": 1.5473, "step": 523 }, { "epoch": 0.49, "learning_rate": 1.9106487777086107e-05, "loss": 1.6444, "step": 524 }, { "epoch": 0.49, "learning_rate": 1.9102351948177244e-05, "loss": 1.5171, "step": 525 }, { "epoch": 0.49, "learning_rate": 1.9098207019277198e-05, "loss": 1.606, "step": 526 }, { "epoch": 0.49, "learning_rate": 1.9094052994529823e-05, "loss": 1.6596, "step": 527 }, { "epoch": 0.49, "learning_rate": 1.9089889878088063e-05, "loss": 1.7256, "step": 528 }, { "epoch": 0.49, "learning_rate": 1.908571767411396e-05, "loss": 1.6557, "step": 529 }, { "epoch": 0.49, "learning_rate": 1.908153638677863e-05, "loss": 1.5159, "step": 530 }, { "epoch": 0.49, "learning_rate": 1.9077346020262282e-05, "loss": 1.6016, "step": 531 }, { "epoch": 0.49, "learning_rate": 1.9073146578754192e-05, "loss": 1.642, "step": 532 }, { "epoch": 0.49, "learning_rate": 1.9068938066452713e-05, "loss": 1.6407, "step": 533 }, { "epoch": 0.49, "learning_rate": 1.9064720487565265e-05, "loss": 1.6202, "step": 534 }, { "epoch": 0.5, "learning_rate": 1.9060493846308333e-05, "loss": 1.531, "step": 535 }, { "epoch": 0.5, "learning_rate": 1.9056258146907467e-05, "loss": 1.6472, "step": 536 }, { "epoch": 0.5, "learning_rate": 1.9052013393597263e-05, "loss": 1.5264, "step": 537 }, { "epoch": 0.5, "learning_rate": 1.904775959062137e-05, "loss": 1.5287, "step": 538 }, { "epoch": 0.5, "learning_rate": 1.9043496742232495e-05, "loss": 1.5977, "step": 539 }, { "epoch": 0.5, "learning_rate": 1.903922485269238e-05, "loss": 1.5634, "step": 540 }, { "epoch": 0.5, "learning_rate": 1.90349439262718e-05, "loss": 1.5177, "step": 541 }, { "epoch": 0.5, "learning_rate": 1.9030653967250577e-05, "loss": 1.6364, "step": 542 }, { "epoch": 0.5, "learning_rate": 1.902635497991756e-05, "loss": 1.5643, "step": 543 }, { "epoch": 0.5, "learning_rate": 1.9022046968570612e-05, "loss": 1.7273, "step": 544 }, { "epoch": 0.5, "learning_rate": 1.9017729937516634e-05, "loss": 1.5994, "step": 545 }, { "epoch": 0.51, "learning_rate": 1.9013403891071537e-05, "loss": 1.6498, "step": 546 }, { "epoch": 0.51, "learning_rate": 1.900906883356024e-05, "loss": 1.6329, "step": 547 }, { "epoch": 0.51, "learning_rate": 1.9004724769316682e-05, "loss": 1.6364, "step": 548 }, { "epoch": 0.51, "learning_rate": 1.90003717026838e-05, "loss": 1.561, "step": 549 }, { "epoch": 0.51, "learning_rate": 1.899600963801353e-05, "loss": 1.6002, "step": 550 }, { "epoch": 0.51, "learning_rate": 1.8991638579666808e-05, "loss": 1.6603, "step": 551 }, { "epoch": 0.51, "learning_rate": 1.8987258532013553e-05, "loss": 1.6025, "step": 552 }, { "epoch": 0.51, "learning_rate": 1.8982869499432682e-05, "loss": 1.5697, "step": 553 }, { "epoch": 0.51, "learning_rate": 1.8978471486312087e-05, "loss": 1.5276, "step": 554 }, { "epoch": 0.51, "learning_rate": 1.8974064497048637e-05, "loss": 1.6086, "step": 555 }, { "epoch": 0.51, "learning_rate": 1.8969648536048192e-05, "loss": 1.6066, "step": 556 }, { "epoch": 0.52, "learning_rate": 1.896522360772555e-05, "loss": 1.6312, "step": 557 }, { "epoch": 0.52, "learning_rate": 1.8960789716504502e-05, "loss": 1.6262, "step": 558 }, { "epoch": 0.52, "learning_rate": 1.8956346866817786e-05, "loss": 1.6087, "step": 559 }, { "epoch": 0.52, "learning_rate": 1.89518950631071e-05, "loss": 1.6612, "step": 560 }, { "epoch": 0.52, "learning_rate": 1.894743430982309e-05, "loss": 1.58, "step": 561 }, { "epoch": 0.52, "learning_rate": 1.8942964611425362e-05, "loss": 1.6543, "step": 562 }, { "epoch": 0.52, "learning_rate": 1.8938485972382447e-05, "loss": 1.6058, "step": 563 }, { "epoch": 0.52, "learning_rate": 1.893399839717182e-05, "loss": 1.6507, "step": 564 }, { "epoch": 0.52, "learning_rate": 1.8929501890279903e-05, "loss": 1.5465, "step": 565 }, { "epoch": 0.52, "learning_rate": 1.8924996456202027e-05, "loss": 1.6055, "step": 566 }, { "epoch": 0.52, "learning_rate": 1.892048209944246e-05, "loss": 1.6058, "step": 567 }, { "epoch": 0.53, "learning_rate": 1.8915958824514388e-05, "loss": 1.6286, "step": 568 }, { "epoch": 0.53, "learning_rate": 1.8911426635939917e-05, "loss": 1.627, "step": 569 }, { "epoch": 0.53, "learning_rate": 1.8906885538250053e-05, "loss": 1.5665, "step": 570 }, { "epoch": 0.53, "learning_rate": 1.8902335535984725e-05, "loss": 1.5836, "step": 571 }, { "epoch": 0.53, "learning_rate": 1.8897776633692742e-05, "loss": 1.5997, "step": 572 }, { "epoch": 0.53, "learning_rate": 1.8893208835931838e-05, "loss": 1.6251, "step": 573 }, { "epoch": 0.53, "learning_rate": 1.888863214726862e-05, "loss": 1.5305, "step": 574 }, { "epoch": 0.53, "learning_rate": 1.8884046572278587e-05, "loss": 1.6064, "step": 575 }, { "epoch": 0.53, "learning_rate": 1.8879452115546126e-05, "loss": 1.5983, "step": 576 }, { "epoch": 0.53, "learning_rate": 1.8874848781664503e-05, "loss": 1.6046, "step": 577 }, { "epoch": 0.54, "learning_rate": 1.887023657523586e-05, "loss": 1.5773, "step": 578 }, { "epoch": 0.54, "learning_rate": 1.8865615500871204e-05, "loss": 1.587, "step": 579 }, { "epoch": 0.54, "learning_rate": 1.886098556319041e-05, "loss": 1.594, "step": 580 }, { "epoch": 0.54, "learning_rate": 1.8856346766822224e-05, "loss": 1.5942, "step": 581 }, { "epoch": 0.54, "learning_rate": 1.8851699116404226e-05, "loss": 1.5959, "step": 582 }, { "epoch": 0.54, "learning_rate": 1.8847042616582874e-05, "loss": 1.6041, "step": 583 }, { "epoch": 0.54, "learning_rate": 1.8842377272013448e-05, "loss": 1.4831, "step": 584 }, { "epoch": 0.54, "learning_rate": 1.883770308736009e-05, "loss": 1.5402, "step": 585 }, { "epoch": 0.54, "learning_rate": 1.883302006729577e-05, "loss": 1.5632, "step": 586 }, { "epoch": 0.54, "learning_rate": 1.8828328216502293e-05, "loss": 1.598, "step": 587 }, { "epoch": 0.54, "learning_rate": 1.8823627539670296e-05, "loss": 1.6416, "step": 588 }, { "epoch": 0.55, "learning_rate": 1.8818918041499234e-05, "loss": 1.517, "step": 589 }, { "epoch": 0.55, "learning_rate": 1.8814199726697383e-05, "loss": 1.5258, "step": 590 }, { "epoch": 0.55, "learning_rate": 1.8809472599981836e-05, "loss": 1.6408, "step": 591 }, { "epoch": 0.55, "learning_rate": 1.8804736666078495e-05, "loss": 1.5286, "step": 592 }, { "epoch": 0.55, "learning_rate": 1.879999192972206e-05, "loss": 1.6255, "step": 593 }, { "epoch": 0.55, "learning_rate": 1.8795238395656042e-05, "loss": 1.5899, "step": 594 }, { "epoch": 0.55, "learning_rate": 1.8790476068632742e-05, "loss": 1.6421, "step": 595 }, { "epoch": 0.55, "learning_rate": 1.8785704953413248e-05, "loss": 1.6383, "step": 596 }, { "epoch": 0.55, "learning_rate": 1.8780925054767443e-05, "loss": 1.5817, "step": 597 }, { "epoch": 0.55, "learning_rate": 1.877613637747398e-05, "loss": 1.5827, "step": 598 }, { "epoch": 0.55, "learning_rate": 1.8771338926320298e-05, "loss": 1.6751, "step": 599 }, { "epoch": 0.56, "learning_rate": 1.8766532706102608e-05, "loss": 1.6624, "step": 600 }, { "epoch": 0.56, "learning_rate": 1.8761717721625877e-05, "loss": 1.6471, "step": 601 }, { "epoch": 0.56, "learning_rate": 1.875689397770384e-05, "loss": 1.6933, "step": 602 }, { "epoch": 0.56, "learning_rate": 1.8752061479158992e-05, "loss": 1.697, "step": 603 }, { "epoch": 0.56, "learning_rate": 1.8747220230822584e-05, "loss": 1.6424, "step": 604 }, { "epoch": 0.56, "learning_rate": 1.87423702375346e-05, "loss": 1.6314, "step": 605 }, { "epoch": 0.56, "learning_rate": 1.873751150414378e-05, "loss": 1.5859, "step": 606 }, { "epoch": 0.56, "learning_rate": 1.8732644035507595e-05, "loss": 1.5931, "step": 607 }, { "epoch": 0.56, "learning_rate": 1.8727767836492256e-05, "loss": 1.6258, "step": 608 }, { "epoch": 0.56, "learning_rate": 1.8722882911972687e-05, "loss": 1.6088, "step": 609 }, { "epoch": 0.56, "learning_rate": 1.8717989266832562e-05, "loss": 1.4536, "step": 610 }, { "epoch": 0.57, "learning_rate": 1.871308690596424e-05, "loss": 1.6522, "step": 611 }, { "epoch": 0.57, "learning_rate": 1.870817583426882e-05, "loss": 1.553, "step": 612 }, { "epoch": 0.57, "learning_rate": 1.8703256056656096e-05, "loss": 1.5451, "step": 613 }, { "epoch": 0.57, "learning_rate": 1.8698327578044574e-05, "loss": 1.6219, "step": 614 }, { "epoch": 0.57, "learning_rate": 1.869339040336145e-05, "loss": 1.5471, "step": 615 }, { "epoch": 0.57, "learning_rate": 1.8688444537542622e-05, "loss": 1.5937, "step": 616 }, { "epoch": 0.57, "learning_rate": 1.8683489985532665e-05, "loss": 1.5829, "step": 617 }, { "epoch": 0.57, "learning_rate": 1.8678526752284857e-05, "loss": 1.625, "step": 618 }, { "epoch": 0.57, "learning_rate": 1.8673554842761138e-05, "loss": 1.6663, "step": 619 }, { "epoch": 0.57, "learning_rate": 1.866857426193213e-05, "loss": 1.4847, "step": 620 }, { "epoch": 0.57, "learning_rate": 1.8663585014777118e-05, "loss": 1.5604, "step": 621 }, { "epoch": 0.58, "learning_rate": 1.8658587106284063e-05, "loss": 1.574, "step": 622 }, { "epoch": 0.58, "learning_rate": 1.865358054144957e-05, "loss": 1.6029, "step": 623 }, { "epoch": 0.58, "learning_rate": 1.8648565325278908e-05, "loss": 1.5769, "step": 624 }, { "epoch": 0.58, "learning_rate": 1.8643541462785993e-05, "loss": 1.5474, "step": 625 }, { "epoch": 0.58, "learning_rate": 1.863850895899338e-05, "loss": 1.6314, "step": 626 }, { "epoch": 0.58, "learning_rate": 1.8633467818932274e-05, "loss": 1.4776, "step": 627 }, { "epoch": 0.58, "learning_rate": 1.8628418047642505e-05, "loss": 1.6245, "step": 628 }, { "epoch": 0.58, "learning_rate": 1.8623359650172532e-05, "loss": 1.5762, "step": 629 }, { "epoch": 0.58, "learning_rate": 1.8618292631579447e-05, "loss": 1.5791, "step": 630 }, { "epoch": 0.58, "learning_rate": 1.8613216996928946e-05, "loss": 1.6472, "step": 631 }, { "epoch": 0.59, "learning_rate": 1.8608132751295354e-05, "loss": 1.5576, "step": 632 }, { "epoch": 0.59, "learning_rate": 1.86030398997616e-05, "loss": 1.5365, "step": 633 }, { "epoch": 0.59, "learning_rate": 1.8597938447419202e-05, "loss": 1.6168, "step": 634 }, { "epoch": 0.59, "learning_rate": 1.8592828399368305e-05, "loss": 1.6423, "step": 635 }, { "epoch": 0.59, "learning_rate": 1.858770976071762e-05, "loss": 1.6539, "step": 636 }, { "epoch": 0.59, "learning_rate": 1.8582582536584467e-05, "loss": 1.5548, "step": 637 }, { "epoch": 0.59, "learning_rate": 1.8577446732094732e-05, "loss": 1.5399, "step": 638 }, { "epoch": 0.59, "learning_rate": 1.8572302352382888e-05, "loss": 1.5366, "step": 639 }, { "epoch": 0.59, "learning_rate": 1.856714940259199e-05, "loss": 1.5283, "step": 640 }, { "epoch": 0.59, "learning_rate": 1.856198788787364e-05, "loss": 1.5851, "step": 641 }, { "epoch": 0.59, "learning_rate": 1.8556817813388024e-05, "loss": 1.5848, "step": 642 }, { "epoch": 0.6, "learning_rate": 1.8551639184303867e-05, "loss": 1.5817, "step": 643 }, { "epoch": 0.6, "learning_rate": 1.8546452005798464e-05, "loss": 1.6124, "step": 644 }, { "epoch": 0.6, "learning_rate": 1.854125628305764e-05, "loss": 1.5611, "step": 645 }, { "epoch": 0.6, "learning_rate": 1.853605202127577e-05, "loss": 1.5718, "step": 646 }, { "epoch": 0.6, "learning_rate": 1.853083922565577e-05, "loss": 1.6249, "step": 647 }, { "epoch": 0.6, "learning_rate": 1.8525617901409082e-05, "loss": 1.5593, "step": 648 }, { "epoch": 0.6, "learning_rate": 1.852038805375568e-05, "loss": 1.5565, "step": 649 }, { "epoch": 0.6, "learning_rate": 1.851514968792405e-05, "loss": 1.5575, "step": 650 }, { "epoch": 0.6, "learning_rate": 1.85099028091512e-05, "loss": 1.6004, "step": 651 }, { "epoch": 0.6, "learning_rate": 1.8504647422682652e-05, "loss": 1.5995, "step": 652 }, { "epoch": 0.6, "learning_rate": 1.8499383533772426e-05, "loss": 1.5634, "step": 653 }, { "epoch": 0.61, "learning_rate": 1.8494111147683044e-05, "loss": 1.639, "step": 654 }, { "epoch": 0.61, "learning_rate": 1.848883026968553e-05, "loss": 1.5293, "step": 655 }, { "epoch": 0.61, "learning_rate": 1.848354090505939e-05, "loss": 1.5588, "step": 656 }, { "epoch": 0.61, "learning_rate": 1.8478243059092613e-05, "loss": 1.5289, "step": 657 }, { "epoch": 0.61, "learning_rate": 1.8472936737081673e-05, "loss": 1.5864, "step": 658 }, { "epoch": 0.61, "learning_rate": 1.846762194433152e-05, "loss": 1.5147, "step": 659 }, { "epoch": 0.61, "learning_rate": 1.8462298686155565e-05, "loss": 1.5092, "step": 660 }, { "epoch": 0.61, "learning_rate": 1.8456966967875685e-05, "loss": 1.6432, "step": 661 }, { "epoch": 0.61, "learning_rate": 1.8451626794822215e-05, "loss": 1.6189, "step": 662 }, { "epoch": 0.61, "learning_rate": 1.8446278172333945e-05, "loss": 1.6659, "step": 663 }, { "epoch": 0.61, "learning_rate": 1.8440921105758107e-05, "loss": 1.553, "step": 664 }, { "epoch": 0.62, "learning_rate": 1.8435555600450384e-05, "loss": 1.5059, "step": 665 }, { "epoch": 0.62, "learning_rate": 1.8430181661774885e-05, "loss": 1.6117, "step": 666 }, { "epoch": 0.62, "learning_rate": 1.8424799295104155e-05, "loss": 1.5417, "step": 667 }, { "epoch": 0.62, "learning_rate": 1.841940850581917e-05, "loss": 1.628, "step": 668 }, { "epoch": 0.62, "learning_rate": 1.8414009299309315e-05, "loss": 1.5765, "step": 669 }, { "epoch": 0.62, "learning_rate": 1.8408601680972397e-05, "loss": 1.5528, "step": 670 }, { "epoch": 0.62, "learning_rate": 1.8403185656214635e-05, "loss": 1.5291, "step": 671 }, { "epoch": 0.62, "learning_rate": 1.8397761230450647e-05, "loss": 1.5541, "step": 672 }, { "epoch": 0.62, "learning_rate": 1.839232840910345e-05, "loss": 1.5544, "step": 673 }, { "epoch": 0.62, "learning_rate": 1.8386887197604458e-05, "loss": 1.6525, "step": 674 }, { "epoch": 0.62, "learning_rate": 1.8381437601393475e-05, "loss": 1.5408, "step": 675 }, { "epoch": 0.63, "learning_rate": 1.837597962591868e-05, "loss": 1.496, "step": 676 }, { "epoch": 0.63, "learning_rate": 1.8370513276636633e-05, "loss": 1.641, "step": 677 }, { "epoch": 0.63, "learning_rate": 1.8365038559012263e-05, "loss": 1.5225, "step": 678 }, { "epoch": 0.63, "learning_rate": 1.8359555478518873e-05, "loss": 1.5415, "step": 679 }, { "epoch": 0.63, "learning_rate": 1.835406404063812e-05, "loss": 1.6244, "step": 680 }, { "epoch": 0.63, "learning_rate": 1.8348564250860016e-05, "loss": 1.5057, "step": 681 }, { "epoch": 0.63, "learning_rate": 1.834305611468292e-05, "loss": 1.598, "step": 682 }, { "epoch": 0.63, "learning_rate": 1.8337539637613553e-05, "loss": 1.5087, "step": 683 }, { "epoch": 0.63, "learning_rate": 1.833201482516695e-05, "loss": 1.4772, "step": 684 }, { "epoch": 0.63, "learning_rate": 1.832648168286649e-05, "loss": 1.5809, "step": 685 }, { "epoch": 0.64, "learning_rate": 1.8320940216243888e-05, "loss": 1.6416, "step": 686 }, { "epoch": 0.64, "learning_rate": 1.831539043083917e-05, "loss": 1.5497, "step": 687 }, { "epoch": 0.64, "learning_rate": 1.8309832332200677e-05, "loss": 1.5583, "step": 688 }, { "epoch": 0.64, "learning_rate": 1.8304265925885075e-05, "loss": 1.6452, "step": 689 }, { "epoch": 0.64, "learning_rate": 1.829869121745732e-05, "loss": 1.6232, "step": 690 }, { "epoch": 0.64, "learning_rate": 1.8293108212490674e-05, "loss": 1.6157, "step": 691 }, { "epoch": 0.64, "learning_rate": 1.82875169165667e-05, "loss": 1.5575, "step": 692 }, { "epoch": 0.64, "learning_rate": 1.8281917335275236e-05, "loss": 1.5707, "step": 693 }, { "epoch": 0.64, "learning_rate": 1.827630947421442e-05, "loss": 1.6261, "step": 694 }, { "epoch": 0.64, "learning_rate": 1.8270693338990648e-05, "loss": 1.6515, "step": 695 }, { "epoch": 0.64, "learning_rate": 1.826506893521861e-05, "loss": 1.6074, "step": 696 }, { "epoch": 0.65, "learning_rate": 1.8259436268521244e-05, "loss": 1.5217, "step": 697 }, { "epoch": 0.65, "learning_rate": 1.8253795344529758e-05, "loss": 1.5159, "step": 698 }, { "epoch": 0.65, "learning_rate": 1.824814616888361e-05, "loss": 1.6619, "step": 699 }, { "epoch": 0.65, "learning_rate": 1.8242488747230515e-05, "loss": 1.6214, "step": 700 }, { "epoch": 0.65, "learning_rate": 1.8236823085226427e-05, "loss": 1.578, "step": 701 }, { "epoch": 0.65, "learning_rate": 1.8231149188535534e-05, "loss": 1.5055, "step": 702 }, { "epoch": 0.65, "learning_rate": 1.822546706283026e-05, "loss": 1.5111, "step": 703 }, { "epoch": 0.65, "learning_rate": 1.8219776713791265e-05, "loss": 1.599, "step": 704 }, { "epoch": 0.65, "learning_rate": 1.8214078147107423e-05, "loss": 1.5688, "step": 705 }, { "epoch": 0.65, "learning_rate": 1.820837136847581e-05, "loss": 1.6265, "step": 706 }, { "epoch": 0.65, "learning_rate": 1.8202656383601737e-05, "loss": 1.5914, "step": 707 }, { "epoch": 0.66, "learning_rate": 1.81969331981987e-05, "loss": 1.6053, "step": 708 }, { "epoch": 0.66, "learning_rate": 1.8191201817988403e-05, "loss": 1.5351, "step": 709 }, { "epoch": 0.66, "learning_rate": 1.818546224870074e-05, "loss": 1.6301, "step": 710 }, { "epoch": 0.66, "learning_rate": 1.8179714496073787e-05, "loss": 1.5255, "step": 711 }, { "epoch": 0.66, "learning_rate": 1.8173958565853812e-05, "loss": 1.5438, "step": 712 }, { "epoch": 0.66, "learning_rate": 1.816819446379525e-05, "loss": 1.6011, "step": 713 }, { "epoch": 0.66, "learning_rate": 1.8162422195660702e-05, "loss": 1.587, "step": 714 }, { "epoch": 0.66, "learning_rate": 1.8156641767220953e-05, "loss": 1.6023, "step": 715 }, { "epoch": 0.66, "learning_rate": 1.815085318425492e-05, "loss": 1.5328, "step": 716 }, { "epoch": 0.66, "learning_rate": 1.814505645254969e-05, "loss": 1.5769, "step": 717 }, { "epoch": 0.66, "learning_rate": 1.813925157790049e-05, "loss": 1.6298, "step": 718 }, { "epoch": 0.67, "learning_rate": 1.813343856611069e-05, "loss": 1.5452, "step": 719 }, { "epoch": 0.67, "learning_rate": 1.8127617422991793e-05, "loss": 1.5578, "step": 720 }, { "epoch": 0.67, "learning_rate": 1.812178815436343e-05, "loss": 1.567, "step": 721 }, { "epoch": 0.67, "learning_rate": 1.811595076605336e-05, "loss": 1.5681, "step": 722 }, { "epoch": 0.67, "learning_rate": 1.8110105263897463e-05, "loss": 1.654, "step": 723 }, { "epoch": 0.67, "learning_rate": 1.8104251653739715e-05, "loss": 1.5403, "step": 724 }, { "epoch": 0.67, "learning_rate": 1.8098389941432217e-05, "loss": 1.5498, "step": 725 }, { "epoch": 0.67, "learning_rate": 1.8092520132835154e-05, "loss": 1.5481, "step": 726 }, { "epoch": 0.67, "learning_rate": 1.8086642233816817e-05, "loss": 1.5744, "step": 727 }, { "epoch": 0.67, "learning_rate": 1.8080756250253575e-05, "loss": 1.704, "step": 728 }, { "epoch": 0.67, "learning_rate": 1.807486218802989e-05, "loss": 1.5424, "step": 729 }, { "epoch": 0.68, "learning_rate": 1.80689600530383e-05, "loss": 1.5611, "step": 730 }, { "epoch": 0.68, "learning_rate": 1.8063049851179398e-05, "loss": 1.516, "step": 731 }, { "epoch": 0.68, "learning_rate": 1.8057131588361857e-05, "loss": 1.5024, "step": 732 }, { "epoch": 0.68, "learning_rate": 1.805120527050241e-05, "loss": 1.6105, "step": 733 }, { "epoch": 0.68, "learning_rate": 1.804527090352583e-05, "loss": 1.561, "step": 734 }, { "epoch": 0.68, "learning_rate": 1.803932849336495e-05, "loss": 1.5636, "step": 735 }, { "epoch": 0.68, "learning_rate": 1.803337804596064e-05, "loss": 1.5368, "step": 736 }, { "epoch": 0.68, "learning_rate": 1.8027419567261805e-05, "loss": 1.5401, "step": 737 }, { "epoch": 0.68, "learning_rate": 1.802145306322537e-05, "loss": 1.5529, "step": 738 }, { "epoch": 0.68, "learning_rate": 1.8015478539816297e-05, "loss": 1.5298, "step": 739 }, { "epoch": 0.69, "learning_rate": 1.8009496003007563e-05, "loss": 1.5772, "step": 740 }, { "epoch": 0.69, "learning_rate": 1.8003505458780148e-05, "loss": 1.5211, "step": 741 }, { "epoch": 0.69, "learning_rate": 1.7997506913123043e-05, "loss": 1.6804, "step": 742 }, { "epoch": 0.69, "learning_rate": 1.7991500372033244e-05, "loss": 1.6211, "step": 743 }, { "epoch": 0.69, "learning_rate": 1.798548584151573e-05, "loss": 1.6128, "step": 744 }, { "epoch": 0.69, "learning_rate": 1.7979463327583465e-05, "loss": 1.6158, "step": 745 }, { "epoch": 0.69, "learning_rate": 1.7973432836257408e-05, "loss": 1.5651, "step": 746 }, { "epoch": 0.69, "learning_rate": 1.796739437356649e-05, "loss": 1.6293, "step": 747 }, { "epoch": 0.69, "learning_rate": 1.79613479455476e-05, "loss": 1.6054, "step": 748 }, { "epoch": 0.69, "learning_rate": 1.7955293558245604e-05, "loss": 1.6163, "step": 749 }, { "epoch": 0.69, "learning_rate": 1.794923121771331e-05, "loss": 1.5133, "step": 750 }, { "epoch": 0.7, "learning_rate": 1.79431609300115e-05, "loss": 1.5762, "step": 751 }, { "epoch": 0.7, "learning_rate": 1.7937082701208875e-05, "loss": 1.5245, "step": 752 }, { "epoch": 0.7, "learning_rate": 1.7930996537382094e-05, "loss": 1.6385, "step": 753 }, { "epoch": 0.7, "learning_rate": 1.7924902444615736e-05, "loss": 1.5671, "step": 754 }, { "epoch": 0.7, "learning_rate": 1.791880042900232e-05, "loss": 1.5771, "step": 755 }, { "epoch": 0.7, "learning_rate": 1.7912690496642276e-05, "loss": 1.5027, "step": 756 }, { "epoch": 0.7, "learning_rate": 1.790657265364395e-05, "loss": 1.5161, "step": 757 }, { "epoch": 0.7, "learning_rate": 1.7900446906123604e-05, "loss": 1.596, "step": 758 }, { "epoch": 0.7, "learning_rate": 1.7894313260205392e-05, "loss": 1.5532, "step": 759 }, { "epoch": 0.7, "learning_rate": 1.788817172202137e-05, "loss": 1.5832, "step": 760 }, { "epoch": 0.7, "learning_rate": 1.7882022297711484e-05, "loss": 1.5515, "step": 761 }, { "epoch": 0.71, "learning_rate": 1.7875864993423562e-05, "loss": 1.5737, "step": 762 }, { "epoch": 0.71, "learning_rate": 1.786969981531331e-05, "loss": 1.5123, "step": 763 }, { "epoch": 0.71, "learning_rate": 1.7863526769544314e-05, "loss": 1.5531, "step": 764 }, { "epoch": 0.71, "learning_rate": 1.7857345862288013e-05, "loss": 1.492, "step": 765 }, { "epoch": 0.71, "learning_rate": 1.785115709972371e-05, "loss": 1.5482, "step": 766 }, { "epoch": 0.71, "learning_rate": 1.7844960488038568e-05, "loss": 1.5846, "step": 767 }, { "epoch": 0.71, "learning_rate": 1.7838756033427585e-05, "loss": 1.5745, "step": 768 }, { "epoch": 0.71, "learning_rate": 1.7832543742093615e-05, "loss": 1.4284, "step": 769 }, { "epoch": 0.71, "learning_rate": 1.782632362024733e-05, "loss": 1.5279, "step": 770 }, { "epoch": 0.71, "learning_rate": 1.7820095674107243e-05, "loss": 1.4754, "step": 771 }, { "epoch": 0.71, "learning_rate": 1.7813859909899682e-05, "loss": 1.5458, "step": 772 }, { "epoch": 0.72, "learning_rate": 1.7807616333858794e-05, "loss": 1.5869, "step": 773 }, { "epoch": 0.72, "learning_rate": 1.780136495222654e-05, "loss": 1.6008, "step": 774 }, { "epoch": 0.72, "learning_rate": 1.7795105771252676e-05, "loss": 1.4643, "step": 775 }, { "epoch": 0.72, "learning_rate": 1.778883879719476e-05, "loss": 1.6388, "step": 776 }, { "epoch": 0.72, "learning_rate": 1.7782564036318143e-05, "loss": 1.4948, "step": 777 }, { "epoch": 0.72, "learning_rate": 1.7776281494895956e-05, "loss": 1.4665, "step": 778 }, { "epoch": 0.72, "learning_rate": 1.7769991179209115e-05, "loss": 1.5453, "step": 779 }, { "epoch": 0.72, "learning_rate": 1.77636930955463e-05, "loss": 1.5746, "step": 780 }, { "epoch": 0.72, "learning_rate": 1.775738725020396e-05, "loss": 1.5208, "step": 781 }, { "epoch": 0.72, "learning_rate": 1.7751073649486307e-05, "loss": 1.458, "step": 782 }, { "epoch": 0.72, "learning_rate": 1.774475229970531e-05, "loss": 1.5178, "step": 783 }, { "epoch": 0.73, "learning_rate": 1.773842320718067e-05, "loss": 1.5934, "step": 784 }, { "epoch": 0.73, "learning_rate": 1.7732086378239843e-05, "loss": 1.5845, "step": 785 }, { "epoch": 0.73, "learning_rate": 1.7725741819218016e-05, "loss": 1.61, "step": 786 }, { "epoch": 0.73, "learning_rate": 1.7719389536458097e-05, "loss": 1.5836, "step": 787 }, { "epoch": 0.73, "learning_rate": 1.7713029536310722e-05, "loss": 1.5841, "step": 788 }, { "epoch": 0.73, "learning_rate": 1.770666182513425e-05, "loss": 1.5008, "step": 789 }, { "epoch": 0.73, "learning_rate": 1.770028640929473e-05, "loss": 1.4884, "step": 790 }, { "epoch": 0.73, "learning_rate": 1.769390329516593e-05, "loss": 1.5538, "step": 791 }, { "epoch": 0.73, "learning_rate": 1.7687512489129305e-05, "loss": 1.4963, "step": 792 }, { "epoch": 0.73, "learning_rate": 1.7681113997574007e-05, "loss": 1.5625, "step": 793 }, { "epoch": 0.74, "learning_rate": 1.7674707826896868e-05, "loss": 1.5107, "step": 794 }, { "epoch": 0.74, "learning_rate": 1.7668293983502395e-05, "loss": 1.5422, "step": 795 }, { "epoch": 0.74, "learning_rate": 1.766187247380277e-05, "loss": 1.5658, "step": 796 }, { "epoch": 0.74, "learning_rate": 1.7655443304217837e-05, "loss": 1.6031, "step": 797 }, { "epoch": 0.74, "learning_rate": 1.76490064811751e-05, "loss": 1.5149, "step": 798 }, { "epoch": 0.74, "learning_rate": 1.7642562011109707e-05, "loss": 1.5346, "step": 799 }, { "epoch": 0.74, "learning_rate": 1.763610990046446e-05, "loss": 1.5909, "step": 800 }, { "epoch": 0.74, "learning_rate": 1.7629650155689794e-05, "loss": 1.4995, "step": 801 }, { "epoch": 0.74, "learning_rate": 1.7623182783243785e-05, "loss": 1.5699, "step": 802 }, { "epoch": 0.74, "learning_rate": 1.761670778959212e-05, "loss": 1.5669, "step": 803 }, { "epoch": 0.74, "learning_rate": 1.7610225181208118e-05, "loss": 1.4459, "step": 804 }, { "epoch": 0.75, "learning_rate": 1.7603734964572706e-05, "loss": 1.5582, "step": 805 }, { "epoch": 0.75, "learning_rate": 1.7597237146174416e-05, "loss": 1.5016, "step": 806 }, { "epoch": 0.75, "learning_rate": 1.759073173250938e-05, "loss": 1.5935, "step": 807 }, { "epoch": 0.75, "learning_rate": 1.7584218730081325e-05, "loss": 1.636, "step": 808 }, { "epoch": 0.75, "learning_rate": 1.7577698145401565e-05, "loss": 1.5331, "step": 809 }, { "epoch": 0.75, "learning_rate": 1.7571169984988994e-05, "loss": 1.5691, "step": 810 }, { "epoch": 0.75, "learning_rate": 1.756463425537008e-05, "loss": 1.6268, "step": 811 }, { "epoch": 0.75, "learning_rate": 1.7558090963078856e-05, "loss": 1.5215, "step": 812 }, { "epoch": 0.75, "learning_rate": 1.7551540114656916e-05, "loss": 1.5141, "step": 813 }, { "epoch": 0.75, "learning_rate": 1.754498171665341e-05, "loss": 1.5886, "step": 814 }, { "epoch": 0.75, "learning_rate": 1.7538415775625043e-05, "loss": 1.6099, "step": 815 }, { "epoch": 0.76, "learning_rate": 1.753184229813604e-05, "loss": 1.6574, "step": 816 }, { "epoch": 0.76, "learning_rate": 1.752526129075818e-05, "loss": 1.5972, "step": 817 }, { "epoch": 0.76, "learning_rate": 1.7518672760070764e-05, "loss": 1.5171, "step": 818 }, { "epoch": 0.76, "learning_rate": 1.7512076712660612e-05, "loss": 1.4953, "step": 819 }, { "epoch": 0.76, "learning_rate": 1.7505473155122063e-05, "loss": 1.4649, "step": 820 }, { "epoch": 0.76, "learning_rate": 1.749886209405696e-05, "loss": 1.549, "step": 821 }, { "epoch": 0.76, "learning_rate": 1.7492243536074653e-05, "loss": 1.5792, "step": 822 }, { "epoch": 0.76, "learning_rate": 1.748561748779198e-05, "loss": 1.5821, "step": 823 }, { "epoch": 0.76, "learning_rate": 1.747898395583327e-05, "loss": 1.6085, "step": 824 }, { "epoch": 0.76, "learning_rate": 1.7472342946830336e-05, "loss": 1.5167, "step": 825 }, { "epoch": 0.76, "learning_rate": 1.7465694467422462e-05, "loss": 1.5856, "step": 826 }, { "epoch": 0.77, "learning_rate": 1.7459038524256407e-05, "loss": 1.467, "step": 827 }, { "epoch": 0.77, "learning_rate": 1.7452375123986386e-05, "loss": 1.5568, "step": 828 }, { "epoch": 0.77, "learning_rate": 1.7445704273274075e-05, "loss": 1.5161, "step": 829 }, { "epoch": 0.77, "learning_rate": 1.7439025978788585e-05, "loss": 1.4919, "step": 830 }, { "epoch": 0.77, "learning_rate": 1.743234024720649e-05, "loss": 1.5498, "step": 831 }, { "epoch": 0.77, "learning_rate": 1.742564708521178e-05, "loss": 1.5474, "step": 832 }, { "epoch": 0.77, "learning_rate": 1.741894649949588e-05, "loss": 1.5436, "step": 833 }, { "epoch": 0.77, "learning_rate": 1.7412238496757644e-05, "loss": 1.6003, "step": 834 }, { "epoch": 0.77, "learning_rate": 1.7405523083703333e-05, "loss": 1.5523, "step": 835 }, { "epoch": 0.77, "learning_rate": 1.7398800267046615e-05, "loss": 1.5716, "step": 836 }, { "epoch": 0.77, "learning_rate": 1.7392070053508566e-05, "loss": 1.5005, "step": 837 }, { "epoch": 0.78, "learning_rate": 1.7385332449817655e-05, "loss": 1.5101, "step": 838 }, { "epoch": 0.78, "learning_rate": 1.7378587462709736e-05, "loss": 1.5886, "step": 839 }, { "epoch": 0.78, "learning_rate": 1.7371835098928045e-05, "loss": 1.5085, "step": 840 }, { "epoch": 0.78, "learning_rate": 1.73650753652232e-05, "loss": 1.6157, "step": 841 }, { "epoch": 0.78, "learning_rate": 1.7358308268353178e-05, "loss": 1.5297, "step": 842 }, { "epoch": 0.78, "learning_rate": 1.735153381508332e-05, "loss": 1.5649, "step": 843 }, { "epoch": 0.78, "learning_rate": 1.7344752012186323e-05, "loss": 1.5673, "step": 844 }, { "epoch": 0.78, "learning_rate": 1.733796286644223e-05, "loss": 1.6685, "step": 845 }, { "epoch": 0.78, "learning_rate": 1.733116638463843e-05, "loss": 1.569, "step": 846 }, { "epoch": 0.78, "learning_rate": 1.7324362573569637e-05, "loss": 1.5322, "step": 847 }, { "epoch": 0.79, "learning_rate": 1.73175514400379e-05, "loss": 1.5995, "step": 848 }, { "epoch": 0.79, "learning_rate": 1.731073299085259e-05, "loss": 1.5486, "step": 849 }, { "epoch": 0.79, "learning_rate": 1.7303907232830378e-05, "loss": 1.5315, "step": 850 }, { "epoch": 0.79, "learning_rate": 1.729707417279526e-05, "loss": 1.5615, "step": 851 }, { "epoch": 0.79, "learning_rate": 1.7290233817578525e-05, "loss": 1.54, "step": 852 }, { "epoch": 0.79, "learning_rate": 1.7283386174018744e-05, "loss": 1.5655, "step": 853 }, { "epoch": 0.79, "learning_rate": 1.7276531248961795e-05, "loss": 1.5044, "step": 854 }, { "epoch": 0.79, "learning_rate": 1.7269669049260817e-05, "loss": 1.4971, "step": 855 }, { "epoch": 0.79, "learning_rate": 1.7262799581776236e-05, "loss": 1.5465, "step": 856 }, { "epoch": 0.79, "learning_rate": 1.725592285337574e-05, "loss": 1.5737, "step": 857 }, { "epoch": 0.79, "learning_rate": 1.7249038870934263e-05, "loss": 1.6355, "step": 858 }, { "epoch": 0.8, "learning_rate": 1.7242147641334012e-05, "loss": 1.5946, "step": 859 }, { "epoch": 0.8, "learning_rate": 1.7235249171464423e-05, "loss": 1.4966, "step": 860 }, { "epoch": 0.8, "learning_rate": 1.7228343468222185e-05, "loss": 1.5878, "step": 861 }, { "epoch": 0.8, "learning_rate": 1.72214305385112e-05, "loss": 1.4965, "step": 862 }, { "epoch": 0.8, "learning_rate": 1.721451038924261e-05, "loss": 1.5193, "step": 863 }, { "epoch": 0.8, "learning_rate": 1.720758302733477e-05, "loss": 1.5636, "step": 864 }, { "epoch": 0.8, "learning_rate": 1.7200648459713248e-05, "loss": 1.5365, "step": 865 }, { "epoch": 0.8, "learning_rate": 1.7193706693310807e-05, "loss": 1.6054, "step": 866 }, { "epoch": 0.8, "learning_rate": 1.7186757735067415e-05, "loss": 1.5394, "step": 867 }, { "epoch": 0.8, "learning_rate": 1.717980159193023e-05, "loss": 1.5858, "step": 868 }, { "epoch": 0.8, "learning_rate": 1.7172838270853588e-05, "loss": 1.6546, "step": 869 }, { "epoch": 0.81, "learning_rate": 1.7165867778799003e-05, "loss": 1.5311, "step": 870 }, { "epoch": 0.81, "learning_rate": 1.7158890122735164e-05, "loss": 1.5893, "step": 871 }, { "epoch": 0.81, "learning_rate": 1.7151905309637916e-05, "loss": 1.5724, "step": 872 }, { "epoch": 0.81, "learning_rate": 1.714491334649026e-05, "loss": 1.4964, "step": 873 }, { "epoch": 0.81, "learning_rate": 1.713791424028234e-05, "loss": 1.5525, "step": 874 }, { "epoch": 0.81, "learning_rate": 1.7130907998011462e-05, "loss": 1.5783, "step": 875 }, { "epoch": 0.81, "learning_rate": 1.7123894626682033e-05, "loss": 1.5004, "step": 876 }, { "epoch": 0.81, "learning_rate": 1.711687413330562e-05, "loss": 1.5965, "step": 877 }, { "epoch": 0.81, "learning_rate": 1.7109846524900885e-05, "loss": 1.5805, "step": 878 }, { "epoch": 0.81, "learning_rate": 1.710281180849362e-05, "loss": 1.5544, "step": 879 }, { "epoch": 0.81, "learning_rate": 1.7095769991116716e-05, "loss": 1.581, "step": 880 }, { "epoch": 0.82, "learning_rate": 1.708872107981016e-05, "loss": 1.4571, "step": 881 }, { "epoch": 0.82, "learning_rate": 1.708166508162104e-05, "loss": 1.5416, "step": 882 }, { "epoch": 0.82, "learning_rate": 1.7074602003603522e-05, "loss": 1.5174, "step": 883 }, { "epoch": 0.82, "learning_rate": 1.7067531852818853e-05, "loss": 1.5541, "step": 884 }, { "epoch": 0.82, "learning_rate": 1.706045463633535e-05, "loss": 1.5362, "step": 885 }, { "epoch": 0.82, "learning_rate": 1.705337036122839e-05, "loss": 1.5144, "step": 886 }, { "epoch": 0.82, "learning_rate": 1.7046279034580417e-05, "loss": 1.5131, "step": 887 }, { "epoch": 0.82, "learning_rate": 1.7039180663480917e-05, "loss": 1.6122, "step": 888 }, { "epoch": 0.82, "learning_rate": 1.703207525502642e-05, "loss": 1.498, "step": 889 }, { "epoch": 0.82, "learning_rate": 1.7024962816320485e-05, "loss": 1.5402, "step": 890 }, { "epoch": 0.82, "learning_rate": 1.7017843354473713e-05, "loss": 1.5276, "step": 891 }, { "epoch": 0.83, "learning_rate": 1.701071687660372e-05, "loss": 1.5268, "step": 892 }, { "epoch": 0.83, "learning_rate": 1.7003583389835135e-05, "loss": 1.5198, "step": 893 }, { "epoch": 0.83, "learning_rate": 1.699644290129959e-05, "loss": 1.592, "step": 894 }, { "epoch": 0.83, "learning_rate": 1.6989295418135724e-05, "loss": 1.5506, "step": 895 }, { "epoch": 0.83, "learning_rate": 1.6982140947489168e-05, "loss": 1.5028, "step": 896 }, { "epoch": 0.83, "learning_rate": 1.6974979496512538e-05, "loss": 1.5623, "step": 897 }, { "epoch": 0.83, "learning_rate": 1.6967811072365423e-05, "loss": 1.4827, "step": 898 }, { "epoch": 0.83, "learning_rate": 1.6960635682214394e-05, "loss": 1.5709, "step": 899 }, { "epoch": 0.83, "learning_rate": 1.695345333323297e-05, "loss": 1.5472, "step": 900 }, { "epoch": 0.83, "learning_rate": 1.694626403260165e-05, "loss": 1.4999, "step": 901 }, { "epoch": 0.83, "learning_rate": 1.6939067787507864e-05, "loss": 1.5502, "step": 902 }, { "epoch": 0.84, "learning_rate": 1.693186460514599e-05, "loss": 1.657, "step": 903 }, { "epoch": 0.84, "learning_rate": 1.692465449271734e-05, "loss": 1.5454, "step": 904 }, { "epoch": 0.84, "learning_rate": 1.6917437457430165e-05, "loss": 1.6015, "step": 905 }, { "epoch": 0.84, "learning_rate": 1.691021350649962e-05, "loss": 1.5146, "step": 906 }, { "epoch": 0.84, "learning_rate": 1.6902982647147783e-05, "loss": 1.6306, "step": 907 }, { "epoch": 0.84, "learning_rate": 1.689574488660364e-05, "loss": 1.5783, "step": 908 }, { "epoch": 0.84, "learning_rate": 1.6888500232103076e-05, "loss": 1.5676, "step": 909 }, { "epoch": 0.84, "learning_rate": 1.6881248690888866e-05, "loss": 1.5601, "step": 910 }, { "epoch": 0.84, "learning_rate": 1.687399027021067e-05, "loss": 1.5258, "step": 911 }, { "epoch": 0.84, "learning_rate": 1.6866724977325027e-05, "loss": 1.5679, "step": 912 }, { "epoch": 0.85, "learning_rate": 1.685945281949534e-05, "loss": 1.5366, "step": 913 }, { "epoch": 0.85, "learning_rate": 1.6852173803991887e-05, "loss": 1.5693, "step": 914 }, { "epoch": 0.85, "learning_rate": 1.6844887938091794e-05, "loss": 1.5746, "step": 915 }, { "epoch": 0.85, "learning_rate": 1.6837595229079037e-05, "loss": 1.5225, "step": 916 }, { "epoch": 0.85, "learning_rate": 1.683029568424443e-05, "loss": 1.5788, "step": 917 }, { "epoch": 0.85, "learning_rate": 1.682298931088563e-05, "loss": 1.6176, "step": 918 }, { "epoch": 0.85, "learning_rate": 1.6815676116307117e-05, "loss": 1.5478, "step": 919 }, { "epoch": 0.85, "learning_rate": 1.6808356107820182e-05, "loss": 1.5726, "step": 920 }, { "epoch": 0.85, "learning_rate": 1.6801029292742938e-05, "loss": 1.6279, "step": 921 }, { "epoch": 0.85, "learning_rate": 1.6793695678400307e-05, "loss": 1.5315, "step": 922 }, { "epoch": 0.85, "learning_rate": 1.6786355272123987e-05, "loss": 1.566, "step": 923 }, { "epoch": 0.86, "learning_rate": 1.6779008081252488e-05, "loss": 1.5722, "step": 924 }, { "epoch": 0.86, "learning_rate": 1.67716541131311e-05, "loss": 1.5307, "step": 925 }, { "epoch": 0.86, "learning_rate": 1.6764293375111874e-05, "loss": 1.6331, "step": 926 }, { "epoch": 0.86, "learning_rate": 1.6756925874553648e-05, "loss": 1.62, "step": 927 }, { "epoch": 0.86, "learning_rate": 1.6749551618822006e-05, "loss": 1.5439, "step": 928 }, { "epoch": 0.86, "learning_rate": 1.674217061528929e-05, "loss": 1.623, "step": 929 }, { "epoch": 0.86, "learning_rate": 1.67347828713346e-05, "loss": 1.502, "step": 930 }, { "epoch": 0.86, "learning_rate": 1.672738839434375e-05, "loss": 1.5559, "step": 931 }, { "epoch": 0.86, "learning_rate": 1.6719987191709306e-05, "loss": 1.5542, "step": 932 }, { "epoch": 0.86, "learning_rate": 1.671257927083055e-05, "loss": 1.4951, "step": 933 }, { "epoch": 0.86, "learning_rate": 1.6705164639113483e-05, "loss": 1.5792, "step": 934 }, { "epoch": 0.87, "learning_rate": 1.6697743303970813e-05, "loss": 1.5862, "step": 935 }, { "epoch": 0.87, "learning_rate": 1.669031527282195e-05, "loss": 1.4664, "step": 936 }, { "epoch": 0.87, "learning_rate": 1.6682880553092993e-05, "loss": 1.5355, "step": 937 }, { "epoch": 0.87, "learning_rate": 1.6675439152216747e-05, "loss": 1.5227, "step": 938 }, { "epoch": 0.87, "learning_rate": 1.6667991077632673e-05, "loss": 1.5762, "step": 939 }, { "epoch": 0.87, "learning_rate": 1.6660536336786916e-05, "loss": 1.5114, "step": 940 }, { "epoch": 0.87, "learning_rate": 1.665307493713228e-05, "loss": 1.5192, "step": 941 }, { "epoch": 0.87, "learning_rate": 1.6645606886128236e-05, "loss": 1.5374, "step": 942 }, { "epoch": 0.87, "learning_rate": 1.6638132191240893e-05, "loss": 1.5662, "step": 943 }, { "epoch": 0.87, "learning_rate": 1.6630650859943006e-05, "loss": 1.4704, "step": 944 }, { "epoch": 0.87, "learning_rate": 1.6623162899713975e-05, "loss": 1.5311, "step": 945 }, { "epoch": 0.88, "learning_rate": 1.661566831803981e-05, "loss": 1.5216, "step": 946 }, { "epoch": 0.88, "learning_rate": 1.6608167122413152e-05, "loss": 1.5102, "step": 947 }, { "epoch": 0.88, "learning_rate": 1.6600659320333247e-05, "loss": 1.4879, "step": 948 }, { "epoch": 0.88, "learning_rate": 1.6593144919305958e-05, "loss": 1.5873, "step": 949 }, { "epoch": 0.88, "learning_rate": 1.6585623926843724e-05, "loss": 1.4767, "step": 950 }, { "epoch": 0.88, "learning_rate": 1.6578096350465603e-05, "loss": 1.5499, "step": 951 }, { "epoch": 0.88, "learning_rate": 1.6570562197697208e-05, "loss": 1.5821, "step": 952 }, { "epoch": 0.88, "learning_rate": 1.656302147607074e-05, "loss": 1.621, "step": 953 }, { "epoch": 0.88, "learning_rate": 1.655547419312497e-05, "loss": 1.5678, "step": 954 }, { "epoch": 0.88, "learning_rate": 1.6547920356405212e-05, "loss": 1.519, "step": 955 }, { "epoch": 0.88, "learning_rate": 1.6540359973463358e-05, "loss": 1.5978, "step": 956 }, { "epoch": 0.89, "learning_rate": 1.653279305185782e-05, "loss": 1.5313, "step": 957 }, { "epoch": 0.89, "learning_rate": 1.652521959915356e-05, "loss": 1.523, "step": 958 }, { "epoch": 0.89, "learning_rate": 1.6517639622922062e-05, "loss": 1.5745, "step": 959 }, { "epoch": 0.89, "learning_rate": 1.651005313074135e-05, "loss": 1.5101, "step": 960 }, { "epoch": 0.89, "learning_rate": 1.650246013019593e-05, "loss": 1.549, "step": 961 }, { "epoch": 0.89, "learning_rate": 1.6494860628876848e-05, "loss": 1.5436, "step": 962 }, { "epoch": 0.89, "learning_rate": 1.6487254634381627e-05, "loss": 1.5566, "step": 963 }, { "epoch": 0.89, "learning_rate": 1.6479642154314294e-05, "loss": 1.5226, "step": 964 }, { "epoch": 0.89, "learning_rate": 1.6472023196285344e-05, "loss": 1.5695, "step": 965 }, { "epoch": 0.89, "learning_rate": 1.6464397767911772e-05, "loss": 1.4765, "step": 966 }, { "epoch": 0.9, "learning_rate": 1.6456765876817023e-05, "loss": 1.4872, "step": 967 }, { "epoch": 0.9, "learning_rate": 1.6449127530631005e-05, "loss": 1.5348, "step": 968 }, { "epoch": 0.9, "learning_rate": 1.6441482736990092e-05, "loss": 1.4565, "step": 969 }, { "epoch": 0.9, "learning_rate": 1.643383150353709e-05, "loss": 1.5736, "step": 970 }, { "epoch": 0.9, "learning_rate": 1.6426173837921243e-05, "loss": 1.4879, "step": 971 }, { "epoch": 0.9, "learning_rate": 1.6418509747798237e-05, "loss": 1.4985, "step": 972 }, { "epoch": 0.9, "learning_rate": 1.6410839240830175e-05, "loss": 1.4514, "step": 973 }, { "epoch": 0.9, "learning_rate": 1.640316232468557e-05, "loss": 1.458, "step": 974 }, { "epoch": 0.9, "learning_rate": 1.639547900703935e-05, "loss": 1.5118, "step": 975 }, { "epoch": 0.9, "learning_rate": 1.6387789295572837e-05, "loss": 1.456, "step": 976 }, { "epoch": 0.9, "learning_rate": 1.638009319797375e-05, "loss": 1.4947, "step": 977 }, { "epoch": 0.91, "learning_rate": 1.6372390721936198e-05, "loss": 1.6411, "step": 978 }, { "epoch": 0.91, "learning_rate": 1.6364681875160646e-05, "loss": 1.599, "step": 979 }, { "epoch": 0.91, "learning_rate": 1.635696666535395e-05, "loss": 1.6197, "step": 980 }, { "epoch": 0.91, "learning_rate": 1.634924510022932e-05, "loss": 1.5025, "step": 981 }, { "epoch": 0.91, "learning_rate": 1.6341517187506307e-05, "loss": 1.5552, "step": 982 }, { "epoch": 0.91, "learning_rate": 1.633378293491083e-05, "loss": 1.4365, "step": 983 }, { "epoch": 0.91, "learning_rate": 1.6326042350175137e-05, "loss": 1.5534, "step": 984 }, { "epoch": 0.91, "learning_rate": 1.63182954410378e-05, "loss": 1.5171, "step": 985 }, { "epoch": 0.91, "learning_rate": 1.6310542215243717e-05, "loss": 1.6073, "step": 986 }, { "epoch": 0.91, "learning_rate": 1.630278268054411e-05, "loss": 1.5706, "step": 987 }, { "epoch": 0.91, "learning_rate": 1.6295016844696494e-05, "loss": 1.5699, "step": 988 }, { "epoch": 0.92, "learning_rate": 1.6287244715464698e-05, "loss": 1.555, "step": 989 }, { "epoch": 0.92, "learning_rate": 1.6279466300618826e-05, "loss": 1.5953, "step": 990 }, { "epoch": 0.92, "learning_rate": 1.6271681607935286e-05, "loss": 1.5813, "step": 991 }, { "epoch": 0.92, "learning_rate": 1.6263890645196744e-05, "loss": 1.5141, "step": 992 }, { "epoch": 0.92, "learning_rate": 1.6256093420192146e-05, "loss": 1.5136, "step": 993 }, { "epoch": 0.92, "learning_rate": 1.624828994071669e-05, "loss": 1.5184, "step": 994 }, { "epoch": 0.92, "learning_rate": 1.624048021457184e-05, "loss": 1.6113, "step": 995 }, { "epoch": 0.92, "learning_rate": 1.6232664249565288e-05, "loss": 1.4883, "step": 996 }, { "epoch": 0.92, "learning_rate": 1.6224842053510977e-05, "loss": 1.5736, "step": 997 }, { "epoch": 0.92, "learning_rate": 1.621701363422907e-05, "loss": 1.4524, "step": 998 }, { "epoch": 0.92, "learning_rate": 1.6209178999545966e-05, "loss": 1.6126, "step": 999 }, { "epoch": 0.93, "learning_rate": 1.6201338157294256e-05, "loss": 1.5211, "step": 1000 }, { "epoch": 0.93, "learning_rate": 1.619349111531275e-05, "loss": 1.4642, "step": 1001 }, { "epoch": 0.93, "learning_rate": 1.618563788144646e-05, "loss": 1.5203, "step": 1002 }, { "epoch": 0.93, "learning_rate": 1.6177778463546583e-05, "loss": 1.6294, "step": 1003 }, { "epoch": 0.93, "learning_rate": 1.6169912869470496e-05, "loss": 1.5195, "step": 1004 }, { "epoch": 0.93, "learning_rate": 1.6162041107081754e-05, "loss": 1.4844, "step": 1005 }, { "epoch": 0.93, "learning_rate": 1.6154163184250077e-05, "loss": 1.4711, "step": 1006 }, { "epoch": 0.93, "learning_rate": 1.6146279108851344e-05, "loss": 1.5286, "step": 1007 }, { "epoch": 0.93, "learning_rate": 1.6138388888767587e-05, "loss": 1.5649, "step": 1008 }, { "epoch": 0.93, "learning_rate": 1.613049253188698e-05, "loss": 1.5218, "step": 1009 }, { "epoch": 0.93, "learning_rate": 1.6122590046103827e-05, "loss": 1.5774, "step": 1010 }, { "epoch": 0.94, "learning_rate": 1.6114681439318576e-05, "loss": 1.5451, "step": 1011 }, { "epoch": 0.94, "learning_rate": 1.610676671943777e-05, "loss": 1.5251, "step": 1012 }, { "epoch": 0.94, "learning_rate": 1.6098845894374078e-05, "loss": 1.477, "step": 1013 }, { "epoch": 0.94, "learning_rate": 1.6090918972046284e-05, "loss": 1.5532, "step": 1014 }, { "epoch": 0.94, "learning_rate": 1.6082985960379234e-05, "loss": 1.5428, "step": 1015 }, { "epoch": 0.94, "learning_rate": 1.60750468673039e-05, "loss": 1.4762, "step": 1016 }, { "epoch": 0.94, "learning_rate": 1.6067101700757303e-05, "loss": 1.5125, "step": 1017 }, { "epoch": 0.94, "learning_rate": 1.6059150468682558e-05, "loss": 1.5726, "step": 1018 }, { "epoch": 0.94, "learning_rate": 1.605119317902883e-05, "loss": 1.593, "step": 1019 }, { "epoch": 0.94, "learning_rate": 1.6043229839751346e-05, "loss": 1.532, "step": 1020 }, { "epoch": 0.95, "learning_rate": 1.603526045881138e-05, "loss": 1.4795, "step": 1021 }, { "epoch": 0.95, "learning_rate": 1.6027285044176245e-05, "loss": 1.5231, "step": 1022 }, { "epoch": 0.95, "learning_rate": 1.601930360381929e-05, "loss": 1.5579, "step": 1023 }, { "epoch": 0.95, "learning_rate": 1.6011316145719886e-05, "loss": 1.5191, "step": 1024 }, { "epoch": 0.95, "learning_rate": 1.6003322677863412e-05, "loss": 1.5018, "step": 1025 }, { "epoch": 0.95, "learning_rate": 1.599532320824127e-05, "loss": 1.5009, "step": 1026 }, { "epoch": 0.95, "learning_rate": 1.5987317744850855e-05, "loss": 1.5003, "step": 1027 }, { "epoch": 0.95, "learning_rate": 1.5979306295695547e-05, "loss": 1.611, "step": 1028 }, { "epoch": 0.95, "learning_rate": 1.5971288868784724e-05, "loss": 1.4653, "step": 1029 }, { "epoch": 0.95, "learning_rate": 1.5963265472133733e-05, "loss": 1.5037, "step": 1030 }, { "epoch": 0.95, "learning_rate": 1.595523611376389e-05, "loss": 1.6016, "step": 1031 }, { "epoch": 0.96, "learning_rate": 1.5947200801702467e-05, "loss": 1.5399, "step": 1032 }, { "epoch": 0.96, "learning_rate": 1.5939159543982695e-05, "loss": 1.5468, "step": 1033 }, { "epoch": 0.96, "learning_rate": 1.5931112348643742e-05, "loss": 1.5898, "step": 1034 }, { "epoch": 0.96, "learning_rate": 1.592305922373072e-05, "loss": 1.4492, "step": 1035 }, { "epoch": 0.96, "learning_rate": 1.5915000177294668e-05, "loss": 1.5328, "step": 1036 }, { "epoch": 0.96, "learning_rate": 1.5906935217392538e-05, "loss": 1.4894, "step": 1037 }, { "epoch": 0.96, "learning_rate": 1.58988643520872e-05, "loss": 1.571, "step": 1038 }, { "epoch": 0.96, "learning_rate": 1.5890787589447424e-05, "loss": 1.5252, "step": 1039 }, { "epoch": 0.96, "learning_rate": 1.5882704937547885e-05, "loss": 1.5107, "step": 1040 }, { "epoch": 0.96, "learning_rate": 1.5874616404469124e-05, "loss": 1.4811, "step": 1041 }, { "epoch": 0.96, "learning_rate": 1.586652199829759e-05, "loss": 1.496, "step": 1042 }, { "epoch": 0.97, "learning_rate": 1.5858421727125586e-05, "loss": 1.4955, "step": 1043 }, { "epoch": 0.97, "learning_rate": 1.585031559905128e-05, "loss": 1.5647, "step": 1044 }, { "epoch": 0.97, "learning_rate": 1.5842203622178697e-05, "loss": 1.516, "step": 1045 }, { "epoch": 0.97, "learning_rate": 1.5834085804617715e-05, "loss": 1.4734, "step": 1046 }, { "epoch": 0.97, "learning_rate": 1.5825962154484044e-05, "loss": 1.54, "step": 1047 }, { "epoch": 0.97, "learning_rate": 1.5817832679899222e-05, "loss": 1.5773, "step": 1048 }, { "epoch": 0.97, "learning_rate": 1.5809697388990622e-05, "loss": 1.5351, "step": 1049 }, { "epoch": 0.97, "learning_rate": 1.5801556289891423e-05, "loss": 1.5516, "step": 1050 }, { "epoch": 0.97, "learning_rate": 1.579340939074061e-05, "loss": 1.5347, "step": 1051 }, { "epoch": 0.97, "learning_rate": 1.5785256699682973e-05, "loss": 1.5124, "step": 1052 }, { "epoch": 0.97, "learning_rate": 1.5777098224869087e-05, "loss": 1.5338, "step": 1053 }, { "epoch": 0.98, "learning_rate": 1.5768933974455306e-05, "loss": 1.5552, "step": 1054 }, { "epoch": 0.98, "learning_rate": 1.5760763956603776e-05, "loss": 1.4599, "step": 1055 }, { "epoch": 0.98, "learning_rate": 1.575258817948238e-05, "loss": 1.5189, "step": 1056 }, { "epoch": 0.98, "learning_rate": 1.5744406651264786e-05, "loss": 1.5368, "step": 1057 }, { "epoch": 0.98, "learning_rate": 1.5736219380130395e-05, "loss": 1.5385, "step": 1058 }, { "epoch": 0.98, "learning_rate": 1.5728026374264354e-05, "loss": 1.5917, "step": 1059 }, { "epoch": 0.98, "learning_rate": 1.571982764185755e-05, "loss": 1.4671, "step": 1060 }, { "epoch": 0.98, "learning_rate": 1.5711623191106588e-05, "loss": 1.4924, "step": 1061 }, { "epoch": 0.98, "learning_rate": 1.5703413030213782e-05, "loss": 1.5243, "step": 1062 }, { "epoch": 0.98, "learning_rate": 1.569519716738717e-05, "loss": 1.4805, "step": 1063 }, { "epoch": 0.98, "learning_rate": 1.568697561084049e-05, "loss": 1.4858, "step": 1064 }, { "epoch": 0.99, "learning_rate": 1.5678748368793155e-05, "loss": 1.5524, "step": 1065 }, { "epoch": 0.99, "learning_rate": 1.5670515449470278e-05, "loss": 1.4609, "step": 1066 }, { "epoch": 0.99, "learning_rate": 1.566227686110264e-05, "loss": 1.5369, "step": 1067 }, { "epoch": 0.99, "learning_rate": 1.5654032611926698e-05, "loss": 1.5273, "step": 1068 }, { "epoch": 0.99, "learning_rate": 1.5645782710184562e-05, "loss": 1.5042, "step": 1069 }, { "epoch": 0.99, "learning_rate": 1.5637527164123995e-05, "loss": 1.4749, "step": 1070 }, { "epoch": 0.99, "learning_rate": 1.5629265981998395e-05, "loss": 1.5467, "step": 1071 }, { "epoch": 0.99, "learning_rate": 1.5620999172066807e-05, "loss": 1.5104, "step": 1072 }, { "epoch": 0.99, "learning_rate": 1.56127267425939e-05, "loss": 1.513, "step": 1073 }, { "epoch": 0.99, "learning_rate": 1.560444870184995e-05, "loss": 1.4681, "step": 1074 }, { "epoch": 1.0, "learning_rate": 1.5596165058110854e-05, "loss": 1.4405, "step": 1075 }, { "epoch": 1.0, "learning_rate": 1.5587875819658108e-05, "loss": 1.4766, "step": 1076 }, { "epoch": 1.0, "learning_rate": 1.5579580994778805e-05, "loss": 1.4857, "step": 1077 }, { "epoch": 1.0, "learning_rate": 1.5571280591765613e-05, "loss": 1.5315, "step": 1078 }, { "epoch": 1.0, "learning_rate": 1.556297461891678e-05, "loss": 1.513, "step": 1079 }, { "epoch": 1.0, "learning_rate": 1.555466308453613e-05, "loss": 1.4966, "step": 1080 }, { "epoch": 1.0, "learning_rate": 1.5546345996933042e-05, "loss": 1.6981, "step": 1081 }, { "epoch": 1.0, "learning_rate": 1.553802336442244e-05, "loss": 1.3098, "step": 1082 }, { "epoch": 1.0, "learning_rate": 1.5529695195324803e-05, "loss": 1.3445, "step": 1083 }, { "epoch": 1.0, "learning_rate": 1.5521361497966143e-05, "loss": 1.2841, "step": 1084 }, { "epoch": 1.0, "learning_rate": 1.5513022280677986e-05, "loss": 1.3858, "step": 1085 }, { "epoch": 1.01, "learning_rate": 1.55046775517974e-05, "loss": 1.3553, "step": 1086 }, { "epoch": 1.01, "learning_rate": 1.5496327319666938e-05, "loss": 1.3402, "step": 1087 }, { "epoch": 1.01, "learning_rate": 1.548797159263467e-05, "loss": 1.2538, "step": 1088 }, { "epoch": 1.01, "learning_rate": 1.5479610379054163e-05, "loss": 1.3342, "step": 1089 }, { "epoch": 1.01, "learning_rate": 1.547124368728445e-05, "loss": 1.2598, "step": 1090 }, { "epoch": 1.01, "learning_rate": 1.5462871525690058e-05, "loss": 1.2991, "step": 1091 }, { "epoch": 1.01, "learning_rate": 1.5454493902640982e-05, "loss": 1.299, "step": 1092 }, { "epoch": 1.01, "learning_rate": 1.5446110826512664e-05, "loss": 1.3691, "step": 1093 }, { "epoch": 1.01, "learning_rate": 1.5437722305686015e-05, "loss": 1.3154, "step": 1094 }, { "epoch": 1.01, "learning_rate": 1.542932834854737e-05, "loss": 1.315, "step": 1095 }, { "epoch": 1.01, "learning_rate": 1.5420928963488515e-05, "loss": 1.184, "step": 1096 }, { "epoch": 1.02, "learning_rate": 1.5412524158906658e-05, "loss": 1.2897, "step": 1097 }, { "epoch": 1.02, "learning_rate": 1.540411394320442e-05, "loss": 1.2856, "step": 1098 }, { "epoch": 1.02, "learning_rate": 1.5395698324789837e-05, "loss": 1.3676, "step": 1099 }, { "epoch": 1.02, "learning_rate": 1.5387277312076348e-05, "loss": 1.2769, "step": 1100 }, { "epoch": 1.02, "learning_rate": 1.5378850913482776e-05, "loss": 1.3397, "step": 1101 }, { "epoch": 1.02, "learning_rate": 1.5370419137433335e-05, "loss": 1.3086, "step": 1102 }, { "epoch": 1.02, "learning_rate": 1.536198199235762e-05, "loss": 1.3055, "step": 1103 }, { "epoch": 1.02, "learning_rate": 1.5353539486690577e-05, "loss": 1.3022, "step": 1104 }, { "epoch": 1.02, "learning_rate": 1.5345091628872536e-05, "loss": 1.3114, "step": 1105 }, { "epoch": 1.02, "learning_rate": 1.533663842734915e-05, "loss": 1.299, "step": 1106 }, { "epoch": 1.02, "learning_rate": 1.532817989057144e-05, "loss": 1.3441, "step": 1107 }, { "epoch": 1.03, "learning_rate": 1.531971602699574e-05, "loss": 1.3589, "step": 1108 }, { "epoch": 1.03, "learning_rate": 1.5311246845083724e-05, "loss": 1.2931, "step": 1109 }, { "epoch": 1.03, "learning_rate": 1.5302772353302374e-05, "loss": 1.3688, "step": 1110 }, { "epoch": 1.03, "learning_rate": 1.5294292560123984e-05, "loss": 1.331, "step": 1111 }, { "epoch": 1.03, "learning_rate": 1.5285807474026154e-05, "loss": 1.2597, "step": 1112 }, { "epoch": 1.03, "learning_rate": 1.527731710349176e-05, "loss": 1.3287, "step": 1113 }, { "epoch": 1.03, "learning_rate": 1.526882145700898e-05, "loss": 1.2387, "step": 1114 }, { "epoch": 1.03, "learning_rate": 1.5260320543071246e-05, "loss": 1.295, "step": 1115 }, { "epoch": 1.03, "learning_rate": 1.5251814370177276e-05, "loss": 1.2558, "step": 1116 }, { "epoch": 1.03, "learning_rate": 1.5243302946831034e-05, "loss": 1.3354, "step": 1117 }, { "epoch": 1.04, "learning_rate": 1.5234786281541736e-05, "loss": 1.3298, "step": 1118 }, { "epoch": 1.04, "learning_rate": 1.5226264382823837e-05, "loss": 1.288, "step": 1119 }, { "epoch": 1.04, "learning_rate": 1.5217737259197028e-05, "loss": 1.1961, "step": 1120 }, { "epoch": 1.04, "learning_rate": 1.5209204919186218e-05, "loss": 1.3383, "step": 1121 }, { "epoch": 1.04, "learning_rate": 1.520066737132154e-05, "loss": 1.2444, "step": 1122 }, { "epoch": 1.04, "learning_rate": 1.519212462413832e-05, "loss": 1.2569, "step": 1123 }, { "epoch": 1.04, "learning_rate": 1.5183576686177094e-05, "loss": 1.3048, "step": 1124 }, { "epoch": 1.04, "learning_rate": 1.5175023565983583e-05, "loss": 1.2866, "step": 1125 }, { "epoch": 1.04, "learning_rate": 1.5166465272108686e-05, "loss": 1.2138, "step": 1126 }, { "epoch": 1.04, "learning_rate": 1.5157901813108482e-05, "loss": 1.3512, "step": 1127 }, { "epoch": 1.04, "learning_rate": 1.5149333197544203e-05, "loss": 1.2789, "step": 1128 }, { "epoch": 1.05, "learning_rate": 1.5140759433982245e-05, "loss": 1.3196, "step": 1129 }, { "epoch": 1.05, "learning_rate": 1.5132180530994149e-05, "loss": 1.3183, "step": 1130 }, { "epoch": 1.05, "learning_rate": 1.5123596497156596e-05, "loss": 1.3057, "step": 1131 }, { "epoch": 1.05, "learning_rate": 1.5115007341051388e-05, "loss": 1.2964, "step": 1132 }, { "epoch": 1.05, "learning_rate": 1.5106413071265453e-05, "loss": 1.3339, "step": 1133 }, { "epoch": 1.05, "learning_rate": 1.5097813696390835e-05, "loss": 1.2835, "step": 1134 }, { "epoch": 1.05, "learning_rate": 1.5089209225024678e-05, "loss": 1.1978, "step": 1135 }, { "epoch": 1.05, "learning_rate": 1.508059966576922e-05, "loss": 1.2749, "step": 1136 }, { "epoch": 1.05, "learning_rate": 1.507198502723179e-05, "loss": 1.382, "step": 1137 }, { "epoch": 1.05, "learning_rate": 1.5063365318024791e-05, "loss": 1.262, "step": 1138 }, { "epoch": 1.05, "learning_rate": 1.5054740546765696e-05, "loss": 1.3027, "step": 1139 }, { "epoch": 1.06, "learning_rate": 1.5046110722077041e-05, "loss": 1.327, "step": 1140 }, { "epoch": 1.06, "learning_rate": 1.503747585258641e-05, "loss": 1.3165, "step": 1141 }, { "epoch": 1.06, "learning_rate": 1.5028835946926433e-05, "loss": 1.3098, "step": 1142 }, { "epoch": 1.06, "learning_rate": 1.5020191013734776e-05, "loss": 1.2374, "step": 1143 }, { "epoch": 1.06, "learning_rate": 1.5011541061654131e-05, "loss": 1.2966, "step": 1144 }, { "epoch": 1.06, "learning_rate": 1.5002886099332208e-05, "loss": 1.1798, "step": 1145 }, { "epoch": 1.06, "learning_rate": 1.4994226135421719e-05, "loss": 1.3565, "step": 1146 }, { "epoch": 1.06, "learning_rate": 1.4985561178580388e-05, "loss": 1.3042, "step": 1147 }, { "epoch": 1.06, "learning_rate": 1.497689123747092e-05, "loss": 1.3637, "step": 1148 }, { "epoch": 1.06, "learning_rate": 1.4968216320761012e-05, "loss": 1.4165, "step": 1149 }, { "epoch": 1.06, "learning_rate": 1.4959536437123326e-05, "loss": 1.2263, "step": 1150 }, { "epoch": 1.07, "learning_rate": 1.4950851595235494e-05, "loss": 1.3126, "step": 1151 }, { "epoch": 1.07, "learning_rate": 1.4942161803780113e-05, "loss": 1.3516, "step": 1152 }, { "epoch": 1.07, "learning_rate": 1.4933467071444713e-05, "loss": 1.3341, "step": 1153 }, { "epoch": 1.07, "learning_rate": 1.4924767406921775e-05, "loss": 1.3524, "step": 1154 }, { "epoch": 1.07, "learning_rate": 1.4916062818908703e-05, "loss": 1.2762, "step": 1155 }, { "epoch": 1.07, "learning_rate": 1.4907353316107836e-05, "loss": 1.3533, "step": 1156 }, { "epoch": 1.07, "learning_rate": 1.4898638907226407e-05, "loss": 1.2946, "step": 1157 }, { "epoch": 1.07, "learning_rate": 1.4889919600976573e-05, "loss": 1.3258, "step": 1158 }, { "epoch": 1.07, "learning_rate": 1.4881195406075374e-05, "loss": 1.1656, "step": 1159 }, { "epoch": 1.07, "learning_rate": 1.4872466331244745e-05, "loss": 1.3406, "step": 1160 }, { "epoch": 1.07, "learning_rate": 1.4863732385211498e-05, "loss": 1.3176, "step": 1161 }, { "epoch": 1.08, "learning_rate": 1.485499357670731e-05, "loss": 1.2695, "step": 1162 }, { "epoch": 1.08, "learning_rate": 1.4846249914468726e-05, "loss": 1.2524, "step": 1163 }, { "epoch": 1.08, "learning_rate": 1.4837501407237143e-05, "loss": 1.312, "step": 1164 }, { "epoch": 1.08, "learning_rate": 1.482874806375879e-05, "loss": 1.2019, "step": 1165 }, { "epoch": 1.08, "learning_rate": 1.481998989278475e-05, "loss": 1.3159, "step": 1166 }, { "epoch": 1.08, "learning_rate": 1.4811226903070917e-05, "loss": 1.1935, "step": 1167 }, { "epoch": 1.08, "learning_rate": 1.4802459103378008e-05, "loss": 1.3056, "step": 1168 }, { "epoch": 1.08, "learning_rate": 1.4793686502471549e-05, "loss": 1.3394, "step": 1169 }, { "epoch": 1.08, "learning_rate": 1.478490910912187e-05, "loss": 1.2946, "step": 1170 }, { "epoch": 1.08, "learning_rate": 1.4776126932104083e-05, "loss": 1.2775, "step": 1171 }, { "epoch": 1.09, "learning_rate": 1.476733998019809e-05, "loss": 1.3477, "step": 1172 }, { "epoch": 1.09, "learning_rate": 1.4758548262188562e-05, "loss": 1.2456, "step": 1173 }, { "epoch": 1.09, "learning_rate": 1.4749751786864936e-05, "loss": 1.3565, "step": 1174 }, { "epoch": 1.09, "learning_rate": 1.474095056302141e-05, "loss": 1.2348, "step": 1175 }, { "epoch": 1.09, "learning_rate": 1.473214459945692e-05, "loss": 1.2481, "step": 1176 }, { "epoch": 1.09, "learning_rate": 1.4723333904975153e-05, "loss": 1.3165, "step": 1177 }, { "epoch": 1.09, "learning_rate": 1.4714518488384512e-05, "loss": 1.3182, "step": 1178 }, { "epoch": 1.09, "learning_rate": 1.4705698358498128e-05, "loss": 1.2188, "step": 1179 }, { "epoch": 1.09, "learning_rate": 1.4696873524133843e-05, "loss": 1.2907, "step": 1180 }, { "epoch": 1.09, "learning_rate": 1.4688043994114208e-05, "loss": 1.3314, "step": 1181 }, { "epoch": 1.09, "learning_rate": 1.4679209777266455e-05, "loss": 1.2973, "step": 1182 }, { "epoch": 1.1, "learning_rate": 1.4670370882422515e-05, "loss": 1.3241, "step": 1183 }, { "epoch": 1.1, "learning_rate": 1.466152731841899e-05, "loss": 1.2916, "step": 1184 }, { "epoch": 1.1, "learning_rate": 1.4652679094097146e-05, "loss": 1.2064, "step": 1185 }, { "epoch": 1.1, "learning_rate": 1.464382621830292e-05, "loss": 1.3285, "step": 1186 }, { "epoch": 1.1, "learning_rate": 1.4634968699886882e-05, "loss": 1.2947, "step": 1187 }, { "epoch": 1.1, "learning_rate": 1.4626106547704262e-05, "loss": 1.2884, "step": 1188 }, { "epoch": 1.1, "learning_rate": 1.4617239770614913e-05, "loss": 1.2627, "step": 1189 }, { "epoch": 1.1, "learning_rate": 1.4608368377483306e-05, "loss": 1.2979, "step": 1190 }, { "epoch": 1.1, "learning_rate": 1.459949237717854e-05, "loss": 1.2868, "step": 1191 }, { "epoch": 1.1, "learning_rate": 1.4590611778574309e-05, "loss": 1.2401, "step": 1192 }, { "epoch": 1.1, "learning_rate": 1.4581726590548908e-05, "loss": 1.2445, "step": 1193 }, { "epoch": 1.11, "learning_rate": 1.4572836821985223e-05, "loss": 1.3312, "step": 1194 }, { "epoch": 1.11, "learning_rate": 1.4563942481770717e-05, "loss": 1.3206, "step": 1195 }, { "epoch": 1.11, "learning_rate": 1.4555043578797423e-05, "loss": 1.2865, "step": 1196 }, { "epoch": 1.11, "learning_rate": 1.4546140121961935e-05, "loss": 1.3749, "step": 1197 }, { "epoch": 1.11, "learning_rate": 1.4537232120165401e-05, "loss": 1.3569, "step": 1198 }, { "epoch": 1.11, "learning_rate": 1.4528319582313513e-05, "loss": 1.2606, "step": 1199 }, { "epoch": 1.11, "learning_rate": 1.4519402517316496e-05, "loss": 1.3709, "step": 1200 }, { "epoch": 1.11, "learning_rate": 1.4510480934089104e-05, "loss": 1.2357, "step": 1201 }, { "epoch": 1.11, "learning_rate": 1.4501554841550605e-05, "loss": 1.3323, "step": 1202 }, { "epoch": 1.11, "learning_rate": 1.4492624248624778e-05, "loss": 1.2495, "step": 1203 }, { "epoch": 1.11, "learning_rate": 1.44836891642399e-05, "loss": 1.3065, "step": 1204 }, { "epoch": 1.12, "learning_rate": 1.4474749597328735e-05, "loss": 1.267, "step": 1205 }, { "epoch": 1.12, "learning_rate": 1.4465805556828534e-05, "loss": 1.2152, "step": 1206 }, { "epoch": 1.12, "learning_rate": 1.4456857051681015e-05, "loss": 1.3462, "step": 1207 }, { "epoch": 1.12, "learning_rate": 1.4447904090832366e-05, "loss": 1.2738, "step": 1208 }, { "epoch": 1.12, "learning_rate": 1.4438946683233224e-05, "loss": 1.3737, "step": 1209 }, { "epoch": 1.12, "learning_rate": 1.4429984837838673e-05, "loss": 1.2911, "step": 1210 }, { "epoch": 1.12, "learning_rate": 1.4421018563608234e-05, "loss": 1.3239, "step": 1211 }, { "epoch": 1.12, "learning_rate": 1.4412047869505857e-05, "loss": 1.2455, "step": 1212 }, { "epoch": 1.12, "learning_rate": 1.4403072764499908e-05, "loss": 1.3003, "step": 1213 }, { "epoch": 1.12, "learning_rate": 1.4394093257563169e-05, "loss": 1.2777, "step": 1214 }, { "epoch": 1.12, "learning_rate": 1.4385109357672811e-05, "loss": 1.3196, "step": 1215 }, { "epoch": 1.13, "learning_rate": 1.4376121073810406e-05, "loss": 1.3136, "step": 1216 }, { "epoch": 1.13, "learning_rate": 1.4367128414961908e-05, "loss": 1.2274, "step": 1217 }, { "epoch": 1.13, "learning_rate": 1.4358131390117645e-05, "loss": 1.2709, "step": 1218 }, { "epoch": 1.13, "learning_rate": 1.4349130008272305e-05, "loss": 1.2691, "step": 1219 }, { "epoch": 1.13, "learning_rate": 1.4340124278424936e-05, "loss": 1.2788, "step": 1220 }, { "epoch": 1.13, "learning_rate": 1.4331114209578935e-05, "loss": 1.2619, "step": 1221 }, { "epoch": 1.13, "learning_rate": 1.4322099810742031e-05, "loss": 1.3227, "step": 1222 }, { "epoch": 1.13, "learning_rate": 1.4313081090926288e-05, "loss": 1.281, "step": 1223 }, { "epoch": 1.13, "learning_rate": 1.4304058059148083e-05, "loss": 1.3609, "step": 1224 }, { "epoch": 1.13, "learning_rate": 1.4295030724428108e-05, "loss": 1.2328, "step": 1225 }, { "epoch": 1.14, "learning_rate": 1.4285999095791358e-05, "loss": 1.253, "step": 1226 }, { "epoch": 1.14, "learning_rate": 1.4276963182267119e-05, "loss": 1.2929, "step": 1227 }, { "epoch": 1.14, "learning_rate": 1.4267922992888961e-05, "loss": 1.3169, "step": 1228 }, { "epoch": 1.14, "learning_rate": 1.4258878536694727e-05, "loss": 1.2454, "step": 1229 }, { "epoch": 1.14, "learning_rate": 1.4249829822726534e-05, "loss": 1.2117, "step": 1230 }, { "epoch": 1.14, "learning_rate": 1.4240776860030738e-05, "loss": 1.2913, "step": 1231 }, { "epoch": 1.14, "learning_rate": 1.4231719657657966e-05, "loss": 1.2594, "step": 1232 }, { "epoch": 1.14, "learning_rate": 1.4222658224663062e-05, "loss": 1.317, "step": 1233 }, { "epoch": 1.14, "learning_rate": 1.421359257010511e-05, "loss": 1.2294, "step": 1234 }, { "epoch": 1.14, "learning_rate": 1.4204522703047417e-05, "loss": 1.3758, "step": 1235 }, { "epoch": 1.14, "learning_rate": 1.4195448632557496e-05, "loss": 1.2956, "step": 1236 }, { "epoch": 1.15, "learning_rate": 1.4186370367707063e-05, "loss": 1.3522, "step": 1237 }, { "epoch": 1.15, "learning_rate": 1.4177287917572032e-05, "loss": 1.2093, "step": 1238 }, { "epoch": 1.15, "learning_rate": 1.4168201291232494e-05, "loss": 1.3177, "step": 1239 }, { "epoch": 1.15, "learning_rate": 1.4159110497772718e-05, "loss": 1.3186, "step": 1240 }, { "epoch": 1.15, "learning_rate": 1.4150015546281144e-05, "loss": 1.3254, "step": 1241 }, { "epoch": 1.15, "learning_rate": 1.4140916445850358e-05, "loss": 1.3041, "step": 1242 }, { "epoch": 1.15, "learning_rate": 1.413181320557711e-05, "loss": 1.2965, "step": 1243 }, { "epoch": 1.15, "learning_rate": 1.412270583456227e-05, "loss": 1.3482, "step": 1244 }, { "epoch": 1.15, "learning_rate": 1.4113594341910853e-05, "loss": 1.4073, "step": 1245 }, { "epoch": 1.15, "learning_rate": 1.4104478736731983e-05, "loss": 1.2679, "step": 1246 }, { "epoch": 1.15, "learning_rate": 1.4095359028138906e-05, "loss": 1.2834, "step": 1247 }, { "epoch": 1.16, "learning_rate": 1.4086235225248966e-05, "loss": 1.354, "step": 1248 }, { "epoch": 1.16, "learning_rate": 1.4077107337183593e-05, "loss": 1.2185, "step": 1249 }, { "epoch": 1.16, "learning_rate": 1.4067975373068313e-05, "loss": 1.3715, "step": 1250 }, { "epoch": 1.16, "learning_rate": 1.4058839342032719e-05, "loss": 1.2881, "step": 1251 }, { "epoch": 1.16, "learning_rate": 1.4049699253210475e-05, "loss": 1.282, "step": 1252 }, { "epoch": 1.16, "learning_rate": 1.4040555115739301e-05, "loss": 1.4027, "step": 1253 }, { "epoch": 1.16, "learning_rate": 1.4031406938760956e-05, "loss": 1.2677, "step": 1254 }, { "epoch": 1.16, "learning_rate": 1.402225473142125e-05, "loss": 1.3031, "step": 1255 }, { "epoch": 1.16, "learning_rate": 1.4013098502870018e-05, "loss": 1.3137, "step": 1256 }, { "epoch": 1.16, "learning_rate": 1.4003938262261108e-05, "loss": 1.3081, "step": 1257 }, { "epoch": 1.16, "learning_rate": 1.3994774018752389e-05, "loss": 1.2953, "step": 1258 }, { "epoch": 1.17, "learning_rate": 1.3985605781505727e-05, "loss": 1.2301, "step": 1259 }, { "epoch": 1.17, "learning_rate": 1.3976433559686977e-05, "loss": 1.3827, "step": 1260 }, { "epoch": 1.17, "learning_rate": 1.396725736246599e-05, "loss": 1.2662, "step": 1261 }, { "epoch": 1.17, "learning_rate": 1.3958077199016576e-05, "loss": 1.2239, "step": 1262 }, { "epoch": 1.17, "learning_rate": 1.3948893078516519e-05, "loss": 1.2671, "step": 1263 }, { "epoch": 1.17, "learning_rate": 1.3939705010147563e-05, "loss": 1.2303, "step": 1264 }, { "epoch": 1.17, "learning_rate": 1.393051300309539e-05, "loss": 1.3043, "step": 1265 }, { "epoch": 1.17, "learning_rate": 1.392131706654962e-05, "loss": 1.2748, "step": 1266 }, { "epoch": 1.17, "learning_rate": 1.3912117209703806e-05, "loss": 1.3768, "step": 1267 }, { "epoch": 1.17, "learning_rate": 1.3902913441755422e-05, "loss": 1.3258, "step": 1268 }, { "epoch": 1.17, "learning_rate": 1.3893705771905845e-05, "loss": 1.3366, "step": 1269 }, { "epoch": 1.18, "learning_rate": 1.3884494209360362e-05, "loss": 1.3216, "step": 1270 }, { "epoch": 1.18, "learning_rate": 1.3875278763328139e-05, "loss": 1.326, "step": 1271 }, { "epoch": 1.18, "learning_rate": 1.3866059443022237e-05, "loss": 1.2314, "step": 1272 }, { "epoch": 1.18, "learning_rate": 1.3856836257659587e-05, "loss": 1.2818, "step": 1273 }, { "epoch": 1.18, "learning_rate": 1.3847609216460976e-05, "loss": 1.3595, "step": 1274 }, { "epoch": 1.18, "learning_rate": 1.3838378328651056e-05, "loss": 1.2511, "step": 1275 }, { "epoch": 1.18, "learning_rate": 1.3829143603458319e-05, "loss": 1.3929, "step": 1276 }, { "epoch": 1.18, "learning_rate": 1.3819905050115097e-05, "loss": 1.2817, "step": 1277 }, { "epoch": 1.18, "learning_rate": 1.3810662677857548e-05, "loss": 1.2772, "step": 1278 }, { "epoch": 1.18, "learning_rate": 1.3801416495925641e-05, "loss": 1.3148, "step": 1279 }, { "epoch": 1.19, "learning_rate": 1.3792166513563167e-05, "loss": 1.3526, "step": 1280 }, { "epoch": 1.19, "learning_rate": 1.3782912740017711e-05, "loss": 1.3599, "step": 1281 }, { "epoch": 1.19, "learning_rate": 1.3773655184540637e-05, "loss": 1.3042, "step": 1282 }, { "epoch": 1.19, "learning_rate": 1.3764393856387109e-05, "loss": 1.3405, "step": 1283 }, { "epoch": 1.19, "learning_rate": 1.3755128764816048e-05, "loss": 1.3591, "step": 1284 }, { "epoch": 1.19, "learning_rate": 1.3745859919090141e-05, "loss": 1.2998, "step": 1285 }, { "epoch": 1.19, "learning_rate": 1.3736587328475832e-05, "loss": 1.2966, "step": 1286 }, { "epoch": 1.19, "learning_rate": 1.3727311002243309e-05, "loss": 1.3651, "step": 1287 }, { "epoch": 1.19, "learning_rate": 1.3718030949666488e-05, "loss": 1.1954, "step": 1288 }, { "epoch": 1.19, "learning_rate": 1.370874718002302e-05, "loss": 1.2209, "step": 1289 }, { "epoch": 1.19, "learning_rate": 1.3699459702594264e-05, "loss": 1.2716, "step": 1290 }, { "epoch": 1.2, "learning_rate": 1.3690168526665287e-05, "loss": 1.3078, "step": 1291 }, { "epoch": 1.2, "learning_rate": 1.3680873661524855e-05, "loss": 1.2666, "step": 1292 }, { "epoch": 1.2, "learning_rate": 1.3671575116465427e-05, "loss": 1.2894, "step": 1293 }, { "epoch": 1.2, "learning_rate": 1.3662272900783134e-05, "loss": 1.3134, "step": 1294 }, { "epoch": 1.2, "learning_rate": 1.3652967023777776e-05, "loss": 1.3055, "step": 1295 }, { "epoch": 1.2, "learning_rate": 1.3643657494752822e-05, "loss": 1.221, "step": 1296 }, { "epoch": 1.2, "learning_rate": 1.3634344323015382e-05, "loss": 1.275, "step": 1297 }, { "epoch": 1.2, "learning_rate": 1.3625027517876216e-05, "loss": 1.2801, "step": 1298 }, { "epoch": 1.2, "learning_rate": 1.3615707088649708e-05, "loss": 1.2212, "step": 1299 }, { "epoch": 1.2, "learning_rate": 1.3606383044653875e-05, "loss": 1.2671, "step": 1300 }, { "epoch": 1.2, "learning_rate": 1.3597055395210336e-05, "loss": 1.25, "step": 1301 }, { "epoch": 1.21, "learning_rate": 1.3587724149644328e-05, "loss": 1.2288, "step": 1302 }, { "epoch": 1.21, "learning_rate": 1.3578389317284675e-05, "loss": 1.306, "step": 1303 }, { "epoch": 1.21, "learning_rate": 1.3569050907463788e-05, "loss": 1.1833, "step": 1304 }, { "epoch": 1.21, "learning_rate": 1.3559708929517653e-05, "loss": 1.2724, "step": 1305 }, { "epoch": 1.21, "learning_rate": 1.3550363392785826e-05, "loss": 1.2884, "step": 1306 }, { "epoch": 1.21, "learning_rate": 1.354101430661142e-05, "loss": 1.3023, "step": 1307 }, { "epoch": 1.21, "learning_rate": 1.3531661680341098e-05, "loss": 1.2958, "step": 1308 }, { "epoch": 1.21, "learning_rate": 1.3522305523325057e-05, "loss": 1.3205, "step": 1309 }, { "epoch": 1.21, "learning_rate": 1.351294584491703e-05, "loss": 1.2655, "step": 1310 }, { "epoch": 1.21, "learning_rate": 1.3503582654474267e-05, "loss": 1.3765, "step": 1311 }, { "epoch": 1.21, "learning_rate": 1.3494215961357533e-05, "loss": 1.2963, "step": 1312 }, { "epoch": 1.22, "learning_rate": 1.3484845774931085e-05, "loss": 1.2332, "step": 1313 }, { "epoch": 1.22, "learning_rate": 1.347547210456269e-05, "loss": 1.277, "step": 1314 }, { "epoch": 1.22, "learning_rate": 1.3466094959623575e-05, "loss": 1.3376, "step": 1315 }, { "epoch": 1.22, "learning_rate": 1.345671434948846e-05, "loss": 1.2799, "step": 1316 }, { "epoch": 1.22, "learning_rate": 1.344733028353552e-05, "loss": 1.3087, "step": 1317 }, { "epoch": 1.22, "learning_rate": 1.3437942771146388e-05, "loss": 1.2633, "step": 1318 }, { "epoch": 1.22, "learning_rate": 1.3428551821706144e-05, "loss": 1.3491, "step": 1319 }, { "epoch": 1.22, "learning_rate": 1.3419157444603297e-05, "loss": 1.3642, "step": 1320 }, { "epoch": 1.22, "learning_rate": 1.340975964922979e-05, "loss": 1.3306, "step": 1321 }, { "epoch": 1.22, "learning_rate": 1.3400358444980986e-05, "loss": 1.2747, "step": 1322 }, { "epoch": 1.22, "learning_rate": 1.3390953841255641e-05, "loss": 1.2901, "step": 1323 }, { "epoch": 1.23, "learning_rate": 1.3381545847455925e-05, "loss": 1.2602, "step": 1324 }, { "epoch": 1.23, "learning_rate": 1.3372134472987392e-05, "loss": 1.3256, "step": 1325 }, { "epoch": 1.23, "learning_rate": 1.3362719727258975e-05, "loss": 1.2915, "step": 1326 }, { "epoch": 1.23, "learning_rate": 1.3353301619682976e-05, "loss": 1.3067, "step": 1327 }, { "epoch": 1.23, "learning_rate": 1.3343880159675062e-05, "loss": 1.2865, "step": 1328 }, { "epoch": 1.23, "learning_rate": 1.3334455356654252e-05, "loss": 1.2808, "step": 1329 }, { "epoch": 1.23, "learning_rate": 1.33250272200429e-05, "loss": 1.364, "step": 1330 }, { "epoch": 1.23, "learning_rate": 1.33155957592667e-05, "loss": 1.2248, "step": 1331 }, { "epoch": 1.23, "learning_rate": 1.3306160983754665e-05, "loss": 1.2644, "step": 1332 }, { "epoch": 1.23, "learning_rate": 1.3296722902939124e-05, "loss": 1.2433, "step": 1333 }, { "epoch": 1.24, "learning_rate": 1.3287281526255709e-05, "loss": 1.2634, "step": 1334 }, { "epoch": 1.24, "learning_rate": 1.3277836863143352e-05, "loss": 1.2934, "step": 1335 }, { "epoch": 1.24, "learning_rate": 1.3268388923044265e-05, "loss": 1.3516, "step": 1336 }, { "epoch": 1.24, "learning_rate": 1.3258937715403933e-05, "loss": 1.3034, "step": 1337 }, { "epoch": 1.24, "learning_rate": 1.3249483249671117e-05, "loss": 1.3525, "step": 1338 }, { "epoch": 1.24, "learning_rate": 1.3240025535297833e-05, "loss": 1.1765, "step": 1339 }, { "epoch": 1.24, "learning_rate": 1.3230564581739336e-05, "loss": 1.3301, "step": 1340 }, { "epoch": 1.24, "learning_rate": 1.3221100398454127e-05, "loss": 1.2909, "step": 1341 }, { "epoch": 1.24, "learning_rate": 1.321163299490394e-05, "loss": 1.2821, "step": 1342 }, { "epoch": 1.24, "learning_rate": 1.3202162380553717e-05, "loss": 1.2769, "step": 1343 }, { "epoch": 1.24, "learning_rate": 1.3192688564871621e-05, "loss": 1.2593, "step": 1344 }, { "epoch": 1.25, "learning_rate": 1.3183211557329005e-05, "loss": 1.3108, "step": 1345 }, { "epoch": 1.25, "learning_rate": 1.3173731367400426e-05, "loss": 1.2802, "step": 1346 }, { "epoch": 1.25, "learning_rate": 1.3164248004563612e-05, "loss": 1.2595, "step": 1347 }, { "epoch": 1.25, "learning_rate": 1.3154761478299461e-05, "loss": 1.2595, "step": 1348 }, { "epoch": 1.25, "learning_rate": 1.3145271798092046e-05, "loss": 1.2718, "step": 1349 }, { "epoch": 1.25, "learning_rate": 1.3135778973428583e-05, "loss": 1.2246, "step": 1350 }, { "epoch": 1.25, "learning_rate": 1.3126283013799432e-05, "loss": 1.3388, "step": 1351 }, { "epoch": 1.25, "learning_rate": 1.3116783928698095e-05, "loss": 1.295, "step": 1352 }, { "epoch": 1.25, "learning_rate": 1.310728172762119e-05, "loss": 1.2604, "step": 1353 }, { "epoch": 1.25, "learning_rate": 1.3097776420068456e-05, "loss": 1.3495, "step": 1354 }, { "epoch": 1.25, "learning_rate": 1.3088268015542732e-05, "loss": 1.2419, "step": 1355 }, { "epoch": 1.26, "learning_rate": 1.307875652354996e-05, "loss": 1.2618, "step": 1356 }, { "epoch": 1.26, "learning_rate": 1.3069241953599163e-05, "loss": 1.2797, "step": 1357 }, { "epoch": 1.26, "learning_rate": 1.3059724315202444e-05, "loss": 1.2218, "step": 1358 }, { "epoch": 1.26, "learning_rate": 1.3050203617874972e-05, "loss": 1.3398, "step": 1359 }, { "epoch": 1.26, "learning_rate": 1.3040679871134976e-05, "loss": 1.3419, "step": 1360 }, { "epoch": 1.26, "learning_rate": 1.3031153084503733e-05, "loss": 1.22, "step": 1361 }, { "epoch": 1.26, "learning_rate": 1.3021623267505558e-05, "loss": 1.2826, "step": 1362 }, { "epoch": 1.26, "learning_rate": 1.3012090429667796e-05, "loss": 1.2021, "step": 1363 }, { "epoch": 1.26, "learning_rate": 1.3002554580520816e-05, "loss": 1.2767, "step": 1364 }, { "epoch": 1.26, "learning_rate": 1.2993015729597987e-05, "loss": 1.2415, "step": 1365 }, { "epoch": 1.26, "learning_rate": 1.2983473886435692e-05, "loss": 1.2971, "step": 1366 }, { "epoch": 1.27, "learning_rate": 1.2973929060573295e-05, "loss": 1.3053, "step": 1367 }, { "epoch": 1.27, "learning_rate": 1.296438126155315e-05, "loss": 1.262, "step": 1368 }, { "epoch": 1.27, "learning_rate": 1.2954830498920577e-05, "loss": 1.2393, "step": 1369 }, { "epoch": 1.27, "learning_rate": 1.2945276782223863e-05, "loss": 1.2254, "step": 1370 }, { "epoch": 1.27, "learning_rate": 1.293572012101425e-05, "loss": 1.2753, "step": 1371 }, { "epoch": 1.27, "learning_rate": 1.2926160524845914e-05, "loss": 1.2888, "step": 1372 }, { "epoch": 1.27, "learning_rate": 1.291659800327598e-05, "loss": 1.261, "step": 1373 }, { "epoch": 1.27, "learning_rate": 1.2907032565864482e-05, "loss": 1.2696, "step": 1374 }, { "epoch": 1.27, "learning_rate": 1.289746422217438e-05, "loss": 1.3462, "step": 1375 }, { "epoch": 1.27, "learning_rate": 1.2887892981771532e-05, "loss": 1.3738, "step": 1376 }, { "epoch": 1.27, "learning_rate": 1.2878318854224703e-05, "loss": 1.2541, "step": 1377 }, { "epoch": 1.28, "learning_rate": 1.286874184910553e-05, "loss": 1.3264, "step": 1378 }, { "epoch": 1.28, "learning_rate": 1.285916197598854e-05, "loss": 1.2977, "step": 1379 }, { "epoch": 1.28, "learning_rate": 1.284957924445112e-05, "loss": 1.3095, "step": 1380 }, { "epoch": 1.28, "learning_rate": 1.2839993664073517e-05, "loss": 1.3276, "step": 1381 }, { "epoch": 1.28, "learning_rate": 1.2830405244438823e-05, "loss": 1.2943, "step": 1382 }, { "epoch": 1.28, "learning_rate": 1.282081399513297e-05, "loss": 1.2982, "step": 1383 }, { "epoch": 1.28, "learning_rate": 1.2811219925744723e-05, "loss": 1.2583, "step": 1384 }, { "epoch": 1.28, "learning_rate": 1.2801623045865661e-05, "loss": 1.2899, "step": 1385 }, { "epoch": 1.28, "learning_rate": 1.2792023365090176e-05, "loss": 1.231, "step": 1386 }, { "epoch": 1.28, "learning_rate": 1.2782420893015458e-05, "loss": 1.2762, "step": 1387 }, { "epoch": 1.29, "learning_rate": 1.2772815639241491e-05, "loss": 1.2555, "step": 1388 }, { "epoch": 1.29, "learning_rate": 1.2763207613371034e-05, "loss": 1.2819, "step": 1389 }, { "epoch": 1.29, "learning_rate": 1.2753596825009625e-05, "loss": 1.2957, "step": 1390 }, { "epoch": 1.29, "learning_rate": 1.2743983283765555e-05, "loss": 1.3242, "step": 1391 }, { "epoch": 1.29, "learning_rate": 1.2734366999249874e-05, "loss": 1.1966, "step": 1392 }, { "epoch": 1.29, "learning_rate": 1.2724747981076373e-05, "loss": 1.2592, "step": 1393 }, { "epoch": 1.29, "learning_rate": 1.2715126238861574e-05, "loss": 1.2817, "step": 1394 }, { "epoch": 1.29, "learning_rate": 1.2705501782224725e-05, "loss": 1.3397, "step": 1395 }, { "epoch": 1.29, "learning_rate": 1.269587462078779e-05, "loss": 1.2787, "step": 1396 }, { "epoch": 1.29, "learning_rate": 1.2686244764175426e-05, "loss": 1.3265, "step": 1397 }, { "epoch": 1.29, "learning_rate": 1.2676612222014999e-05, "loss": 1.2907, "step": 1398 }, { "epoch": 1.3, "learning_rate": 1.2666977003936546e-05, "loss": 1.2045, "step": 1399 }, { "epoch": 1.3, "learning_rate": 1.2657339119572793e-05, "loss": 1.2635, "step": 1400 }, { "epoch": 1.3, "learning_rate": 1.2647698578559123e-05, "loss": 1.2968, "step": 1401 }, { "epoch": 1.3, "learning_rate": 1.2638055390533572e-05, "loss": 1.364, "step": 1402 }, { "epoch": 1.3, "learning_rate": 1.2628409565136832e-05, "loss": 1.255, "step": 1403 }, { "epoch": 1.3, "learning_rate": 1.2618761112012223e-05, "loss": 1.2948, "step": 1404 }, { "epoch": 1.3, "learning_rate": 1.2609110040805699e-05, "loss": 1.3369, "step": 1405 }, { "epoch": 1.3, "learning_rate": 1.2599456361165828e-05, "loss": 1.2691, "step": 1406 }, { "epoch": 1.3, "learning_rate": 1.2589800082743783e-05, "loss": 1.3702, "step": 1407 }, { "epoch": 1.3, "learning_rate": 1.258014121519334e-05, "loss": 1.3092, "step": 1408 }, { "epoch": 1.3, "learning_rate": 1.257047976817086e-05, "loss": 1.2373, "step": 1409 }, { "epoch": 1.31, "learning_rate": 1.2560815751335286e-05, "loss": 1.2965, "step": 1410 }, { "epoch": 1.31, "learning_rate": 1.2551149174348129e-05, "loss": 1.2599, "step": 1411 }, { "epoch": 1.31, "learning_rate": 1.2541480046873457e-05, "loss": 1.2461, "step": 1412 }, { "epoch": 1.31, "learning_rate": 1.2531808378577892e-05, "loss": 1.3132, "step": 1413 }, { "epoch": 1.31, "learning_rate": 1.2522134179130592e-05, "loss": 1.3581, "step": 1414 }, { "epoch": 1.31, "learning_rate": 1.2512457458203247e-05, "loss": 1.2895, "step": 1415 }, { "epoch": 1.31, "learning_rate": 1.2502778225470069e-05, "loss": 1.3161, "step": 1416 }, { "epoch": 1.31, "learning_rate": 1.249309649060778e-05, "loss": 1.3029, "step": 1417 }, { "epoch": 1.31, "learning_rate": 1.2483412263295602e-05, "loss": 1.3417, "step": 1418 }, { "epoch": 1.31, "learning_rate": 1.2473725553215255e-05, "loss": 1.2981, "step": 1419 }, { "epoch": 1.31, "learning_rate": 1.2464036370050932e-05, "loss": 1.3284, "step": 1420 }, { "epoch": 1.32, "learning_rate": 1.2454344723489306e-05, "loss": 1.2618, "step": 1421 }, { "epoch": 1.32, "learning_rate": 1.2444650623219512e-05, "loss": 1.2913, "step": 1422 }, { "epoch": 1.32, "learning_rate": 1.2434954078933133e-05, "loss": 1.2276, "step": 1423 }, { "epoch": 1.32, "learning_rate": 1.2425255100324198e-05, "loss": 1.2418, "step": 1424 }, { "epoch": 1.32, "learning_rate": 1.241555369708917e-05, "loss": 1.3194, "step": 1425 }, { "epoch": 1.32, "learning_rate": 1.2405849878926941e-05, "loss": 1.2801, "step": 1426 }, { "epoch": 1.32, "learning_rate": 1.2396143655538806e-05, "loss": 1.3397, "step": 1427 }, { "epoch": 1.32, "learning_rate": 1.2386435036628474e-05, "loss": 1.2547, "step": 1428 }, { "epoch": 1.32, "learning_rate": 1.2376724031902046e-05, "loss": 1.2988, "step": 1429 }, { "epoch": 1.32, "learning_rate": 1.2367010651068005e-05, "loss": 1.3095, "step": 1430 }, { "epoch": 1.32, "learning_rate": 1.2357294903837221e-05, "loss": 1.1631, "step": 1431 }, { "epoch": 1.33, "learning_rate": 1.234757679992291e-05, "loss": 1.3685, "step": 1432 }, { "epoch": 1.33, "learning_rate": 1.2337856349040665e-05, "loss": 1.3094, "step": 1433 }, { "epoch": 1.33, "learning_rate": 1.232813356090841e-05, "loss": 1.3075, "step": 1434 }, { "epoch": 1.33, "learning_rate": 1.231840844524641e-05, "loss": 1.2994, "step": 1435 }, { "epoch": 1.33, "learning_rate": 1.2308681011777266e-05, "loss": 1.3048, "step": 1436 }, { "epoch": 1.33, "learning_rate": 1.2298951270225883e-05, "loss": 1.3076, "step": 1437 }, { "epoch": 1.33, "learning_rate": 1.228921923031948e-05, "loss": 1.3132, "step": 1438 }, { "epoch": 1.33, "learning_rate": 1.2279484901787576e-05, "loss": 1.2999, "step": 1439 }, { "epoch": 1.33, "learning_rate": 1.2269748294361969e-05, "loss": 1.3422, "step": 1440 }, { "epoch": 1.33, "learning_rate": 1.2260009417776745e-05, "loss": 1.2073, "step": 1441 }, { "epoch": 1.34, "learning_rate": 1.2250268281768255e-05, "loss": 1.3353, "step": 1442 }, { "epoch": 1.34, "learning_rate": 1.2240524896075106e-05, "loss": 1.341, "step": 1443 }, { "epoch": 1.34, "learning_rate": 1.2230779270438158e-05, "loss": 1.2585, "step": 1444 }, { "epoch": 1.34, "learning_rate": 1.222103141460051e-05, "loss": 1.2532, "step": 1445 }, { "epoch": 1.34, "learning_rate": 1.2211281338307488e-05, "loss": 1.297, "step": 1446 }, { "epoch": 1.34, "learning_rate": 1.220152905130664e-05, "loss": 1.2422, "step": 1447 }, { "epoch": 1.34, "learning_rate": 1.2191774563347727e-05, "loss": 1.2676, "step": 1448 }, { "epoch": 1.34, "learning_rate": 1.2182017884182702e-05, "loss": 1.2877, "step": 1449 }, { "epoch": 1.34, "learning_rate": 1.2172259023565715e-05, "loss": 1.326, "step": 1450 }, { "epoch": 1.34, "learning_rate": 1.21624979912531e-05, "loss": 1.3555, "step": 1451 }, { "epoch": 1.34, "learning_rate": 1.2152734797003349e-05, "loss": 1.3119, "step": 1452 }, { "epoch": 1.35, "learning_rate": 1.2142969450577136e-05, "loss": 1.2625, "step": 1453 }, { "epoch": 1.35, "learning_rate": 1.2133201961737262e-05, "loss": 1.2862, "step": 1454 }, { "epoch": 1.35, "learning_rate": 1.212343234024869e-05, "loss": 1.3367, "step": 1455 }, { "epoch": 1.35, "learning_rate": 1.2113660595878506e-05, "loss": 1.2831, "step": 1456 }, { "epoch": 1.35, "learning_rate": 1.2103886738395923e-05, "loss": 1.2437, "step": 1457 }, { "epoch": 1.35, "learning_rate": 1.2094110777572257e-05, "loss": 1.3273, "step": 1458 }, { "epoch": 1.35, "learning_rate": 1.2084332723180938e-05, "loss": 1.3348, "step": 1459 }, { "epoch": 1.35, "learning_rate": 1.2074552584997482e-05, "loss": 1.3299, "step": 1460 }, { "epoch": 1.35, "learning_rate": 1.2064770372799495e-05, "loss": 1.3428, "step": 1461 }, { "epoch": 1.35, "learning_rate": 1.2054986096366648e-05, "loss": 1.259, "step": 1462 }, { "epoch": 1.35, "learning_rate": 1.204519976548068e-05, "loss": 1.2191, "step": 1463 }, { "epoch": 1.36, "learning_rate": 1.2035411389925385e-05, "loss": 1.2047, "step": 1464 }, { "epoch": 1.36, "learning_rate": 1.2025620979486603e-05, "loss": 1.3142, "step": 1465 }, { "epoch": 1.36, "learning_rate": 1.2015828543952196e-05, "loss": 1.3719, "step": 1466 }, { "epoch": 1.36, "learning_rate": 1.2006034093112073e-05, "loss": 1.3393, "step": 1467 }, { "epoch": 1.36, "learning_rate": 1.1996237636758133e-05, "loss": 1.3597, "step": 1468 }, { "epoch": 1.36, "learning_rate": 1.1986439184684295e-05, "loss": 1.2805, "step": 1469 }, { "epoch": 1.36, "learning_rate": 1.197663874668647e-05, "loss": 1.2853, "step": 1470 }, { "epoch": 1.36, "learning_rate": 1.1966836332562554e-05, "loss": 1.311, "step": 1471 }, { "epoch": 1.36, "learning_rate": 1.1957031952112422e-05, "loss": 1.2589, "step": 1472 }, { "epoch": 1.36, "learning_rate": 1.194722561513791e-05, "loss": 1.3018, "step": 1473 }, { "epoch": 1.36, "learning_rate": 1.1937417331442808e-05, "loss": 1.2473, "step": 1474 }, { "epoch": 1.37, "learning_rate": 1.192760711083286e-05, "loss": 1.2343, "step": 1475 }, { "epoch": 1.37, "learning_rate": 1.1917794963115741e-05, "loss": 1.2655, "step": 1476 }, { "epoch": 1.37, "learning_rate": 1.1907980898101052e-05, "loss": 1.243, "step": 1477 }, { "epoch": 1.37, "learning_rate": 1.1898164925600316e-05, "loss": 1.3015, "step": 1478 }, { "epoch": 1.37, "learning_rate": 1.1888347055426956e-05, "loss": 1.3115, "step": 1479 }, { "epoch": 1.37, "learning_rate": 1.18785272973963e-05, "loss": 1.2991, "step": 1480 }, { "epoch": 1.37, "learning_rate": 1.1868705661325557e-05, "loss": 1.3071, "step": 1481 }, { "epoch": 1.37, "learning_rate": 1.1858882157033815e-05, "loss": 1.3176, "step": 1482 }, { "epoch": 1.37, "learning_rate": 1.184905679434203e-05, "loss": 1.2959, "step": 1483 }, { "epoch": 1.37, "learning_rate": 1.183922958307302e-05, "loss": 1.3144, "step": 1484 }, { "epoch": 1.37, "learning_rate": 1.1829400533051443e-05, "loss": 1.2431, "step": 1485 }, { "epoch": 1.38, "learning_rate": 1.1819569654103802e-05, "loss": 1.2648, "step": 1486 }, { "epoch": 1.38, "learning_rate": 1.1809736956058427e-05, "loss": 1.2038, "step": 1487 }, { "epoch": 1.38, "learning_rate": 1.1799902448745466e-05, "loss": 1.3245, "step": 1488 }, { "epoch": 1.38, "learning_rate": 1.1790066141996872e-05, "loss": 1.2174, "step": 1489 }, { "epoch": 1.38, "learning_rate": 1.1780228045646407e-05, "loss": 1.3048, "step": 1490 }, { "epoch": 1.38, "learning_rate": 1.177038816952961e-05, "loss": 1.3075, "step": 1491 }, { "epoch": 1.38, "learning_rate": 1.1760546523483806e-05, "loss": 1.2745, "step": 1492 }, { "epoch": 1.38, "learning_rate": 1.1750703117348094e-05, "loss": 1.2785, "step": 1493 }, { "epoch": 1.38, "learning_rate": 1.174085796096332e-05, "loss": 1.2656, "step": 1494 }, { "epoch": 1.38, "learning_rate": 1.1731011064172089e-05, "loss": 1.3083, "step": 1495 }, { "epoch": 1.39, "learning_rate": 1.1721162436818745e-05, "loss": 1.243, "step": 1496 }, { "epoch": 1.39, "learning_rate": 1.1711312088749363e-05, "loss": 1.2825, "step": 1497 }, { "epoch": 1.39, "learning_rate": 1.1701460029811733e-05, "loss": 1.2906, "step": 1498 }, { "epoch": 1.39, "learning_rate": 1.169160626985536e-05, "loss": 1.2746, "step": 1499 }, { "epoch": 1.39, "learning_rate": 1.1681750818731446e-05, "loss": 1.3307, "step": 1500 }, { "epoch": 1.39, "learning_rate": 1.167189368629289e-05, "loss": 1.2347, "step": 1501 }, { "epoch": 1.39, "learning_rate": 1.1662034882394266e-05, "loss": 1.2935, "step": 1502 }, { "epoch": 1.39, "learning_rate": 1.165217441689182e-05, "loss": 1.2714, "step": 1503 }, { "epoch": 1.39, "learning_rate": 1.1642312299643461e-05, "loss": 1.2829, "step": 1504 }, { "epoch": 1.39, "learning_rate": 1.1632448540508751e-05, "loss": 1.3392, "step": 1505 }, { "epoch": 1.39, "learning_rate": 1.1622583149348888e-05, "loss": 1.3426, "step": 1506 }, { "epoch": 1.4, "learning_rate": 1.1612716136026707e-05, "loss": 1.3036, "step": 1507 }, { "epoch": 1.4, "learning_rate": 1.1602847510406657e-05, "loss": 1.2699, "step": 1508 }, { "epoch": 1.4, "learning_rate": 1.1592977282354807e-05, "loss": 1.3125, "step": 1509 }, { "epoch": 1.4, "learning_rate": 1.1583105461738827e-05, "loss": 1.2507, "step": 1510 }, { "epoch": 1.4, "learning_rate": 1.1573232058427972e-05, "loss": 1.3052, "step": 1511 }, { "epoch": 1.4, "learning_rate": 1.1563357082293092e-05, "loss": 1.1345, "step": 1512 }, { "epoch": 1.4, "learning_rate": 1.1553480543206597e-05, "loss": 1.2342, "step": 1513 }, { "epoch": 1.4, "learning_rate": 1.1543602451042466e-05, "loss": 1.3377, "step": 1514 }, { "epoch": 1.4, "learning_rate": 1.1533722815676231e-05, "loss": 1.2716, "step": 1515 }, { "epoch": 1.4, "learning_rate": 1.1523841646984961e-05, "loss": 1.23, "step": 1516 }, { "epoch": 1.4, "learning_rate": 1.1513958954847264e-05, "loss": 1.2148, "step": 1517 }, { "epoch": 1.41, "learning_rate": 1.150407474914327e-05, "loss": 1.2486, "step": 1518 }, { "epoch": 1.41, "learning_rate": 1.1494189039754621e-05, "loss": 1.2744, "step": 1519 }, { "epoch": 1.41, "learning_rate": 1.1484301836564459e-05, "loss": 1.2641, "step": 1520 }, { "epoch": 1.41, "learning_rate": 1.1474413149457429e-05, "loss": 1.3341, "step": 1521 }, { "epoch": 1.41, "learning_rate": 1.1464522988319646e-05, "loss": 1.2946, "step": 1522 }, { "epoch": 1.41, "learning_rate": 1.1454631363038713e-05, "loss": 1.2887, "step": 1523 }, { "epoch": 1.41, "learning_rate": 1.1444738283503684e-05, "loss": 1.291, "step": 1524 }, { "epoch": 1.41, "learning_rate": 1.1434843759605077e-05, "loss": 1.3358, "step": 1525 }, { "epoch": 1.41, "learning_rate": 1.1424947801234846e-05, "loss": 1.3169, "step": 1526 }, { "epoch": 1.41, "learning_rate": 1.141505041828639e-05, "loss": 1.2845, "step": 1527 }, { "epoch": 1.41, "learning_rate": 1.1405151620654513e-05, "loss": 1.3657, "step": 1528 }, { "epoch": 1.42, "learning_rate": 1.1395251418235455e-05, "loss": 1.2356, "step": 1529 }, { "epoch": 1.42, "learning_rate": 1.138534982092685e-05, "loss": 1.2107, "step": 1530 }, { "epoch": 1.42, "learning_rate": 1.1375446838627725e-05, "loss": 1.2726, "step": 1531 }, { "epoch": 1.42, "learning_rate": 1.1365542481238492e-05, "loss": 1.2781, "step": 1532 }, { "epoch": 1.42, "learning_rate": 1.1355636758660945e-05, "loss": 1.3427, "step": 1533 }, { "epoch": 1.42, "learning_rate": 1.134572968079823e-05, "loss": 1.2586, "step": 1534 }, { "epoch": 1.42, "learning_rate": 1.133582125755486e-05, "loss": 1.2426, "step": 1535 }, { "epoch": 1.42, "learning_rate": 1.1325911498836688e-05, "loss": 1.3242, "step": 1536 }, { "epoch": 1.42, "learning_rate": 1.1316000414550902e-05, "loss": 1.2768, "step": 1537 }, { "epoch": 1.42, "learning_rate": 1.1306088014606018e-05, "loss": 1.3249, "step": 1538 }, { "epoch": 1.42, "learning_rate": 1.1296174308911863e-05, "loss": 1.1874, "step": 1539 }, { "epoch": 1.43, "learning_rate": 1.1286259307379573e-05, "loss": 1.225, "step": 1540 }, { "epoch": 1.43, "learning_rate": 1.1276343019921575e-05, "loss": 1.2768, "step": 1541 }, { "epoch": 1.43, "learning_rate": 1.1266425456451586e-05, "loss": 1.1774, "step": 1542 }, { "epoch": 1.43, "learning_rate": 1.1256506626884598e-05, "loss": 1.2874, "step": 1543 }, { "epoch": 1.43, "learning_rate": 1.1246586541136868e-05, "loss": 1.3524, "step": 1544 }, { "epoch": 1.43, "learning_rate": 1.123666520912591e-05, "loss": 1.2225, "step": 1545 }, { "epoch": 1.43, "learning_rate": 1.122674264077048e-05, "loss": 1.2504, "step": 1546 }, { "epoch": 1.43, "learning_rate": 1.1216818845990573e-05, "loss": 1.2338, "step": 1547 }, { "epoch": 1.43, "learning_rate": 1.1206893834707415e-05, "loss": 1.3073, "step": 1548 }, { "epoch": 1.43, "learning_rate": 1.1196967616843437e-05, "loss": 1.3262, "step": 1549 }, { "epoch": 1.44, "learning_rate": 1.1187040202322286e-05, "loss": 1.3115, "step": 1550 }, { "epoch": 1.44, "learning_rate": 1.1177111601068798e-05, "loss": 1.2863, "step": 1551 }, { "epoch": 1.44, "learning_rate": 1.1167181823009004e-05, "loss": 1.3154, "step": 1552 }, { "epoch": 1.44, "learning_rate": 1.1157250878070102e-05, "loss": 1.1969, "step": 1553 }, { "epoch": 1.44, "learning_rate": 1.1147318776180467e-05, "loss": 1.2324, "step": 1554 }, { "epoch": 1.44, "learning_rate": 1.1137385527269619e-05, "loss": 1.2913, "step": 1555 }, { "epoch": 1.44, "learning_rate": 1.1127451141268235e-05, "loss": 1.2397, "step": 1556 }, { "epoch": 1.44, "learning_rate": 1.1117515628108125e-05, "loss": 1.2212, "step": 1557 }, { "epoch": 1.44, "learning_rate": 1.110757899772222e-05, "loss": 1.349, "step": 1558 }, { "epoch": 1.44, "learning_rate": 1.1097641260044579e-05, "loss": 1.2581, "step": 1559 }, { "epoch": 1.44, "learning_rate": 1.1087702425010356e-05, "loss": 1.308, "step": 1560 }, { "epoch": 1.45, "learning_rate": 1.1077762502555813e-05, "loss": 1.3309, "step": 1561 }, { "epoch": 1.45, "learning_rate": 1.1067821502618294e-05, "loss": 1.2107, "step": 1562 }, { "epoch": 1.45, "learning_rate": 1.1057879435136218e-05, "loss": 1.2576, "step": 1563 }, { "epoch": 1.45, "learning_rate": 1.1047936310049078e-05, "loss": 1.1992, "step": 1564 }, { "epoch": 1.45, "learning_rate": 1.1037992137297422e-05, "loss": 1.2639, "step": 1565 }, { "epoch": 1.45, "learning_rate": 1.1028046926822835e-05, "loss": 1.3158, "step": 1566 }, { "epoch": 1.45, "learning_rate": 1.1018100688567954e-05, "loss": 1.3203, "step": 1567 }, { "epoch": 1.45, "learning_rate": 1.1008153432476436e-05, "loss": 1.2417, "step": 1568 }, { "epoch": 1.45, "learning_rate": 1.0998205168492957e-05, "loss": 1.2334, "step": 1569 }, { "epoch": 1.45, "learning_rate": 1.0988255906563203e-05, "loss": 1.3852, "step": 1570 }, { "epoch": 1.45, "learning_rate": 1.0978305656633856e-05, "loss": 1.348, "step": 1571 }, { "epoch": 1.46, "learning_rate": 1.0968354428652584e-05, "loss": 1.2832, "step": 1572 }, { "epoch": 1.46, "learning_rate": 1.0958402232568031e-05, "loss": 1.3101, "step": 1573 }, { "epoch": 1.46, "learning_rate": 1.0948449078329815e-05, "loss": 1.2221, "step": 1574 }, { "epoch": 1.46, "learning_rate": 1.0938494975888508e-05, "loss": 1.3094, "step": 1575 }, { "epoch": 1.46, "learning_rate": 1.092853993519563e-05, "loss": 1.2863, "step": 1576 }, { "epoch": 1.46, "learning_rate": 1.091858396620364e-05, "loss": 1.2471, "step": 1577 }, { "epoch": 1.46, "learning_rate": 1.0908627078865928e-05, "loss": 1.2904, "step": 1578 }, { "epoch": 1.46, "learning_rate": 1.0898669283136793e-05, "loss": 1.284, "step": 1579 }, { "epoch": 1.46, "learning_rate": 1.0888710588971455e-05, "loss": 1.2797, "step": 1580 }, { "epoch": 1.46, "learning_rate": 1.087875100632602e-05, "loss": 1.2426, "step": 1581 }, { "epoch": 1.46, "learning_rate": 1.0868790545157492e-05, "loss": 1.2995, "step": 1582 }, { "epoch": 1.47, "learning_rate": 1.0858829215423746e-05, "loss": 1.3319, "step": 1583 }, { "epoch": 1.47, "learning_rate": 1.0848867027083528e-05, "loss": 1.2484, "step": 1584 }, { "epoch": 1.47, "learning_rate": 1.0838903990096443e-05, "loss": 1.3637, "step": 1585 }, { "epoch": 1.47, "learning_rate": 1.0828940114422943e-05, "loss": 1.225, "step": 1586 }, { "epoch": 1.47, "learning_rate": 1.0818975410024319e-05, "loss": 1.2552, "step": 1587 }, { "epoch": 1.47, "learning_rate": 1.0809009886862695e-05, "loss": 1.352, "step": 1588 }, { "epoch": 1.47, "learning_rate": 1.0799043554901008e-05, "loss": 1.2595, "step": 1589 }, { "epoch": 1.47, "learning_rate": 1.0789076424103002e-05, "loss": 1.2471, "step": 1590 }, { "epoch": 1.47, "learning_rate": 1.0779108504433226e-05, "loss": 1.2735, "step": 1591 }, { "epoch": 1.47, "learning_rate": 1.0769139805857012e-05, "loss": 1.249, "step": 1592 }, { "epoch": 1.47, "learning_rate": 1.0759170338340477e-05, "loss": 1.3081, "step": 1593 }, { "epoch": 1.48, "learning_rate": 1.0749200111850498e-05, "loss": 1.2636, "step": 1594 }, { "epoch": 1.48, "learning_rate": 1.0739229136354716e-05, "loss": 1.2909, "step": 1595 }, { "epoch": 1.48, "learning_rate": 1.0729257421821527e-05, "loss": 1.2215, "step": 1596 }, { "epoch": 1.48, "learning_rate": 1.0719284978220052e-05, "loss": 1.2724, "step": 1597 }, { "epoch": 1.48, "learning_rate": 1.0709311815520151e-05, "loss": 1.249, "step": 1598 }, { "epoch": 1.48, "learning_rate": 1.0699337943692402e-05, "loss": 1.283, "step": 1599 }, { "epoch": 1.48, "learning_rate": 1.0689363372708087e-05, "loss": 1.1851, "step": 1600 }, { "epoch": 1.48, "learning_rate": 1.0679388112539191e-05, "loss": 1.2719, "step": 1601 }, { "epoch": 1.48, "learning_rate": 1.0669412173158384e-05, "loss": 1.2849, "step": 1602 }, { "epoch": 1.48, "learning_rate": 1.0659435564539022e-05, "loss": 1.3016, "step": 1603 }, { "epoch": 1.49, "learning_rate": 1.0649458296655126e-05, "loss": 1.2675, "step": 1604 }, { "epoch": 1.49, "learning_rate": 1.0639480379481378e-05, "loss": 1.2398, "step": 1605 }, { "epoch": 1.49, "learning_rate": 1.06295018229931e-05, "loss": 1.256, "step": 1606 }, { "epoch": 1.49, "learning_rate": 1.0619522637166271e-05, "loss": 1.2018, "step": 1607 }, { "epoch": 1.49, "learning_rate": 1.0609542831977478e-05, "loss": 1.2303, "step": 1608 }, { "epoch": 1.49, "learning_rate": 1.0599562417403944e-05, "loss": 1.3616, "step": 1609 }, { "epoch": 1.49, "learning_rate": 1.0589581403423493e-05, "loss": 1.3076, "step": 1610 }, { "epoch": 1.49, "learning_rate": 1.0579599800014553e-05, "loss": 1.2997, "step": 1611 }, { "epoch": 1.49, "learning_rate": 1.0569617617156133e-05, "loss": 1.331, "step": 1612 }, { "epoch": 1.49, "learning_rate": 1.0559634864827833e-05, "loss": 1.2544, "step": 1613 }, { "epoch": 1.49, "learning_rate": 1.0549651553009813e-05, "loss": 1.2714, "step": 1614 }, { "epoch": 1.5, "learning_rate": 1.0539667691682798e-05, "loss": 1.3484, "step": 1615 }, { "epoch": 1.5, "learning_rate": 1.0529683290828056e-05, "loss": 1.2725, "step": 1616 }, { "epoch": 1.5, "learning_rate": 1.0519698360427401e-05, "loss": 1.2888, "step": 1617 }, { "epoch": 1.5, "learning_rate": 1.0509712910463175e-05, "loss": 1.288, "step": 1618 }, { "epoch": 1.5, "learning_rate": 1.0499726950918235e-05, "loss": 1.2796, "step": 1619 }, { "epoch": 1.5, "learning_rate": 1.048974049177595e-05, "loss": 1.3699, "step": 1620 }, { "epoch": 1.5, "learning_rate": 1.0479753543020194e-05, "loss": 1.2346, "step": 1621 }, { "epoch": 1.5, "learning_rate": 1.0469766114635323e-05, "loss": 1.2916, "step": 1622 }, { "epoch": 1.5, "learning_rate": 1.0459778216606175e-05, "loss": 1.3191, "step": 1623 }, { "epoch": 1.5, "learning_rate": 1.0449789858918056e-05, "loss": 1.2704, "step": 1624 }, { "epoch": 1.5, "learning_rate": 1.0439801051556738e-05, "loss": 1.2202, "step": 1625 }, { "epoch": 1.51, "learning_rate": 1.042981180450843e-05, "loss": 1.2419, "step": 1626 }, { "epoch": 1.51, "learning_rate": 1.0419822127759794e-05, "loss": 1.2746, "step": 1627 }, { "epoch": 1.51, "learning_rate": 1.0409832031297912e-05, "loss": 1.2716, "step": 1628 }, { "epoch": 1.51, "learning_rate": 1.0399841525110295e-05, "loss": 1.2853, "step": 1629 }, { "epoch": 1.51, "learning_rate": 1.0389850619184852e-05, "loss": 1.3131, "step": 1630 }, { "epoch": 1.51, "learning_rate": 1.0379859323509902e-05, "loss": 1.2031, "step": 1631 }, { "epoch": 1.51, "learning_rate": 1.036986764807415e-05, "loss": 1.252, "step": 1632 }, { "epoch": 1.51, "learning_rate": 1.0359875602866676e-05, "loss": 1.31, "step": 1633 }, { "epoch": 1.51, "learning_rate": 1.0349883197876935e-05, "loss": 1.2852, "step": 1634 }, { "epoch": 1.51, "learning_rate": 1.033989044309474e-05, "loss": 1.2821, "step": 1635 }, { "epoch": 1.51, "learning_rate": 1.0329897348510255e-05, "loss": 1.2546, "step": 1636 }, { "epoch": 1.52, "learning_rate": 1.0319903924113988e-05, "loss": 1.3124, "step": 1637 }, { "epoch": 1.52, "learning_rate": 1.0309910179896762e-05, "loss": 1.3202, "step": 1638 }, { "epoch": 1.52, "learning_rate": 1.0299916125849736e-05, "loss": 1.3035, "step": 1639 }, { "epoch": 1.52, "learning_rate": 1.0289921771964368e-05, "loss": 1.2258, "step": 1640 }, { "epoch": 1.52, "learning_rate": 1.0279927128232423e-05, "loss": 1.2425, "step": 1641 }, { "epoch": 1.52, "learning_rate": 1.0269932204645949e-05, "loss": 1.3116, "step": 1642 }, { "epoch": 1.52, "learning_rate": 1.025993701119728e-05, "loss": 1.3108, "step": 1643 }, { "epoch": 1.52, "learning_rate": 1.0249941557879017e-05, "loss": 1.2649, "step": 1644 }, { "epoch": 1.52, "learning_rate": 1.023994585468402e-05, "loss": 1.3033, "step": 1645 }, { "epoch": 1.52, "learning_rate": 1.0229949911605399e-05, "loss": 1.336, "step": 1646 }, { "epoch": 1.52, "learning_rate": 1.0219953738636505e-05, "loss": 1.3037, "step": 1647 }, { "epoch": 1.53, "learning_rate": 1.0209957345770924e-05, "loss": 1.127, "step": 1648 }, { "epoch": 1.53, "learning_rate": 1.0199960743002446e-05, "loss": 1.2661, "step": 1649 }, { "epoch": 1.53, "learning_rate": 1.018996394032509e-05, "loss": 1.2685, "step": 1650 }, { "epoch": 1.53, "learning_rate": 1.0179966947733062e-05, "loss": 1.2944, "step": 1651 }, { "epoch": 1.53, "learning_rate": 1.016996977522076e-05, "loss": 1.277, "step": 1652 }, { "epoch": 1.53, "learning_rate": 1.0159972432782766e-05, "loss": 1.2942, "step": 1653 }, { "epoch": 1.53, "learning_rate": 1.014997493041383e-05, "loss": 1.2619, "step": 1654 }, { "epoch": 1.53, "learning_rate": 1.0139977278108858e-05, "loss": 1.2853, "step": 1655 }, { "epoch": 1.53, "learning_rate": 1.0129979485862915e-05, "loss": 1.2664, "step": 1656 }, { "epoch": 1.53, "learning_rate": 1.01199815636712e-05, "loss": 1.2242, "step": 1657 }, { "epoch": 1.54, "learning_rate": 1.0109983521529036e-05, "loss": 1.2741, "step": 1658 }, { "epoch": 1.54, "learning_rate": 1.0099985369431875e-05, "loss": 1.2377, "step": 1659 }, { "epoch": 1.54, "learning_rate": 1.0089987117375278e-05, "loss": 1.4401, "step": 1660 }, { "epoch": 1.54, "learning_rate": 1.00799887753549e-05, "loss": 1.2645, "step": 1661 }, { "epoch": 1.54, "learning_rate": 1.0069990353366497e-05, "loss": 1.2662, "step": 1662 }, { "epoch": 1.54, "learning_rate": 1.0059991861405889e-05, "loss": 1.26, "step": 1663 }, { "epoch": 1.54, "learning_rate": 1.0049993309468982e-05, "loss": 1.289, "step": 1664 }, { "epoch": 1.54, "learning_rate": 1.003999470755173e-05, "loss": 1.2315, "step": 1665 }, { "epoch": 1.54, "learning_rate": 1.0029996065650142e-05, "loss": 1.243, "step": 1666 }, { "epoch": 1.54, "learning_rate": 1.001999739376027e-05, "loss": 1.2029, "step": 1667 }, { "epoch": 1.54, "learning_rate": 1.000999870187819e-05, "loss": 1.3067, "step": 1668 }, { "epoch": 1.55, "learning_rate": 1e-05, "loss": 1.2364, "step": 1669 }, { "epoch": 1.55, "learning_rate": 9.990001298121813e-06, "loss": 1.2784, "step": 1670 }, { "epoch": 1.55, "learning_rate": 9.980002606239732e-06, "loss": 1.2752, "step": 1671 }, { "epoch": 1.55, "learning_rate": 9.97000393434986e-06, "loss": 1.2507, "step": 1672 }, { "epoch": 1.55, "learning_rate": 9.960005292448272e-06, "loss": 1.314, "step": 1673 }, { "epoch": 1.55, "learning_rate": 9.950006690531023e-06, "loss": 1.2801, "step": 1674 }, { "epoch": 1.55, "learning_rate": 9.940008138594113e-06, "loss": 1.2041, "step": 1675 }, { "epoch": 1.55, "learning_rate": 9.930009646633506e-06, "loss": 1.309, "step": 1676 }, { "epoch": 1.55, "learning_rate": 9.9200112246451e-06, "loss": 1.2682, "step": 1677 }, { "epoch": 1.55, "learning_rate": 9.910012882624726e-06, "loss": 1.2418, "step": 1678 }, { "epoch": 1.55, "learning_rate": 9.90001463056813e-06, "loss": 1.2438, "step": 1679 }, { "epoch": 1.56, "learning_rate": 9.890016478470969e-06, "loss": 1.2599, "step": 1680 }, { "epoch": 1.56, "learning_rate": 9.880018436328805e-06, "loss": 1.1783, "step": 1681 }, { "epoch": 1.56, "learning_rate": 9.870020514137088e-06, "loss": 1.2636, "step": 1682 }, { "epoch": 1.56, "learning_rate": 9.860022721891143e-06, "loss": 1.2535, "step": 1683 }, { "epoch": 1.56, "learning_rate": 9.850025069586173e-06, "loss": 1.3492, "step": 1684 }, { "epoch": 1.56, "learning_rate": 9.840027567217236e-06, "loss": 1.2842, "step": 1685 }, { "epoch": 1.56, "learning_rate": 9.830030224779242e-06, "loss": 1.3224, "step": 1686 }, { "epoch": 1.56, "learning_rate": 9.82003305226694e-06, "loss": 1.3435, "step": 1687 }, { "epoch": 1.56, "learning_rate": 9.810036059674914e-06, "loss": 1.2591, "step": 1688 }, { "epoch": 1.56, "learning_rate": 9.800039256997556e-06, "loss": 1.2868, "step": 1689 }, { "epoch": 1.56, "learning_rate": 9.79004265422908e-06, "loss": 1.2844, "step": 1690 }, { "epoch": 1.57, "learning_rate": 9.780046261363495e-06, "loss": 1.301, "step": 1691 }, { "epoch": 1.57, "learning_rate": 9.770050088394603e-06, "loss": 1.2701, "step": 1692 }, { "epoch": 1.57, "learning_rate": 9.760054145315982e-06, "loss": 1.3016, "step": 1693 }, { "epoch": 1.57, "learning_rate": 9.750058442120985e-06, "loss": 1.3122, "step": 1694 }, { "epoch": 1.57, "learning_rate": 9.740062988802725e-06, "loss": 1.2556, "step": 1695 }, { "epoch": 1.57, "learning_rate": 9.730067795354056e-06, "loss": 1.1967, "step": 1696 }, { "epoch": 1.57, "learning_rate": 9.720072871767582e-06, "loss": 1.2926, "step": 1697 }, { "epoch": 1.57, "learning_rate": 9.710078228035635e-06, "loss": 1.2471, "step": 1698 }, { "epoch": 1.57, "learning_rate": 9.700083874150267e-06, "loss": 1.3013, "step": 1699 }, { "epoch": 1.57, "learning_rate": 9.69008982010324e-06, "loss": 1.2335, "step": 1700 }, { "epoch": 1.57, "learning_rate": 9.680096075886014e-06, "loss": 1.3141, "step": 1701 }, { "epoch": 1.58, "learning_rate": 9.670102651489746e-06, "loss": 1.263, "step": 1702 }, { "epoch": 1.58, "learning_rate": 9.660109556905261e-06, "loss": 1.2087, "step": 1703 }, { "epoch": 1.58, "learning_rate": 9.650116802123069e-06, "loss": 1.2511, "step": 1704 }, { "epoch": 1.58, "learning_rate": 9.640124397133327e-06, "loss": 1.3243, "step": 1705 }, { "epoch": 1.58, "learning_rate": 9.630132351925852e-06, "loss": 1.2441, "step": 1706 }, { "epoch": 1.58, "learning_rate": 9.620140676490098e-06, "loss": 1.2497, "step": 1707 }, { "epoch": 1.58, "learning_rate": 9.610149380815148e-06, "loss": 1.2429, "step": 1708 }, { "epoch": 1.58, "learning_rate": 9.600158474889706e-06, "loss": 1.2378, "step": 1709 }, { "epoch": 1.58, "learning_rate": 9.590167968702088e-06, "loss": 1.32, "step": 1710 }, { "epoch": 1.58, "learning_rate": 9.580177872240211e-06, "loss": 1.2291, "step": 1711 }, { "epoch": 1.59, "learning_rate": 9.570188195491575e-06, "loss": 1.2744, "step": 1712 }, { "epoch": 1.59, "learning_rate": 9.560198948443267e-06, "loss": 1.2185, "step": 1713 }, { "epoch": 1.59, "learning_rate": 9.550210141081948e-06, "loss": 1.3507, "step": 1714 }, { "epoch": 1.59, "learning_rate": 9.540221783393827e-06, "loss": 1.2799, "step": 1715 }, { "epoch": 1.59, "learning_rate": 9.53023388536468e-06, "loss": 1.2487, "step": 1716 }, { "epoch": 1.59, "learning_rate": 9.520246456979808e-06, "loss": 1.2784, "step": 1717 }, { "epoch": 1.59, "learning_rate": 9.510259508224052e-06, "loss": 1.2171, "step": 1718 }, { "epoch": 1.59, "learning_rate": 9.500273049081769e-06, "loss": 1.3194, "step": 1719 }, { "epoch": 1.59, "learning_rate": 9.490287089536828e-06, "loss": 1.262, "step": 1720 }, { "epoch": 1.59, "learning_rate": 9.4803016395726e-06, "loss": 1.3121, "step": 1721 }, { "epoch": 1.59, "learning_rate": 9.470316709171945e-06, "loss": 1.29, "step": 1722 }, { "epoch": 1.6, "learning_rate": 9.460332308317205e-06, "loss": 1.3275, "step": 1723 }, { "epoch": 1.6, "learning_rate": 9.450348446990187e-06, "loss": 1.2645, "step": 1724 }, { "epoch": 1.6, "learning_rate": 9.440365135172168e-06, "loss": 1.2452, "step": 1725 }, { "epoch": 1.6, "learning_rate": 9.430382382843869e-06, "loss": 1.2195, "step": 1726 }, { "epoch": 1.6, "learning_rate": 9.420400199985452e-06, "loss": 1.2999, "step": 1727 }, { "epoch": 1.6, "learning_rate": 9.41041859657651e-06, "loss": 1.2413, "step": 1728 }, { "epoch": 1.6, "learning_rate": 9.40043758259606e-06, "loss": 1.3496, "step": 1729 }, { "epoch": 1.6, "learning_rate": 9.390457168022527e-06, "loss": 1.2893, "step": 1730 }, { "epoch": 1.6, "learning_rate": 9.380477362833735e-06, "loss": 1.3715, "step": 1731 }, { "epoch": 1.6, "learning_rate": 9.370498177006903e-06, "loss": 1.2333, "step": 1732 }, { "epoch": 1.6, "learning_rate": 9.360519620518627e-06, "loss": 1.3126, "step": 1733 }, { "epoch": 1.61, "learning_rate": 9.350541703344875e-06, "loss": 1.2605, "step": 1734 }, { "epoch": 1.61, "learning_rate": 9.34056443546098e-06, "loss": 1.1949, "step": 1735 }, { "epoch": 1.61, "learning_rate": 9.330587826841618e-06, "loss": 1.2776, "step": 1736 }, { "epoch": 1.61, "learning_rate": 9.320611887460814e-06, "loss": 1.2732, "step": 1737 }, { "epoch": 1.61, "learning_rate": 9.310636627291915e-06, "loss": 1.2624, "step": 1738 }, { "epoch": 1.61, "learning_rate": 9.3006620563076e-06, "loss": 1.2322, "step": 1739 }, { "epoch": 1.61, "learning_rate": 9.290688184479852e-06, "loss": 1.2822, "step": 1740 }, { "epoch": 1.61, "learning_rate": 9.28071502177995e-06, "loss": 1.3219, "step": 1741 }, { "epoch": 1.61, "learning_rate": 9.270742578178475e-06, "loss": 1.2298, "step": 1742 }, { "epoch": 1.61, "learning_rate": 9.260770863645283e-06, "loss": 1.3109, "step": 1743 }, { "epoch": 1.61, "learning_rate": 9.250799888149504e-06, "loss": 1.3757, "step": 1744 }, { "epoch": 1.62, "learning_rate": 9.24082966165953e-06, "loss": 1.2527, "step": 1745 }, { "epoch": 1.62, "learning_rate": 9.230860194142991e-06, "loss": 1.3497, "step": 1746 }, { "epoch": 1.62, "learning_rate": 9.220891495566779e-06, "loss": 1.2469, "step": 1747 }, { "epoch": 1.62, "learning_rate": 9.210923575897001e-06, "loss": 1.2119, "step": 1748 }, { "epoch": 1.62, "learning_rate": 9.200956445098997e-06, "loss": 1.2318, "step": 1749 }, { "epoch": 1.62, "learning_rate": 9.190990113137308e-06, "loss": 1.3164, "step": 1750 }, { "epoch": 1.62, "learning_rate": 9.181024589975683e-06, "loss": 1.2891, "step": 1751 }, { "epoch": 1.62, "learning_rate": 9.17105988557706e-06, "loss": 1.353, "step": 1752 }, { "epoch": 1.62, "learning_rate": 9.16109600990356e-06, "loss": 1.2734, "step": 1753 }, { "epoch": 1.62, "learning_rate": 9.151132972916477e-06, "loss": 1.2268, "step": 1754 }, { "epoch": 1.62, "learning_rate": 9.141170784576257e-06, "loss": 1.2914, "step": 1755 }, { "epoch": 1.63, "learning_rate": 9.13120945484251e-06, "loss": 1.2566, "step": 1756 }, { "epoch": 1.63, "learning_rate": 9.121248993673981e-06, "loss": 1.3015, "step": 1757 }, { "epoch": 1.63, "learning_rate": 9.111289411028545e-06, "loss": 1.2449, "step": 1758 }, { "epoch": 1.63, "learning_rate": 9.101330716863207e-06, "loss": 1.3788, "step": 1759 }, { "epoch": 1.63, "learning_rate": 9.091372921134074e-06, "loss": 1.3024, "step": 1760 }, { "epoch": 1.63, "learning_rate": 9.081416033796363e-06, "loss": 1.2466, "step": 1761 }, { "epoch": 1.63, "learning_rate": 9.071460064804377e-06, "loss": 1.1938, "step": 1762 }, { "epoch": 1.63, "learning_rate": 9.0615050241115e-06, "loss": 1.2232, "step": 1763 }, { "epoch": 1.63, "learning_rate": 9.05155092167019e-06, "loss": 1.322, "step": 1764 }, { "epoch": 1.63, "learning_rate": 9.041597767431972e-06, "loss": 1.2778, "step": 1765 }, { "epoch": 1.64, "learning_rate": 9.031645571347421e-06, "loss": 1.318, "step": 1766 }, { "epoch": 1.64, "learning_rate": 9.021694343366146e-06, "loss": 1.2643, "step": 1767 }, { "epoch": 1.64, "learning_rate": 9.0117440934368e-06, "loss": 1.2648, "step": 1768 }, { "epoch": 1.64, "learning_rate": 9.001794831507044e-06, "loss": 1.3095, "step": 1769 }, { "epoch": 1.64, "learning_rate": 8.991846567523567e-06, "loss": 1.1795, "step": 1770 }, { "epoch": 1.64, "learning_rate": 8.98189931143205e-06, "loss": 1.2742, "step": 1771 }, { "epoch": 1.64, "learning_rate": 8.971953073177168e-06, "loss": 1.2906, "step": 1772 }, { "epoch": 1.64, "learning_rate": 8.962007862702581e-06, "loss": 1.2508, "step": 1773 }, { "epoch": 1.64, "learning_rate": 8.952063689950922e-06, "loss": 1.398, "step": 1774 }, { "epoch": 1.64, "learning_rate": 8.94212056486378e-06, "loss": 1.2946, "step": 1775 }, { "epoch": 1.64, "learning_rate": 8.932178497381708e-06, "loss": 1.2116, "step": 1776 }, { "epoch": 1.65, "learning_rate": 8.922237497444186e-06, "loss": 1.3419, "step": 1777 }, { "epoch": 1.65, "learning_rate": 8.912297574989649e-06, "loss": 1.2927, "step": 1778 }, { "epoch": 1.65, "learning_rate": 8.902358739955428e-06, "loss": 1.2467, "step": 1779 }, { "epoch": 1.65, "learning_rate": 8.892421002277784e-06, "loss": 1.2554, "step": 1780 }, { "epoch": 1.65, "learning_rate": 8.88248437189188e-06, "loss": 1.2966, "step": 1781 }, { "epoch": 1.65, "learning_rate": 8.872548858731768e-06, "loss": 1.2717, "step": 1782 }, { "epoch": 1.65, "learning_rate": 8.862614472730384e-06, "loss": 1.3172, "step": 1783 }, { "epoch": 1.65, "learning_rate": 8.852681223819538e-06, "loss": 1.2246, "step": 1784 }, { "epoch": 1.65, "learning_rate": 8.8427491219299e-06, "loss": 1.2805, "step": 1785 }, { "epoch": 1.65, "learning_rate": 8.832818176991e-06, "loss": 1.3001, "step": 1786 }, { "epoch": 1.65, "learning_rate": 8.822888398931205e-06, "loss": 1.3572, "step": 1787 }, { "epoch": 1.66, "learning_rate": 8.812959797677718e-06, "loss": 1.359, "step": 1788 }, { "epoch": 1.66, "learning_rate": 8.803032383156566e-06, "loss": 1.3084, "step": 1789 }, { "epoch": 1.66, "learning_rate": 8.793106165292588e-06, "loss": 1.2735, "step": 1790 }, { "epoch": 1.66, "learning_rate": 8.783181154009427e-06, "loss": 1.2645, "step": 1791 }, { "epoch": 1.66, "learning_rate": 8.773257359229523e-06, "loss": 1.3487, "step": 1792 }, { "epoch": 1.66, "learning_rate": 8.763334790874093e-06, "loss": 1.31, "step": 1793 }, { "epoch": 1.66, "learning_rate": 8.753413458863132e-06, "loss": 1.2346, "step": 1794 }, { "epoch": 1.66, "learning_rate": 8.743493373115405e-06, "loss": 1.2709, "step": 1795 }, { "epoch": 1.66, "learning_rate": 8.73357454354842e-06, "loss": 1.2128, "step": 1796 }, { "epoch": 1.66, "learning_rate": 8.72365698007843e-06, "loss": 1.252, "step": 1797 }, { "epoch": 1.66, "learning_rate": 8.713740692620432e-06, "loss": 1.2757, "step": 1798 }, { "epoch": 1.67, "learning_rate": 8.70382569108814e-06, "loss": 1.1752, "step": 1799 }, { "epoch": 1.67, "learning_rate": 8.693911985393985e-06, "loss": 1.3538, "step": 1800 }, { "epoch": 1.67, "learning_rate": 8.6839995854491e-06, "loss": 1.2348, "step": 1801 }, { "epoch": 1.67, "learning_rate": 8.674088501163314e-06, "loss": 1.2323, "step": 1802 }, { "epoch": 1.67, "learning_rate": 8.664178742445143e-06, "loss": 1.2806, "step": 1803 }, { "epoch": 1.67, "learning_rate": 8.654270319201773e-06, "loss": 1.2465, "step": 1804 }, { "epoch": 1.67, "learning_rate": 8.644363241339059e-06, "loss": 1.1811, "step": 1805 }, { "epoch": 1.67, "learning_rate": 8.634457518761511e-06, "loss": 1.2619, "step": 1806 }, { "epoch": 1.67, "learning_rate": 8.624553161372277e-06, "loss": 1.3, "step": 1807 }, { "epoch": 1.67, "learning_rate": 8.614650179073152e-06, "loss": 1.2887, "step": 1808 }, { "epoch": 1.67, "learning_rate": 8.604748581764545e-06, "loss": 1.2853, "step": 1809 }, { "epoch": 1.68, "learning_rate": 8.594848379345487e-06, "loss": 1.2654, "step": 1810 }, { "epoch": 1.68, "learning_rate": 8.584949581713612e-06, "loss": 1.2671, "step": 1811 }, { "epoch": 1.68, "learning_rate": 8.575052198765157e-06, "loss": 1.2864, "step": 1812 }, { "epoch": 1.68, "learning_rate": 8.565156240394927e-06, "loss": 1.2826, "step": 1813 }, { "epoch": 1.68, "learning_rate": 8.55526171649632e-06, "loss": 1.3277, "step": 1814 }, { "epoch": 1.68, "learning_rate": 8.545368636961292e-06, "loss": 1.2429, "step": 1815 }, { "epoch": 1.68, "learning_rate": 8.535477011680356e-06, "loss": 1.2956, "step": 1816 }, { "epoch": 1.68, "learning_rate": 8.525586850542576e-06, "loss": 1.287, "step": 1817 }, { "epoch": 1.68, "learning_rate": 8.515698163435542e-06, "loss": 1.2864, "step": 1818 }, { "epoch": 1.68, "learning_rate": 8.50581096024538e-06, "loss": 1.2934, "step": 1819 }, { "epoch": 1.69, "learning_rate": 8.495925250856732e-06, "loss": 1.1979, "step": 1820 }, { "epoch": 1.69, "learning_rate": 8.486041045152738e-06, "loss": 1.3703, "step": 1821 }, { "epoch": 1.69, "learning_rate": 8.47615835301504e-06, "loss": 1.2748, "step": 1822 }, { "epoch": 1.69, "learning_rate": 8.46627718432377e-06, "loss": 1.3255, "step": 1823 }, { "epoch": 1.69, "learning_rate": 8.456397548957534e-06, "loss": 1.3208, "step": 1824 }, { "epoch": 1.69, "learning_rate": 8.446519456793405e-06, "loss": 1.2353, "step": 1825 }, { "epoch": 1.69, "learning_rate": 8.43664291770691e-06, "loss": 1.3037, "step": 1826 }, { "epoch": 1.69, "learning_rate": 8.426767941572028e-06, "loss": 1.2926, "step": 1827 }, { "epoch": 1.69, "learning_rate": 8.41689453826118e-06, "loss": 1.272, "step": 1828 }, { "epoch": 1.69, "learning_rate": 8.4070227176452e-06, "loss": 1.2217, "step": 1829 }, { "epoch": 1.69, "learning_rate": 8.397152489593348e-06, "loss": 1.2624, "step": 1830 }, { "epoch": 1.7, "learning_rate": 8.387283863973298e-06, "loss": 1.3396, "step": 1831 }, { "epoch": 1.7, "learning_rate": 8.377416850651114e-06, "loss": 1.2944, "step": 1832 }, { "epoch": 1.7, "learning_rate": 8.36755145949125e-06, "loss": 1.3154, "step": 1833 }, { "epoch": 1.7, "learning_rate": 8.357687700356542e-06, "loss": 1.3552, "step": 1834 }, { "epoch": 1.7, "learning_rate": 8.347825583108182e-06, "loss": 1.291, "step": 1835 }, { "epoch": 1.7, "learning_rate": 8.337965117605737e-06, "loss": 1.2765, "step": 1836 }, { "epoch": 1.7, "learning_rate": 8.328106313707113e-06, "loss": 1.353, "step": 1837 }, { "epoch": 1.7, "learning_rate": 8.318249181268556e-06, "loss": 1.1687, "step": 1838 }, { "epoch": 1.7, "learning_rate": 8.308393730144644e-06, "loss": 1.2932, "step": 1839 }, { "epoch": 1.7, "learning_rate": 8.29853997018827e-06, "loss": 1.2723, "step": 1840 }, { "epoch": 1.7, "learning_rate": 8.288687911250639e-06, "loss": 1.2396, "step": 1841 }, { "epoch": 1.71, "learning_rate": 8.278837563181255e-06, "loss": 1.25, "step": 1842 }, { "epoch": 1.71, "learning_rate": 8.268988935827911e-06, "loss": 1.2673, "step": 1843 }, { "epoch": 1.71, "learning_rate": 8.259142039036682e-06, "loss": 1.2739, "step": 1844 }, { "epoch": 1.71, "learning_rate": 8.24929688265191e-06, "loss": 1.2632, "step": 1845 }, { "epoch": 1.71, "learning_rate": 8.239453476516195e-06, "loss": 1.3326, "step": 1846 }, { "epoch": 1.71, "learning_rate": 8.229611830470392e-06, "loss": 1.3286, "step": 1847 }, { "epoch": 1.71, "learning_rate": 8.219771954353597e-06, "loss": 1.2731, "step": 1848 }, { "epoch": 1.71, "learning_rate": 8.209933858003131e-06, "loss": 1.2823, "step": 1849 }, { "epoch": 1.71, "learning_rate": 8.200097551254537e-06, "loss": 1.2165, "step": 1850 }, { "epoch": 1.71, "learning_rate": 8.190263043941575e-06, "loss": 1.2693, "step": 1851 }, { "epoch": 1.71, "learning_rate": 8.1804303458962e-06, "loss": 1.3105, "step": 1852 }, { "epoch": 1.72, "learning_rate": 8.17059946694856e-06, "loss": 1.2661, "step": 1853 }, { "epoch": 1.72, "learning_rate": 8.160770416926984e-06, "loss": 1.3314, "step": 1854 }, { "epoch": 1.72, "learning_rate": 8.150943205657971e-06, "loss": 1.2822, "step": 1855 }, { "epoch": 1.72, "learning_rate": 8.141117842966187e-06, "loss": 1.2135, "step": 1856 }, { "epoch": 1.72, "learning_rate": 8.131294338674445e-06, "loss": 1.2079, "step": 1857 }, { "epoch": 1.72, "learning_rate": 8.121472702603703e-06, "loss": 1.3088, "step": 1858 }, { "epoch": 1.72, "learning_rate": 8.111652944573046e-06, "loss": 1.3015, "step": 1859 }, { "epoch": 1.72, "learning_rate": 8.101835074399686e-06, "loss": 1.2141, "step": 1860 }, { "epoch": 1.72, "learning_rate": 8.09201910189895e-06, "loss": 1.2715, "step": 1861 }, { "epoch": 1.72, "learning_rate": 8.082205036884265e-06, "loss": 1.2151, "step": 1862 }, { "epoch": 1.72, "learning_rate": 8.072392889167145e-06, "loss": 1.1989, "step": 1863 }, { "epoch": 1.73, "learning_rate": 8.062582668557197e-06, "loss": 1.2852, "step": 1864 }, { "epoch": 1.73, "learning_rate": 8.052774384862094e-06, "loss": 1.3142, "step": 1865 }, { "epoch": 1.73, "learning_rate": 8.04296804788758e-06, "loss": 1.2097, "step": 1866 }, { "epoch": 1.73, "learning_rate": 8.033163667437448e-06, "loss": 1.2848, "step": 1867 }, { "epoch": 1.73, "learning_rate": 8.023361253313533e-06, "loss": 1.3048, "step": 1868 }, { "epoch": 1.73, "learning_rate": 8.013560815315708e-06, "loss": 1.3547, "step": 1869 }, { "epoch": 1.73, "learning_rate": 8.00376236324187e-06, "loss": 1.2568, "step": 1870 }, { "epoch": 1.73, "learning_rate": 7.99396590688793e-06, "loss": 1.2505, "step": 1871 }, { "epoch": 1.73, "learning_rate": 7.984171456047805e-06, "loss": 1.2928, "step": 1872 }, { "epoch": 1.73, "learning_rate": 7.9743790205134e-06, "loss": 1.284, "step": 1873 }, { "epoch": 1.74, "learning_rate": 7.964588610074615e-06, "loss": 1.2561, "step": 1874 }, { "epoch": 1.74, "learning_rate": 7.95480023451932e-06, "loss": 1.2459, "step": 1875 }, { "epoch": 1.74, "learning_rate": 7.945013903633352e-06, "loss": 1.3015, "step": 1876 }, { "epoch": 1.74, "learning_rate": 7.935229627200505e-06, "loss": 1.2131, "step": 1877 }, { "epoch": 1.74, "learning_rate": 7.925447415002523e-06, "loss": 1.3134, "step": 1878 }, { "epoch": 1.74, "learning_rate": 7.915667276819067e-06, "loss": 1.362, "step": 1879 }, { "epoch": 1.74, "learning_rate": 7.905889222427747e-06, "loss": 1.2988, "step": 1880 }, { "epoch": 1.74, "learning_rate": 7.896113261604082e-06, "loss": 1.3454, "step": 1881 }, { "epoch": 1.74, "learning_rate": 7.886339404121497e-06, "loss": 1.3119, "step": 1882 }, { "epoch": 1.74, "learning_rate": 7.876567659751313e-06, "loss": 1.2543, "step": 1883 }, { "epoch": 1.74, "learning_rate": 7.86679803826274e-06, "loss": 1.2332, "step": 1884 }, { "epoch": 1.75, "learning_rate": 7.857030549422868e-06, "loss": 1.2978, "step": 1885 }, { "epoch": 1.75, "learning_rate": 7.847265202996653e-06, "loss": 1.3081, "step": 1886 }, { "epoch": 1.75, "learning_rate": 7.837502008746904e-06, "loss": 1.3314, "step": 1887 }, { "epoch": 1.75, "learning_rate": 7.827740976434287e-06, "loss": 1.2642, "step": 1888 }, { "epoch": 1.75, "learning_rate": 7.8179821158173e-06, "loss": 1.3519, "step": 1889 }, { "epoch": 1.75, "learning_rate": 7.808225436652275e-06, "loss": 1.2811, "step": 1890 }, { "epoch": 1.75, "learning_rate": 7.79847094869336e-06, "loss": 1.2337, "step": 1891 }, { "epoch": 1.75, "learning_rate": 7.788718661692512e-06, "loss": 1.2455, "step": 1892 }, { "epoch": 1.75, "learning_rate": 7.778968585399491e-06, "loss": 1.3145, "step": 1893 }, { "epoch": 1.75, "learning_rate": 7.769220729561842e-06, "loss": 1.2607, "step": 1894 }, { "epoch": 1.75, "learning_rate": 7.759475103924899e-06, "loss": 1.2954, "step": 1895 }, { "epoch": 1.76, "learning_rate": 7.74973171823175e-06, "loss": 1.2838, "step": 1896 }, { "epoch": 1.76, "learning_rate": 7.739990582223258e-06, "loss": 1.2589, "step": 1897 }, { "epoch": 1.76, "learning_rate": 7.730251705638034e-06, "loss": 1.3064, "step": 1898 }, { "epoch": 1.76, "learning_rate": 7.720515098212428e-06, "loss": 1.1809, "step": 1899 }, { "epoch": 1.76, "learning_rate": 7.710780769680522e-06, "loss": 1.2957, "step": 1900 }, { "epoch": 1.76, "learning_rate": 7.70104872977412e-06, "loss": 1.2031, "step": 1901 }, { "epoch": 1.76, "learning_rate": 7.691318988222736e-06, "loss": 1.3119, "step": 1902 }, { "epoch": 1.76, "learning_rate": 7.681591554753592e-06, "loss": 1.2095, "step": 1903 }, { "epoch": 1.76, "learning_rate": 7.671866439091594e-06, "loss": 1.2664, "step": 1904 }, { "epoch": 1.76, "learning_rate": 7.66214365095934e-06, "loss": 1.2666, "step": 1905 }, { "epoch": 1.76, "learning_rate": 7.652423200077091e-06, "loss": 1.3458, "step": 1906 }, { "epoch": 1.77, "learning_rate": 7.642705096162782e-06, "loss": 1.3022, "step": 1907 }, { "epoch": 1.77, "learning_rate": 7.632989348931993e-06, "loss": 1.1682, "step": 1908 }, { "epoch": 1.77, "learning_rate": 7.623275968097955e-06, "loss": 1.255, "step": 1909 }, { "epoch": 1.77, "learning_rate": 7.6135649633715285e-06, "loss": 1.2848, "step": 1910 }, { "epoch": 1.77, "learning_rate": 7.603856344461196e-06, "loss": 1.2145, "step": 1911 }, { "epoch": 1.77, "learning_rate": 7.594150121073063e-06, "loss": 1.2313, "step": 1912 }, { "epoch": 1.77, "learning_rate": 7.584446302910832e-06, "loss": 1.2918, "step": 1913 }, { "epoch": 1.77, "learning_rate": 7.574744899675807e-06, "loss": 1.2247, "step": 1914 }, { "epoch": 1.77, "learning_rate": 7.565045921066871e-06, "loss": 1.2555, "step": 1915 }, { "epoch": 1.77, "learning_rate": 7.555349376780492e-06, "loss": 1.3561, "step": 1916 }, { "epoch": 1.77, "learning_rate": 7.545655276510696e-06, "loss": 1.2594, "step": 1917 }, { "epoch": 1.78, "learning_rate": 7.535963629949071e-06, "loss": 1.217, "step": 1918 }, { "epoch": 1.78, "learning_rate": 7.526274446784749e-06, "loss": 1.2877, "step": 1919 }, { "epoch": 1.78, "learning_rate": 7.516587736704402e-06, "loss": 1.2293, "step": 1920 }, { "epoch": 1.78, "learning_rate": 7.506903509392224e-06, "loss": 1.2084, "step": 1921 }, { "epoch": 1.78, "learning_rate": 7.497221774529935e-06, "loss": 1.3098, "step": 1922 }, { "epoch": 1.78, "learning_rate": 7.487542541796755e-06, "loss": 1.3083, "step": 1923 }, { "epoch": 1.78, "learning_rate": 7.4778658208694125e-06, "loss": 1.2504, "step": 1924 }, { "epoch": 1.78, "learning_rate": 7.4681916214221095e-06, "loss": 1.2637, "step": 1925 }, { "epoch": 1.78, "learning_rate": 7.458519953126544e-06, "loss": 1.32, "step": 1926 }, { "epoch": 1.78, "learning_rate": 7.448850825651871e-06, "loss": 1.2996, "step": 1927 }, { "epoch": 1.79, "learning_rate": 7.439184248664713e-06, "loss": 1.2526, "step": 1928 }, { "epoch": 1.79, "learning_rate": 7.429520231829144e-06, "loss": 1.2216, "step": 1929 }, { "epoch": 1.79, "learning_rate": 7.419858784806665e-06, "loss": 1.1862, "step": 1930 }, { "epoch": 1.79, "learning_rate": 7.410199917256223e-06, "loss": 1.2831, "step": 1931 }, { "epoch": 1.79, "learning_rate": 7.4005436388341765e-06, "loss": 1.2683, "step": 1932 }, { "epoch": 1.79, "learning_rate": 7.3908899591943055e-06, "loss": 1.2814, "step": 1933 }, { "epoch": 1.79, "learning_rate": 7.381238887987781e-06, "loss": 1.2468, "step": 1934 }, { "epoch": 1.79, "learning_rate": 7.371590434863172e-06, "loss": 1.2768, "step": 1935 }, { "epoch": 1.79, "learning_rate": 7.361944609466431e-06, "loss": 1.3264, "step": 1936 }, { "epoch": 1.79, "learning_rate": 7.3523014214408805e-06, "loss": 1.3317, "step": 1937 }, { "epoch": 1.79, "learning_rate": 7.34266088042721e-06, "loss": 1.2589, "step": 1938 }, { "epoch": 1.8, "learning_rate": 7.333022996063455e-06, "loss": 1.2759, "step": 1939 }, { "epoch": 1.8, "learning_rate": 7.3233877779850045e-06, "loss": 1.261, "step": 1940 }, { "epoch": 1.8, "learning_rate": 7.313755235824575e-06, "loss": 1.2271, "step": 1941 }, { "epoch": 1.8, "learning_rate": 7.304125379212212e-06, "loss": 1.1912, "step": 1942 }, { "epoch": 1.8, "learning_rate": 7.294498217775274e-06, "loss": 1.3018, "step": 1943 }, { "epoch": 1.8, "learning_rate": 7.284873761138427e-06, "loss": 1.2106, "step": 1944 }, { "epoch": 1.8, "learning_rate": 7.275252018923633e-06, "loss": 1.2707, "step": 1945 }, { "epoch": 1.8, "learning_rate": 7.265633000750132e-06, "loss": 1.1852, "step": 1946 }, { "epoch": 1.8, "learning_rate": 7.256016716234451e-06, "loss": 1.3029, "step": 1947 }, { "epoch": 1.8, "learning_rate": 7.246403174990379e-06, "loss": 1.2211, "step": 1948 }, { "epoch": 1.8, "learning_rate": 7.236792386628968e-06, "loss": 1.2692, "step": 1949 }, { "epoch": 1.81, "learning_rate": 7.227184360758512e-06, "loss": 1.3189, "step": 1950 }, { "epoch": 1.81, "learning_rate": 7.217579106984542e-06, "loss": 1.2952, "step": 1951 }, { "epoch": 1.81, "learning_rate": 7.207976634909827e-06, "loss": 1.2916, "step": 1952 }, { "epoch": 1.81, "learning_rate": 7.198376954134341e-06, "loss": 1.185, "step": 1953 }, { "epoch": 1.81, "learning_rate": 7.18878007425528e-06, "loss": 1.3091, "step": 1954 }, { "epoch": 1.81, "learning_rate": 7.179186004867032e-06, "loss": 1.3722, "step": 1955 }, { "epoch": 1.81, "learning_rate": 7.16959475556118e-06, "loss": 1.288, "step": 1956 }, { "epoch": 1.81, "learning_rate": 7.1600063359264845e-06, "loss": 1.2537, "step": 1957 }, { "epoch": 1.81, "learning_rate": 7.15042075554888e-06, "loss": 1.2387, "step": 1958 }, { "epoch": 1.81, "learning_rate": 7.14083802401146e-06, "loss": 1.2402, "step": 1959 }, { "epoch": 1.81, "learning_rate": 7.131258150894469e-06, "loss": 1.3122, "step": 1960 }, { "epoch": 1.82, "learning_rate": 7.121681145775301e-06, "loss": 1.2622, "step": 1961 }, { "epoch": 1.82, "learning_rate": 7.112107018228471e-06, "loss": 1.3325, "step": 1962 }, { "epoch": 1.82, "learning_rate": 7.102535777825626e-06, "loss": 1.2644, "step": 1963 }, { "epoch": 1.82, "learning_rate": 7.092967434135523e-06, "loss": 1.3298, "step": 1964 }, { "epoch": 1.82, "learning_rate": 7.083401996724023e-06, "loss": 1.2973, "step": 1965 }, { "epoch": 1.82, "learning_rate": 7.073839475154087e-06, "loss": 1.2315, "step": 1966 }, { "epoch": 1.82, "learning_rate": 7.0642798789857536e-06, "loss": 1.3058, "step": 1967 }, { "epoch": 1.82, "learning_rate": 7.054723217776138e-06, "loss": 1.2633, "step": 1968 }, { "epoch": 1.82, "learning_rate": 7.0451695010794256e-06, "loss": 1.2228, "step": 1969 }, { "epoch": 1.82, "learning_rate": 7.035618738446853e-06, "loss": 1.2154, "step": 1970 }, { "epoch": 1.82, "learning_rate": 7.026070939426708e-06, "loss": 1.1075, "step": 1971 }, { "epoch": 1.83, "learning_rate": 7.016526113564313e-06, "loss": 1.3148, "step": 1972 }, { "epoch": 1.83, "learning_rate": 7.006984270402015e-06, "loss": 1.3055, "step": 1973 }, { "epoch": 1.83, "learning_rate": 6.9974454194791875e-06, "loss": 1.2737, "step": 1974 }, { "epoch": 1.83, "learning_rate": 6.987909570332207e-06, "loss": 1.2496, "step": 1975 }, { "epoch": 1.83, "learning_rate": 6.978376732494445e-06, "loss": 1.2236, "step": 1976 }, { "epoch": 1.83, "learning_rate": 6.968846915496269e-06, "loss": 1.2269, "step": 1977 }, { "epoch": 1.83, "learning_rate": 6.959320128865026e-06, "loss": 1.1818, "step": 1978 }, { "epoch": 1.83, "learning_rate": 6.949796382125031e-06, "loss": 1.2649, "step": 1979 }, { "epoch": 1.83, "learning_rate": 6.940275684797562e-06, "loss": 1.3305, "step": 1980 }, { "epoch": 1.83, "learning_rate": 6.930758046400841e-06, "loss": 1.2589, "step": 1981 }, { "epoch": 1.83, "learning_rate": 6.9212434764500435e-06, "loss": 1.1857, "step": 1982 }, { "epoch": 1.84, "learning_rate": 6.911731984457271e-06, "loss": 1.2467, "step": 1983 }, { "epoch": 1.84, "learning_rate": 6.902223579931548e-06, "loss": 1.3075, "step": 1984 }, { "epoch": 1.84, "learning_rate": 6.892718272378813e-06, "loss": 1.1761, "step": 1985 }, { "epoch": 1.84, "learning_rate": 6.883216071301908e-06, "loss": 1.3093, "step": 1986 }, { "epoch": 1.84, "learning_rate": 6.873716986200569e-06, "loss": 1.2529, "step": 1987 }, { "epoch": 1.84, "learning_rate": 6.86422102657142e-06, "loss": 1.2687, "step": 1988 }, { "epoch": 1.84, "learning_rate": 6.854728201907958e-06, "loss": 1.3201, "step": 1989 }, { "epoch": 1.84, "learning_rate": 6.8452385217005416e-06, "loss": 1.3128, "step": 1990 }, { "epoch": 1.84, "learning_rate": 6.8357519954363925e-06, "loss": 1.2292, "step": 1991 }, { "epoch": 1.84, "learning_rate": 6.8262686325995756e-06, "loss": 1.2438, "step": 1992 }, { "epoch": 1.85, "learning_rate": 6.816788442670994e-06, "loss": 1.2793, "step": 1993 }, { "epoch": 1.85, "learning_rate": 6.807311435128381e-06, "loss": 1.2662, "step": 1994 }, { "epoch": 1.85, "learning_rate": 6.797837619446287e-06, "loss": 1.2563, "step": 1995 }, { "epoch": 1.85, "learning_rate": 6.788367005096065e-06, "loss": 1.2229, "step": 1996 }, { "epoch": 1.85, "learning_rate": 6.7788996015458765e-06, "loss": 1.2404, "step": 1997 }, { "epoch": 1.85, "learning_rate": 6.76943541826067e-06, "loss": 1.2589, "step": 1998 }, { "epoch": 1.85, "learning_rate": 6.759974464702173e-06, "loss": 1.2918, "step": 1999 }, { "epoch": 1.85, "learning_rate": 6.750516750328885e-06, "loss": 1.2332, "step": 2000 }, { "epoch": 1.85, "learning_rate": 6.7410622845960695e-06, "loss": 1.2858, "step": 2001 }, { "epoch": 1.85, "learning_rate": 6.731611076955739e-06, "loss": 1.247, "step": 2002 }, { "epoch": 1.85, "learning_rate": 6.722163136856651e-06, "loss": 1.2433, "step": 2003 }, { "epoch": 1.86, "learning_rate": 6.712718473744293e-06, "loss": 1.2484, "step": 2004 }, { "epoch": 1.86, "learning_rate": 6.703277097060879e-06, "loss": 1.2713, "step": 2005 }, { "epoch": 1.86, "learning_rate": 6.693839016245338e-06, "loss": 1.1786, "step": 2006 }, { "epoch": 1.86, "learning_rate": 6.6844042407333036e-06, "loss": 1.2733, "step": 2007 }, { "epoch": 1.86, "learning_rate": 6.6749727799571016e-06, "loss": 1.283, "step": 2008 }, { "epoch": 1.86, "learning_rate": 6.66554464334575e-06, "loss": 1.3274, "step": 2009 }, { "epoch": 1.86, "learning_rate": 6.656119840324938e-06, "loss": 1.2774, "step": 2010 }, { "epoch": 1.86, "learning_rate": 6.646698380317025e-06, "loss": 1.2662, "step": 2011 }, { "epoch": 1.86, "learning_rate": 6.6372802727410306e-06, "loss": 1.2406, "step": 2012 }, { "epoch": 1.86, "learning_rate": 6.627865527012613e-06, "loss": 1.2325, "step": 2013 }, { "epoch": 1.86, "learning_rate": 6.618454152544078e-06, "loss": 1.2161, "step": 2014 }, { "epoch": 1.87, "learning_rate": 6.609046158744363e-06, "loss": 1.296, "step": 2015 }, { "epoch": 1.87, "learning_rate": 6.599641555019019e-06, "loss": 1.2588, "step": 2016 }, { "epoch": 1.87, "learning_rate": 6.590240350770211e-06, "loss": 1.2204, "step": 2017 }, { "epoch": 1.87, "learning_rate": 6.580842555396706e-06, "loss": 1.2129, "step": 2018 }, { "epoch": 1.87, "learning_rate": 6.57144817829386e-06, "loss": 1.2954, "step": 2019 }, { "epoch": 1.87, "learning_rate": 6.562057228853613e-06, "loss": 1.3117, "step": 2020 }, { "epoch": 1.87, "learning_rate": 6.552669716464481e-06, "loss": 1.2126, "step": 2021 }, { "epoch": 1.87, "learning_rate": 6.543285650511543e-06, "loss": 1.2022, "step": 2022 }, { "epoch": 1.87, "learning_rate": 6.533905040376427e-06, "loss": 1.3235, "step": 2023 }, { "epoch": 1.87, "learning_rate": 6.524527895437314e-06, "loss": 1.2429, "step": 2024 }, { "epoch": 1.87, "learning_rate": 6.515154225068914e-06, "loss": 1.3122, "step": 2025 }, { "epoch": 1.88, "learning_rate": 6.505784038642469e-06, "loss": 1.2551, "step": 2026 }, { "epoch": 1.88, "learning_rate": 6.496417345525734e-06, "loss": 1.2376, "step": 2027 }, { "epoch": 1.88, "learning_rate": 6.487054155082973e-06, "loss": 1.2812, "step": 2028 }, { "epoch": 1.88, "learning_rate": 6.477694476674947e-06, "loss": 1.3134, "step": 2029 }, { "epoch": 1.88, "learning_rate": 6.468338319658907e-06, "loss": 1.2333, "step": 2030 }, { "epoch": 1.88, "learning_rate": 6.458985693388584e-06, "loss": 1.1568, "step": 2031 }, { "epoch": 1.88, "learning_rate": 6.449636607214178e-06, "loss": 1.281, "step": 2032 }, { "epoch": 1.88, "learning_rate": 6.440291070482352e-06, "loss": 1.3142, "step": 2033 }, { "epoch": 1.88, "learning_rate": 6.430949092536216e-06, "loss": 1.2217, "step": 2034 }, { "epoch": 1.88, "learning_rate": 6.421610682715327e-06, "loss": 1.2595, "step": 2035 }, { "epoch": 1.88, "learning_rate": 6.412275850355674e-06, "loss": 1.3355, "step": 2036 }, { "epoch": 1.89, "learning_rate": 6.402944604789666e-06, "loss": 1.1805, "step": 2037 }, { "epoch": 1.89, "learning_rate": 6.393616955346129e-06, "loss": 1.2829, "step": 2038 }, { "epoch": 1.89, "learning_rate": 6.384292911350294e-06, "loss": 1.3356, "step": 2039 }, { "epoch": 1.89, "learning_rate": 6.374972482123787e-06, "loss": 1.2896, "step": 2040 }, { "epoch": 1.89, "learning_rate": 6.365655676984621e-06, "loss": 1.3053, "step": 2041 }, { "epoch": 1.89, "learning_rate": 6.35634250524718e-06, "loss": 1.1876, "step": 2042 }, { "epoch": 1.89, "learning_rate": 6.347032976222225e-06, "loss": 1.2575, "step": 2043 }, { "epoch": 1.89, "learning_rate": 6.337727099216868e-06, "loss": 1.2545, "step": 2044 }, { "epoch": 1.89, "learning_rate": 6.328424883534574e-06, "loss": 1.2277, "step": 2045 }, { "epoch": 1.89, "learning_rate": 6.319126338475148e-06, "loss": 1.1687, "step": 2046 }, { "epoch": 1.9, "learning_rate": 6.309831473334718e-06, "loss": 1.3044, "step": 2047 }, { "epoch": 1.9, "learning_rate": 6.3005402974057415e-06, "loss": 1.3247, "step": 2048 }, { "epoch": 1.9, "learning_rate": 6.2912528199769825e-06, "loss": 1.2956, "step": 2049 }, { "epoch": 1.9, "learning_rate": 6.281969050333514e-06, "loss": 1.3551, "step": 2050 }, { "epoch": 1.9, "learning_rate": 6.272688997756694e-06, "loss": 1.2248, "step": 2051 }, { "epoch": 1.9, "learning_rate": 6.263412671524169e-06, "loss": 1.2431, "step": 2052 }, { "epoch": 1.9, "learning_rate": 6.254140080909863e-06, "loss": 1.1561, "step": 2053 }, { "epoch": 1.9, "learning_rate": 6.244871235183956e-06, "loss": 1.2495, "step": 2054 }, { "epoch": 1.9, "learning_rate": 6.235606143612895e-06, "loss": 1.206, "step": 2055 }, { "epoch": 1.9, "learning_rate": 6.226344815459364e-06, "loss": 1.2911, "step": 2056 }, { "epoch": 1.9, "learning_rate": 6.217087259982293e-06, "loss": 1.3149, "step": 2057 }, { "epoch": 1.91, "learning_rate": 6.207833486436832e-06, "loss": 1.245, "step": 2058 }, { "epoch": 1.91, "learning_rate": 6.198583504074358e-06, "loss": 1.1875, "step": 2059 }, { "epoch": 1.91, "learning_rate": 6.189337322142455e-06, "loss": 1.2239, "step": 2060 }, { "epoch": 1.91, "learning_rate": 6.1800949498849035e-06, "loss": 1.2283, "step": 2061 }, { "epoch": 1.91, "learning_rate": 6.170856396541687e-06, "loss": 1.225, "step": 2062 }, { "epoch": 1.91, "learning_rate": 6.1616216713489496e-06, "loss": 1.1982, "step": 2063 }, { "epoch": 1.91, "learning_rate": 6.15239078353903e-06, "loss": 1.2587, "step": 2064 }, { "epoch": 1.91, "learning_rate": 6.143163742340419e-06, "loss": 1.2645, "step": 2065 }, { "epoch": 1.91, "learning_rate": 6.133940556977766e-06, "loss": 1.2994, "step": 2066 }, { "epoch": 1.91, "learning_rate": 6.1247212366718635e-06, "loss": 1.1834, "step": 2067 }, { "epoch": 1.91, "learning_rate": 6.115505790639642e-06, "loss": 1.282, "step": 2068 }, { "epoch": 1.92, "learning_rate": 6.106294228094157e-06, "loss": 1.2344, "step": 2069 }, { "epoch": 1.92, "learning_rate": 6.09708655824458e-06, "loss": 1.2476, "step": 2070 }, { "epoch": 1.92, "learning_rate": 6.087882790296196e-06, "loss": 1.1844, "step": 2071 }, { "epoch": 1.92, "learning_rate": 6.078682933450384e-06, "loss": 1.2565, "step": 2072 }, { "epoch": 1.92, "learning_rate": 6.069486996904614e-06, "loss": 1.2416, "step": 2073 }, { "epoch": 1.92, "learning_rate": 6.060294989852438e-06, "loss": 1.2348, "step": 2074 }, { "epoch": 1.92, "learning_rate": 6.051106921483481e-06, "loss": 1.3068, "step": 2075 }, { "epoch": 1.92, "learning_rate": 6.041922800983427e-06, "loss": 1.2104, "step": 2076 }, { "epoch": 1.92, "learning_rate": 6.032742637534012e-06, "loss": 1.3053, "step": 2077 }, { "epoch": 1.92, "learning_rate": 6.023566440313023e-06, "loss": 1.2162, "step": 2078 }, { "epoch": 1.92, "learning_rate": 6.01439421849428e-06, "loss": 1.2338, "step": 2079 }, { "epoch": 1.93, "learning_rate": 6.0052259812476155e-06, "loss": 1.2349, "step": 2080 }, { "epoch": 1.93, "learning_rate": 5.996061737738895e-06, "loss": 1.1964, "step": 2081 }, { "epoch": 1.93, "learning_rate": 5.986901497129986e-06, "loss": 1.2706, "step": 2082 }, { "epoch": 1.93, "learning_rate": 5.977745268578752e-06, "loss": 1.3221, "step": 2083 }, { "epoch": 1.93, "learning_rate": 5.968593061239046e-06, "loss": 1.3304, "step": 2084 }, { "epoch": 1.93, "learning_rate": 5.959444884260703e-06, "loss": 1.2381, "step": 2085 }, { "epoch": 1.93, "learning_rate": 5.950300746789526e-06, "loss": 1.2426, "step": 2086 }, { "epoch": 1.93, "learning_rate": 5.941160657967282e-06, "loss": 1.3353, "step": 2087 }, { "epoch": 1.93, "learning_rate": 5.932024626931689e-06, "loss": 1.2751, "step": 2088 }, { "epoch": 1.93, "learning_rate": 5.92289266281641e-06, "loss": 1.2892, "step": 2089 }, { "epoch": 1.93, "learning_rate": 5.913764774751038e-06, "loss": 1.2616, "step": 2090 }, { "epoch": 1.94, "learning_rate": 5.904640971861095e-06, "loss": 1.3479, "step": 2091 }, { "epoch": 1.94, "learning_rate": 5.8955212632680185e-06, "loss": 1.2432, "step": 2092 }, { "epoch": 1.94, "learning_rate": 5.886405658089151e-06, "loss": 1.3434, "step": 2093 }, { "epoch": 1.94, "learning_rate": 5.877294165437733e-06, "loss": 1.2407, "step": 2094 }, { "epoch": 1.94, "learning_rate": 5.868186794422892e-06, "loss": 1.2989, "step": 2095 }, { "epoch": 1.94, "learning_rate": 5.8590835541496426e-06, "loss": 1.2889, "step": 2096 }, { "epoch": 1.94, "learning_rate": 5.849984453718861e-06, "loss": 1.3641, "step": 2097 }, { "epoch": 1.94, "learning_rate": 5.840889502227285e-06, "loss": 1.1167, "step": 2098 }, { "epoch": 1.94, "learning_rate": 5.831798708767509e-06, "loss": 1.2294, "step": 2099 }, { "epoch": 1.94, "learning_rate": 5.822712082427971e-06, "loss": 1.1957, "step": 2100 }, { "epoch": 1.95, "learning_rate": 5.813629632292937e-06, "loss": 1.1901, "step": 2101 }, { "epoch": 1.95, "learning_rate": 5.804551367442509e-06, "loss": 1.212, "step": 2102 }, { "epoch": 1.95, "learning_rate": 5.795477296952587e-06, "loss": 1.2653, "step": 2103 }, { "epoch": 1.95, "learning_rate": 5.7864074298948945e-06, "loss": 1.3337, "step": 2104 }, { "epoch": 1.95, "learning_rate": 5.777341775336944e-06, "loss": 1.2334, "step": 2105 }, { "epoch": 1.95, "learning_rate": 5.768280342342038e-06, "loss": 1.2666, "step": 2106 }, { "epoch": 1.95, "learning_rate": 5.759223139969263e-06, "loss": 1.2629, "step": 2107 }, { "epoch": 1.95, "learning_rate": 5.750170177273471e-06, "loss": 1.2478, "step": 2108 }, { "epoch": 1.95, "learning_rate": 5.741121463305273e-06, "loss": 1.4056, "step": 2109 }, { "epoch": 1.95, "learning_rate": 5.732077007111041e-06, "loss": 1.195, "step": 2110 }, { "epoch": 1.95, "learning_rate": 5.723036817732882e-06, "loss": 1.2677, "step": 2111 }, { "epoch": 1.96, "learning_rate": 5.714000904208648e-06, "loss": 1.205, "step": 2112 }, { "epoch": 1.96, "learning_rate": 5.704969275571896e-06, "loss": 1.261, "step": 2113 }, { "epoch": 1.96, "learning_rate": 5.695941940851924e-06, "loss": 1.232, "step": 2114 }, { "epoch": 1.96, "learning_rate": 5.686918909073718e-06, "loss": 1.2876, "step": 2115 }, { "epoch": 1.96, "learning_rate": 5.677900189257972e-06, "loss": 1.2608, "step": 2116 }, { "epoch": 1.96, "learning_rate": 5.668885790421068e-06, "loss": 1.2936, "step": 2117 }, { "epoch": 1.96, "learning_rate": 5.659875721575065e-06, "loss": 1.2057, "step": 2118 }, { "epoch": 1.96, "learning_rate": 5.6508699917276975e-06, "loss": 1.2406, "step": 2119 }, { "epoch": 1.96, "learning_rate": 5.641868609882357e-06, "loss": 1.2667, "step": 2120 }, { "epoch": 1.96, "learning_rate": 5.632871585038093e-06, "loss": 1.2286, "step": 2121 }, { "epoch": 1.96, "learning_rate": 5.623878926189595e-06, "loss": 1.2388, "step": 2122 }, { "epoch": 1.97, "learning_rate": 5.614890642327191e-06, "loss": 1.2963, "step": 2123 }, { "epoch": 1.97, "learning_rate": 5.605906742436833e-06, "loss": 1.2253, "step": 2124 }, { "epoch": 1.97, "learning_rate": 5.59692723550009e-06, "loss": 1.262, "step": 2125 }, { "epoch": 1.97, "learning_rate": 5.587952130494145e-06, "loss": 1.2477, "step": 2126 }, { "epoch": 1.97, "learning_rate": 5.578981436391768e-06, "loss": 1.2992, "step": 2127 }, { "epoch": 1.97, "learning_rate": 5.570015162161331e-06, "loss": 1.2717, "step": 2128 }, { "epoch": 1.97, "learning_rate": 5.561053316766779e-06, "loss": 1.2893, "step": 2129 }, { "epoch": 1.97, "learning_rate": 5.5520959091676406e-06, "loss": 1.2954, "step": 2130 }, { "epoch": 1.97, "learning_rate": 5.543142948318992e-06, "loss": 1.3015, "step": 2131 }, { "epoch": 1.97, "learning_rate": 5.534194443171472e-06, "loss": 1.2874, "step": 2132 }, { "epoch": 1.97, "learning_rate": 5.525250402671272e-06, "loss": 1.262, "step": 2133 }, { "epoch": 1.98, "learning_rate": 5.516310835760106e-06, "loss": 1.3326, "step": 2134 }, { "epoch": 1.98, "learning_rate": 5.507375751375226e-06, "loss": 1.299, "step": 2135 }, { "epoch": 1.98, "learning_rate": 5.498445158449398e-06, "loss": 1.2274, "step": 2136 }, { "epoch": 1.98, "learning_rate": 5.4895190659109e-06, "loss": 1.2705, "step": 2137 }, { "epoch": 1.98, "learning_rate": 5.480597482683507e-06, "loss": 1.3681, "step": 2138 }, { "epoch": 1.98, "learning_rate": 5.47168041768649e-06, "loss": 1.2243, "step": 2139 }, { "epoch": 1.98, "learning_rate": 5.4627678798346015e-06, "loss": 1.2534, "step": 2140 }, { "epoch": 1.98, "learning_rate": 5.453859878038068e-06, "loss": 1.1776, "step": 2141 }, { "epoch": 1.98, "learning_rate": 5.444956421202579e-06, "loss": 1.2071, "step": 2142 }, { "epoch": 1.98, "learning_rate": 5.436057518229284e-06, "loss": 1.2699, "step": 2143 }, { "epoch": 1.98, "learning_rate": 5.427163178014777e-06, "loss": 1.3283, "step": 2144 }, { "epoch": 1.99, "learning_rate": 5.418273409451092e-06, "loss": 1.2836, "step": 2145 }, { "epoch": 1.99, "learning_rate": 5.409388221425696e-06, "loss": 1.2164, "step": 2146 }, { "epoch": 1.99, "learning_rate": 5.4005076228214645e-06, "loss": 1.2283, "step": 2147 }, { "epoch": 1.99, "learning_rate": 5.391631622516697e-06, "loss": 1.2675, "step": 2148 }, { "epoch": 1.99, "learning_rate": 5.382760229385091e-06, "loss": 1.2037, "step": 2149 }, { "epoch": 1.99, "learning_rate": 5.373893452295739e-06, "loss": 1.2686, "step": 2150 }, { "epoch": 1.99, "learning_rate": 5.365031300113119e-06, "loss": 1.2867, "step": 2151 }, { "epoch": 1.99, "learning_rate": 5.356173781697083e-06, "loss": 1.2715, "step": 2152 }, { "epoch": 1.99, "learning_rate": 5.347320905902855e-06, "loss": 1.3058, "step": 2153 }, { "epoch": 1.99, "learning_rate": 5.3384726815810164e-06, "loss": 1.1333, "step": 2154 }, { "epoch": 2.0, "learning_rate": 5.32962911757749e-06, "loss": 1.2657, "step": 2155 }, { "epoch": 2.0, "learning_rate": 5.3207902227335494e-06, "loss": 1.2302, "step": 2156 }, { "epoch": 2.0, "learning_rate": 5.311956005885796e-06, "loss": 1.227, "step": 2157 }, { "epoch": 2.0, "learning_rate": 5.3031264758661595e-06, "loss": 1.1452, "step": 2158 }, { "epoch": 2.0, "learning_rate": 5.294301641501875e-06, "loss": 1.2519, "step": 2159 }, { "epoch": 2.0, "learning_rate": 5.285481511615491e-06, "loss": 1.2751, "step": 2160 }, { "epoch": 2.0, "learning_rate": 5.2766660950248496e-06, "loss": 1.335, "step": 2161 }, { "epoch": 2.0, "learning_rate": 5.267855400543079e-06, "loss": 0.9602, "step": 2162 }, { "epoch": 2.0, "learning_rate": 5.259049436978595e-06, "loss": 1.0134, "step": 2163 }, { "epoch": 2.0, "learning_rate": 5.2502482131350675e-06, "loss": 0.985, "step": 2164 }, { "epoch": 2.0, "learning_rate": 5.241451737811443e-06, "loss": 1.0215, "step": 2165 }, { "epoch": 2.01, "learning_rate": 5.232660019801914e-06, "loss": 0.9734, "step": 2166 }, { "epoch": 2.01, "learning_rate": 5.22387306789592e-06, "loss": 0.9611, "step": 2167 }, { "epoch": 2.01, "learning_rate": 5.2150908908781335e-06, "loss": 1.0326, "step": 2168 }, { "epoch": 2.01, "learning_rate": 5.206313497528453e-06, "loss": 0.9918, "step": 2169 }, { "epoch": 2.01, "learning_rate": 5.197540896621996e-06, "loss": 1.0387, "step": 2170 }, { "epoch": 2.01, "learning_rate": 5.188773096929088e-06, "loss": 1.0189, "step": 2171 }, { "epoch": 2.01, "learning_rate": 5.180010107215253e-06, "loss": 0.9866, "step": 2172 }, { "epoch": 2.01, "learning_rate": 5.1712519362412105e-06, "loss": 1.0199, "step": 2173 }, { "epoch": 2.01, "learning_rate": 5.162498592762859e-06, "loss": 1.0053, "step": 2174 }, { "epoch": 2.01, "learning_rate": 5.153750085531272e-06, "loss": 1.0209, "step": 2175 }, { "epoch": 2.01, "learning_rate": 5.145006423292688e-06, "loss": 0.9632, "step": 2176 }, { "epoch": 2.02, "learning_rate": 5.1362676147885e-06, "loss": 0.9862, "step": 2177 }, { "epoch": 2.02, "learning_rate": 5.1275336687552554e-06, "loss": 0.9579, "step": 2178 }, { "epoch": 2.02, "learning_rate": 5.118804593924626e-06, "loss": 0.9427, "step": 2179 }, { "epoch": 2.02, "learning_rate": 5.110080399023429e-06, "loss": 0.994, "step": 2180 }, { "epoch": 2.02, "learning_rate": 5.101361092773594e-06, "loss": 0.9772, "step": 2181 }, { "epoch": 2.02, "learning_rate": 5.0926466838921715e-06, "loss": 1.0274, "step": 2182 }, { "epoch": 2.02, "learning_rate": 5.083937181091301e-06, "loss": 0.9808, "step": 2183 }, { "epoch": 2.02, "learning_rate": 5.075232593078232e-06, "loss": 0.9781, "step": 2184 }, { "epoch": 2.02, "learning_rate": 5.0665329285552925e-06, "loss": 1.0795, "step": 2185 }, { "epoch": 2.02, "learning_rate": 5.057838196219892e-06, "loss": 0.9282, "step": 2186 }, { "epoch": 2.02, "learning_rate": 5.049148404764508e-06, "loss": 0.9759, "step": 2187 }, { "epoch": 2.03, "learning_rate": 5.040463562876678e-06, "loss": 0.9334, "step": 2188 }, { "epoch": 2.03, "learning_rate": 5.0317836792389925e-06, "loss": 1.0348, "step": 2189 }, { "epoch": 2.03, "learning_rate": 5.023108762529082e-06, "loss": 0.9691, "step": 2190 }, { "epoch": 2.03, "learning_rate": 5.014438821419614e-06, "loss": 0.8948, "step": 2191 }, { "epoch": 2.03, "learning_rate": 5.005773864578283e-06, "loss": 0.9966, "step": 2192 }, { "epoch": 2.03, "learning_rate": 4.997113900667794e-06, "loss": 0.9417, "step": 2193 }, { "epoch": 2.03, "learning_rate": 4.9884589383458695e-06, "loss": 1.0013, "step": 2194 }, { "epoch": 2.03, "learning_rate": 4.979808986265224e-06, "loss": 0.9818, "step": 2195 }, { "epoch": 2.03, "learning_rate": 4.9711640530735715e-06, "loss": 0.948, "step": 2196 }, { "epoch": 2.03, "learning_rate": 4.962524147413595e-06, "loss": 0.9727, "step": 2197 }, { "epoch": 2.04, "learning_rate": 4.9538892779229645e-06, "loss": 0.9633, "step": 2198 }, { "epoch": 2.04, "learning_rate": 4.945259453234308e-06, "loss": 0.8865, "step": 2199 }, { "epoch": 2.04, "learning_rate": 4.936634681975212e-06, "loss": 0.9884, "step": 2200 }, { "epoch": 2.04, "learning_rate": 4.928014972768213e-06, "loss": 1.0021, "step": 2201 }, { "epoch": 2.04, "learning_rate": 4.9194003342307805e-06, "loss": 0.9541, "step": 2202 }, { "epoch": 2.04, "learning_rate": 4.910790774975324e-06, "loss": 0.9602, "step": 2203 }, { "epoch": 2.04, "learning_rate": 4.902186303609165e-06, "loss": 0.8968, "step": 2204 }, { "epoch": 2.04, "learning_rate": 4.893586928734547e-06, "loss": 0.9633, "step": 2205 }, { "epoch": 2.04, "learning_rate": 4.884992658948618e-06, "loss": 0.9245, "step": 2206 }, { "epoch": 2.04, "learning_rate": 4.8764035028434074e-06, "loss": 0.9632, "step": 2207 }, { "epoch": 2.04, "learning_rate": 4.867819469005851e-06, "loss": 1.0193, "step": 2208 }, { "epoch": 2.05, "learning_rate": 4.859240566017756e-06, "loss": 0.9252, "step": 2209 }, { "epoch": 2.05, "learning_rate": 4.850666802455799e-06, "loss": 0.9955, "step": 2210 }, { "epoch": 2.05, "learning_rate": 4.842098186891521e-06, "loss": 0.9193, "step": 2211 }, { "epoch": 2.05, "learning_rate": 4.833534727891316e-06, "loss": 0.9789, "step": 2212 }, { "epoch": 2.05, "learning_rate": 4.8249764340164225e-06, "loss": 0.9754, "step": 2213 }, { "epoch": 2.05, "learning_rate": 4.816423313822911e-06, "loss": 0.9678, "step": 2214 }, { "epoch": 2.05, "learning_rate": 4.807875375861684e-06, "loss": 0.9716, "step": 2215 }, { "epoch": 2.05, "learning_rate": 4.799332628678465e-06, "loss": 1.0002, "step": 2216 }, { "epoch": 2.05, "learning_rate": 4.790795080813783e-06, "loss": 0.9367, "step": 2217 }, { "epoch": 2.05, "learning_rate": 4.782262740802975e-06, "loss": 0.9414, "step": 2218 }, { "epoch": 2.05, "learning_rate": 4.773735617176166e-06, "loss": 1.021, "step": 2219 }, { "epoch": 2.06, "learning_rate": 4.765213718458268e-06, "loss": 1.0161, "step": 2220 }, { "epoch": 2.06, "learning_rate": 4.7566970531689694e-06, "loss": 0.9691, "step": 2221 }, { "epoch": 2.06, "learning_rate": 4.748185629822727e-06, "loss": 0.9846, "step": 2222 }, { "epoch": 2.06, "learning_rate": 4.739679456928756e-06, "loss": 0.9428, "step": 2223 }, { "epoch": 2.06, "learning_rate": 4.731178542991024e-06, "loss": 0.9771, "step": 2224 }, { "epoch": 2.06, "learning_rate": 4.7226828965082404e-06, "loss": 0.9991, "step": 2225 }, { "epoch": 2.06, "learning_rate": 4.714192525973848e-06, "loss": 0.9876, "step": 2226 }, { "epoch": 2.06, "learning_rate": 4.705707439876014e-06, "loss": 0.9961, "step": 2227 }, { "epoch": 2.06, "learning_rate": 4.697227646697627e-06, "loss": 0.9345, "step": 2228 }, { "epoch": 2.06, "learning_rate": 4.6887531549162804e-06, "loss": 0.9052, "step": 2229 }, { "epoch": 2.06, "learning_rate": 4.680283973004262e-06, "loss": 0.9385, "step": 2230 }, { "epoch": 2.07, "learning_rate": 4.671820109428564e-06, "loss": 0.9478, "step": 2231 }, { "epoch": 2.07, "learning_rate": 4.663361572650851e-06, "loss": 1.0194, "step": 2232 }, { "epoch": 2.07, "learning_rate": 4.6549083711274675e-06, "loss": 0.9588, "step": 2233 }, { "epoch": 2.07, "learning_rate": 4.646460513309427e-06, "loss": 0.9922, "step": 2234 }, { "epoch": 2.07, "learning_rate": 4.638018007642387e-06, "loss": 0.8988, "step": 2235 }, { "epoch": 2.07, "learning_rate": 4.629580862566669e-06, "loss": 0.9035, "step": 2236 }, { "epoch": 2.07, "learning_rate": 4.621149086517229e-06, "loss": 0.946, "step": 2237 }, { "epoch": 2.07, "learning_rate": 4.612722687923658e-06, "loss": 0.964, "step": 2238 }, { "epoch": 2.07, "learning_rate": 4.604301675210165e-06, "loss": 0.9543, "step": 2239 }, { "epoch": 2.07, "learning_rate": 4.595886056795582e-06, "loss": 0.9767, "step": 2240 }, { "epoch": 2.07, "learning_rate": 4.587475841093345e-06, "loss": 0.9481, "step": 2241 }, { "epoch": 2.08, "learning_rate": 4.579071036511486e-06, "loss": 0.8985, "step": 2242 }, { "epoch": 2.08, "learning_rate": 4.5706716514526314e-06, "loss": 0.939, "step": 2243 }, { "epoch": 2.08, "learning_rate": 4.562277694313989e-06, "loss": 0.9516, "step": 2244 }, { "epoch": 2.08, "learning_rate": 4.5538891734873355e-06, "loss": 0.9607, "step": 2245 }, { "epoch": 2.08, "learning_rate": 4.545506097359023e-06, "loss": 0.9213, "step": 2246 }, { "epoch": 2.08, "learning_rate": 4.5371284743099445e-06, "loss": 0.9543, "step": 2247 }, { "epoch": 2.08, "learning_rate": 4.5287563127155545e-06, "loss": 1.0298, "step": 2248 }, { "epoch": 2.08, "learning_rate": 4.520389620945842e-06, "loss": 0.9572, "step": 2249 }, { "epoch": 2.08, "learning_rate": 4.512028407365331e-06, "loss": 0.9687, "step": 2250 }, { "epoch": 2.08, "learning_rate": 4.503672680333064e-06, "loss": 0.9492, "step": 2251 }, { "epoch": 2.09, "learning_rate": 4.495322448202603e-06, "loss": 0.9607, "step": 2252 }, { "epoch": 2.09, "learning_rate": 4.486977719322013e-06, "loss": 1.0169, "step": 2253 }, { "epoch": 2.09, "learning_rate": 4.47863850203386e-06, "loss": 0.9772, "step": 2254 }, { "epoch": 2.09, "learning_rate": 4.470304804675197e-06, "loss": 0.9505, "step": 2255 }, { "epoch": 2.09, "learning_rate": 4.46197663557756e-06, "loss": 0.9397, "step": 2256 }, { "epoch": 2.09, "learning_rate": 4.453654003066961e-06, "loss": 0.9905, "step": 2257 }, { "epoch": 2.09, "learning_rate": 4.445336915463874e-06, "loss": 0.987, "step": 2258 }, { "epoch": 2.09, "learning_rate": 4.437025381083223e-06, "loss": 0.9528, "step": 2259 }, { "epoch": 2.09, "learning_rate": 4.428719408234392e-06, "loss": 0.9753, "step": 2260 }, { "epoch": 2.09, "learning_rate": 4.420419005221198e-06, "loss": 0.9988, "step": 2261 }, { "epoch": 2.09, "learning_rate": 4.412124180341893e-06, "loss": 1.0296, "step": 2262 }, { "epoch": 2.1, "learning_rate": 4.403834941889151e-06, "loss": 0.9939, "step": 2263 }, { "epoch": 2.1, "learning_rate": 4.395551298150056e-06, "loss": 0.9725, "step": 2264 }, { "epoch": 2.1, "learning_rate": 4.387273257406108e-06, "loss": 0.947, "step": 2265 }, { "epoch": 2.1, "learning_rate": 4.379000827933196e-06, "loss": 0.8971, "step": 2266 }, { "epoch": 2.1, "learning_rate": 4.370734018001609e-06, "loss": 0.9736, "step": 2267 }, { "epoch": 2.1, "learning_rate": 4.362472835876011e-06, "loss": 1.0235, "step": 2268 }, { "epoch": 2.1, "learning_rate": 4.35421728981544e-06, "loss": 0.9385, "step": 2269 }, { "epoch": 2.1, "learning_rate": 4.345967388073303e-06, "loss": 1.0083, "step": 2270 }, { "epoch": 2.1, "learning_rate": 4.337723138897362e-06, "loss": 0.9429, "step": 2271 }, { "epoch": 2.1, "learning_rate": 4.329484550529726e-06, "loss": 0.9483, "step": 2272 }, { "epoch": 2.1, "learning_rate": 4.321251631206849e-06, "loss": 0.9194, "step": 2273 }, { "epoch": 2.11, "learning_rate": 4.3130243891595136e-06, "loss": 1.0464, "step": 2274 }, { "epoch": 2.11, "learning_rate": 4.304802832612829e-06, "loss": 0.9615, "step": 2275 }, { "epoch": 2.11, "learning_rate": 4.296586969786219e-06, "loss": 0.9112, "step": 2276 }, { "epoch": 2.11, "learning_rate": 4.288376808893414e-06, "loss": 1.0631, "step": 2277 }, { "epoch": 2.11, "learning_rate": 4.280172358142448e-06, "loss": 1.0547, "step": 2278 }, { "epoch": 2.11, "learning_rate": 4.2719736257356425e-06, "loss": 0.8878, "step": 2279 }, { "epoch": 2.11, "learning_rate": 4.263780619869607e-06, "loss": 0.9064, "step": 2280 }, { "epoch": 2.11, "learning_rate": 4.255593348735217e-06, "loss": 0.9651, "step": 2281 }, { "epoch": 2.11, "learning_rate": 4.247411820517622e-06, "loss": 0.9608, "step": 2282 }, { "epoch": 2.11, "learning_rate": 4.239236043396229e-06, "loss": 0.9496, "step": 2283 }, { "epoch": 2.11, "learning_rate": 4.231066025544695e-06, "loss": 0.9412, "step": 2284 }, { "epoch": 2.12, "learning_rate": 4.222901775130917e-06, "loss": 0.9406, "step": 2285 }, { "epoch": 2.12, "learning_rate": 4.214743300317034e-06, "loss": 0.9279, "step": 2286 }, { "epoch": 2.12, "learning_rate": 4.206590609259395e-06, "loss": 0.9207, "step": 2287 }, { "epoch": 2.12, "learning_rate": 4.198443710108583e-06, "loss": 0.9328, "step": 2288 }, { "epoch": 2.12, "learning_rate": 4.190302611009382e-06, "loss": 0.8876, "step": 2289 }, { "epoch": 2.12, "learning_rate": 4.182167320100782e-06, "loss": 0.8931, "step": 2290 }, { "epoch": 2.12, "learning_rate": 4.174037845515961e-06, "loss": 1.0141, "step": 2291 }, { "epoch": 2.12, "learning_rate": 4.165914195382288e-06, "loss": 0.9568, "step": 2292 }, { "epoch": 2.12, "learning_rate": 4.1577963778213044e-06, "loss": 0.9471, "step": 2293 }, { "epoch": 2.12, "learning_rate": 4.149684400948724e-06, "loss": 0.9183, "step": 2294 }, { "epoch": 2.12, "learning_rate": 4.141578272874417e-06, "loss": 0.9665, "step": 2295 }, { "epoch": 2.13, "learning_rate": 4.133478001702414e-06, "loss": 0.9732, "step": 2296 }, { "epoch": 2.13, "learning_rate": 4.12538359553088e-06, "loss": 0.9731, "step": 2297 }, { "epoch": 2.13, "learning_rate": 4.117295062452122e-06, "loss": 1.0443, "step": 2298 }, { "epoch": 2.13, "learning_rate": 4.1092124105525775e-06, "loss": 0.9419, "step": 2299 }, { "epoch": 2.13, "learning_rate": 4.101135647912803e-06, "loss": 0.9331, "step": 2300 }, { "epoch": 2.13, "learning_rate": 4.093064782607464e-06, "loss": 1.0174, "step": 2301 }, { "epoch": 2.13, "learning_rate": 4.084999822705335e-06, "loss": 0.9337, "step": 2302 }, { "epoch": 2.13, "learning_rate": 4.076940776269281e-06, "loss": 0.9545, "step": 2303 }, { "epoch": 2.13, "learning_rate": 4.068887651356261e-06, "loss": 0.9283, "step": 2304 }, { "epoch": 2.13, "learning_rate": 4.0608404560173095e-06, "loss": 1.0099, "step": 2305 }, { "epoch": 2.14, "learning_rate": 4.052799198297536e-06, "loss": 0.9765, "step": 2306 }, { "epoch": 2.14, "learning_rate": 4.044763886236112e-06, "loss": 0.9824, "step": 2307 }, { "epoch": 2.14, "learning_rate": 4.036734527866266e-06, "loss": 0.9475, "step": 2308 }, { "epoch": 2.14, "learning_rate": 4.028711131215278e-06, "loss": 0.9423, "step": 2309 }, { "epoch": 2.14, "learning_rate": 4.0206937043044545e-06, "loss": 0.9539, "step": 2310 }, { "epoch": 2.14, "learning_rate": 4.012682255149148e-06, "loss": 0.9158, "step": 2311 }, { "epoch": 2.14, "learning_rate": 4.004676791758732e-06, "loss": 0.9584, "step": 2312 }, { "epoch": 2.14, "learning_rate": 3.99667732213659e-06, "loss": 0.8927, "step": 2313 }, { "epoch": 2.14, "learning_rate": 3.988683854280121e-06, "loss": 1.0205, "step": 2314 }, { "epoch": 2.14, "learning_rate": 3.9806963961807135e-06, "loss": 0.9787, "step": 2315 }, { "epoch": 2.14, "learning_rate": 3.9727149558237576e-06, "loss": 0.9245, "step": 2316 }, { "epoch": 2.15, "learning_rate": 3.964739541188624e-06, "loss": 0.9428, "step": 2317 }, { "epoch": 2.15, "learning_rate": 3.956770160248658e-06, "loss": 0.893, "step": 2318 }, { "epoch": 2.15, "learning_rate": 3.948806820971174e-06, "loss": 0.9275, "step": 2319 }, { "epoch": 2.15, "learning_rate": 3.940849531317446e-06, "loss": 0.9962, "step": 2320 }, { "epoch": 2.15, "learning_rate": 3.932898299242699e-06, "loss": 0.9244, "step": 2321 }, { "epoch": 2.15, "learning_rate": 3.924953132696105e-06, "loss": 1.0175, "step": 2322 }, { "epoch": 2.15, "learning_rate": 3.9170140396207665e-06, "loss": 1.0212, "step": 2323 }, { "epoch": 2.15, "learning_rate": 3.9090810279537204e-06, "loss": 0.9389, "step": 2324 }, { "epoch": 2.15, "learning_rate": 3.901154105625921e-06, "loss": 0.9699, "step": 2325 }, { "epoch": 2.15, "learning_rate": 3.8932332805622315e-06, "loss": 0.9909, "step": 2326 }, { "epoch": 2.15, "learning_rate": 3.885318560681427e-06, "loss": 0.9606, "step": 2327 }, { "epoch": 2.16, "learning_rate": 3.8774099538961705e-06, "loss": 0.9854, "step": 2328 }, { "epoch": 2.16, "learning_rate": 3.869507468113022e-06, "loss": 1.0057, "step": 2329 }, { "epoch": 2.16, "learning_rate": 3.861611111232417e-06, "loss": 0.885, "step": 2330 }, { "epoch": 2.16, "learning_rate": 3.853720891148659e-06, "loss": 0.9271, "step": 2331 }, { "epoch": 2.16, "learning_rate": 3.845836815749927e-06, "loss": 0.9712, "step": 2332 }, { "epoch": 2.16, "learning_rate": 3.837958892918249e-06, "loss": 0.9485, "step": 2333 }, { "epoch": 2.16, "learning_rate": 3.830087130529505e-06, "loss": 1.0411, "step": 2334 }, { "epoch": 2.16, "learning_rate": 3.822221536453417e-06, "loss": 0.9776, "step": 2335 }, { "epoch": 2.16, "learning_rate": 3.8143621185535397e-06, "loss": 0.9421, "step": 2336 }, { "epoch": 2.16, "learning_rate": 3.8065088846872532e-06, "loss": 1.0431, "step": 2337 }, { "epoch": 2.16, "learning_rate": 3.79866184270575e-06, "loss": 1.0368, "step": 2338 }, { "epoch": 2.17, "learning_rate": 3.790821000454039e-06, "loss": 0.939, "step": 2339 }, { "epoch": 2.17, "learning_rate": 3.782986365770931e-06, "loss": 0.9377, "step": 2340 }, { "epoch": 2.17, "learning_rate": 3.7751579464890264e-06, "loss": 0.9976, "step": 2341 }, { "epoch": 2.17, "learning_rate": 3.767335750434714e-06, "loss": 0.8741, "step": 2342 }, { "epoch": 2.17, "learning_rate": 3.759519785428164e-06, "loss": 0.941, "step": 2343 }, { "epoch": 2.17, "learning_rate": 3.751710059283311e-06, "loss": 0.9485, "step": 2344 }, { "epoch": 2.17, "learning_rate": 3.7439065798078565e-06, "loss": 0.9787, "step": 2345 }, { "epoch": 2.17, "learning_rate": 3.736109354803261e-06, "loss": 0.966, "step": 2346 }, { "epoch": 2.17, "learning_rate": 3.7283183920647194e-06, "loss": 0.8883, "step": 2347 }, { "epoch": 2.17, "learning_rate": 3.720533699381177e-06, "loss": 0.9538, "step": 2348 }, { "epoch": 2.17, "learning_rate": 3.7127552845353077e-06, "loss": 0.9924, "step": 2349 }, { "epoch": 2.18, "learning_rate": 3.7049831553035085e-06, "loss": 0.9277, "step": 2350 }, { "epoch": 2.18, "learning_rate": 3.6972173194558937e-06, "loss": 0.9755, "step": 2351 }, { "epoch": 2.18, "learning_rate": 3.689457784756285e-06, "loss": 0.915, "step": 2352 }, { "epoch": 2.18, "learning_rate": 3.6817045589622037e-06, "loss": 0.9669, "step": 2353 }, { "epoch": 2.18, "learning_rate": 3.673957649824865e-06, "loss": 1.0198, "step": 2354 }, { "epoch": 2.18, "learning_rate": 3.6662170650891694e-06, "loss": 0.9903, "step": 2355 }, { "epoch": 2.18, "learning_rate": 3.6584828124936933e-06, "loss": 0.9968, "step": 2356 }, { "epoch": 2.18, "learning_rate": 3.650754899770684e-06, "loss": 0.9485, "step": 2357 }, { "epoch": 2.18, "learning_rate": 3.64303333464605e-06, "loss": 0.9429, "step": 2358 }, { "epoch": 2.18, "learning_rate": 3.635318124839353e-06, "loss": 0.9637, "step": 2359 }, { "epoch": 2.19, "learning_rate": 3.627609278063803e-06, "loss": 0.9592, "step": 2360 }, { "epoch": 2.19, "learning_rate": 3.6199068020262497e-06, "loss": 0.9784, "step": 2361 }, { "epoch": 2.19, "learning_rate": 3.612210704427165e-06, "loss": 1.021, "step": 2362 }, { "epoch": 2.19, "learning_rate": 3.604520992960654e-06, "loss": 0.983, "step": 2363 }, { "epoch": 2.19, "learning_rate": 3.596837675314433e-06, "loss": 1.0135, "step": 2364 }, { "epoch": 2.19, "learning_rate": 3.5891607591698317e-06, "loss": 0.9895, "step": 2365 }, { "epoch": 2.19, "learning_rate": 3.581490252201768e-06, "loss": 0.9646, "step": 2366 }, { "epoch": 2.19, "learning_rate": 3.5738261620787616e-06, "loss": 0.955, "step": 2367 }, { "epoch": 2.19, "learning_rate": 3.5661684964629174e-06, "loss": 1.0105, "step": 2368 }, { "epoch": 2.19, "learning_rate": 3.558517263009912e-06, "loss": 0.952, "step": 2369 }, { "epoch": 2.19, "learning_rate": 3.5508724693689957e-06, "loss": 1.003, "step": 2370 }, { "epoch": 2.2, "learning_rate": 3.5432341231829803e-06, "loss": 0.9798, "step": 2371 }, { "epoch": 2.2, "learning_rate": 3.53560223208823e-06, "loss": 1.0017, "step": 2372 }, { "epoch": 2.2, "learning_rate": 3.5279768037146567e-06, "loss": 0.9517, "step": 2373 }, { "epoch": 2.2, "learning_rate": 3.52035784568571e-06, "loss": 0.9938, "step": 2374 }, { "epoch": 2.2, "learning_rate": 3.5127453656183743e-06, "loss": 0.9688, "step": 2375 }, { "epoch": 2.2, "learning_rate": 3.505139371123153e-06, "loss": 0.9272, "step": 2376 }, { "epoch": 2.2, "learning_rate": 3.49753986980407e-06, "loss": 0.9581, "step": 2377 }, { "epoch": 2.2, "learning_rate": 3.4899468692586537e-06, "loss": 0.9489, "step": 2378 }, { "epoch": 2.2, "learning_rate": 3.4823603770779356e-06, "loss": 0.9617, "step": 2379 }, { "epoch": 2.2, "learning_rate": 3.474780400846445e-06, "loss": 0.8922, "step": 2380 }, { "epoch": 2.2, "learning_rate": 3.4672069481421845e-06, "loss": 0.9925, "step": 2381 }, { "epoch": 2.21, "learning_rate": 3.4596400265366458e-06, "loss": 1.0457, "step": 2382 }, { "epoch": 2.21, "learning_rate": 3.4520796435947877e-06, "loss": 0.9742, "step": 2383 }, { "epoch": 2.21, "learning_rate": 3.444525806875034e-06, "loss": 0.9785, "step": 2384 }, { "epoch": 2.21, "learning_rate": 3.4369785239292608e-06, "loss": 0.9106, "step": 2385 }, { "epoch": 2.21, "learning_rate": 3.429437802302793e-06, "loss": 0.935, "step": 2386 }, { "epoch": 2.21, "learning_rate": 3.4219036495343984e-06, "loss": 0.9725, "step": 2387 }, { "epoch": 2.21, "learning_rate": 3.414376073156275e-06, "loss": 0.9389, "step": 2388 }, { "epoch": 2.21, "learning_rate": 3.4068550806940483e-06, "loss": 0.9924, "step": 2389 }, { "epoch": 2.21, "learning_rate": 3.399340679666756e-06, "loss": 0.9628, "step": 2390 }, { "epoch": 2.21, "learning_rate": 3.3918328775868524e-06, "loss": 0.9937, "step": 2391 }, { "epoch": 2.21, "learning_rate": 3.3843316819601936e-06, "loss": 0.9298, "step": 2392 }, { "epoch": 2.22, "learning_rate": 3.3768371002860277e-06, "loss": 0.8907, "step": 2393 }, { "epoch": 2.22, "learning_rate": 3.3693491400569935e-06, "loss": 0.9546, "step": 2394 }, { "epoch": 2.22, "learning_rate": 3.3618678087591093e-06, "loss": 1.0808, "step": 2395 }, { "epoch": 2.22, "learning_rate": 3.3543931138717667e-06, "loss": 0.8927, "step": 2396 }, { "epoch": 2.22, "learning_rate": 3.346925062867724e-06, "loss": 0.993, "step": 2397 }, { "epoch": 2.22, "learning_rate": 3.3394636632130893e-06, "loss": 1.0235, "step": 2398 }, { "epoch": 2.22, "learning_rate": 3.3320089223673315e-06, "loss": 0.9652, "step": 2399 }, { "epoch": 2.22, "learning_rate": 3.324560847783256e-06, "loss": 0.9399, "step": 2400 }, { "epoch": 2.22, "learning_rate": 3.3171194469070067e-06, "loss": 0.9288, "step": 2401 }, { "epoch": 2.22, "learning_rate": 3.309684727178054e-06, "loss": 0.9522, "step": 2402 }, { "epoch": 2.22, "learning_rate": 3.30225669602919e-06, "loss": 0.9512, "step": 2403 }, { "epoch": 2.23, "learning_rate": 3.2948353608865192e-06, "loss": 0.9511, "step": 2404 }, { "epoch": 2.23, "learning_rate": 3.2874207291694517e-06, "loss": 0.9526, "step": 2405 }, { "epoch": 2.23, "learning_rate": 3.280012808290697e-06, "loss": 0.9689, "step": 2406 }, { "epoch": 2.23, "learning_rate": 3.272611605656253e-06, "loss": 1.0053, "step": 2407 }, { "epoch": 2.23, "learning_rate": 3.2652171286654032e-06, "loss": 0.8955, "step": 2408 }, { "epoch": 2.23, "learning_rate": 3.257829384710709e-06, "loss": 1.0124, "step": 2409 }, { "epoch": 2.23, "learning_rate": 3.2504483811779954e-06, "loss": 0.9366, "step": 2410 }, { "epoch": 2.23, "learning_rate": 3.2430741254463537e-06, "loss": 0.9515, "step": 2411 }, { "epoch": 2.23, "learning_rate": 3.235706624888125e-06, "loss": 0.9732, "step": 2412 }, { "epoch": 2.23, "learning_rate": 3.2283458868689045e-06, "loss": 0.9488, "step": 2413 }, { "epoch": 2.24, "learning_rate": 3.2209919187475136e-06, "loss": 1.0145, "step": 2414 }, { "epoch": 2.24, "learning_rate": 3.2136447278760165e-06, "loss": 0.9879, "step": 2415 }, { "epoch": 2.24, "learning_rate": 3.206304321599698e-06, "loss": 0.9576, "step": 2416 }, { "epoch": 2.24, "learning_rate": 3.1989707072570643e-06, "loss": 0.9857, "step": 2417 }, { "epoch": 2.24, "learning_rate": 3.191643892179821e-06, "loss": 0.9681, "step": 2418 }, { "epoch": 2.24, "learning_rate": 3.184323883692887e-06, "loss": 0.9075, "step": 2419 }, { "epoch": 2.24, "learning_rate": 3.1770106891143714e-06, "loss": 0.9455, "step": 2420 }, { "epoch": 2.24, "learning_rate": 3.1697043157555716e-06, "loss": 0.9467, "step": 2421 }, { "epoch": 2.24, "learning_rate": 3.162404770920967e-06, "loss": 0.9752, "step": 2422 }, { "epoch": 2.24, "learning_rate": 3.15511206190821e-06, "loss": 0.8592, "step": 2423 }, { "epoch": 2.24, "learning_rate": 3.1478261960081157e-06, "loss": 0.987, "step": 2424 }, { "epoch": 2.25, "learning_rate": 3.140547180504663e-06, "loss": 0.966, "step": 2425 }, { "epoch": 2.25, "learning_rate": 3.1332750226749776e-06, "loss": 0.9865, "step": 2426 }, { "epoch": 2.25, "learning_rate": 3.126009729789331e-06, "loss": 0.9203, "step": 2427 }, { "epoch": 2.25, "learning_rate": 3.1187513091111345e-06, "loss": 0.9411, "step": 2428 }, { "epoch": 2.25, "learning_rate": 3.1114997678969226e-06, "loss": 0.9573, "step": 2429 }, { "epoch": 2.25, "learning_rate": 3.104255113396363e-06, "loss": 0.9852, "step": 2430 }, { "epoch": 2.25, "learning_rate": 3.0970173528522206e-06, "loss": 0.9981, "step": 2431 }, { "epoch": 2.25, "learning_rate": 3.089786493500385e-06, "loss": 0.9276, "step": 2432 }, { "epoch": 2.25, "learning_rate": 3.082562542569839e-06, "loss": 1.0557, "step": 2433 }, { "epoch": 2.25, "learning_rate": 3.075345507282661e-06, "loss": 0.9379, "step": 2434 }, { "epoch": 2.25, "learning_rate": 3.0681353948540127e-06, "loss": 0.9863, "step": 2435 }, { "epoch": 2.26, "learning_rate": 3.060932212492138e-06, "loss": 0.9947, "step": 2436 }, { "epoch": 2.26, "learning_rate": 3.05373596739835e-06, "loss": 0.8928, "step": 2437 }, { "epoch": 2.26, "learning_rate": 3.0465466667670286e-06, "loss": 1.0045, "step": 2438 }, { "epoch": 2.26, "learning_rate": 3.039364317785609e-06, "loss": 0.9563, "step": 2439 }, { "epoch": 2.26, "learning_rate": 3.0321889276345773e-06, "loss": 0.9894, "step": 2440 }, { "epoch": 2.26, "learning_rate": 3.025020503487467e-06, "loss": 0.9919, "step": 2441 }, { "epoch": 2.26, "learning_rate": 3.017859052510834e-06, "loss": 1.0173, "step": 2442 }, { "epoch": 2.26, "learning_rate": 3.0107045818642788e-06, "loss": 0.9961, "step": 2443 }, { "epoch": 2.26, "learning_rate": 3.0035570987004136e-06, "loss": 0.9804, "step": 2444 }, { "epoch": 2.26, "learning_rate": 2.9964166101648683e-06, "loss": 0.9175, "step": 2445 }, { "epoch": 2.26, "learning_rate": 2.9892831233962803e-06, "loss": 0.9113, "step": 2446 }, { "epoch": 2.27, "learning_rate": 2.9821566455262897e-06, "loss": 0.8846, "step": 2447 }, { "epoch": 2.27, "learning_rate": 2.975037183679519e-06, "loss": 0.9605, "step": 2448 }, { "epoch": 2.27, "learning_rate": 2.9679247449735873e-06, "loss": 0.9507, "step": 2449 }, { "epoch": 2.27, "learning_rate": 2.9608193365190876e-06, "loss": 0.953, "step": 2450 }, { "epoch": 2.27, "learning_rate": 2.9537209654195853e-06, "loss": 0.9502, "step": 2451 }, { "epoch": 2.27, "learning_rate": 2.946629638771612e-06, "loss": 0.8777, "step": 2452 }, { "epoch": 2.27, "learning_rate": 2.9395453636646544e-06, "loss": 1.0699, "step": 2453 }, { "epoch": 2.27, "learning_rate": 2.93246814718115e-06, "loss": 0.9419, "step": 2454 }, { "epoch": 2.27, "learning_rate": 2.9253979963964808e-06, "loss": 0.9351, "step": 2455 }, { "epoch": 2.27, "learning_rate": 2.918334918378962e-06, "loss": 0.9265, "step": 2456 }, { "epoch": 2.27, "learning_rate": 2.911278920189842e-06, "loss": 0.9411, "step": 2457 }, { "epoch": 2.28, "learning_rate": 2.9042300088832877e-06, "loss": 0.9349, "step": 2458 }, { "epoch": 2.28, "learning_rate": 2.8971881915063817e-06, "loss": 0.9104, "step": 2459 }, { "epoch": 2.28, "learning_rate": 2.8901534750991158e-06, "loss": 0.9091, "step": 2460 }, { "epoch": 2.28, "learning_rate": 2.883125866694382e-06, "loss": 0.9489, "step": 2461 }, { "epoch": 2.28, "learning_rate": 2.8761053733179658e-06, "loss": 1.0014, "step": 2462 }, { "epoch": 2.28, "learning_rate": 2.869092001988543e-06, "loss": 0.9383, "step": 2463 }, { "epoch": 2.28, "learning_rate": 2.8620857597176586e-06, "loss": 0.9856, "step": 2464 }, { "epoch": 2.28, "learning_rate": 2.8550866535097433e-06, "loss": 0.9655, "step": 2465 }, { "epoch": 2.28, "learning_rate": 2.8480946903620866e-06, "loss": 0.9202, "step": 2466 }, { "epoch": 2.28, "learning_rate": 2.841109877264837e-06, "loss": 0.9255, "step": 2467 }, { "epoch": 2.29, "learning_rate": 2.834132221200998e-06, "loss": 0.9837, "step": 2468 }, { "epoch": 2.29, "learning_rate": 2.8271617291464183e-06, "loss": 0.9882, "step": 2469 }, { "epoch": 2.29, "learning_rate": 2.8201984080697763e-06, "loss": 0.9417, "step": 2470 }, { "epoch": 2.29, "learning_rate": 2.8132422649325896e-06, "loss": 0.9369, "step": 2471 }, { "epoch": 2.29, "learning_rate": 2.8062933066891975e-06, "loss": 0.9518, "step": 2472 }, { "epoch": 2.29, "learning_rate": 2.7993515402867556e-06, "loss": 1.0589, "step": 2473 }, { "epoch": 2.29, "learning_rate": 2.7924169726652308e-06, "loss": 0.9225, "step": 2474 }, { "epoch": 2.29, "learning_rate": 2.7854896107573902e-06, "loss": 0.9657, "step": 2475 }, { "epoch": 2.29, "learning_rate": 2.7785694614888024e-06, "loss": 0.912, "step": 2476 }, { "epoch": 2.29, "learning_rate": 2.771656531777819e-06, "loss": 0.95, "step": 2477 }, { "epoch": 2.29, "learning_rate": 2.7647508285355774e-06, "loss": 1.0114, "step": 2478 }, { "epoch": 2.3, "learning_rate": 2.75785235866599e-06, "loss": 0.9553, "step": 2479 }, { "epoch": 2.3, "learning_rate": 2.750961129065741e-06, "loss": 0.9486, "step": 2480 }, { "epoch": 2.3, "learning_rate": 2.7440771466242667e-06, "loss": 1.0155, "step": 2481 }, { "epoch": 2.3, "learning_rate": 2.7372004182237665e-06, "loss": 0.9867, "step": 2482 }, { "epoch": 2.3, "learning_rate": 2.730330950739185e-06, "loss": 1.0203, "step": 2483 }, { "epoch": 2.3, "learning_rate": 2.7234687510382098e-06, "loss": 0.9669, "step": 2484 }, { "epoch": 2.3, "learning_rate": 2.716613825981258e-06, "loss": 0.9719, "step": 2485 }, { "epoch": 2.3, "learning_rate": 2.709766182421479e-06, "loss": 0.9822, "step": 2486 }, { "epoch": 2.3, "learning_rate": 2.70292582720474e-06, "loss": 0.9329, "step": 2487 }, { "epoch": 2.3, "learning_rate": 2.6960927671696213e-06, "loss": 0.9719, "step": 2488 }, { "epoch": 2.3, "learning_rate": 2.6892670091474116e-06, "loss": 0.9226, "step": 2489 }, { "epoch": 2.31, "learning_rate": 2.6824485599620986e-06, "loss": 0.8829, "step": 2490 }, { "epoch": 2.31, "learning_rate": 2.675637426430363e-06, "loss": 0.9584, "step": 2491 }, { "epoch": 2.31, "learning_rate": 2.668833615361571e-06, "loss": 0.9483, "step": 2492 }, { "epoch": 2.31, "learning_rate": 2.662037133557772e-06, "loss": 0.985, "step": 2493 }, { "epoch": 2.31, "learning_rate": 2.6552479878136807e-06, "loss": 1.0308, "step": 2494 }, { "epoch": 2.31, "learning_rate": 2.648466184916684e-06, "loss": 0.9697, "step": 2495 }, { "epoch": 2.31, "learning_rate": 2.641691731646826e-06, "loss": 0.9127, "step": 2496 }, { "epoch": 2.31, "learning_rate": 2.634924634776805e-06, "loss": 0.9339, "step": 2497 }, { "epoch": 2.31, "learning_rate": 2.6281649010719577e-06, "loss": 0.9833, "step": 2498 }, { "epoch": 2.31, "learning_rate": 2.621412537290269e-06, "loss": 0.9535, "step": 2499 }, { "epoch": 2.31, "learning_rate": 2.6146675501823484e-06, "loss": 0.8898, "step": 2500 }, { "epoch": 2.32, "learning_rate": 2.6079299464914363e-06, "loss": 0.9991, "step": 2501 }, { "epoch": 2.32, "learning_rate": 2.601199732953387e-06, "loss": 0.8986, "step": 2502 }, { "epoch": 2.32, "learning_rate": 2.59447691629667e-06, "loss": 0.9524, "step": 2503 }, { "epoch": 2.32, "learning_rate": 2.5877615032423575e-06, "loss": 0.8741, "step": 2504 }, { "epoch": 2.32, "learning_rate": 2.5810535005041206e-06, "loss": 0.9258, "step": 2505 }, { "epoch": 2.32, "learning_rate": 2.574352914788223e-06, "loss": 0.9083, "step": 2506 }, { "epoch": 2.32, "learning_rate": 2.5676597527935133e-06, "loss": 0.9166, "step": 2507 }, { "epoch": 2.32, "learning_rate": 2.5609740212114154e-06, "loss": 1.0171, "step": 2508 }, { "epoch": 2.32, "learning_rate": 2.554295726725928e-06, "loss": 1.0113, "step": 2509 }, { "epoch": 2.32, "learning_rate": 2.5476248760136148e-06, "loss": 1.0001, "step": 2510 }, { "epoch": 2.32, "learning_rate": 2.540961475743594e-06, "loss": 0.9798, "step": 2511 }, { "epoch": 2.33, "learning_rate": 2.534305532577539e-06, "loss": 0.9692, "step": 2512 }, { "epoch": 2.33, "learning_rate": 2.52765705316967e-06, "loss": 0.984, "step": 2513 }, { "epoch": 2.33, "learning_rate": 2.5210160441667353e-06, "loss": 0.968, "step": 2514 }, { "epoch": 2.33, "learning_rate": 2.5143825122080246e-06, "loss": 0.9649, "step": 2515 }, { "epoch": 2.33, "learning_rate": 2.5077564639253493e-06, "loss": 0.9593, "step": 2516 }, { "epoch": 2.33, "learning_rate": 2.5011379059430406e-06, "loss": 1.003, "step": 2517 }, { "epoch": 2.33, "learning_rate": 2.494526844877938e-06, "loss": 0.9009, "step": 2518 }, { "epoch": 2.33, "learning_rate": 2.487923287339389e-06, "loss": 0.9331, "step": 2519 }, { "epoch": 2.33, "learning_rate": 2.4813272399292376e-06, "loss": 0.9038, "step": 2520 }, { "epoch": 2.33, "learning_rate": 2.474738709241824e-06, "loss": 0.9397, "step": 2521 }, { "epoch": 2.34, "learning_rate": 2.468157701863965e-06, "loss": 0.9937, "step": 2522 }, { "epoch": 2.34, "learning_rate": 2.461584224374963e-06, "loss": 1.0164, "step": 2523 }, { "epoch": 2.34, "learning_rate": 2.4550182833465908e-06, "loss": 0.9004, "step": 2524 }, { "epoch": 2.34, "learning_rate": 2.4484598853430865e-06, "loss": 0.9122, "step": 2525 }, { "epoch": 2.34, "learning_rate": 2.4419090369211475e-06, "loss": 0.978, "step": 2526 }, { "epoch": 2.34, "learning_rate": 2.435365744629923e-06, "loss": 0.9499, "step": 2527 }, { "epoch": 2.34, "learning_rate": 2.4288300150110077e-06, "loss": 0.9468, "step": 2528 }, { "epoch": 2.34, "learning_rate": 2.4223018545984366e-06, "loss": 0.9602, "step": 2529 }, { "epoch": 2.34, "learning_rate": 2.41578126991868e-06, "loss": 0.959, "step": 2530 }, { "epoch": 2.34, "learning_rate": 2.4092682674906253e-06, "loss": 1.0258, "step": 2531 }, { "epoch": 2.34, "learning_rate": 2.40276285382559e-06, "loss": 1.0166, "step": 2532 }, { "epoch": 2.35, "learning_rate": 2.3962650354272978e-06, "loss": 0.8914, "step": 2533 }, { "epoch": 2.35, "learning_rate": 2.3897748187918846e-06, "loss": 1.0279, "step": 2534 }, { "epoch": 2.35, "learning_rate": 2.3832922104078815e-06, "loss": 0.9365, "step": 2535 }, { "epoch": 2.35, "learning_rate": 2.3768172167562175e-06, "loss": 0.9072, "step": 2536 }, { "epoch": 2.35, "learning_rate": 2.370349844310206e-06, "loss": 0.9408, "step": 2537 }, { "epoch": 2.35, "learning_rate": 2.3638900995355418e-06, "loss": 0.9079, "step": 2538 }, { "epoch": 2.35, "learning_rate": 2.357437988890294e-06, "loss": 0.936, "step": 2539 }, { "epoch": 2.35, "learning_rate": 2.350993518824902e-06, "loss": 1.0038, "step": 2540 }, { "epoch": 2.35, "learning_rate": 2.3445566957821618e-06, "loss": 0.9566, "step": 2541 }, { "epoch": 2.35, "learning_rate": 2.338127526197228e-06, "loss": 0.9251, "step": 2542 }, { "epoch": 2.35, "learning_rate": 2.3317060164976033e-06, "loss": 1.046, "step": 2543 }, { "epoch": 2.36, "learning_rate": 2.325292173103132e-06, "loss": 0.9425, "step": 2544 }, { "epoch": 2.36, "learning_rate": 2.318886002425995e-06, "loss": 0.9583, "step": 2545 }, { "epoch": 2.36, "learning_rate": 2.312487510870698e-06, "loss": 0.9791, "step": 2546 }, { "epoch": 2.36, "learning_rate": 2.3060967048340756e-06, "loss": 1.0073, "step": 2547 }, { "epoch": 2.36, "learning_rate": 2.299713590705275e-06, "loss": 0.9986, "step": 2548 }, { "epoch": 2.36, "learning_rate": 2.293338174865758e-06, "loss": 0.9825, "step": 2549 }, { "epoch": 2.36, "learning_rate": 2.286970463689282e-06, "loss": 0.996, "step": 2550 }, { "epoch": 2.36, "learning_rate": 2.280610463541909e-06, "loss": 0.9468, "step": 2551 }, { "epoch": 2.36, "learning_rate": 2.274258180781991e-06, "loss": 0.9186, "step": 2552 }, { "epoch": 2.36, "learning_rate": 2.2679136217601605e-06, "loss": 0.9865, "step": 2553 }, { "epoch": 2.36, "learning_rate": 2.2615767928193335e-06, "loss": 0.9198, "step": 2554 }, { "epoch": 2.37, "learning_rate": 2.2552477002946936e-06, "loss": 1.0358, "step": 2555 }, { "epoch": 2.37, "learning_rate": 2.248926350513694e-06, "loss": 0.9377, "step": 2556 }, { "epoch": 2.37, "learning_rate": 2.2426127497960427e-06, "loss": 0.9069, "step": 2557 }, { "epoch": 2.37, "learning_rate": 2.2363069044537044e-06, "loss": 0.9606, "step": 2558 }, { "epoch": 2.37, "learning_rate": 2.230008820790888e-06, "loss": 0.928, "step": 2559 }, { "epoch": 2.37, "learning_rate": 2.2237185051040445e-06, "loss": 0.9274, "step": 2560 }, { "epoch": 2.37, "learning_rate": 2.2174359636818576e-06, "loss": 1.0288, "step": 2561 }, { "epoch": 2.37, "learning_rate": 2.2111612028052397e-06, "loss": 0.9529, "step": 2562 }, { "epoch": 2.37, "learning_rate": 2.204894228747325e-06, "loss": 0.9293, "step": 2563 }, { "epoch": 2.37, "learning_rate": 2.1986350477734632e-06, "loss": 0.9225, "step": 2564 }, { "epoch": 2.37, "learning_rate": 2.192383666141208e-06, "loss": 0.9072, "step": 2565 }, { "epoch": 2.38, "learning_rate": 2.1861400901003205e-06, "loss": 0.9508, "step": 2566 }, { "epoch": 2.38, "learning_rate": 2.17990432589276e-06, "loss": 0.9288, "step": 2567 }, { "epoch": 2.38, "learning_rate": 2.173676379752672e-06, "loss": 0.9317, "step": 2568 }, { "epoch": 2.38, "learning_rate": 2.1674562579063863e-06, "loss": 0.885, "step": 2569 }, { "epoch": 2.38, "learning_rate": 2.1612439665724128e-06, "loss": 0.9636, "step": 2570 }, { "epoch": 2.38, "learning_rate": 2.155039511961432e-06, "loss": 0.9685, "step": 2571 }, { "epoch": 2.38, "learning_rate": 2.148842900276288e-06, "loss": 0.9767, "step": 2572 }, { "epoch": 2.38, "learning_rate": 2.1426541377119903e-06, "loss": 0.9764, "step": 2573 }, { "epoch": 2.38, "learning_rate": 2.136473230455688e-06, "loss": 1.0315, "step": 2574 }, { "epoch": 2.38, "learning_rate": 2.13030018468669e-06, "loss": 1.0058, "step": 2575 }, { "epoch": 2.39, "learning_rate": 2.124135006576441e-06, "loss": 0.9609, "step": 2576 }, { "epoch": 2.39, "learning_rate": 2.1179777022885194e-06, "loss": 0.9348, "step": 2577 }, { "epoch": 2.39, "learning_rate": 2.1118282779786325e-06, "loss": 0.9536, "step": 2578 }, { "epoch": 2.39, "learning_rate": 2.10568673979461e-06, "loss": 0.962, "step": 2579 }, { "epoch": 2.39, "learning_rate": 2.099553093876401e-06, "loss": 0.9058, "step": 2580 }, { "epoch": 2.39, "learning_rate": 2.093427346356053e-06, "loss": 0.9379, "step": 2581 }, { "epoch": 2.39, "learning_rate": 2.0873095033577284e-06, "loss": 0.9404, "step": 2582 }, { "epoch": 2.39, "learning_rate": 2.081199570997684e-06, "loss": 1.0176, "step": 2583 }, { "epoch": 2.39, "learning_rate": 2.075097555384268e-06, "loss": 1.0066, "step": 2584 }, { "epoch": 2.39, "learning_rate": 2.0690034626179123e-06, "loss": 0.9501, "step": 2585 }, { "epoch": 2.39, "learning_rate": 2.0629172987911284e-06, "loss": 1.0146, "step": 2586 }, { "epoch": 2.4, "learning_rate": 2.0568390699885045e-06, "loss": 0.985, "step": 2587 }, { "epoch": 2.4, "learning_rate": 2.0507687822866897e-06, "loss": 0.8673, "step": 2588 }, { "epoch": 2.4, "learning_rate": 2.0447064417543994e-06, "loss": 0.945, "step": 2589 }, { "epoch": 2.4, "learning_rate": 2.0386520544524003e-06, "loss": 0.9721, "step": 2590 }, { "epoch": 2.4, "learning_rate": 2.0326056264335116e-06, "loss": 0.8842, "step": 2591 }, { "epoch": 2.4, "learning_rate": 2.026567163742591e-06, "loss": 0.8965, "step": 2592 }, { "epoch": 2.4, "learning_rate": 2.0205366724165352e-06, "loss": 0.9013, "step": 2593 }, { "epoch": 2.4, "learning_rate": 2.0145141584842744e-06, "loss": 0.9642, "step": 2594 }, { "epoch": 2.4, "learning_rate": 2.0084996279667568e-06, "loss": 0.9694, "step": 2595 }, { "epoch": 2.4, "learning_rate": 2.0024930868769578e-06, "loss": 0.9668, "step": 2596 }, { "epoch": 2.4, "learning_rate": 1.9964945412198545e-06, "loss": 0.9735, "step": 2597 }, { "epoch": 2.41, "learning_rate": 1.9905039969924388e-06, "loss": 1.0126, "step": 2598 }, { "epoch": 2.41, "learning_rate": 1.9845214601837025e-06, "loss": 0.9962, "step": 2599 }, { "epoch": 2.41, "learning_rate": 1.9785469367746313e-06, "loss": 0.9543, "step": 2600 }, { "epoch": 2.41, "learning_rate": 1.9725804327382004e-06, "loss": 0.8882, "step": 2601 }, { "epoch": 2.41, "learning_rate": 1.966621954039362e-06, "loss": 0.9938, "step": 2602 }, { "epoch": 2.41, "learning_rate": 1.9606715066350514e-06, "loss": 0.9379, "step": 2603 }, { "epoch": 2.41, "learning_rate": 1.9547290964741718e-06, "loss": 0.8861, "step": 2604 }, { "epoch": 2.41, "learning_rate": 1.948794729497594e-06, "loss": 0.9901, "step": 2605 }, { "epoch": 2.41, "learning_rate": 1.9428684116381457e-06, "loss": 1.0198, "step": 2606 }, { "epoch": 2.41, "learning_rate": 1.9369501488206066e-06, "loss": 0.9327, "step": 2607 }, { "epoch": 2.41, "learning_rate": 1.9310399469617048e-06, "loss": 1.0234, "step": 2608 }, { "epoch": 2.42, "learning_rate": 1.925137811970109e-06, "loss": 0.946, "step": 2609 }, { "epoch": 2.42, "learning_rate": 1.9192437497464257e-06, "loss": 1.0024, "step": 2610 }, { "epoch": 2.42, "learning_rate": 1.9133577661831858e-06, "loss": 0.9776, "step": 2611 }, { "epoch": 2.42, "learning_rate": 1.9074798671648477e-06, "loss": 0.9468, "step": 2612 }, { "epoch": 2.42, "learning_rate": 1.9016100585677844e-06, "loss": 0.9779, "step": 2613 }, { "epoch": 2.42, "learning_rate": 1.8957483462602866e-06, "loss": 0.8571, "step": 2614 }, { "epoch": 2.42, "learning_rate": 1.889894736102541e-06, "loss": 0.9781, "step": 2615 }, { "epoch": 2.42, "learning_rate": 1.884049233946641e-06, "loss": 0.9243, "step": 2616 }, { "epoch": 2.42, "learning_rate": 1.8782118456365727e-06, "loss": 0.9706, "step": 2617 }, { "epoch": 2.42, "learning_rate": 1.8723825770082116e-06, "loss": 0.9779, "step": 2618 }, { "epoch": 2.42, "learning_rate": 1.8665614338893135e-06, "loss": 0.9952, "step": 2619 }, { "epoch": 2.43, "learning_rate": 1.8607484220995121e-06, "loss": 0.926, "step": 2620 }, { "epoch": 2.43, "learning_rate": 1.8549435474503118e-06, "loss": 0.9046, "step": 2621 }, { "epoch": 2.43, "learning_rate": 1.849146815745081e-06, "loss": 0.9712, "step": 2622 }, { "epoch": 2.43, "learning_rate": 1.843358232779049e-06, "loss": 0.9483, "step": 2623 }, { "epoch": 2.43, "learning_rate": 1.837577804339299e-06, "loss": 0.9403, "step": 2624 }, { "epoch": 2.43, "learning_rate": 1.831805536204755e-06, "loss": 0.9972, "step": 2625 }, { "epoch": 2.43, "learning_rate": 1.8260414341461919e-06, "loss": 0.9946, "step": 2626 }, { "epoch": 2.43, "learning_rate": 1.8202855039262157e-06, "loss": 1.0067, "step": 2627 }, { "epoch": 2.43, "learning_rate": 1.8145377512992646e-06, "loss": 0.9646, "step": 2628 }, { "epoch": 2.43, "learning_rate": 1.8087981820115996e-06, "loss": 0.9745, "step": 2629 }, { "epoch": 2.44, "learning_rate": 1.8030668018013043e-06, "loss": 0.9752, "step": 2630 }, { "epoch": 2.44, "learning_rate": 1.7973436163982672e-06, "loss": 0.9723, "step": 2631 }, { "epoch": 2.44, "learning_rate": 1.7916286315241926e-06, "loss": 0.9287, "step": 2632 }, { "epoch": 2.44, "learning_rate": 1.7859218528925826e-06, "loss": 0.9111, "step": 2633 }, { "epoch": 2.44, "learning_rate": 1.7802232862087355e-06, "loss": 0.9246, "step": 2634 }, { "epoch": 2.44, "learning_rate": 1.7745329371697406e-06, "loss": 0.9281, "step": 2635 }, { "epoch": 2.44, "learning_rate": 1.7688508114644708e-06, "loss": 0.9767, "step": 2636 }, { "epoch": 2.44, "learning_rate": 1.7631769147735778e-06, "loss": 0.9344, "step": 2637 }, { "epoch": 2.44, "learning_rate": 1.7575112527694871e-06, "loss": 0.9533, "step": 2638 }, { "epoch": 2.44, "learning_rate": 1.7518538311163913e-06, "loss": 1.0033, "step": 2639 }, { "epoch": 2.44, "learning_rate": 1.746204655470245e-06, "loss": 0.9166, "step": 2640 }, { "epoch": 2.45, "learning_rate": 1.7405637314787582e-06, "loss": 0.8705, "step": 2641 }, { "epoch": 2.45, "learning_rate": 1.734931064781391e-06, "loss": 0.933, "step": 2642 }, { "epoch": 2.45, "learning_rate": 1.7293066610093513e-06, "loss": 0.9204, "step": 2643 }, { "epoch": 2.45, "learning_rate": 1.723690525785583e-06, "loss": 0.9853, "step": 2644 }, { "epoch": 2.45, "learning_rate": 1.7180826647247651e-06, "loss": 1.0041, "step": 2645 }, { "epoch": 2.45, "learning_rate": 1.7124830834333027e-06, "loss": 0.9771, "step": 2646 }, { "epoch": 2.45, "learning_rate": 1.7068917875093295e-06, "loss": 0.9502, "step": 2647 }, { "epoch": 2.45, "learning_rate": 1.7013087825426855e-06, "loss": 0.9823, "step": 2648 }, { "epoch": 2.45, "learning_rate": 1.6957340741149287e-06, "loss": 0.9829, "step": 2649 }, { "epoch": 2.45, "learning_rate": 1.6901676677993239e-06, "loss": 0.9211, "step": 2650 }, { "epoch": 2.45, "learning_rate": 1.6846095691608334e-06, "loss": 1.0011, "step": 2651 }, { "epoch": 2.46, "learning_rate": 1.679059783756115e-06, "loss": 0.9416, "step": 2652 }, { "epoch": 2.46, "learning_rate": 1.6735183171335112e-06, "loss": 0.9015, "step": 2653 }, { "epoch": 2.46, "learning_rate": 1.6679851748330545e-06, "loss": 0.9369, "step": 2654 }, { "epoch": 2.46, "learning_rate": 1.6624603623864511e-06, "loss": 1.0339, "step": 2655 }, { "epoch": 2.46, "learning_rate": 1.6569438853170794e-06, "loss": 0.933, "step": 2656 }, { "epoch": 2.46, "learning_rate": 1.651435749139988e-06, "loss": 0.9869, "step": 2657 }, { "epoch": 2.46, "learning_rate": 1.645935959361884e-06, "loss": 0.8917, "step": 2658 }, { "epoch": 2.46, "learning_rate": 1.6404445214811294e-06, "loss": 0.9523, "step": 2659 }, { "epoch": 2.46, "learning_rate": 1.634961440987739e-06, "loss": 0.9578, "step": 2660 }, { "epoch": 2.46, "learning_rate": 1.6294867233633704e-06, "loss": 0.9715, "step": 2661 }, { "epoch": 2.46, "learning_rate": 1.6240203740813232e-06, "loss": 0.9566, "step": 2662 }, { "epoch": 2.47, "learning_rate": 1.6185623986065269e-06, "loss": 0.9534, "step": 2663 }, { "epoch": 2.47, "learning_rate": 1.6131128023955444e-06, "loss": 0.9658, "step": 2664 }, { "epoch": 2.47, "learning_rate": 1.6076715908965534e-06, "loss": 0.9732, "step": 2665 }, { "epoch": 2.47, "learning_rate": 1.602238769549358e-06, "loss": 0.9783, "step": 2666 }, { "epoch": 2.47, "learning_rate": 1.5968143437853689e-06, "loss": 0.9443, "step": 2667 }, { "epoch": 2.47, "learning_rate": 1.591398319027605e-06, "loss": 1.0061, "step": 2668 }, { "epoch": 2.47, "learning_rate": 1.5859907006906883e-06, "loss": 0.9368, "step": 2669 }, { "epoch": 2.47, "learning_rate": 1.580591494180832e-06, "loss": 0.8976, "step": 2670 }, { "epoch": 2.47, "learning_rate": 1.5752007048958451e-06, "loss": 1.0237, "step": 2671 }, { "epoch": 2.47, "learning_rate": 1.5698183382251164e-06, "loss": 1.0112, "step": 2672 }, { "epoch": 2.47, "learning_rate": 1.5644443995496173e-06, "loss": 0.9424, "step": 2673 }, { "epoch": 2.48, "learning_rate": 1.5590788942418933e-06, "loss": 0.9894, "step": 2674 }, { "epoch": 2.48, "learning_rate": 1.553721827666057e-06, "loss": 0.9727, "step": 2675 }, { "epoch": 2.48, "learning_rate": 1.5483732051777888e-06, "loss": 0.89, "step": 2676 }, { "epoch": 2.48, "learning_rate": 1.5430330321243193e-06, "loss": 1.0061, "step": 2677 }, { "epoch": 2.48, "learning_rate": 1.5377013138444386e-06, "loss": 0.8903, "step": 2678 }, { "epoch": 2.48, "learning_rate": 1.5323780556684831e-06, "loss": 0.9007, "step": 2679 }, { "epoch": 2.48, "learning_rate": 1.527063262918328e-06, "loss": 1.0305, "step": 2680 }, { "epoch": 2.48, "learning_rate": 1.5217569409073917e-06, "loss": 0.9959, "step": 2681 }, { "epoch": 2.48, "learning_rate": 1.5164590949406166e-06, "loss": 1.0047, "step": 2682 }, { "epoch": 2.48, "learning_rate": 1.5111697303144734e-06, "loss": 0.9747, "step": 2683 }, { "epoch": 2.49, "learning_rate": 1.5058888523169579e-06, "loss": 1.0263, "step": 2684 }, { "epoch": 2.49, "learning_rate": 1.500616466227578e-06, "loss": 0.9759, "step": 2685 }, { "epoch": 2.49, "learning_rate": 1.4953525773173506e-06, "loss": 0.9305, "step": 2686 }, { "epoch": 2.49, "learning_rate": 1.4900971908488015e-06, "loss": 0.9602, "step": 2687 }, { "epoch": 2.49, "learning_rate": 1.4848503120759527e-06, "loss": 0.9347, "step": 2688 }, { "epoch": 2.49, "learning_rate": 1.4796119462443225e-06, "loss": 0.9204, "step": 2689 }, { "epoch": 2.49, "learning_rate": 1.4743820985909184e-06, "loss": 0.899, "step": 2690 }, { "epoch": 2.49, "learning_rate": 1.4691607743442326e-06, "loss": 0.934, "step": 2691 }, { "epoch": 2.49, "learning_rate": 1.4639479787242327e-06, "loss": 1.0102, "step": 2692 }, { "epoch": 2.49, "learning_rate": 1.4587437169423647e-06, "loss": 1.0061, "step": 2693 }, { "epoch": 2.49, "learning_rate": 1.4535479942015396e-06, "loss": 1.0141, "step": 2694 }, { "epoch": 2.5, "learning_rate": 1.4483608156961326e-06, "loss": 0.9417, "step": 2695 }, { "epoch": 2.5, "learning_rate": 1.4431821866119766e-06, "loss": 1.0026, "step": 2696 }, { "epoch": 2.5, "learning_rate": 1.43801211212636e-06, "loss": 0.975, "step": 2697 }, { "epoch": 2.5, "learning_rate": 1.432850597408012e-06, "loss": 0.946, "step": 2698 }, { "epoch": 2.5, "learning_rate": 1.4276976476171124e-06, "loss": 0.9022, "step": 2699 }, { "epoch": 2.5, "learning_rate": 1.422553267905271e-06, "loss": 0.9754, "step": 2700 }, { "epoch": 2.5, "learning_rate": 1.417417463415537e-06, "loss": 0.8944, "step": 2701 }, { "epoch": 2.5, "learning_rate": 1.4122902392823801e-06, "loss": 0.9942, "step": 2702 }, { "epoch": 2.5, "learning_rate": 1.407171600631696e-06, "loss": 0.9019, "step": 2703 }, { "epoch": 2.5, "learning_rate": 1.402061552580799e-06, "loss": 0.9249, "step": 2704 }, { "epoch": 2.5, "learning_rate": 1.3969601002384049e-06, "loss": 0.9005, "step": 2705 }, { "epoch": 2.51, "learning_rate": 1.3918672487046469e-06, "loss": 0.9447, "step": 2706 }, { "epoch": 2.51, "learning_rate": 1.386783003071055e-06, "loss": 0.9807, "step": 2707 }, { "epoch": 2.51, "learning_rate": 1.3817073684205562e-06, "loss": 0.9437, "step": 2708 }, { "epoch": 2.51, "learning_rate": 1.3766403498274695e-06, "loss": 0.9741, "step": 2709 }, { "epoch": 2.51, "learning_rate": 1.371581952357498e-06, "loss": 0.9178, "step": 2710 }, { "epoch": 2.51, "learning_rate": 1.3665321810677278e-06, "loss": 1.0183, "step": 2711 }, { "epoch": 2.51, "learning_rate": 1.3614910410066217e-06, "loss": 0.9837, "step": 2712 }, { "epoch": 2.51, "learning_rate": 1.3564585372140114e-06, "loss": 0.985, "step": 2713 }, { "epoch": 2.51, "learning_rate": 1.351434674721097e-06, "loss": 0.9293, "step": 2714 }, { "epoch": 2.51, "learning_rate": 1.3464194585504342e-06, "loss": 1.0024, "step": 2715 }, { "epoch": 2.51, "learning_rate": 1.3414128937159398e-06, "loss": 0.9532, "step": 2716 }, { "epoch": 2.52, "learning_rate": 1.3364149852228824e-06, "loss": 0.9654, "step": 2717 }, { "epoch": 2.52, "learning_rate": 1.331425738067872e-06, "loss": 0.9053, "step": 2718 }, { "epoch": 2.52, "learning_rate": 1.3264451572388626e-06, "loss": 0.9354, "step": 2719 }, { "epoch": 2.52, "learning_rate": 1.3214732477151438e-06, "loss": 0.9762, "step": 2720 }, { "epoch": 2.52, "learning_rate": 1.3165100144673349e-06, "loss": 0.9279, "step": 2721 }, { "epoch": 2.52, "learning_rate": 1.3115554624573811e-06, "loss": 0.9685, "step": 2722 }, { "epoch": 2.52, "learning_rate": 1.3066095966385517e-06, "loss": 0.9627, "step": 2723 }, { "epoch": 2.52, "learning_rate": 1.3016724219554277e-06, "loss": 0.9813, "step": 2724 }, { "epoch": 2.52, "learning_rate": 1.2967439433439043e-06, "loss": 0.9151, "step": 2725 }, { "epoch": 2.52, "learning_rate": 1.2918241657311814e-06, "loss": 0.9967, "step": 2726 }, { "epoch": 2.52, "learning_rate": 1.286913094035761e-06, "loss": 0.9537, "step": 2727 }, { "epoch": 2.53, "learning_rate": 1.2820107331674426e-06, "loss": 0.9034, "step": 2728 }, { "epoch": 2.53, "learning_rate": 1.2771170880273121e-06, "loss": 1.0117, "step": 2729 }, { "epoch": 2.53, "learning_rate": 1.2722321635077473e-06, "loss": 0.9531, "step": 2730 }, { "epoch": 2.53, "learning_rate": 1.2673559644924062e-06, "loss": 0.9894, "step": 2731 }, { "epoch": 2.53, "learning_rate": 1.2624884958562233e-06, "loss": 1.0257, "step": 2732 }, { "epoch": 2.53, "learning_rate": 1.2576297624654033e-06, "loss": 0.9866, "step": 2733 }, { "epoch": 2.53, "learning_rate": 1.2527797691774202e-06, "loss": 0.9552, "step": 2734 }, { "epoch": 2.53, "learning_rate": 1.2479385208410088e-06, "loss": 0.9551, "step": 2735 }, { "epoch": 2.53, "learning_rate": 1.2431060222961632e-06, "loss": 1.0051, "step": 2736 }, { "epoch": 2.53, "learning_rate": 1.2382822783741278e-06, "loss": 0.9965, "step": 2737 }, { "epoch": 2.54, "learning_rate": 1.2334672938973958e-06, "loss": 0.9246, "step": 2738 }, { "epoch": 2.54, "learning_rate": 1.2286610736797021e-06, "loss": 0.9217, "step": 2739 }, { "epoch": 2.54, "learning_rate": 1.2238636225260214e-06, "loss": 0.9286, "step": 2740 }, { "epoch": 2.54, "learning_rate": 1.21907494523256e-06, "loss": 0.9869, "step": 2741 }, { "epoch": 2.54, "learning_rate": 1.2142950465867543e-06, "loss": 0.9904, "step": 2742 }, { "epoch": 2.54, "learning_rate": 1.2095239313672613e-06, "loss": 0.9612, "step": 2743 }, { "epoch": 2.54, "learning_rate": 1.2047616043439593e-06, "loss": 0.9121, "step": 2744 }, { "epoch": 2.54, "learning_rate": 1.2000080702779416e-06, "loss": 0.93, "step": 2745 }, { "epoch": 2.54, "learning_rate": 1.195263333921508e-06, "loss": 0.9806, "step": 2746 }, { "epoch": 2.54, "learning_rate": 1.1905274000181665e-06, "loss": 0.9762, "step": 2747 }, { "epoch": 2.54, "learning_rate": 1.1858002733026196e-06, "loss": 0.9168, "step": 2748 }, { "epoch": 2.55, "learning_rate": 1.1810819585007693e-06, "loss": 0.9391, "step": 2749 }, { "epoch": 2.55, "learning_rate": 1.1763724603297066e-06, "loss": 0.9472, "step": 2750 }, { "epoch": 2.55, "learning_rate": 1.1716717834977087e-06, "loss": 0.937, "step": 2751 }, { "epoch": 2.55, "learning_rate": 1.166979932704232e-06, "loss": 0.9832, "step": 2752 }, { "epoch": 2.55, "learning_rate": 1.1622969126399119e-06, "loss": 0.9645, "step": 2753 }, { "epoch": 2.55, "learning_rate": 1.1576227279865538e-06, "loss": 0.9674, "step": 2754 }, { "epoch": 2.55, "learning_rate": 1.152957383417128e-06, "loss": 0.9387, "step": 2755 }, { "epoch": 2.55, "learning_rate": 1.1483008835957744e-06, "loss": 0.935, "step": 2756 }, { "epoch": 2.55, "learning_rate": 1.1436532331777793e-06, "loss": 0.9675, "step": 2757 }, { "epoch": 2.55, "learning_rate": 1.139014436809589e-06, "loss": 0.9358, "step": 2758 }, { "epoch": 2.55, "learning_rate": 1.1343844991287978e-06, "loss": 0.9766, "step": 2759 }, { "epoch": 2.56, "learning_rate": 1.129763424764142e-06, "loss": 0.9533, "step": 2760 }, { "epoch": 2.56, "learning_rate": 1.1251512183354984e-06, "loss": 0.9508, "step": 2761 }, { "epoch": 2.56, "learning_rate": 1.1205478844538764e-06, "loss": 0.9757, "step": 2762 }, { "epoch": 2.56, "learning_rate": 1.1159534277214168e-06, "loss": 0.9489, "step": 2763 }, { "epoch": 2.56, "learning_rate": 1.1113678527313853e-06, "loss": 0.9864, "step": 2764 }, { "epoch": 2.56, "learning_rate": 1.106791164068165e-06, "loss": 0.9693, "step": 2765 }, { "epoch": 2.56, "learning_rate": 1.102223366307259e-06, "loss": 0.9564, "step": 2766 }, { "epoch": 2.56, "learning_rate": 1.097664464015279e-06, "loss": 0.9691, "step": 2767 }, { "epoch": 2.56, "learning_rate": 1.093114461749948e-06, "loss": 0.9752, "step": 2768 }, { "epoch": 2.56, "learning_rate": 1.088573364060085e-06, "loss": 0.9777, "step": 2769 }, { "epoch": 2.56, "learning_rate": 1.0840411754856128e-06, "loss": 0.9593, "step": 2770 }, { "epoch": 2.57, "learning_rate": 1.0795179005575418e-06, "loss": 0.9814, "step": 2771 }, { "epoch": 2.57, "learning_rate": 1.0750035437979755e-06, "loss": 1.0002, "step": 2772 }, { "epoch": 2.57, "learning_rate": 1.0704981097200995e-06, "loss": 1.0373, "step": 2773 }, { "epoch": 2.57, "learning_rate": 1.0660016028281794e-06, "loss": 0.991, "step": 2774 }, { "epoch": 2.57, "learning_rate": 1.061514027617555e-06, "loss": 0.9695, "step": 2775 }, { "epoch": 2.57, "learning_rate": 1.0570353885746386e-06, "loss": 0.8566, "step": 2776 }, { "epoch": 2.57, "learning_rate": 1.0525656901769076e-06, "loss": 0.9976, "step": 2777 }, { "epoch": 2.57, "learning_rate": 1.048104936892902e-06, "loss": 0.9919, "step": 2778 }, { "epoch": 2.57, "learning_rate": 1.0436531331822152e-06, "loss": 0.9486, "step": 2779 }, { "epoch": 2.57, "learning_rate": 1.039210283495502e-06, "loss": 0.9619, "step": 2780 }, { "epoch": 2.57, "learning_rate": 1.0347763922744524e-06, "loss": 0.8894, "step": 2781 }, { "epoch": 2.58, "learning_rate": 1.0303514639518132e-06, "loss": 0.9936, "step": 2782 }, { "epoch": 2.58, "learning_rate": 1.0259355029513618e-06, "loss": 0.9085, "step": 2783 }, { "epoch": 2.58, "learning_rate": 1.0215285136879172e-06, "loss": 0.9707, "step": 2784 }, { "epoch": 2.58, "learning_rate": 1.0171305005673216e-06, "loss": 0.9909, "step": 2785 }, { "epoch": 2.58, "learning_rate": 1.01274146798645e-06, "loss": 0.927, "step": 2786 }, { "epoch": 2.58, "learning_rate": 1.0083614203331971e-06, "loss": 0.8858, "step": 2787 }, { "epoch": 2.58, "learning_rate": 1.003990361986472e-06, "loss": 0.9547, "step": 2788 }, { "epoch": 2.58, "learning_rate": 9.996282973162018e-07, "loss": 0.9622, "step": 2789 }, { "epoch": 2.58, "learning_rate": 9.952752306833203e-07, "loss": 0.9518, "step": 2790 }, { "epoch": 2.58, "learning_rate": 9.909311664397624e-07, "loss": 0.9586, "step": 2791 }, { "epoch": 2.59, "learning_rate": 9.86596108928467e-07, "loss": 0.8961, "step": 2792 }, { "epoch": 2.59, "learning_rate": 9.822700624833692e-07, "loss": 0.9628, "step": 2793 }, { "epoch": 2.59, "learning_rate": 9.779530314293906e-07, "loss": 1.0004, "step": 2794 }, { "epoch": 2.59, "learning_rate": 9.736450200824431e-07, "loss": 0.9962, "step": 2795 }, { "epoch": 2.59, "learning_rate": 9.69346032749423e-07, "loss": 0.9844, "step": 2796 }, { "epoch": 2.59, "learning_rate": 9.650560737282e-07, "loss": 0.956, "step": 2797 }, { "epoch": 2.59, "learning_rate": 9.607751473076232e-07, "loss": 0.9086, "step": 2798 }, { "epoch": 2.59, "learning_rate": 9.56503257767506e-07, "loss": 0.9654, "step": 2799 }, { "epoch": 2.59, "learning_rate": 9.522404093786308e-07, "loss": 0.9486, "step": 2800 }, { "epoch": 2.59, "learning_rate": 9.47986606402741e-07, "loss": 0.8432, "step": 2801 }, { "epoch": 2.59, "learning_rate": 9.437418530925346e-07, "loss": 0.9224, "step": 2802 }, { "epoch": 2.6, "learning_rate": 9.395061536916672e-07, "loss": 0.9121, "step": 2803 }, { "epoch": 2.6, "learning_rate": 9.352795124347369e-07, "loss": 1.0849, "step": 2804 }, { "epoch": 2.6, "learning_rate": 9.310619335472892e-07, "loss": 0.9397, "step": 2805 }, { "epoch": 2.6, "learning_rate": 9.268534212458102e-07, "loss": 1.0084, "step": 2806 }, { "epoch": 2.6, "learning_rate": 9.226539797377199e-07, "loss": 0.9729, "step": 2807 }, { "epoch": 2.6, "learning_rate": 9.18463613221372e-07, "loss": 0.9412, "step": 2808 }, { "epoch": 2.6, "learning_rate": 9.142823258860444e-07, "loss": 0.8721, "step": 2809 }, { "epoch": 2.6, "learning_rate": 9.101101219119402e-07, "loss": 0.9734, "step": 2810 }, { "epoch": 2.6, "learning_rate": 9.059470054701813e-07, "loss": 0.931, "step": 2811 }, { "epoch": 2.6, "learning_rate": 9.017929807228043e-07, "loss": 0.9857, "step": 2812 }, { "epoch": 2.6, "learning_rate": 8.976480518227582e-07, "loss": 0.9562, "step": 2813 }, { "epoch": 2.61, "learning_rate": 8.935122229138971e-07, "loss": 0.9368, "step": 2814 }, { "epoch": 2.61, "learning_rate": 8.893854981309746e-07, "loss": 0.9695, "step": 2815 }, { "epoch": 2.61, "learning_rate": 8.852678815996452e-07, "loss": 0.9766, "step": 2816 }, { "epoch": 2.61, "learning_rate": 8.811593774364601e-07, "loss": 0.9528, "step": 2817 }, { "epoch": 2.61, "learning_rate": 8.770599897488563e-07, "loss": 0.982, "step": 2818 }, { "epoch": 2.61, "learning_rate": 8.729697226351574e-07, "loss": 0.9334, "step": 2819 }, { "epoch": 2.61, "learning_rate": 8.688885801845725e-07, "loss": 0.9386, "step": 2820 }, { "epoch": 2.61, "learning_rate": 8.648165664771824e-07, "loss": 0.925, "step": 2821 }, { "epoch": 2.61, "learning_rate": 8.607536855839472e-07, "loss": 0.9868, "step": 2822 }, { "epoch": 2.61, "learning_rate": 8.566999415666932e-07, "loss": 0.9459, "step": 2823 }, { "epoch": 2.61, "learning_rate": 8.526553384781122e-07, "loss": 0.9716, "step": 2824 }, { "epoch": 2.62, "learning_rate": 8.486198803617595e-07, "loss": 0.9512, "step": 2825 }, { "epoch": 2.62, "learning_rate": 8.445935712520459e-07, "loss": 0.9639, "step": 2826 }, { "epoch": 2.62, "learning_rate": 8.405764151742368e-07, "loss": 1.024, "step": 2827 }, { "epoch": 2.62, "learning_rate": 8.365684161444454e-07, "loss": 0.9655, "step": 2828 }, { "epoch": 2.62, "learning_rate": 8.32569578169632e-07, "loss": 0.9503, "step": 2829 }, { "epoch": 2.62, "learning_rate": 8.285799052475973e-07, "loss": 0.9799, "step": 2830 }, { "epoch": 2.62, "learning_rate": 8.245994013669811e-07, "loss": 0.9384, "step": 2831 }, { "epoch": 2.62, "learning_rate": 8.206280705072522e-07, "loss": 0.9658, "step": 2832 }, { "epoch": 2.62, "learning_rate": 8.166659166387114e-07, "loss": 0.9176, "step": 2833 }, { "epoch": 2.62, "learning_rate": 8.127129437224867e-07, "loss": 1.0427, "step": 2834 }, { "epoch": 2.62, "learning_rate": 8.08769155710526e-07, "loss": 0.9977, "step": 2835 }, { "epoch": 2.63, "learning_rate": 8.048345565455951e-07, "loss": 0.9489, "step": 2836 }, { "epoch": 2.63, "learning_rate": 8.009091501612687e-07, "loss": 0.9979, "step": 2837 }, { "epoch": 2.63, "learning_rate": 7.969929404819377e-07, "loss": 0.9342, "step": 2838 }, { "epoch": 2.63, "learning_rate": 7.930859314227968e-07, "loss": 0.9836, "step": 2839 }, { "epoch": 2.63, "learning_rate": 7.891881268898405e-07, "loss": 0.9944, "step": 2840 }, { "epoch": 2.63, "learning_rate": 7.852995307798627e-07, "loss": 0.8996, "step": 2841 }, { "epoch": 2.63, "learning_rate": 7.814201469804506e-07, "loss": 0.9587, "step": 2842 }, { "epoch": 2.63, "learning_rate": 7.775499793699837e-07, "loss": 0.9489, "step": 2843 }, { "epoch": 2.63, "learning_rate": 7.736890318176238e-07, "loss": 0.9547, "step": 2844 }, { "epoch": 2.63, "learning_rate": 7.698373081833177e-07, "loss": 0.9763, "step": 2845 }, { "epoch": 2.64, "learning_rate": 7.659948123177908e-07, "loss": 0.975, "step": 2846 }, { "epoch": 2.64, "learning_rate": 7.62161548062541e-07, "loss": 0.877, "step": 2847 }, { "epoch": 2.64, "learning_rate": 7.583375192498422e-07, "loss": 0.9804, "step": 2848 }, { "epoch": 2.64, "learning_rate": 7.545227297027258e-07, "loss": 0.8953, "step": 2849 }, { "epoch": 2.64, "learning_rate": 7.507171832349946e-07, "loss": 0.9174, "step": 2850 }, { "epoch": 2.64, "learning_rate": 7.469208836512077e-07, "loss": 0.9555, "step": 2851 }, { "epoch": 2.64, "learning_rate": 7.431338347466799e-07, "loss": 0.938, "step": 2852 }, { "epoch": 2.64, "learning_rate": 7.393560403074796e-07, "loss": 0.9465, "step": 2853 }, { "epoch": 2.64, "learning_rate": 7.355875041104199e-07, "loss": 0.9581, "step": 2854 }, { "epoch": 2.64, "learning_rate": 7.318282299230595e-07, "loss": 0.9761, "step": 2855 }, { "epoch": 2.64, "learning_rate": 7.280782215036986e-07, "loss": 0.9434, "step": 2856 }, { "epoch": 2.65, "learning_rate": 7.243374826013716e-07, "loss": 0.9598, "step": 2857 }, { "epoch": 2.65, "learning_rate": 7.206060169558482e-07, "loss": 0.9609, "step": 2858 }, { "epoch": 2.65, "learning_rate": 7.168838282976265e-07, "loss": 0.9708, "step": 2859 }, { "epoch": 2.65, "learning_rate": 7.131709203479309e-07, "loss": 0.9327, "step": 2860 }, { "epoch": 2.65, "learning_rate": 7.094672968187033e-07, "loss": 0.9001, "step": 2861 }, { "epoch": 2.65, "learning_rate": 7.057729614126085e-07, "loss": 0.9327, "step": 2862 }, { "epoch": 2.65, "learning_rate": 7.020879178230233e-07, "loss": 0.9408, "step": 2863 }, { "epoch": 2.65, "learning_rate": 6.984121697340373e-07, "loss": 0.9824, "step": 2864 }, { "epoch": 2.65, "learning_rate": 6.947457208204422e-07, "loss": 0.9016, "step": 2865 }, { "epoch": 2.65, "learning_rate": 6.91088574747737e-07, "loss": 0.9917, "step": 2866 }, { "epoch": 2.65, "learning_rate": 6.874407351721202e-07, "loss": 0.9871, "step": 2867 }, { "epoch": 2.66, "learning_rate": 6.838022057404848e-07, "loss": 0.9891, "step": 2868 }, { "epoch": 2.66, "learning_rate": 6.801729900904163e-07, "loss": 0.9583, "step": 2869 }, { "epoch": 2.66, "learning_rate": 6.765530918501895e-07, "loss": 0.9831, "step": 2870 }, { "epoch": 2.66, "learning_rate": 6.729425146387635e-07, "loss": 0.8969, "step": 2871 }, { "epoch": 2.66, "learning_rate": 6.693412620657802e-07, "loss": 0.9534, "step": 2872 }, { "epoch": 2.66, "learning_rate": 6.657493377315561e-07, "loss": 0.9617, "step": 2873 }, { "epoch": 2.66, "learning_rate": 6.621667452270852e-07, "loss": 0.9062, "step": 2874 }, { "epoch": 2.66, "learning_rate": 6.585934881340317e-07, "loss": 0.9784, "step": 2875 }, { "epoch": 2.66, "learning_rate": 6.550295700247233e-07, "loss": 0.8905, "step": 2876 }, { "epoch": 2.66, "learning_rate": 6.514749944621568e-07, "loss": 0.8964, "step": 2877 }, { "epoch": 2.66, "learning_rate": 6.479297649999839e-07, "loss": 0.918, "step": 2878 }, { "epoch": 2.67, "learning_rate": 6.443938851825149e-07, "loss": 0.9517, "step": 2879 }, { "epoch": 2.67, "learning_rate": 6.408673585447123e-07, "loss": 0.9222, "step": 2880 }, { "epoch": 2.67, "learning_rate": 6.373501886121913e-07, "loss": 0.9875, "step": 2881 }, { "epoch": 2.67, "learning_rate": 6.338423789012049e-07, "loss": 0.9507, "step": 2882 }, { "epoch": 2.67, "learning_rate": 6.303439329186545e-07, "loss": 1.0037, "step": 2883 }, { "epoch": 2.67, "learning_rate": 6.2685485416208e-07, "loss": 0.9739, "step": 2884 }, { "epoch": 2.67, "learning_rate": 6.233751461196546e-07, "loss": 0.888, "step": 2885 }, { "epoch": 2.67, "learning_rate": 6.199048122701834e-07, "loss": 0.9543, "step": 2886 }, { "epoch": 2.67, "learning_rate": 6.16443856083101e-07, "loss": 0.968, "step": 2887 }, { "epoch": 2.67, "learning_rate": 6.129922810184663e-07, "loss": 0.9688, "step": 2888 }, { "epoch": 2.67, "learning_rate": 6.095500905269569e-07, "loss": 1.0137, "step": 2889 }, { "epoch": 2.68, "learning_rate": 6.061172880498733e-07, "loss": 0.9647, "step": 2890 }, { "epoch": 2.68, "learning_rate": 6.026938770191248e-07, "loss": 0.9302, "step": 2891 }, { "epoch": 2.68, "learning_rate": 5.99279860857237e-07, "loss": 0.9637, "step": 2892 }, { "epoch": 2.68, "learning_rate": 5.958752429773396e-07, "loss": 0.9325, "step": 2893 }, { "epoch": 2.68, "learning_rate": 5.924800267831676e-07, "loss": 0.9261, "step": 2894 }, { "epoch": 2.68, "learning_rate": 5.890942156690571e-07, "loss": 0.9296, "step": 2895 }, { "epoch": 2.68, "learning_rate": 5.857178130199403e-07, "loss": 0.9522, "step": 2896 }, { "epoch": 2.68, "learning_rate": 5.823508222113439e-07, "loss": 0.9925, "step": 2897 }, { "epoch": 2.68, "learning_rate": 5.789932466093884e-07, "loss": 0.9497, "step": 2898 }, { "epoch": 2.68, "learning_rate": 5.756450895707743e-07, "loss": 0.8942, "step": 2899 }, { "epoch": 2.69, "learning_rate": 5.723063544427921e-07, "loss": 0.9338, "step": 2900 }, { "epoch": 2.69, "learning_rate": 5.689770445633113e-07, "loss": 0.9956, "step": 2901 }, { "epoch": 2.69, "learning_rate": 5.656571632607777e-07, "loss": 0.9144, "step": 2902 }, { "epoch": 2.69, "learning_rate": 5.623467138542116e-07, "loss": 0.9723, "step": 2903 }, { "epoch": 2.69, "learning_rate": 5.590456996532045e-07, "loss": 0.8711, "step": 2904 }, { "epoch": 2.69, "learning_rate": 5.557541239579145e-07, "loss": 0.9122, "step": 2905 }, { "epoch": 2.69, "learning_rate": 5.524719900590636e-07, "loss": 0.939, "step": 2906 }, { "epoch": 2.69, "learning_rate": 5.491993012379338e-07, "loss": 0.9786, "step": 2907 }, { "epoch": 2.69, "learning_rate": 5.459360607663644e-07, "loss": 1.0548, "step": 2908 }, { "epoch": 2.69, "learning_rate": 5.426822719067515e-07, "loss": 0.913, "step": 2909 }, { "epoch": 2.69, "learning_rate": 5.394379379120384e-07, "loss": 0.9024, "step": 2910 }, { "epoch": 2.7, "learning_rate": 5.362030620257208e-07, "loss": 0.9542, "step": 2911 }, { "epoch": 2.7, "learning_rate": 5.329776474818305e-07, "loss": 0.9796, "step": 2912 }, { "epoch": 2.7, "learning_rate": 5.297616975049491e-07, "loss": 0.9735, "step": 2913 }, { "epoch": 2.7, "learning_rate": 5.265552153101905e-07, "loss": 0.9298, "step": 2914 }, { "epoch": 2.7, "learning_rate": 5.23358204103207e-07, "loss": 0.9666, "step": 2915 }, { "epoch": 2.7, "learning_rate": 5.201706670801809e-07, "loss": 1.022, "step": 2916 }, { "epoch": 2.7, "learning_rate": 5.169926074278208e-07, "loss": 0.9707, "step": 2917 }, { "epoch": 2.7, "learning_rate": 5.138240283233631e-07, "loss": 1.0052, "step": 2918 }, { "epoch": 2.7, "learning_rate": 5.106649329345636e-07, "loss": 0.9161, "step": 2919 }, { "epoch": 2.7, "learning_rate": 5.075153244197006e-07, "loss": 1.0108, "step": 2920 }, { "epoch": 2.7, "learning_rate": 5.043752059275641e-07, "loss": 0.9606, "step": 2921 }, { "epoch": 2.71, "learning_rate": 5.012445805974598e-07, "loss": 0.9533, "step": 2922 }, { "epoch": 2.71, "learning_rate": 4.981234515591993e-07, "loss": 0.93, "step": 2923 }, { "epoch": 2.71, "learning_rate": 4.950118219331024e-07, "loss": 1.0035, "step": 2924 }, { "epoch": 2.71, "learning_rate": 4.919096948299939e-07, "loss": 0.9993, "step": 2925 }, { "epoch": 2.71, "learning_rate": 4.888170733511932e-07, "loss": 0.9496, "step": 2926 }, { "epoch": 2.71, "learning_rate": 4.857339605885214e-07, "loss": 0.9403, "step": 2927 }, { "epoch": 2.71, "learning_rate": 4.826603596242918e-07, "loss": 0.9683, "step": 2928 }, { "epoch": 2.71, "learning_rate": 4.795962735313087e-07, "loss": 0.9031, "step": 2929 }, { "epoch": 2.71, "learning_rate": 4.765417053728616e-07, "loss": 0.9425, "step": 2930 }, { "epoch": 2.71, "learning_rate": 4.734966582027289e-07, "loss": 0.9755, "step": 2931 }, { "epoch": 2.71, "learning_rate": 4.7046113506516556e-07, "loss": 0.9395, "step": 2932 }, { "epoch": 2.72, "learning_rate": 4.674351389949072e-07, "loss": 0.9736, "step": 2933 }, { "epoch": 2.72, "learning_rate": 4.644186730171674e-07, "loss": 0.9258, "step": 2934 }, { "epoch": 2.72, "learning_rate": 4.614117401476292e-07, "loss": 0.9707, "step": 2935 }, { "epoch": 2.72, "learning_rate": 4.584143433924437e-07, "loss": 1.0101, "step": 2936 }, { "epoch": 2.72, "learning_rate": 4.5542648574823265e-07, "loss": 0.9426, "step": 2937 }, { "epoch": 2.72, "learning_rate": 4.5244817020207775e-07, "loss": 0.9458, "step": 2938 }, { "epoch": 2.72, "learning_rate": 4.4947939973152276e-07, "loss": 0.9753, "step": 2939 }, { "epoch": 2.72, "learning_rate": 4.4652017730456685e-07, "loss": 0.9786, "step": 2940 }, { "epoch": 2.72, "learning_rate": 4.4357050587966576e-07, "loss": 0.9931, "step": 2941 }, { "epoch": 2.72, "learning_rate": 4.4063038840572505e-07, "loss": 0.9481, "step": 2942 }, { "epoch": 2.72, "learning_rate": 4.376998278221012e-07, "loss": 0.9509, "step": 2943 }, { "epoch": 2.73, "learning_rate": 4.34778827058594e-07, "loss": 0.8806, "step": 2944 }, { "epoch": 2.73, "learning_rate": 4.3186738903544634e-07, "loss": 0.9851, "step": 2945 }, { "epoch": 2.73, "learning_rate": 4.289655166633411e-07, "loss": 0.9487, "step": 2946 }, { "epoch": 2.73, "learning_rate": 4.2607321284339754e-07, "loss": 0.9263, "step": 2947 }, { "epoch": 2.73, "learning_rate": 4.231904804671727e-07, "loss": 0.9311, "step": 2948 }, { "epoch": 2.73, "learning_rate": 4.203173224166468e-07, "loss": 1.0642, "step": 2949 }, { "epoch": 2.73, "learning_rate": 4.1745374156423436e-07, "loss": 0.9186, "step": 2950 }, { "epoch": 2.73, "learning_rate": 4.145997407727742e-07, "loss": 0.9105, "step": 2951 }, { "epoch": 2.73, "learning_rate": 4.117553228955262e-07, "loss": 1.0113, "step": 2952 }, { "epoch": 2.73, "learning_rate": 4.0892049077617125e-07, "loss": 0.9333, "step": 2953 }, { "epoch": 2.74, "learning_rate": 4.060952472488044e-07, "loss": 0.9557, "step": 2954 }, { "epoch": 2.74, "learning_rate": 4.032795951379398e-07, "loss": 1.0479, "step": 2955 }, { "epoch": 2.74, "learning_rate": 4.0047353725849556e-07, "loss": 0.9553, "step": 2956 }, { "epoch": 2.74, "learning_rate": 3.9767707641580443e-07, "loss": 0.9953, "step": 2957 }, { "epoch": 2.74, "learning_rate": 3.9489021540560114e-07, "loss": 0.9435, "step": 2958 }, { "epoch": 2.74, "learning_rate": 3.9211295701402365e-07, "loss": 1.0125, "step": 2959 }, { "epoch": 2.74, "learning_rate": 3.8934530401760986e-07, "loss": 0.9397, "step": 2960 }, { "epoch": 2.74, "learning_rate": 3.865872591832953e-07, "loss": 0.9148, "step": 2961 }, { "epoch": 2.74, "learning_rate": 3.8383882526840997e-07, "loss": 0.9661, "step": 2962 }, { "epoch": 2.74, "learning_rate": 3.811000050206759e-07, "loss": 1.003, "step": 2963 }, { "epoch": 2.74, "learning_rate": 3.7837080117819944e-07, "loss": 0.9468, "step": 2964 }, { "epoch": 2.75, "learning_rate": 3.756512164694803e-07, "loss": 0.9024, "step": 2965 }, { "epoch": 2.75, "learning_rate": 3.7294125361339473e-07, "loss": 0.9576, "step": 2966 }, { "epoch": 2.75, "learning_rate": 3.702409153192066e-07, "loss": 0.9428, "step": 2967 }, { "epoch": 2.75, "learning_rate": 3.6755020428654974e-07, "loss": 0.8942, "step": 2968 }, { "epoch": 2.75, "learning_rate": 3.648691232054391e-07, "loss": 0.9636, "step": 2969 }, { "epoch": 2.75, "learning_rate": 3.6219767475626054e-07, "loss": 0.9028, "step": 2970 }, { "epoch": 2.75, "learning_rate": 3.5953586160976994e-07, "loss": 0.9435, "step": 2971 }, { "epoch": 2.75, "learning_rate": 3.568836864270897e-07, "loss": 0.932, "step": 2972 }, { "epoch": 2.75, "learning_rate": 3.5424115185970665e-07, "loss": 0.9342, "step": 2973 }, { "epoch": 2.75, "learning_rate": 3.5160826054947085e-07, "loss": 0.9104, "step": 2974 }, { "epoch": 2.75, "learning_rate": 3.4898501512859006e-07, "loss": 0.9436, "step": 2975 }, { "epoch": 2.76, "learning_rate": 3.463714182196298e-07, "loss": 0.9111, "step": 2976 }, { "epoch": 2.76, "learning_rate": 3.437674724355089e-07, "loss": 0.9706, "step": 2977 }, { "epoch": 2.76, "learning_rate": 3.411731803794982e-07, "loss": 0.9283, "step": 2978 }, { "epoch": 2.76, "learning_rate": 3.385885446452164e-07, "loss": 1.0218, "step": 2979 }, { "epoch": 2.76, "learning_rate": 3.360135678166287e-07, "loss": 1.0042, "step": 2980 }, { "epoch": 2.76, "learning_rate": 3.33448252468046e-07, "loss": 0.9083, "step": 2981 }, { "epoch": 2.76, "learning_rate": 3.308926011641156e-07, "loss": 0.9893, "step": 2982 }, { "epoch": 2.76, "learning_rate": 3.2834661645982703e-07, "loss": 0.9297, "step": 2983 }, { "epoch": 2.76, "learning_rate": 3.2581030090050424e-07, "loss": 0.933, "step": 2984 }, { "epoch": 2.76, "learning_rate": 3.2328365702180676e-07, "loss": 0.9335, "step": 2985 }, { "epoch": 2.76, "learning_rate": 3.207666873497217e-07, "loss": 1.0193, "step": 2986 }, { "epoch": 2.77, "learning_rate": 3.182593944005652e-07, "loss": 0.9354, "step": 2987 }, { "epoch": 2.77, "learning_rate": 3.1576178068098205e-07, "loss": 0.9566, "step": 2988 }, { "epoch": 2.77, "learning_rate": 3.132738486879361e-07, "loss": 0.9347, "step": 2989 }, { "epoch": 2.77, "learning_rate": 3.107956009087154e-07, "loss": 0.9606, "step": 2990 }, { "epoch": 2.77, "learning_rate": 3.083270398209259e-07, "loss": 0.902, "step": 2991 }, { "epoch": 2.77, "learning_rate": 3.0586816789248573e-07, "loss": 1.0166, "step": 2992 }, { "epoch": 2.77, "learning_rate": 3.0341898758163066e-07, "loss": 0.9534, "step": 2993 }, { "epoch": 2.77, "learning_rate": 3.009795013369055e-07, "loss": 1.0113, "step": 2994 }, { "epoch": 2.77, "learning_rate": 2.9854971159716473e-07, "loss": 0.9653, "step": 2995 }, { "epoch": 2.77, "learning_rate": 2.9612962079156647e-07, "loss": 0.9215, "step": 2996 }, { "epoch": 2.77, "learning_rate": 2.9371923133957427e-07, "loss": 1.0007, "step": 2997 }, { "epoch": 2.78, "learning_rate": 2.913185456509537e-07, "loss": 0.9234, "step": 2998 }, { "epoch": 2.78, "learning_rate": 2.8892756612576623e-07, "loss": 0.9486, "step": 2999 }, { "epoch": 2.78, "learning_rate": 2.8654629515437074e-07, "loss": 0.9892, "step": 3000 }, { "epoch": 2.78, "learning_rate": 2.8417473511742286e-07, "loss": 0.9008, "step": 3001 }, { "epoch": 2.78, "learning_rate": 2.818128883858662e-07, "loss": 0.9483, "step": 3002 }, { "epoch": 2.78, "learning_rate": 2.7946075732093404e-07, "loss": 0.9315, "step": 3003 }, { "epoch": 2.78, "learning_rate": 2.7711834427414985e-07, "loss": 0.9609, "step": 3004 }, { "epoch": 2.78, "learning_rate": 2.7478565158731707e-07, "loss": 0.9364, "step": 3005 }, { "epoch": 2.78, "learning_rate": 2.7246268159252356e-07, "loss": 0.8998, "step": 3006 }, { "epoch": 2.78, "learning_rate": 2.701494366121382e-07, "loss": 0.8979, "step": 3007 }, { "epoch": 2.79, "learning_rate": 2.678459189588045e-07, "loss": 0.9313, "step": 3008 }, { "epoch": 2.79, "learning_rate": 2.6555213093544364e-07, "loss": 0.896, "step": 3009 }, { "epoch": 2.79, "learning_rate": 2.6326807483524786e-07, "loss": 0.9725, "step": 3010 }, { "epoch": 2.79, "learning_rate": 2.6099375294168175e-07, "loss": 0.9898, "step": 3011 }, { "epoch": 2.79, "learning_rate": 2.5872916752847755e-07, "loss": 0.99, "step": 3012 }, { "epoch": 2.79, "learning_rate": 2.564743208596321e-07, "loss": 0.9815, "step": 3013 }, { "epoch": 2.79, "learning_rate": 2.542292151894088e-07, "loss": 0.9494, "step": 3014 }, { "epoch": 2.79, "learning_rate": 2.5199385276233004e-07, "loss": 1.0311, "step": 3015 }, { "epoch": 2.79, "learning_rate": 2.497682358131781e-07, "loss": 0.9817, "step": 3016 }, { "epoch": 2.79, "learning_rate": 2.4755236656699324e-07, "loss": 0.9605, "step": 3017 }, { "epoch": 2.79, "learning_rate": 2.453462472390711e-07, "loss": 0.8703, "step": 3018 }, { "epoch": 2.8, "learning_rate": 2.431498800349574e-07, "loss": 0.9501, "step": 3019 }, { "epoch": 2.8, "learning_rate": 2.4096326715044915e-07, "loss": 0.9247, "step": 3020 }, { "epoch": 2.8, "learning_rate": 2.3878641077159316e-07, "loss": 1.0184, "step": 3021 }, { "epoch": 2.8, "learning_rate": 2.366193130746819e-07, "loss": 0.9496, "step": 3022 }, { "epoch": 2.8, "learning_rate": 2.34461976226249e-07, "loss": 0.9477, "step": 3023 }, { "epoch": 2.8, "learning_rate": 2.3231440238307367e-07, "loss": 0.9727, "step": 3024 }, { "epoch": 2.8, "learning_rate": 2.3017659369217184e-07, "loss": 1.0122, "step": 3025 }, { "epoch": 2.8, "learning_rate": 2.2804855229079826e-07, "loss": 1.0024, "step": 3026 }, { "epoch": 2.8, "learning_rate": 2.2593028030644116e-07, "loss": 0.9322, "step": 3027 }, { "epoch": 2.8, "learning_rate": 2.238217798568254e-07, "loss": 1.0009, "step": 3028 }, { "epoch": 2.8, "learning_rate": 2.2172305304990262e-07, "loss": 0.9569, "step": 3029 }, { "epoch": 2.81, "learning_rate": 2.196341019838555e-07, "loss": 0.896, "step": 3030 }, { "epoch": 2.81, "learning_rate": 2.175549287470946e-07, "loss": 0.9605, "step": 3031 }, { "epoch": 2.81, "learning_rate": 2.1548553541825278e-07, "loss": 0.9406, "step": 3032 }, { "epoch": 2.81, "learning_rate": 2.1342592406618735e-07, "loss": 0.9064, "step": 3033 }, { "epoch": 2.81, "learning_rate": 2.113760967499745e-07, "loss": 0.9312, "step": 3034 }, { "epoch": 2.81, "learning_rate": 2.0933605551891057e-07, "loss": 0.9547, "step": 3035 }, { "epoch": 2.81, "learning_rate": 2.0730580241250741e-07, "loss": 0.9686, "step": 3036 }, { "epoch": 2.81, "learning_rate": 2.0528533946049257e-07, "loss": 0.9821, "step": 3037 }, { "epoch": 2.81, "learning_rate": 2.0327466868280354e-07, "loss": 0.9504, "step": 3038 }, { "epoch": 2.81, "learning_rate": 2.012737920895902e-07, "loss": 0.9168, "step": 3039 }, { "epoch": 2.81, "learning_rate": 1.9928271168121016e-07, "loss": 0.9348, "step": 3040 }, { "epoch": 2.82, "learning_rate": 1.9730142944822783e-07, "loss": 0.8925, "step": 3041 }, { "epoch": 2.82, "learning_rate": 1.953299473714121e-07, "loss": 0.9246, "step": 3042 }, { "epoch": 2.82, "learning_rate": 1.9336826742173186e-07, "loss": 1.0369, "step": 3043 }, { "epoch": 2.82, "learning_rate": 1.914163915603595e-07, "loss": 0.9533, "step": 3044 }, { "epoch": 2.82, "learning_rate": 1.894743217386652e-07, "loss": 0.9615, "step": 3045 }, { "epoch": 2.82, "learning_rate": 1.8754205989821362e-07, "loss": 0.9883, "step": 3046 }, { "epoch": 2.82, "learning_rate": 1.8561960797076616e-07, "loss": 0.924, "step": 3047 }, { "epoch": 2.82, "learning_rate": 1.8370696787827658e-07, "loss": 0.9705, "step": 3048 }, { "epoch": 2.82, "learning_rate": 1.818041415328886e-07, "loss": 0.943, "step": 3049 }, { "epoch": 2.82, "learning_rate": 1.7991113083693501e-07, "loss": 0.934, "step": 3050 }, { "epoch": 2.82, "learning_rate": 1.7802793768293413e-07, "loss": 0.96, "step": 3051 }, { "epoch": 2.83, "learning_rate": 1.7615456395359333e-07, "loss": 0.9394, "step": 3052 }, { "epoch": 2.83, "learning_rate": 1.7429101152179884e-07, "loss": 0.9643, "step": 3053 }, { "epoch": 2.83, "learning_rate": 1.724372822506204e-07, "loss": 0.8857, "step": 3054 }, { "epoch": 2.83, "learning_rate": 1.7059337799330555e-07, "loss": 0.9302, "step": 3055 }, { "epoch": 2.83, "learning_rate": 1.6875930059328193e-07, "loss": 0.8794, "step": 3056 }, { "epoch": 2.83, "learning_rate": 1.6693505188414949e-07, "loss": 0.9605, "step": 3057 }, { "epoch": 2.83, "learning_rate": 1.6512063368968488e-07, "loss": 0.9398, "step": 3058 }, { "epoch": 2.83, "learning_rate": 1.6331604782383604e-07, "loss": 1.0154, "step": 3059 }, { "epoch": 2.83, "learning_rate": 1.6152129609071977e-07, "loss": 0.9874, "step": 3060 }, { "epoch": 2.83, "learning_rate": 1.5973638028462303e-07, "loss": 0.9709, "step": 3061 }, { "epoch": 2.83, "learning_rate": 1.5796130218999728e-07, "loss": 0.9252, "step": 3062 }, { "epoch": 2.84, "learning_rate": 1.56196063581463e-07, "loss": 0.8411, "step": 3063 }, { "epoch": 2.84, "learning_rate": 1.544406662237985e-07, "loss": 0.9947, "step": 3064 }, { "epoch": 2.84, "learning_rate": 1.5269511187194774e-07, "loss": 0.9767, "step": 3065 }, { "epoch": 2.84, "learning_rate": 1.5095940227101036e-07, "loss": 0.9664, "step": 3066 }, { "epoch": 2.84, "learning_rate": 1.4923353915624716e-07, "loss": 0.8821, "step": 3067 }, { "epoch": 2.84, "learning_rate": 1.4751752425307243e-07, "loss": 0.9747, "step": 3068 }, { "epoch": 2.84, "learning_rate": 1.4581135927705824e-07, "loss": 0.9866, "step": 3069 }, { "epoch": 2.84, "learning_rate": 1.4411504593392468e-07, "loss": 0.9507, "step": 3070 }, { "epoch": 2.84, "learning_rate": 1.4242858591954735e-07, "loss": 0.9782, "step": 3071 }, { "epoch": 2.84, "learning_rate": 1.4075198091994647e-07, "loss": 0.9552, "step": 3072 }, { "epoch": 2.85, "learning_rate": 1.3908523261129349e-07, "loss": 0.9379, "step": 3073 }, { "epoch": 2.85, "learning_rate": 1.3742834265990434e-07, "loss": 0.9961, "step": 3074 }, { "epoch": 2.85, "learning_rate": 1.3578131272223848e-07, "loss": 0.9766, "step": 3075 }, { "epoch": 2.85, "learning_rate": 1.34144144444901e-07, "loss": 0.9451, "step": 3076 }, { "epoch": 2.85, "learning_rate": 1.3251683946463256e-07, "loss": 0.9068, "step": 3077 }, { "epoch": 2.85, "learning_rate": 1.3089939940831854e-07, "loss": 0.9673, "step": 3078 }, { "epoch": 2.85, "learning_rate": 1.2929182589297762e-07, "loss": 0.9813, "step": 3079 }, { "epoch": 2.85, "learning_rate": 1.2769412052576758e-07, "loss": 0.9753, "step": 3080 }, { "epoch": 2.85, "learning_rate": 1.2610628490397847e-07, "loss": 0.9521, "step": 3081 }, { "epoch": 2.85, "learning_rate": 1.2452832061503606e-07, "loss": 0.9599, "step": 3082 }, { "epoch": 2.85, "learning_rate": 1.2296022923649286e-07, "loss": 0.9918, "step": 3083 }, { "epoch": 2.86, "learning_rate": 1.2140201233603598e-07, "loss": 1.0239, "step": 3084 }, { "epoch": 2.86, "learning_rate": 1.1985367147147488e-07, "loss": 0.8823, "step": 3085 }, { "epoch": 2.86, "learning_rate": 1.1831520819075237e-07, "loss": 0.8451, "step": 3086 }, { "epoch": 2.86, "learning_rate": 1.1678662403193041e-07, "loss": 0.9516, "step": 3087 }, { "epoch": 2.86, "learning_rate": 1.1526792052319769e-07, "loss": 0.9873, "step": 3088 }, { "epoch": 2.86, "learning_rate": 1.137590991828641e-07, "loss": 0.9248, "step": 3089 }, { "epoch": 2.86, "learning_rate": 1.122601615193597e-07, "loss": 0.9368, "step": 3090 }, { "epoch": 2.86, "learning_rate": 1.1077110903123134e-07, "loss": 0.9337, "step": 3091 }, { "epoch": 2.86, "learning_rate": 1.092919432071482e-07, "loss": 0.9339, "step": 3092 }, { "epoch": 2.86, "learning_rate": 1.0782266552589072e-07, "loss": 0.9531, "step": 3093 }, { "epoch": 2.86, "learning_rate": 1.06363277456355e-07, "loss": 0.9486, "step": 3094 }, { "epoch": 2.87, "learning_rate": 1.0491378045755395e-07, "loss": 0.9261, "step": 3095 }, { "epoch": 2.87, "learning_rate": 1.0347417597860398e-07, "loss": 0.9355, "step": 3096 }, { "epoch": 2.87, "learning_rate": 1.0204446545873936e-07, "loss": 0.9573, "step": 3097 }, { "epoch": 2.87, "learning_rate": 1.0062465032729896e-07, "loss": 0.9859, "step": 3098 }, { "epoch": 2.87, "learning_rate": 9.921473200372955e-08, "loss": 1.0317, "step": 3099 }, { "epoch": 2.87, "learning_rate": 9.781471189758363e-08, "loss": 1.0064, "step": 3100 }, { "epoch": 2.87, "learning_rate": 9.642459140851824e-08, "loss": 0.9811, "step": 3101 }, { "epoch": 2.87, "learning_rate": 9.504437192629389e-08, "loss": 0.8798, "step": 3102 }, { "epoch": 2.87, "learning_rate": 9.36740548307713e-08, "loss": 0.9012, "step": 3103 }, { "epoch": 2.87, "learning_rate": 9.231364149191236e-08, "loss": 0.9923, "step": 3104 }, { "epoch": 2.87, "learning_rate": 9.096313326977802e-08, "loss": 0.9527, "step": 3105 }, { "epoch": 2.88, "learning_rate": 8.962253151452605e-08, "loss": 0.9321, "step": 3106 }, { "epoch": 2.88, "learning_rate": 8.829183756640991e-08, "loss": 0.9812, "step": 3107 }, { "epoch": 2.88, "learning_rate": 8.697105275577989e-08, "loss": 0.9674, "step": 3108 }, { "epoch": 2.88, "learning_rate": 8.566017840307639e-08, "loss": 0.9116, "step": 3109 }, { "epoch": 2.88, "learning_rate": 8.435921581883554e-08, "loss": 0.969, "step": 3110 }, { "epoch": 2.88, "learning_rate": 8.306816630368142e-08, "loss": 0.9448, "step": 3111 }, { "epoch": 2.88, "learning_rate": 8.178703114832931e-08, "loss": 0.8982, "step": 3112 }, { "epoch": 2.88, "learning_rate": 8.051581163358135e-08, "loss": 1.0028, "step": 3113 }, { "epoch": 2.88, "learning_rate": 7.925450903032761e-08, "loss": 0.9611, "step": 3114 }, { "epoch": 2.88, "learning_rate": 7.800312459954384e-08, "loss": 0.8984, "step": 3115 }, { "epoch": 2.88, "learning_rate": 7.676165959228931e-08, "loss": 1.023, "step": 3116 }, { "epoch": 2.89, "learning_rate": 7.553011524970788e-08, "loss": 0.8898, "step": 3117 }, { "epoch": 2.89, "learning_rate": 7.430849280302465e-08, "loss": 0.9482, "step": 3118 }, { "epoch": 2.89, "learning_rate": 7.309679347354381e-08, "loss": 0.9195, "step": 3119 }, { "epoch": 2.89, "learning_rate": 7.18950184726519e-08, "loss": 0.97, "step": 3120 }, { "epoch": 2.89, "learning_rate": 7.070316900181118e-08, "loss": 1.0107, "step": 3121 }, { "epoch": 2.89, "learning_rate": 6.952124625256296e-08, "loss": 0.9862, "step": 3122 }, { "epoch": 2.89, "learning_rate": 6.834925140652315e-08, "loss": 0.9782, "step": 3123 }, { "epoch": 2.89, "learning_rate": 6.718718563538119e-08, "loss": 0.9722, "step": 3124 }, { "epoch": 2.89, "learning_rate": 6.603505010090328e-08, "loss": 0.9312, "step": 3125 }, { "epoch": 2.89, "learning_rate": 6.489284595492584e-08, "loss": 0.9441, "step": 3126 }, { "epoch": 2.9, "learning_rate": 6.376057433935656e-08, "loss": 0.9575, "step": 3127 }, { "epoch": 2.9, "learning_rate": 6.263823638617328e-08, "loss": 0.9313, "step": 3128 }, { "epoch": 2.9, "learning_rate": 6.15258332174229e-08, "loss": 1.0111, "step": 3129 }, { "epoch": 2.9, "learning_rate": 6.042336594521914e-08, "loss": 0.8867, "step": 3130 }, { "epoch": 2.9, "learning_rate": 5.933083567174591e-08, "loss": 0.8618, "step": 3131 }, { "epoch": 2.9, "learning_rate": 5.824824348924729e-08, "loss": 0.9322, "step": 3132 }, { "epoch": 2.9, "learning_rate": 5.7175590480036405e-08, "loss": 1.0462, "step": 3133 }, { "epoch": 2.9, "learning_rate": 5.611287771648766e-08, "loss": 1.0127, "step": 3134 }, { "epoch": 2.9, "learning_rate": 5.506010626103786e-08, "loss": 0.9781, "step": 3135 }, { "epoch": 2.9, "learning_rate": 5.401727716618621e-08, "loss": 0.9725, "step": 3136 }, { "epoch": 2.9, "learning_rate": 5.298439147448986e-08, "loss": 0.9602, "step": 3137 }, { "epoch": 2.91, "learning_rate": 5.196145021856836e-08, "loss": 0.9536, "step": 3138 }, { "epoch": 2.91, "learning_rate": 5.094845442109586e-08, "loss": 0.9912, "step": 3139 }, { "epoch": 2.91, "learning_rate": 4.994540509480672e-08, "loss": 0.9616, "step": 3140 }, { "epoch": 2.91, "learning_rate": 4.89523032424899e-08, "loss": 0.9564, "step": 3141 }, { "epoch": 2.91, "learning_rate": 4.796914985699008e-08, "loss": 0.9769, "step": 3142 }, { "epoch": 2.91, "learning_rate": 4.699594592120438e-08, "loss": 0.9787, "step": 3143 }, { "epoch": 2.91, "learning_rate": 4.603269240808561e-08, "loss": 0.9516, "step": 3144 }, { "epoch": 2.91, "learning_rate": 4.5079390280636794e-08, "loss": 0.9636, "step": 3145 }, { "epoch": 2.91, "learning_rate": 4.413604049191334e-08, "loss": 0.9526, "step": 3146 }, { "epoch": 2.91, "learning_rate": 4.320264398501972e-08, "loss": 0.9384, "step": 3147 }, { "epoch": 2.91, "learning_rate": 4.2279201693110615e-08, "loss": 0.9609, "step": 3148 }, { "epoch": 2.92, "learning_rate": 4.1365714539388645e-08, "loss": 0.9589, "step": 3149 }, { "epoch": 2.92, "learning_rate": 4.0462183437104395e-08, "loss": 1.0237, "step": 3150 }, { "epoch": 2.92, "learning_rate": 3.95686092895553e-08, "loss": 0.9273, "step": 3151 }, { "epoch": 2.92, "learning_rate": 3.868499299008233e-08, "loss": 0.9479, "step": 3152 }, { "epoch": 2.92, "learning_rate": 3.781133542207327e-08, "loss": 0.9699, "step": 3153 }, { "epoch": 2.92, "learning_rate": 3.694763745895835e-08, "loss": 0.9207, "step": 3154 }, { "epoch": 2.92, "learning_rate": 3.609389996421242e-08, "loss": 1.0396, "step": 3155 }, { "epoch": 2.92, "learning_rate": 3.525012379135162e-08, "loss": 0.946, "step": 3156 }, { "epoch": 2.92, "learning_rate": 3.44163097839334e-08, "loss": 0.8931, "step": 3157 }, { "epoch": 2.92, "learning_rate": 3.359245877555428e-08, "loss": 0.9312, "step": 3158 }, { "epoch": 2.92, "learning_rate": 3.27785715898532e-08, "loss": 0.9789, "step": 3159 }, { "epoch": 2.93, "learning_rate": 3.197464904050485e-08, "loss": 0.9758, "step": 3160 }, { "epoch": 2.93, "learning_rate": 3.118069193122408e-08, "loss": 0.9073, "step": 3161 }, { "epoch": 2.93, "learning_rate": 3.0396701055762645e-08, "loss": 0.9647, "step": 3162 }, { "epoch": 2.93, "learning_rate": 2.9622677197906902e-08, "loss": 0.9338, "step": 3163 }, { "epoch": 2.93, "learning_rate": 2.885862113148008e-08, "loss": 0.9091, "step": 3164 }, { "epoch": 2.93, "learning_rate": 2.8104533620341158e-08, "loss": 1.0114, "step": 3165 }, { "epoch": 2.93, "learning_rate": 2.7360415418380415e-08, "loss": 0.935, "step": 3166 }, { "epoch": 2.93, "learning_rate": 2.6626267269524996e-08, "loss": 0.9139, "step": 3167 }, { "epoch": 2.93, "learning_rate": 2.5902089907730022e-08, "loss": 0.917, "step": 3168 }, { "epoch": 2.93, "learning_rate": 2.5187884056988575e-08, "loss": 1.0166, "step": 3169 }, { "epoch": 2.93, "learning_rate": 2.4483650431318395e-08, "loss": 0.9612, "step": 3170 }, { "epoch": 2.94, "learning_rate": 2.378938973477074e-08, "loss": 0.9504, "step": 3171 }, { "epoch": 2.94, "learning_rate": 2.310510266142818e-08, "loss": 1.0659, "step": 3172 }, { "epoch": 2.94, "learning_rate": 2.243078989539793e-08, "loss": 1.016, "step": 3173 }, { "epoch": 2.94, "learning_rate": 2.1766452110819626e-08, "loss": 0.9472, "step": 3174 }, { "epoch": 2.94, "learning_rate": 2.1112089971857542e-08, "loss": 1.0765, "step": 3175 }, { "epoch": 2.94, "learning_rate": 2.0467704132705046e-08, "loss": 0.9349, "step": 3176 }, { "epoch": 2.94, "learning_rate": 1.983329523758126e-08, "loss": 0.9449, "step": 3177 }, { "epoch": 2.94, "learning_rate": 1.920886392072885e-08, "loss": 0.9301, "step": 3178 }, { "epoch": 2.94, "learning_rate": 1.8594410806418438e-08, "loss": 0.9324, "step": 3179 }, { "epoch": 2.94, "learning_rate": 1.7989936508943095e-08, "loss": 0.939, "step": 3180 }, { "epoch": 2.95, "learning_rate": 1.7395441632620526e-08, "loss": 0.9251, "step": 3181 }, { "epoch": 2.95, "learning_rate": 1.681092677179197e-08, "loss": 0.9967, "step": 3182 }, { "epoch": 2.95, "learning_rate": 1.6236392510819987e-08, "loss": 0.9908, "step": 3183 }, { "epoch": 2.95, "learning_rate": 1.5671839424090674e-08, "loss": 0.9128, "step": 3184 }, { "epoch": 2.95, "learning_rate": 1.5117268076009218e-08, "loss": 0.9656, "step": 3185 }, { "epoch": 2.95, "learning_rate": 1.4572679021004344e-08, "loss": 0.9466, "step": 3186 }, { "epoch": 2.95, "learning_rate": 1.4038072803523872e-08, "loss": 0.92, "step": 3187 }, { "epoch": 2.95, "learning_rate": 1.3513449958034718e-08, "loss": 0.9218, "step": 3188 }, { "epoch": 2.95, "learning_rate": 1.2998811009024003e-08, "loss": 0.9358, "step": 3189 }, { "epoch": 2.95, "learning_rate": 1.2494156470997942e-08, "loss": 0.9574, "step": 3190 }, { "epoch": 2.95, "learning_rate": 1.1999486848478515e-08, "loss": 0.9248, "step": 3191 }, { "epoch": 2.96, "learning_rate": 1.1514802636009014e-08, "loss": 0.9893, "step": 3192 }, { "epoch": 2.96, "learning_rate": 1.1040104318146283e-08, "loss": 0.9089, "step": 3193 }, { "epoch": 2.96, "learning_rate": 1.0575392369466254e-08, "loss": 1.0016, "step": 3194 }, { "epoch": 2.96, "learning_rate": 1.0120667254559513e-08, "loss": 0.9455, "step": 3195 }, { "epoch": 2.96, "learning_rate": 9.675929428035746e-09, "loss": 0.93, "step": 3196 }, { "epoch": 2.96, "learning_rate": 9.24117933451374e-09, "loss": 0.9996, "step": 3197 }, { "epoch": 2.96, "learning_rate": 8.816417408634704e-09, "loss": 0.8902, "step": 3198 }, { "epoch": 2.96, "learning_rate": 8.401644075046732e-09, "loss": 0.9288, "step": 3199 }, { "epoch": 2.96, "learning_rate": 7.996859748418129e-09, "loss": 1.0003, "step": 3200 }, { "epoch": 2.96, "learning_rate": 7.602064833427403e-09, "loss": 0.9736, "step": 3201 }, { "epoch": 2.96, "learning_rate": 7.217259724766612e-09, "loss": 1.0192, "step": 3202 }, { "epoch": 2.97, "learning_rate": 6.842444807142468e-09, "loss": 0.9383, "step": 3203 }, { "epoch": 2.97, "learning_rate": 6.477620455270783e-09, "loss": 0.9921, "step": 3204 }, { "epoch": 2.97, "learning_rate": 6.122787033883137e-09, "loss": 1.0055, "step": 3205 }, { "epoch": 2.97, "learning_rate": 5.777944897719101e-09, "loss": 0.9491, "step": 3206 }, { "epoch": 2.97, "learning_rate": 5.4430943915329e-09, "loss": 0.9549, "step": 3207 }, { "epoch": 2.97, "learning_rate": 5.11823585008675e-09, "loss": 0.8997, "step": 3208 }, { "epoch": 2.97, "learning_rate": 4.803369598157526e-09, "loss": 0.9787, "step": 3209 }, { "epoch": 2.97, "learning_rate": 4.49849595052676e-09, "loss": 0.9213, "step": 3210 }, { "epoch": 2.97, "learning_rate": 4.203615211990641e-09, "loss": 1.0346, "step": 3211 }, { "epoch": 2.97, "learning_rate": 3.918727677353351e-09, "loss": 0.9206, "step": 3212 }, { "epoch": 2.97, "learning_rate": 3.643833631428173e-09, "loss": 0.9666, "step": 3213 }, { "epoch": 2.98, "learning_rate": 3.378933349037494e-09, "loss": 0.92, "step": 3214 }, { "epoch": 2.98, "learning_rate": 3.1240270950139152e-09, "loss": 0.9154, "step": 3215 }, { "epoch": 2.98, "learning_rate": 2.8791151241969184e-09, "loss": 0.9858, "step": 3216 }, { "epoch": 2.98, "learning_rate": 2.64419768143509e-09, "loss": 0.9756, "step": 3217 }, { "epoch": 2.98, "learning_rate": 2.4192750015850088e-09, "loss": 0.9554, "step": 3218 }, { "epoch": 2.98, "learning_rate": 2.2043473095112454e-09, "loss": 0.9818, "step": 3219 }, { "epoch": 2.98, "learning_rate": 1.999414820085255e-09, "loss": 0.9723, "step": 3220 }, { "epoch": 2.98, "learning_rate": 1.8044777381875933e-09, "loss": 0.9594, "step": 3221 }, { "epoch": 2.98, "learning_rate": 1.61953625870237e-09, "loss": 0.9631, "step": 3222 }, { "epoch": 2.98, "learning_rate": 1.444590566525017e-09, "loss": 0.9329, "step": 3223 }, { "epoch": 2.98, "learning_rate": 1.279640836555629e-09, "loss": 0.95, "step": 3224 }, { "epoch": 2.99, "learning_rate": 1.1246872337000724e-09, "loss": 0.9387, "step": 3225 }, { "epoch": 2.99, "learning_rate": 9.797299128733172e-10, "loss": 0.9131, "step": 3226 }, { "epoch": 2.99, "learning_rate": 8.447690189938851e-10, "loss": 0.8874, "step": 3227 }, { "epoch": 2.99, "learning_rate": 7.198046869882902e-10, "loss": 0.997, "step": 3228 }, { "epoch": 2.99, "learning_rate": 6.048370417888194e-10, "loss": 0.9822, "step": 3229 }, { "epoch": 2.99, "learning_rate": 4.998661983324215e-10, "loss": 0.9299, "step": 3230 }, { "epoch": 2.99, "learning_rate": 4.0489226156292806e-10, "loss": 0.9908, "step": 3231 }, { "epoch": 2.99, "learning_rate": 3.199153264299426e-10, "loss": 0.9449, "step": 3232 }, { "epoch": 2.99, "learning_rate": 2.449354778877311e-10, "loss": 0.9116, "step": 3233 }, { "epoch": 2.99, "learning_rate": 1.7995279089744188e-10, "loss": 1.0125, "step": 3234 }, { "epoch": 3.0, "learning_rate": 1.2496733042488551e-10, "loss": 0.9764, "step": 3235 }, { "epoch": 3.0, "learning_rate": 7.997915144053459e-11, "loss": 0.9261, "step": 3236 }, { "epoch": 3.0, "learning_rate": 4.498829892174428e-11, "loss": 0.9535, "step": 3237 }, { "epoch": 3.0, "learning_rate": 1.999480784942165e-11, "loss": 0.9591, "step": 3238 }, { "epoch": 3.0, "learning_rate": 4.998703211356315e-12, "loss": 0.9292, "step": 3239 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 0.9844, "step": 3240 }, { "epoch": 3.0, "step": 3240, "total_flos": 1.6370718981331354e+18, "train_loss": 1.29761775776192, "train_runtime": 56604.187, "train_samples_per_second": 10.992, "train_steps_per_second": 0.057 } ], "max_steps": 3240, "num_train_epochs": 3, "total_flos": 1.6370718981331354e+18, "trial_name": null, "trial_params": null }