{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 6981, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4285714285714287e-07, "loss": 5.1295, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.8571428571428575e-07, "loss": 4.7679, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.2857142857142857e-07, "loss": 4.3996, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.714285714285715e-07, "loss": 4.9955, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.142857142857143e-07, "loss": 5.6138, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.571428571428571e-07, "loss": 4.7768, "step": 6 }, { "epoch": 0.0, "learning_rate": 1e-06, "loss": 4.154, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.142857142857143e-06, "loss": 4.6607, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.2857142857142858e-06, "loss": 4.567, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.4285714285714286e-06, "loss": 4.0781, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.5714285714285714e-06, "loss": 3.4531, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.7142857142857143e-06, "loss": 3.3415, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.8571428571428573e-06, "loss": 3.1763, "step": 13 }, { "epoch": 0.0, "learning_rate": 2e-06, "loss": 3.3817, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.1428571428571427e-06, "loss": 3.0313, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.285714285714286e-06, "loss": 2.6607, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.428571428571429e-06, "loss": 2.6451, "step": 17 }, { "epoch": 0.0, "learning_rate": 2.5714285714285716e-06, "loss": 2.5201, "step": 18 }, { "epoch": 0.0, "learning_rate": 2.7142857142857144e-06, "loss": 2.2768, "step": 19 }, { "epoch": 0.0, "learning_rate": 2.8571428571428573e-06, "loss": 1.9431, "step": 20 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 2.1808, "step": 21 }, { "epoch": 0.0, "learning_rate": 3.142857142857143e-06, "loss": 2.0312, "step": 22 }, { "epoch": 0.0, "learning_rate": 3.2857142857142857e-06, "loss": 1.9955, "step": 23 }, { "epoch": 0.0, "learning_rate": 3.4285714285714285e-06, "loss": 2.1239, "step": 24 }, { "epoch": 0.0, "learning_rate": 3.5714285714285714e-06, "loss": 1.7433, "step": 25 }, { "epoch": 0.0, "learning_rate": 3.7142857142857146e-06, "loss": 2.2946, "step": 26 }, { "epoch": 0.0, "learning_rate": 3.857142857142857e-06, "loss": 1.7857, "step": 27 }, { "epoch": 0.0, "learning_rate": 4e-06, "loss": 1.8984, "step": 28 }, { "epoch": 0.0, "learning_rate": 4.1428571428571435e-06, "loss": 1.654, "step": 29 }, { "epoch": 0.0, "learning_rate": 4.2857142857142855e-06, "loss": 1.6987, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.428571428571429e-06, "loss": 1.567, "step": 31 }, { "epoch": 0.0, "learning_rate": 4.571428571428572e-06, "loss": 1.4051, "step": 32 }, { "epoch": 0.0, "learning_rate": 4.714285714285714e-06, "loss": 1.5971, "step": 33 }, { "epoch": 0.0, "learning_rate": 4.857142857142858e-06, "loss": 1.7176, "step": 34 }, { "epoch": 0.01, "learning_rate": 4.9999999999999996e-06, "loss": 1.6496, "step": 35 }, { "epoch": 0.01, "learning_rate": 5.142857142857143e-06, "loss": 1.9665, "step": 36 }, { "epoch": 0.01, "learning_rate": 5.285714285714286e-06, "loss": 1.5826, "step": 37 }, { "epoch": 0.01, "learning_rate": 5.428571428571429e-06, "loss": 1.3504, "step": 38 }, { "epoch": 0.01, "learning_rate": 5.571428571428572e-06, "loss": 1.4933, "step": 39 }, { "epoch": 0.01, "learning_rate": 5.7142857142857145e-06, "loss": 1.4431, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.857142857142857e-06, "loss": 1.4576, "step": 41 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 1.4263, "step": 42 }, { "epoch": 0.01, "learning_rate": 6.142857142857143e-06, "loss": 1.3895, "step": 43 }, { "epoch": 0.01, "learning_rate": 6.285714285714286e-06, "loss": 1.2545, "step": 44 }, { "epoch": 0.01, "learning_rate": 6.428571428571429e-06, "loss": 1.3917, "step": 45 }, { "epoch": 0.01, "learning_rate": 6.5714285714285714e-06, "loss": 1.0893, "step": 46 }, { "epoch": 0.01, "learning_rate": 6.714285714285715e-06, "loss": 1.4989, "step": 47 }, { "epoch": 0.01, "learning_rate": 6.857142857142857e-06, "loss": 1.3471, "step": 48 }, { "epoch": 0.01, "learning_rate": 7e-06, "loss": 1.2355, "step": 49 }, { "epoch": 0.01, "learning_rate": 7.142857142857143e-06, "loss": 1.4275, "step": 50 }, { "epoch": 0.01, "learning_rate": 7.2857142857142855e-06, "loss": 1.1412, "step": 51 }, { "epoch": 0.01, "learning_rate": 7.428571428571429e-06, "loss": 1.1133, "step": 52 }, { "epoch": 0.01, "learning_rate": 7.571428571428571e-06, "loss": 1.3092, "step": 53 }, { "epoch": 0.01, "learning_rate": 7.714285714285714e-06, "loss": 1.2483, "step": 54 }, { "epoch": 0.01, "learning_rate": 7.857142857142858e-06, "loss": 1.3962, "step": 55 }, { "epoch": 0.01, "learning_rate": 8e-06, "loss": 1.2751, "step": 56 }, { "epoch": 0.01, "learning_rate": 8.142857142857142e-06, "loss": 1.0061, "step": 57 }, { "epoch": 0.01, "learning_rate": 8.285714285714287e-06, "loss": 1.3348, "step": 58 }, { "epoch": 0.01, "learning_rate": 8.428571428571429e-06, "loss": 1.1401, "step": 59 }, { "epoch": 0.01, "learning_rate": 8.571428571428571e-06, "loss": 1.2411, "step": 60 }, { "epoch": 0.01, "learning_rate": 8.714285714285715e-06, "loss": 1.0636, "step": 61 }, { "epoch": 0.01, "learning_rate": 8.857142857142858e-06, "loss": 0.9319, "step": 62 }, { "epoch": 0.01, "learning_rate": 9e-06, "loss": 1.2042, "step": 63 }, { "epoch": 0.01, "learning_rate": 9.142857142857144e-06, "loss": 1.3605, "step": 64 }, { "epoch": 0.01, "learning_rate": 9.285714285714286e-06, "loss": 1.2366, "step": 65 }, { "epoch": 0.01, "learning_rate": 9.428571428571428e-06, "loss": 1.2494, "step": 66 }, { "epoch": 0.01, "learning_rate": 9.571428571428572e-06, "loss": 1.2455, "step": 67 }, { "epoch": 0.01, "learning_rate": 9.714285714285715e-06, "loss": 1.2472, "step": 68 }, { "epoch": 0.01, "learning_rate": 9.857142857142857e-06, "loss": 1.0608, "step": 69 }, { "epoch": 0.01, "learning_rate": 9.999999999999999e-06, "loss": 1.1713, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.0142857142857144e-05, "loss": 1.3209, "step": 71 }, { "epoch": 0.01, "learning_rate": 1.0285714285714286e-05, "loss": 0.9113, "step": 72 }, { "epoch": 0.01, "learning_rate": 1.0428571428571428e-05, "loss": 1.1663, "step": 73 }, { "epoch": 0.01, "learning_rate": 1.0571428571428572e-05, "loss": 1.0871, "step": 74 }, { "epoch": 0.01, "learning_rate": 1.0714285714285714e-05, "loss": 1.1451, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.0857142857142858e-05, "loss": 1.3198, "step": 76 }, { "epoch": 0.01, "learning_rate": 1.1e-05, "loss": 1.0893, "step": 77 }, { "epoch": 0.01, "learning_rate": 1.1142857142857143e-05, "loss": 1.3231, "step": 78 }, { "epoch": 0.01, "learning_rate": 1.1285714285714285e-05, "loss": 1.2612, "step": 79 }, { "epoch": 0.01, "learning_rate": 1.1428571428571429e-05, "loss": 1.0273, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.1571428571428573e-05, "loss": 1.2087, "step": 81 }, { "epoch": 0.01, "learning_rate": 1.1714285714285715e-05, "loss": 0.7905, "step": 82 }, { "epoch": 0.01, "learning_rate": 1.1857142857142857e-05, "loss": 1.1992, "step": 83 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 1.0859, "step": 84 }, { "epoch": 0.01, "learning_rate": 1.2142857142857144e-05, "loss": 1.2115, "step": 85 }, { "epoch": 0.01, "learning_rate": 1.2285714285714286e-05, "loss": 0.9688, "step": 86 }, { "epoch": 0.01, "learning_rate": 1.242857142857143e-05, "loss": 1.048, "step": 87 }, { "epoch": 0.01, "learning_rate": 1.2571428571428572e-05, "loss": 0.8945, "step": 88 }, { "epoch": 0.01, "learning_rate": 1.2714285714285714e-05, "loss": 1.1574, "step": 89 }, { "epoch": 0.01, "learning_rate": 1.2857142857142857e-05, "loss": 1.0575, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.3000000000000001e-05, "loss": 1.26, "step": 91 }, { "epoch": 0.01, "learning_rate": 1.3142857142857143e-05, "loss": 1.1568, "step": 92 }, { "epoch": 0.01, "learning_rate": 1.3285714285714285e-05, "loss": 1.1267, "step": 93 }, { "epoch": 0.01, "learning_rate": 1.342857142857143e-05, "loss": 1.4531, "step": 94 }, { "epoch": 0.01, "learning_rate": 1.3571428571428572e-05, "loss": 1.096, "step": 95 }, { "epoch": 0.01, "learning_rate": 1.3714285714285714e-05, "loss": 1.0368, "step": 96 }, { "epoch": 0.01, "learning_rate": 1.3857142857142858e-05, "loss": 1.0151, "step": 97 }, { "epoch": 0.01, "learning_rate": 1.4e-05, "loss": 1.1071, "step": 98 }, { "epoch": 0.01, "learning_rate": 1.4142857142857143e-05, "loss": 0.9626, "step": 99 }, { "epoch": 0.01, "learning_rate": 1.4285714285714285e-05, "loss": 1.207, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.4428571428571429e-05, "loss": 1.0279, "step": 101 }, { "epoch": 0.01, "learning_rate": 1.4571428571428571e-05, "loss": 0.9199, "step": 102 }, { "epoch": 0.01, "learning_rate": 1.4714285714285715e-05, "loss": 0.9621, "step": 103 }, { "epoch": 0.01, "learning_rate": 1.4857142857142858e-05, "loss": 1.2249, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.5e-05, "loss": 0.8856, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.5142857142857142e-05, "loss": 0.9994, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.5285714285714286e-05, "loss": 0.9855, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.5428571428571428e-05, "loss": 1.0346, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.5571428571428573e-05, "loss": 1.0485, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.5714285714285715e-05, "loss": 1.0435, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.5857142857142857e-05, "loss": 0.9559, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.6e-05, "loss": 1.1758, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.614285714285714e-05, "loss": 0.9431, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.6285714285714283e-05, "loss": 0.9676, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.6428571428571432e-05, "loss": 1.0011, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.6571428571428574e-05, "loss": 0.9721, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.6714285714285716e-05, "loss": 1.0137, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.6857142857142858e-05, "loss": 0.8521, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.7e-05, "loss": 0.9085, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.7142857142857142e-05, "loss": 1.0921, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.7285714285714284e-05, "loss": 1.0614, "step": 121 }, { "epoch": 0.02, "learning_rate": 1.742857142857143e-05, "loss": 0.9009, "step": 122 }, { "epoch": 0.02, "learning_rate": 1.757142857142857e-05, "loss": 1.1267, "step": 123 }, { "epoch": 0.02, "learning_rate": 1.7714285714285717e-05, "loss": 1.0837, "step": 124 }, { "epoch": 0.02, "learning_rate": 1.785714285714286e-05, "loss": 1.0698, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.8e-05, "loss": 0.8242, "step": 126 }, { "epoch": 0.02, "learning_rate": 1.8142857142857142e-05, "loss": 0.9933, "step": 127 }, { "epoch": 0.02, "learning_rate": 1.8285714285714288e-05, "loss": 0.8917, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.842857142857143e-05, "loss": 1.1239, "step": 129 }, { "epoch": 0.02, "learning_rate": 1.8571428571428572e-05, "loss": 0.8666, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.8714285714285714e-05, "loss": 1.0982, "step": 131 }, { "epoch": 0.02, "learning_rate": 1.8857142857142856e-05, "loss": 0.9872, "step": 132 }, { "epoch": 0.02, "learning_rate": 1.9e-05, "loss": 0.9933, "step": 133 }, { "epoch": 0.02, "learning_rate": 1.9142857142857143e-05, "loss": 0.9955, "step": 134 }, { "epoch": 0.02, "learning_rate": 1.928571428571429e-05, "loss": 0.8376, "step": 135 }, { "epoch": 0.02, "learning_rate": 1.942857142857143e-05, "loss": 1.1256, "step": 136 }, { "epoch": 0.02, "learning_rate": 1.9571428571428572e-05, "loss": 0.9196, "step": 137 }, { "epoch": 0.02, "learning_rate": 1.9714285714285714e-05, "loss": 0.9994, "step": 138 }, { "epoch": 0.02, "learning_rate": 1.9857142857142856e-05, "loss": 1.072, "step": 139 }, { "epoch": 0.02, "learning_rate": 1.9999999999999998e-05, "loss": 1.1702, "step": 140 }, { "epoch": 0.02, "learning_rate": 2.014285714285714e-05, "loss": 0.957, "step": 141 }, { "epoch": 0.02, "learning_rate": 2.028571428571429e-05, "loss": 1.2093, "step": 142 }, { "epoch": 0.02, "learning_rate": 2.042857142857143e-05, "loss": 0.9978, "step": 143 }, { "epoch": 0.02, "learning_rate": 2.0571428571428573e-05, "loss": 1.2746, "step": 144 }, { "epoch": 0.02, "learning_rate": 2.0714285714285715e-05, "loss": 1.0831, "step": 145 }, { "epoch": 0.02, "learning_rate": 2.0857142857142857e-05, "loss": 0.8845, "step": 146 }, { "epoch": 0.02, "learning_rate": 2.1e-05, "loss": 1.0625, "step": 147 }, { "epoch": 0.02, "learning_rate": 2.1142857142857144e-05, "loss": 1.0558, "step": 148 }, { "epoch": 0.02, "learning_rate": 2.1285714285714286e-05, "loss": 0.976, "step": 149 }, { "epoch": 0.02, "learning_rate": 2.1428571428571428e-05, "loss": 1.1775, "step": 150 }, { "epoch": 0.02, "learning_rate": 2.1571428571428574e-05, "loss": 1.1752, "step": 151 }, { "epoch": 0.02, "learning_rate": 2.1714285714285715e-05, "loss": 0.9788, "step": 152 }, { "epoch": 0.02, "learning_rate": 2.1857142857142857e-05, "loss": 0.9939, "step": 153 }, { "epoch": 0.02, "learning_rate": 2.2e-05, "loss": 0.9688, "step": 154 }, { "epoch": 0.02, "learning_rate": 2.2142857142857145e-05, "loss": 1.2126, "step": 155 }, { "epoch": 0.02, "learning_rate": 2.2285714285714287e-05, "loss": 1.1496, "step": 156 }, { "epoch": 0.02, "learning_rate": 2.242857142857143e-05, "loss": 1.0999, "step": 157 }, { "epoch": 0.02, "learning_rate": 2.257142857142857e-05, "loss": 0.793, "step": 158 }, { "epoch": 0.02, "learning_rate": 2.2714285714285713e-05, "loss": 0.9481, "step": 159 }, { "epoch": 0.02, "learning_rate": 2.2857142857142858e-05, "loss": 1.0982, "step": 160 }, { "epoch": 0.02, "learning_rate": 2.3000000000000003e-05, "loss": 0.9157, "step": 161 }, { "epoch": 0.02, "learning_rate": 2.3142857142857145e-05, "loss": 1.0206, "step": 162 }, { "epoch": 0.02, "learning_rate": 2.3285714285714287e-05, "loss": 0.9068, "step": 163 }, { "epoch": 0.02, "learning_rate": 2.342857142857143e-05, "loss": 0.873, "step": 164 }, { "epoch": 0.02, "learning_rate": 2.357142857142857e-05, "loss": 0.9794, "step": 165 }, { "epoch": 0.02, "learning_rate": 2.3714285714285713e-05, "loss": 0.9676, "step": 166 }, { "epoch": 0.02, "learning_rate": 2.3857142857142855e-05, "loss": 0.8951, "step": 167 }, { "epoch": 0.02, "learning_rate": 2.4e-05, "loss": 1.2048, "step": 168 }, { "epoch": 0.02, "learning_rate": 2.4142857142857146e-05, "loss": 1.1685, "step": 169 }, { "epoch": 0.02, "learning_rate": 2.4285714285714288e-05, "loss": 1.1864, "step": 170 }, { "epoch": 0.02, "learning_rate": 2.442857142857143e-05, "loss": 0.9598, "step": 171 }, { "epoch": 0.02, "learning_rate": 2.4571428571428572e-05, "loss": 0.9805, "step": 172 }, { "epoch": 0.02, "learning_rate": 2.4714285714285714e-05, "loss": 0.8387, "step": 173 }, { "epoch": 0.02, "learning_rate": 2.485714285714286e-05, "loss": 0.9771, "step": 174 }, { "epoch": 0.03, "learning_rate": 2.5e-05, "loss": 0.9989, "step": 175 }, { "epoch": 0.03, "learning_rate": 2.5142857142857143e-05, "loss": 1.3103, "step": 176 }, { "epoch": 0.03, "learning_rate": 2.5285714285714285e-05, "loss": 0.9654, "step": 177 }, { "epoch": 0.03, "learning_rate": 2.5428571428571427e-05, "loss": 1.1267, "step": 178 }, { "epoch": 0.03, "learning_rate": 2.5571428571428572e-05, "loss": 1.0748, "step": 179 }, { "epoch": 0.03, "learning_rate": 2.5714285714285714e-05, "loss": 0.8856, "step": 180 }, { "epoch": 0.03, "learning_rate": 2.585714285714286e-05, "loss": 1.1239, "step": 181 }, { "epoch": 0.03, "learning_rate": 2.6000000000000002e-05, "loss": 1.0631, "step": 182 }, { "epoch": 0.03, "learning_rate": 2.6142857142857144e-05, "loss": 1.1652, "step": 183 }, { "epoch": 0.03, "learning_rate": 2.6285714285714286e-05, "loss": 0.9838, "step": 184 }, { "epoch": 0.03, "learning_rate": 2.6428571428571428e-05, "loss": 1.014, "step": 185 }, { "epoch": 0.03, "learning_rate": 2.657142857142857e-05, "loss": 1.0441, "step": 186 }, { "epoch": 0.03, "learning_rate": 2.671428571428571e-05, "loss": 0.9102, "step": 187 }, { "epoch": 0.03, "learning_rate": 2.685714285714286e-05, "loss": 0.9927, "step": 188 }, { "epoch": 0.03, "learning_rate": 2.7000000000000002e-05, "loss": 0.9682, "step": 189 }, { "epoch": 0.03, "learning_rate": 2.7142857142857144e-05, "loss": 0.8454, "step": 190 }, { "epoch": 0.03, "learning_rate": 2.7285714285714286e-05, "loss": 0.9113, "step": 191 }, { "epoch": 0.03, "learning_rate": 2.7428571428571428e-05, "loss": 1.0452, "step": 192 }, { "epoch": 0.03, "learning_rate": 2.757142857142857e-05, "loss": 0.9503, "step": 193 }, { "epoch": 0.03, "learning_rate": 2.7714285714285716e-05, "loss": 0.9319, "step": 194 }, { "epoch": 0.03, "learning_rate": 2.7857142857142858e-05, "loss": 1.0647, "step": 195 }, { "epoch": 0.03, "learning_rate": 2.8e-05, "loss": 1.1272, "step": 196 }, { "epoch": 0.03, "learning_rate": 2.8142857142857145e-05, "loss": 1.0592, "step": 197 }, { "epoch": 0.03, "learning_rate": 2.8285714285714287e-05, "loss": 1.0218, "step": 198 }, { "epoch": 0.03, "learning_rate": 2.842857142857143e-05, "loss": 0.9001, "step": 199 }, { "epoch": 0.03, "learning_rate": 2.857142857142857e-05, "loss": 0.9967, "step": 200 }, { "epoch": 0.03, "learning_rate": 2.8714285714285716e-05, "loss": 1.082, "step": 201 }, { "epoch": 0.03, "learning_rate": 2.8857142857142858e-05, "loss": 1.0614, "step": 202 }, { "epoch": 0.03, "learning_rate": 2.9e-05, "loss": 0.9269, "step": 203 }, { "epoch": 0.03, "learning_rate": 2.9142857142857142e-05, "loss": 1.0993, "step": 204 }, { "epoch": 0.03, "learning_rate": 2.9285714285714284e-05, "loss": 0.8912, "step": 205 }, { "epoch": 0.03, "learning_rate": 2.942857142857143e-05, "loss": 1.3181, "step": 206 }, { "epoch": 0.03, "learning_rate": 2.9571428571428575e-05, "loss": 0.8795, "step": 207 }, { "epoch": 0.03, "learning_rate": 2.9714285714285717e-05, "loss": 1.1306, "step": 208 }, { "epoch": 0.03, "learning_rate": 2.985714285714286e-05, "loss": 0.9944, "step": 209 }, { "epoch": 0.03, "learning_rate": 3e-05, "loss": 0.9788, "step": 210 }, { "epoch": 0.03, "learning_rate": 2.99999983854356e-05, "loss": 0.8672, "step": 211 }, { "epoch": 0.03, "learning_rate": 2.9999993541742732e-05, "loss": 0.9347, "step": 212 }, { "epoch": 0.03, "learning_rate": 2.9999985468922454e-05, "loss": 0.957, "step": 213 }, { "epoch": 0.03, "learning_rate": 2.999997416697649e-05, "loss": 1.0167, "step": 214 }, { "epoch": 0.03, "learning_rate": 2.9999959635907287e-05, "loss": 1.1579, "step": 215 }, { "epoch": 0.03, "learning_rate": 2.999994187571796e-05, "loss": 0.9381, "step": 216 }, { "epoch": 0.03, "learning_rate": 2.999992088641234e-05, "loss": 0.9475, "step": 217 }, { "epoch": 0.03, "learning_rate": 2.9999896667994942e-05, "loss": 1.1864, "step": 218 }, { "epoch": 0.03, "learning_rate": 2.9999869220470983e-05, "loss": 0.942, "step": 219 }, { "epoch": 0.03, "learning_rate": 2.9999838543846368e-05, "loss": 0.8638, "step": 220 }, { "epoch": 0.03, "learning_rate": 2.9999804638127705e-05, "loss": 0.8398, "step": 221 }, { "epoch": 0.03, "learning_rate": 2.999976750332229e-05, "loss": 0.9425, "step": 222 }, { "epoch": 0.03, "learning_rate": 2.999972713943812e-05, "loss": 1.0112, "step": 223 }, { "epoch": 0.03, "learning_rate": 2.9999683546483882e-05, "loss": 0.9927, "step": 224 }, { "epoch": 0.03, "learning_rate": 2.999963672446896e-05, "loss": 1.0273, "step": 225 }, { "epoch": 0.03, "learning_rate": 2.9999586673403433e-05, "loss": 0.9032, "step": 226 }, { "epoch": 0.03, "learning_rate": 2.999953339329808e-05, "loss": 0.6519, "step": 227 }, { "epoch": 0.03, "learning_rate": 2.9999476884164365e-05, "loss": 1.0675, "step": 228 }, { "epoch": 0.03, "learning_rate": 2.999941714601446e-05, "loss": 1.1562, "step": 229 }, { "epoch": 0.03, "learning_rate": 2.999935417886122e-05, "loss": 1.1222, "step": 230 }, { "epoch": 0.03, "learning_rate": 2.99992879827182e-05, "loss": 1.0396, "step": 231 }, { "epoch": 0.03, "learning_rate": 2.9999218557599653e-05, "loss": 1.0028, "step": 232 }, { "epoch": 0.03, "learning_rate": 2.9999145903520526e-05, "loss": 0.8518, "step": 233 }, { "epoch": 0.03, "learning_rate": 2.9999070020496456e-05, "loss": 1.0753, "step": 234 }, { "epoch": 0.03, "learning_rate": 2.999899090854378e-05, "loss": 0.9554, "step": 235 }, { "epoch": 0.03, "learning_rate": 2.9998908567679527e-05, "loss": 0.8984, "step": 236 }, { "epoch": 0.03, "learning_rate": 2.9998822997921426e-05, "loss": 0.9129, "step": 237 }, { "epoch": 0.03, "learning_rate": 2.9998734199287893e-05, "loss": 0.9766, "step": 238 }, { "epoch": 0.03, "learning_rate": 2.999864217179805e-05, "loss": 1.0407, "step": 239 }, { "epoch": 0.03, "learning_rate": 2.9998546915471712e-05, "loss": 0.8756, "step": 240 }, { "epoch": 0.03, "learning_rate": 2.999844843032938e-05, "loss": 1.0419, "step": 241 }, { "epoch": 0.03, "learning_rate": 2.9998346716392243e-05, "loss": 0.8823, "step": 242 }, { "epoch": 0.03, "learning_rate": 2.999824177368222e-05, "loss": 1.1607, "step": 243 }, { "epoch": 0.03, "learning_rate": 2.9998133602221886e-05, "loss": 1.0698, "step": 244 }, { "epoch": 0.04, "learning_rate": 2.999802220203454e-05, "loss": 1.019, "step": 245 }, { "epoch": 0.04, "learning_rate": 2.999790757314415e-05, "loss": 1.0251, "step": 246 }, { "epoch": 0.04, "learning_rate": 2.9997789715575405e-05, "loss": 0.9302, "step": 247 }, { "epoch": 0.04, "learning_rate": 2.999766862935367e-05, "loss": 0.9146, "step": 248 }, { "epoch": 0.04, "learning_rate": 2.9997544314505016e-05, "loss": 1.0006, "step": 249 }, { "epoch": 0.04, "learning_rate": 2.99974167710562e-05, "loss": 0.8426, "step": 250 }, { "epoch": 0.04, "learning_rate": 2.9997285999034684e-05, "loss": 0.8086, "step": 251 }, { "epoch": 0.04, "learning_rate": 2.9997151998468612e-05, "loss": 1.0385, "step": 252 }, { "epoch": 0.04, "learning_rate": 2.9997014769386842e-05, "loss": 1.1596, "step": 253 }, { "epoch": 0.04, "learning_rate": 2.9996874311818913e-05, "loss": 1.058, "step": 254 }, { "epoch": 0.04, "learning_rate": 2.9996730625795052e-05, "loss": 0.9741, "step": 255 }, { "epoch": 0.04, "learning_rate": 2.9996583711346204e-05, "loss": 0.9629, "step": 256 }, { "epoch": 0.04, "learning_rate": 2.999643356850399e-05, "loss": 0.7877, "step": 257 }, { "epoch": 0.04, "learning_rate": 2.9996280197300734e-05, "loss": 0.9035, "step": 258 }, { "epoch": 0.04, "learning_rate": 2.9996123597769445e-05, "loss": 0.9927, "step": 259 }, { "epoch": 0.04, "learning_rate": 2.9995963769943853e-05, "loss": 1.1194, "step": 260 }, { "epoch": 0.04, "learning_rate": 2.9995800713858346e-05, "loss": 1.048, "step": 261 }, { "epoch": 0.04, "learning_rate": 2.9995634429548037e-05, "loss": 0.9911, "step": 262 }, { "epoch": 0.04, "learning_rate": 2.9995464917048717e-05, "loss": 1.014, "step": 263 }, { "epoch": 0.04, "learning_rate": 2.9995292176396883e-05, "loss": 0.9459, "step": 264 }, { "epoch": 0.04, "learning_rate": 2.9995116207629718e-05, "loss": 1.1451, "step": 265 }, { "epoch": 0.04, "learning_rate": 2.9994937010785105e-05, "loss": 1.0441, "step": 266 }, { "epoch": 0.04, "learning_rate": 2.9994754585901618e-05, "loss": 0.8552, "step": 267 }, { "epoch": 0.04, "learning_rate": 2.999456893301854e-05, "loss": 0.976, "step": 268 }, { "epoch": 0.04, "learning_rate": 2.9994380052175817e-05, "loss": 1.1004, "step": 269 }, { "epoch": 0.04, "learning_rate": 2.999418794341413e-05, "loss": 1.1942, "step": 270 }, { "epoch": 0.04, "learning_rate": 2.9993992606774826e-05, "loss": 0.851, "step": 271 }, { "epoch": 0.04, "learning_rate": 2.9993794042299952e-05, "loss": 1.1217, "step": 272 }, { "epoch": 0.04, "learning_rate": 2.999359225003227e-05, "loss": 1.0, "step": 273 }, { "epoch": 0.04, "learning_rate": 2.9993387230015203e-05, "loss": 1.1244, "step": 274 }, { "epoch": 0.04, "learning_rate": 2.9993178982292893e-05, "loss": 0.9766, "step": 275 }, { "epoch": 0.04, "learning_rate": 2.9992967506910176e-05, "loss": 1.0, "step": 276 }, { "epoch": 0.04, "learning_rate": 2.999275280391257e-05, "loss": 0.9883, "step": 277 }, { "epoch": 0.04, "learning_rate": 2.99925348733463e-05, "loss": 1.168, "step": 278 }, { "epoch": 0.04, "learning_rate": 2.999231371525828e-05, "loss": 0.8984, "step": 279 }, { "epoch": 0.04, "learning_rate": 2.999208932969612e-05, "loss": 1.0525, "step": 280 }, { "epoch": 0.04, "learning_rate": 2.9991861716708122e-05, "loss": 1.0117, "step": 281 }, { "epoch": 0.04, "learning_rate": 2.9991630876343287e-05, "loss": 0.9855, "step": 282 }, { "epoch": 0.04, "learning_rate": 2.999139680865131e-05, "loss": 0.8984, "step": 283 }, { "epoch": 0.04, "learning_rate": 2.9991159513682578e-05, "loss": 0.8594, "step": 284 }, { "epoch": 0.04, "learning_rate": 2.999091899148818e-05, "loss": 1.1049, "step": 285 }, { "epoch": 0.04, "learning_rate": 2.9990675242119888e-05, "loss": 1.1378, "step": 286 }, { "epoch": 0.04, "learning_rate": 2.999042826563018e-05, "loss": 0.9872, "step": 287 }, { "epoch": 0.04, "learning_rate": 2.9990178062072224e-05, "loss": 0.9448, "step": 288 }, { "epoch": 0.04, "learning_rate": 2.9989924631499876e-05, "loss": 0.9944, "step": 289 }, { "epoch": 0.04, "learning_rate": 2.9989667973967697e-05, "loss": 0.9397, "step": 290 }, { "epoch": 0.04, "learning_rate": 2.9989408089530942e-05, "loss": 1.1027, "step": 291 }, { "epoch": 0.04, "learning_rate": 2.9989144978245558e-05, "loss": 0.9308, "step": 292 }, { "epoch": 0.04, "learning_rate": 2.998887864016818e-05, "loss": 0.8744, "step": 293 }, { "epoch": 0.04, "learning_rate": 2.9988609075356148e-05, "loss": 1.0575, "step": 294 }, { "epoch": 0.04, "learning_rate": 2.9988336283867494e-05, "loss": 0.9704, "step": 295 }, { "epoch": 0.04, "learning_rate": 2.9988060265760943e-05, "loss": 1.0497, "step": 296 }, { "epoch": 0.04, "learning_rate": 2.9987781021095914e-05, "loss": 0.9425, "step": 297 }, { "epoch": 0.04, "learning_rate": 2.998749854993252e-05, "loss": 0.9219, "step": 298 }, { "epoch": 0.04, "learning_rate": 2.9987212852331568e-05, "loss": 0.976, "step": 299 }, { "epoch": 0.04, "learning_rate": 2.9986923928354573e-05, "loss": 1.1523, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.9986631778063716e-05, "loss": 1.034, "step": 301 }, { "epoch": 0.04, "learning_rate": 2.9986336401521904e-05, "loss": 1.0597, "step": 302 }, { "epoch": 0.04, "learning_rate": 2.998603779879272e-05, "loss": 0.9967, "step": 303 }, { "epoch": 0.04, "learning_rate": 2.998573596994044e-05, "loss": 0.8747, "step": 304 }, { "epoch": 0.04, "learning_rate": 2.9985430915030046e-05, "loss": 0.9794, "step": 305 }, { "epoch": 0.04, "learning_rate": 2.998512263412721e-05, "loss": 1.1077, "step": 306 }, { "epoch": 0.04, "learning_rate": 2.9984811127298296e-05, "loss": 0.8895, "step": 307 }, { "epoch": 0.04, "learning_rate": 2.998449639461036e-05, "loss": 0.9023, "step": 308 }, { "epoch": 0.04, "learning_rate": 2.998417843613116e-05, "loss": 0.7768, "step": 309 }, { "epoch": 0.04, "learning_rate": 2.9983857251929145e-05, "loss": 0.8242, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.9983532842073453e-05, "loss": 0.9107, "step": 311 }, { "epoch": 0.04, "learning_rate": 2.998320520663393e-05, "loss": 0.856, "step": 312 }, { "epoch": 0.04, "learning_rate": 2.99828743456811e-05, "loss": 0.9693, "step": 313 }, { "epoch": 0.04, "learning_rate": 2.998254025928619e-05, "loss": 0.8705, "step": 314 }, { "epoch": 0.05, "learning_rate": 2.9982202947521126e-05, "loss": 1.0419, "step": 315 }, { "epoch": 0.05, "learning_rate": 2.9981862410458517e-05, "loss": 0.9637, "step": 316 }, { "epoch": 0.05, "learning_rate": 2.998151864817167e-05, "loss": 0.9699, "step": 317 }, { "epoch": 0.05, "learning_rate": 2.9981171660734604e-05, "loss": 0.9948, "step": 318 }, { "epoch": 0.05, "learning_rate": 2.9980821448222e-05, "loss": 1.1373, "step": 319 }, { "epoch": 0.05, "learning_rate": 2.9980468010709256e-05, "loss": 0.8728, "step": 320 }, { "epoch": 0.05, "learning_rate": 2.9980111348272458e-05, "loss": 1.0552, "step": 321 }, { "epoch": 0.05, "learning_rate": 2.9979751460988388e-05, "loss": 0.8683, "step": 322 }, { "epoch": 0.05, "learning_rate": 2.997938834893452e-05, "loss": 0.9688, "step": 323 }, { "epoch": 0.05, "learning_rate": 2.997902201218902e-05, "loss": 0.6579, "step": 324 }, { "epoch": 0.05, "learning_rate": 2.997865245083076e-05, "loss": 1.0703, "step": 325 }, { "epoch": 0.05, "learning_rate": 2.9978279664939285e-05, "loss": 1.0061, "step": 326 }, { "epoch": 0.05, "learning_rate": 2.9977903654594856e-05, "loss": 1.029, "step": 327 }, { "epoch": 0.05, "learning_rate": 2.9977524419878414e-05, "loss": 1.0787, "step": 328 }, { "epoch": 0.05, "learning_rate": 2.99771419608716e-05, "loss": 0.8633, "step": 329 }, { "epoch": 0.05, "learning_rate": 2.9976756277656753e-05, "loss": 1.0915, "step": 330 }, { "epoch": 0.05, "learning_rate": 2.9976367370316895e-05, "loss": 1.0318, "step": 331 }, { "epoch": 0.05, "learning_rate": 2.9975975238935747e-05, "loss": 0.8214, "step": 332 }, { "epoch": 0.05, "learning_rate": 2.997557988359773e-05, "loss": 0.9069, "step": 333 }, { "epoch": 0.05, "learning_rate": 2.9975181304387953e-05, "loss": 1.048, "step": 334 }, { "epoch": 0.05, "learning_rate": 2.9974779501392218e-05, "loss": 1.0876, "step": 335 }, { "epoch": 0.05, "learning_rate": 2.997437447469702e-05, "loss": 0.8881, "step": 336 }, { "epoch": 0.05, "learning_rate": 2.9973966224389563e-05, "loss": 0.9344, "step": 337 }, { "epoch": 0.05, "learning_rate": 2.9973554750557725e-05, "loss": 0.9872, "step": 338 }, { "epoch": 0.05, "learning_rate": 2.9973140053290084e-05, "loss": 0.7324, "step": 339 }, { "epoch": 0.05, "learning_rate": 2.997272213267592e-05, "loss": 0.9425, "step": 340 }, { "epoch": 0.05, "learning_rate": 2.9972300988805196e-05, "loss": 0.986, "step": 341 }, { "epoch": 0.05, "learning_rate": 2.9971876621768578e-05, "loss": 0.8563, "step": 342 }, { "epoch": 0.05, "learning_rate": 2.9971449031657422e-05, "loss": 0.8175, "step": 343 }, { "epoch": 0.05, "learning_rate": 2.9971018218563768e-05, "loss": 0.9263, "step": 344 }, { "epoch": 0.05, "learning_rate": 2.9970584182580375e-05, "loss": 0.9314, "step": 345 }, { "epoch": 0.05, "learning_rate": 2.9970146923800663e-05, "loss": 0.9074, "step": 346 }, { "epoch": 0.05, "learning_rate": 2.996970644231878e-05, "loss": 0.9305, "step": 347 }, { "epoch": 0.05, "learning_rate": 2.9969262738229536e-05, "loss": 0.9057, "step": 348 }, { "epoch": 0.05, "learning_rate": 2.9968815811628462e-05, "loss": 0.9768, "step": 349 }, { "epoch": 0.05, "learning_rate": 2.996836566261176e-05, "loss": 0.7443, "step": 350 }, { "epoch": 0.05, "learning_rate": 2.9967912291276342e-05, "loss": 0.9448, "step": 351 }, { "epoch": 0.05, "learning_rate": 2.996745569771981e-05, "loss": 0.9475, "step": 352 }, { "epoch": 0.05, "learning_rate": 2.996699588204045e-05, "loss": 0.8591, "step": 353 }, { "epoch": 0.05, "learning_rate": 2.996653284433725e-05, "loss": 0.9688, "step": 354 }, { "epoch": 0.05, "learning_rate": 2.9966066584709897e-05, "loss": 1.0943, "step": 355 }, { "epoch": 0.05, "learning_rate": 2.9965597103258756e-05, "loss": 0.9018, "step": 356 }, { "epoch": 0.05, "learning_rate": 2.9965124400084904e-05, "loss": 1.0061, "step": 357 }, { "epoch": 0.05, "learning_rate": 2.9964648475290098e-05, "loss": 1.0904, "step": 358 }, { "epoch": 0.05, "learning_rate": 2.9964169328976787e-05, "loss": 0.9032, "step": 359 }, { "epoch": 0.05, "learning_rate": 2.996368696124813e-05, "loss": 0.9537, "step": 360 }, { "epoch": 0.05, "learning_rate": 2.9963201372207965e-05, "loss": 0.9367, "step": 361 }, { "epoch": 0.05, "learning_rate": 2.9962712561960824e-05, "loss": 1.0374, "step": 362 }, { "epoch": 0.05, "learning_rate": 2.996222053061194e-05, "loss": 1.0039, "step": 363 }, { "epoch": 0.05, "learning_rate": 2.996172527826723e-05, "loss": 1.01, "step": 364 }, { "epoch": 0.05, "learning_rate": 2.9961226805033315e-05, "loss": 1.0882, "step": 365 }, { "epoch": 0.05, "learning_rate": 2.9960725111017503e-05, "loss": 0.7891, "step": 366 }, { "epoch": 0.05, "learning_rate": 2.9960220196327793e-05, "loss": 0.779, "step": 367 }, { "epoch": 0.05, "learning_rate": 2.9959712061072885e-05, "loss": 0.8845, "step": 368 }, { "epoch": 0.05, "learning_rate": 2.9959200705362163e-05, "loss": 1.1211, "step": 369 }, { "epoch": 0.05, "learning_rate": 2.9958686129305712e-05, "loss": 1.1177, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.995816833301431e-05, "loss": 0.8103, "step": 371 }, { "epoch": 0.05, "learning_rate": 2.9957647316599423e-05, "loss": 0.8209, "step": 372 }, { "epoch": 0.05, "learning_rate": 2.9957123080173208e-05, "loss": 1.0352, "step": 373 }, { "epoch": 0.05, "learning_rate": 2.9956595623848527e-05, "loss": 0.9068, "step": 374 }, { "epoch": 0.05, "learning_rate": 2.995606494773893e-05, "loss": 0.9224, "step": 375 }, { "epoch": 0.05, "learning_rate": 2.9955531051958654e-05, "loss": 0.7732, "step": 376 }, { "epoch": 0.05, "learning_rate": 2.995499393662263e-05, "loss": 0.9821, "step": 377 }, { "epoch": 0.05, "learning_rate": 2.9954453601846495e-05, "loss": 1.0151, "step": 378 }, { "epoch": 0.05, "learning_rate": 2.9953910047746568e-05, "loss": 0.9961, "step": 379 }, { "epoch": 0.05, "learning_rate": 2.9953363274439853e-05, "loss": 0.9972, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.9952813282044063e-05, "loss": 0.8097, "step": 381 }, { "epoch": 0.05, "learning_rate": 2.99522600706776e-05, "loss": 1.0234, "step": 382 }, { "epoch": 0.05, "learning_rate": 2.9951703640459556e-05, "loss": 1.0246, "step": 383 }, { "epoch": 0.06, "learning_rate": 2.9951143991509717e-05, "loss": 0.8973, "step": 384 }, { "epoch": 0.06, "learning_rate": 2.9950581123948556e-05, "loss": 0.8772, "step": 385 }, { "epoch": 0.06, "learning_rate": 2.9950015037897247e-05, "loss": 0.7271, "step": 386 }, { "epoch": 0.06, "learning_rate": 2.9949445733477662e-05, "loss": 0.9526, "step": 387 }, { "epoch": 0.06, "learning_rate": 2.9948873210812345e-05, "loss": 0.8627, "step": 388 }, { "epoch": 0.06, "learning_rate": 2.994829747002456e-05, "loss": 1.0999, "step": 389 }, { "epoch": 0.06, "learning_rate": 2.9947718511238235e-05, "loss": 0.9124, "step": 390 }, { "epoch": 0.06, "learning_rate": 2.994713633457802e-05, "loss": 0.9381, "step": 391 }, { "epoch": 0.06, "learning_rate": 2.9946550940169233e-05, "loss": 1.0463, "step": 392 }, { "epoch": 0.06, "learning_rate": 2.9945962328137898e-05, "loss": 1.0329, "step": 393 }, { "epoch": 0.06, "learning_rate": 2.9945370498610733e-05, "loss": 1.0826, "step": 394 }, { "epoch": 0.06, "learning_rate": 2.9944775451715136e-05, "loss": 1.0681, "step": 395 }, { "epoch": 0.06, "learning_rate": 2.9944177187579208e-05, "loss": 1.111, "step": 396 }, { "epoch": 0.06, "learning_rate": 2.9943575706331746e-05, "loss": 0.9096, "step": 397 }, { "epoch": 0.06, "learning_rate": 2.994297100810223e-05, "loss": 1.0184, "step": 398 }, { "epoch": 0.06, "learning_rate": 2.994236309302084e-05, "loss": 1.0424, "step": 399 }, { "epoch": 0.06, "learning_rate": 2.9941751961218438e-05, "loss": 0.8705, "step": 400 }, { "epoch": 0.06, "learning_rate": 2.9941137612826587e-05, "loss": 0.9263, "step": 401 }, { "epoch": 0.06, "learning_rate": 2.994052004797755e-05, "loss": 0.9286, "step": 402 }, { "epoch": 0.06, "learning_rate": 2.9939899266804264e-05, "loss": 0.9721, "step": 403 }, { "epoch": 0.06, "learning_rate": 2.993927526944037e-05, "loss": 1.0343, "step": 404 }, { "epoch": 0.06, "learning_rate": 2.99386480560202e-05, "loss": 1.1708, "step": 405 }, { "epoch": 0.06, "learning_rate": 2.9938017626678783e-05, "loss": 0.8192, "step": 406 }, { "epoch": 0.06, "learning_rate": 2.9937383981551824e-05, "loss": 1.0915, "step": 407 }, { "epoch": 0.06, "learning_rate": 2.993674712077574e-05, "loss": 1.01, "step": 408 }, { "epoch": 0.06, "learning_rate": 2.993610704448763e-05, "loss": 0.8415, "step": 409 }, { "epoch": 0.06, "learning_rate": 2.9935463752825276e-05, "loss": 1.0993, "step": 410 }, { "epoch": 0.06, "learning_rate": 2.993481724592718e-05, "loss": 0.93, "step": 411 }, { "epoch": 0.06, "learning_rate": 2.9934167523932506e-05, "loss": 0.8862, "step": 412 }, { "epoch": 0.06, "learning_rate": 2.993351458698113e-05, "loss": 0.7754, "step": 413 }, { "epoch": 0.06, "learning_rate": 2.9932858435213607e-05, "loss": 1.1345, "step": 414 }, { "epoch": 0.06, "learning_rate": 2.9932199068771196e-05, "loss": 0.8778, "step": 415 }, { "epoch": 0.06, "learning_rate": 2.9931536487795837e-05, "loss": 0.8309, "step": 416 }, { "epoch": 0.06, "learning_rate": 2.9930870692430176e-05, "loss": 1.0061, "step": 417 }, { "epoch": 0.06, "learning_rate": 2.993020168281753e-05, "loss": 0.7271, "step": 418 }, { "epoch": 0.06, "learning_rate": 2.9929529459101928e-05, "loss": 0.9844, "step": 419 }, { "epoch": 0.06, "learning_rate": 2.992885402142809e-05, "loss": 0.9029, "step": 420 }, { "epoch": 0.06, "learning_rate": 2.9928175369941403e-05, "loss": 0.8599, "step": 421 }, { "epoch": 0.06, "learning_rate": 2.992749350478798e-05, "loss": 0.9422, "step": 422 }, { "epoch": 0.06, "learning_rate": 2.99268084261146e-05, "loss": 0.952, "step": 423 }, { "epoch": 0.06, "learning_rate": 2.9926120134068747e-05, "loss": 1.0106, "step": 424 }, { "epoch": 0.06, "learning_rate": 2.9925428628798598e-05, "loss": 0.9375, "step": 425 }, { "epoch": 0.06, "learning_rate": 2.992473391045301e-05, "loss": 0.976, "step": 426 }, { "epoch": 0.06, "learning_rate": 2.992403597918154e-05, "loss": 1.1864, "step": 427 }, { "epoch": 0.06, "learning_rate": 2.9923334835134437e-05, "loss": 0.9554, "step": 428 }, { "epoch": 0.06, "learning_rate": 2.9922630478462636e-05, "loss": 1.0742, "step": 429 }, { "epoch": 0.06, "learning_rate": 2.9921922909317776e-05, "loss": 1.0485, "step": 430 }, { "epoch": 0.06, "learning_rate": 2.992121212785217e-05, "loss": 0.9749, "step": 431 }, { "epoch": 0.06, "learning_rate": 2.9920498134218835e-05, "loss": 0.966, "step": 432 }, { "epoch": 0.06, "learning_rate": 2.991978092857148e-05, "loss": 0.9275, "step": 433 }, { "epoch": 0.06, "learning_rate": 2.9919060511064495e-05, "loss": 1.0357, "step": 434 }, { "epoch": 0.06, "learning_rate": 2.9918336881852977e-05, "loss": 0.7807, "step": 435 }, { "epoch": 0.06, "learning_rate": 2.9917610041092692e-05, "loss": 1.0698, "step": 436 }, { "epoch": 0.06, "learning_rate": 2.9916879988940125e-05, "loss": 1.0084, "step": 437 }, { "epoch": 0.06, "learning_rate": 2.991614672555243e-05, "loss": 0.9216, "step": 438 }, { "epoch": 0.06, "learning_rate": 2.991541025108746e-05, "loss": 0.9503, "step": 439 }, { "epoch": 0.06, "learning_rate": 2.9914670565703766e-05, "loss": 0.9715, "step": 440 }, { "epoch": 0.06, "learning_rate": 2.9913927669560577e-05, "loss": 0.918, "step": 441 }, { "epoch": 0.06, "learning_rate": 2.9913181562817823e-05, "loss": 0.8895, "step": 442 }, { "epoch": 0.06, "learning_rate": 2.9912432245636122e-05, "loss": 0.8898, "step": 443 }, { "epoch": 0.06, "learning_rate": 2.991167971817679e-05, "loss": 0.8231, "step": 444 }, { "epoch": 0.06, "learning_rate": 2.9910923980601815e-05, "loss": 0.8638, "step": 445 }, { "epoch": 0.06, "learning_rate": 2.9910165033073897e-05, "loss": 0.9492, "step": 446 }, { "epoch": 0.06, "learning_rate": 2.9909402875756415e-05, "loss": 0.8507, "step": 447 }, { "epoch": 0.06, "learning_rate": 2.9908637508813445e-05, "loss": 0.952, "step": 448 }, { "epoch": 0.06, "learning_rate": 2.990786893240975e-05, "loss": 0.8817, "step": 449 }, { "epoch": 0.06, "learning_rate": 2.990709714671079e-05, "loss": 1.0279, "step": 450 }, { "epoch": 0.06, "learning_rate": 2.9906322151882706e-05, "loss": 0.9559, "step": 451 }, { "epoch": 0.06, "learning_rate": 2.9905543948092338e-05, "loss": 1.0435, "step": 452 }, { "epoch": 0.06, "learning_rate": 2.9904762535507207e-05, "loss": 1.0569, "step": 453 }, { "epoch": 0.07, "learning_rate": 2.9903977914295546e-05, "loss": 0.9403, "step": 454 }, { "epoch": 0.07, "learning_rate": 2.990319008462625e-05, "loss": 0.798, "step": 455 }, { "epoch": 0.07, "learning_rate": 2.990239904666893e-05, "loss": 1.0859, "step": 456 }, { "epoch": 0.07, "learning_rate": 2.990160480059387e-05, "loss": 0.8627, "step": 457 }, { "epoch": 0.07, "learning_rate": 2.9900807346572054e-05, "loss": 0.9565, "step": 458 }, { "epoch": 0.07, "learning_rate": 2.990000668477515e-05, "loss": 0.7759, "step": 459 }, { "epoch": 0.07, "learning_rate": 2.9899202815375533e-05, "loss": 0.8371, "step": 460 }, { "epoch": 0.07, "learning_rate": 2.9898395738546246e-05, "loss": 0.9375, "step": 461 }, { "epoch": 0.07, "learning_rate": 2.989758545446103e-05, "loss": 0.8616, "step": 462 }, { "epoch": 0.07, "learning_rate": 2.9896771963294327e-05, "loss": 0.8245, "step": 463 }, { "epoch": 0.07, "learning_rate": 2.989595526522126e-05, "loss": 1.0095, "step": 464 }, { "epoch": 0.07, "learning_rate": 2.9895135360417632e-05, "loss": 0.9526, "step": 465 }, { "epoch": 0.07, "learning_rate": 2.9894312249059967e-05, "loss": 1.0095, "step": 466 }, { "epoch": 0.07, "learning_rate": 2.989348593132545e-05, "loss": 0.9738, "step": 467 }, { "epoch": 0.07, "learning_rate": 2.989265640739197e-05, "loss": 0.8677, "step": 468 }, { "epoch": 0.07, "learning_rate": 2.98918236774381e-05, "loss": 0.9682, "step": 469 }, { "epoch": 0.07, "learning_rate": 2.989098774164311e-05, "loss": 0.8217, "step": 470 }, { "epoch": 0.07, "learning_rate": 2.989014860018695e-05, "loss": 0.9286, "step": 471 }, { "epoch": 0.07, "learning_rate": 2.9889306253250273e-05, "loss": 0.9063, "step": 472 }, { "epoch": 0.07, "learning_rate": 2.9888460701014412e-05, "loss": 1.0391, "step": 473 }, { "epoch": 0.07, "learning_rate": 2.9887611943661397e-05, "loss": 0.8976, "step": 474 }, { "epoch": 0.07, "learning_rate": 2.988675998137394e-05, "loss": 0.9615, "step": 475 }, { "epoch": 0.07, "learning_rate": 2.988590481433544e-05, "loss": 0.9911, "step": 476 }, { "epoch": 0.07, "learning_rate": 2.9885046442730015e-05, "loss": 1.0441, "step": 477 }, { "epoch": 0.07, "learning_rate": 2.9884184866742435e-05, "loss": 1.0167, "step": 478 }, { "epoch": 0.07, "learning_rate": 2.988332008655818e-05, "loss": 1.0156, "step": 479 }, { "epoch": 0.07, "learning_rate": 2.9882452102363413e-05, "loss": 0.942, "step": 480 }, { "epoch": 0.07, "learning_rate": 2.9881580914344996e-05, "loss": 0.9517, "step": 481 }, { "epoch": 0.07, "learning_rate": 2.9880706522690465e-05, "loss": 0.9489, "step": 482 }, { "epoch": 0.07, "learning_rate": 2.9879828927588062e-05, "loss": 0.7905, "step": 483 }, { "epoch": 0.07, "learning_rate": 2.987894812922671e-05, "loss": 1.0597, "step": 484 }, { "epoch": 0.07, "learning_rate": 2.987806412779602e-05, "loss": 1.0162, "step": 485 }, { "epoch": 0.07, "learning_rate": 2.9877176923486304e-05, "loss": 0.9743, "step": 486 }, { "epoch": 0.07, "learning_rate": 2.9876286516488544e-05, "loss": 0.9291, "step": 487 }, { "epoch": 0.07, "learning_rate": 2.9875392906994427e-05, "loss": 1.1055, "step": 488 }, { "epoch": 0.07, "learning_rate": 2.987449609519633e-05, "loss": 1.0681, "step": 489 }, { "epoch": 0.07, "learning_rate": 2.987359608128731e-05, "loss": 0.9978, "step": 490 }, { "epoch": 0.07, "learning_rate": 2.9872692865461116e-05, "loss": 0.8943, "step": 491 }, { "epoch": 0.07, "learning_rate": 2.987178644791219e-05, "loss": 0.9838, "step": 492 }, { "epoch": 0.07, "learning_rate": 2.9870876828835664e-05, "loss": 1.0039, "step": 493 }, { "epoch": 0.07, "learning_rate": 2.9869964008427353e-05, "loss": 0.894, "step": 494 }, { "epoch": 0.07, "learning_rate": 2.9869047986883762e-05, "loss": 1.0257, "step": 495 }, { "epoch": 0.07, "learning_rate": 2.9868128764402098e-05, "loss": 0.9004, "step": 496 }, { "epoch": 0.07, "learning_rate": 2.986720634118023e-05, "loss": 0.8839, "step": 497 }, { "epoch": 0.07, "learning_rate": 2.9866280717416755e-05, "loss": 0.9986, "step": 498 }, { "epoch": 0.07, "learning_rate": 2.9865351893310917e-05, "loss": 0.7087, "step": 499 }, { "epoch": 0.07, "learning_rate": 2.986441986906268e-05, "loss": 0.8675, "step": 500 }, { "epoch": 0.07, "learning_rate": 2.986348464487268e-05, "loss": 1.0056, "step": 501 }, { "epoch": 0.07, "learning_rate": 2.986254622094225e-05, "loss": 1.0145, "step": 502 }, { "epoch": 0.07, "learning_rate": 2.986160459747341e-05, "loss": 1.0513, "step": 503 }, { "epoch": 0.07, "learning_rate": 2.9860659774668867e-05, "loss": 0.9537, "step": 504 }, { "epoch": 0.07, "learning_rate": 2.9859711752732022e-05, "loss": 0.9199, "step": 505 }, { "epoch": 0.07, "learning_rate": 2.9858760531866956e-05, "loss": 0.995, "step": 506 }, { "epoch": 0.07, "learning_rate": 2.9857806112278446e-05, "loss": 0.9598, "step": 507 }, { "epoch": 0.07, "learning_rate": 2.985684849417195e-05, "loss": 0.8912, "step": 508 }, { "epoch": 0.07, "learning_rate": 2.9855887677753627e-05, "loss": 1.0357, "step": 509 }, { "epoch": 0.07, "learning_rate": 2.9854923663230312e-05, "loss": 0.8061, "step": 510 }, { "epoch": 0.07, "learning_rate": 2.985395645080953e-05, "loss": 0.7768, "step": 511 }, { "epoch": 0.07, "learning_rate": 2.9852986040699508e-05, "loss": 0.8811, "step": 512 }, { "epoch": 0.07, "learning_rate": 2.9852012433109146e-05, "loss": 1.0765, "step": 513 }, { "epoch": 0.07, "learning_rate": 2.9851035628248032e-05, "loss": 0.8778, "step": 514 }, { "epoch": 0.07, "learning_rate": 2.9850055626326455e-05, "loss": 1.0075, "step": 515 }, { "epoch": 0.07, "learning_rate": 2.984907242755539e-05, "loss": 0.8923, "step": 516 }, { "epoch": 0.07, "learning_rate": 2.9848086032146484e-05, "loss": 0.9358, "step": 517 }, { "epoch": 0.07, "learning_rate": 2.9847096440312087e-05, "loss": 0.9353, "step": 518 }, { "epoch": 0.07, "learning_rate": 2.9846103652265236e-05, "loss": 0.9001, "step": 519 }, { "epoch": 0.07, "learning_rate": 2.984510766821965e-05, "loss": 0.8527, "step": 520 }, { "epoch": 0.07, "learning_rate": 2.984410848838975e-05, "loss": 0.976, "step": 521 }, { "epoch": 0.07, "learning_rate": 2.984310611299062e-05, "loss": 1.0631, "step": 522 }, { "epoch": 0.07, "learning_rate": 2.984210054223806e-05, "loss": 0.8677, "step": 523 }, { "epoch": 0.08, "learning_rate": 2.9841091776348532e-05, "loss": 0.9369, "step": 524 }, { "epoch": 0.08, "learning_rate": 2.984007981553921e-05, "loss": 0.9342, "step": 525 }, { "epoch": 0.08, "learning_rate": 2.9839064660027932e-05, "loss": 0.9046, "step": 526 }, { "epoch": 0.08, "learning_rate": 2.9838046310033246e-05, "loss": 0.8504, "step": 527 }, { "epoch": 0.08, "learning_rate": 2.9837024765774378e-05, "loss": 0.9944, "step": 528 }, { "epoch": 0.08, "learning_rate": 2.983600002747123e-05, "loss": 0.9894, "step": 529 }, { "epoch": 0.08, "learning_rate": 2.9834972095344415e-05, "loss": 0.7905, "step": 530 }, { "epoch": 0.08, "learning_rate": 2.9833940969615215e-05, "loss": 0.8895, "step": 531 }, { "epoch": 0.08, "learning_rate": 2.983290665050561e-05, "loss": 1.0915, "step": 532 }, { "epoch": 0.08, "learning_rate": 2.9831869138238257e-05, "loss": 0.9107, "step": 533 }, { "epoch": 0.08, "learning_rate": 2.983082843303651e-05, "loss": 0.8521, "step": 534 }, { "epoch": 0.08, "learning_rate": 2.982978453512441e-05, "loss": 0.8739, "step": 535 }, { "epoch": 0.08, "learning_rate": 2.9828737444726676e-05, "loss": 0.9213, "step": 536 }, { "epoch": 0.08, "learning_rate": 2.982768716206873e-05, "loss": 0.9051, "step": 537 }, { "epoch": 0.08, "learning_rate": 2.9826633687376663e-05, "loss": 0.9152, "step": 538 }, { "epoch": 0.08, "learning_rate": 2.9825577020877267e-05, "loss": 0.9805, "step": 539 }, { "epoch": 0.08, "learning_rate": 2.9824517162798013e-05, "loss": 0.8538, "step": 540 }, { "epoch": 0.08, "learning_rate": 2.9823454113367063e-05, "loss": 1.0109, "step": 541 }, { "epoch": 0.08, "learning_rate": 2.9822387872813268e-05, "loss": 0.885, "step": 542 }, { "epoch": 0.08, "learning_rate": 2.9821318441366156e-05, "loss": 0.957, "step": 543 }, { "epoch": 0.08, "learning_rate": 2.9820245819255958e-05, "loss": 0.9185, "step": 544 }, { "epoch": 0.08, "learning_rate": 2.9819170006713582e-05, "loss": 0.8451, "step": 545 }, { "epoch": 0.08, "learning_rate": 2.981809100397062e-05, "loss": 1.0385, "step": 546 }, { "epoch": 0.08, "learning_rate": 2.981700881125935e-05, "loss": 1.0753, "step": 547 }, { "epoch": 0.08, "learning_rate": 2.9815923428812754e-05, "loss": 0.8884, "step": 548 }, { "epoch": 0.08, "learning_rate": 2.9814834856864477e-05, "loss": 0.9559, "step": 549 }, { "epoch": 0.08, "learning_rate": 2.9813743095648868e-05, "loss": 0.7341, "step": 550 }, { "epoch": 0.08, "learning_rate": 2.9812648145400952e-05, "loss": 0.8485, "step": 551 }, { "epoch": 0.08, "learning_rate": 2.9811550006356448e-05, "loss": 0.9732, "step": 552 }, { "epoch": 0.08, "learning_rate": 2.9810448678751753e-05, "loss": 0.8929, "step": 553 }, { "epoch": 0.08, "learning_rate": 2.980934416282396e-05, "loss": 0.8119, "step": 554 }, { "epoch": 0.08, "learning_rate": 2.9808236458810848e-05, "loss": 1.0569, "step": 555 }, { "epoch": 0.08, "learning_rate": 2.9807125566950868e-05, "loss": 0.8248, "step": 556 }, { "epoch": 0.08, "learning_rate": 2.9806011487483174e-05, "loss": 1.0246, "step": 557 }, { "epoch": 0.08, "learning_rate": 2.98048942206476e-05, "loss": 1.0318, "step": 558 }, { "epoch": 0.08, "learning_rate": 2.980377376668467e-05, "loss": 0.8658, "step": 559 }, { "epoch": 0.08, "learning_rate": 2.9802650125835575e-05, "loss": 0.9453, "step": 560 }, { "epoch": 0.08, "learning_rate": 2.980152329834222e-05, "loss": 1.0954, "step": 561 }, { "epoch": 0.08, "learning_rate": 2.9800393284447177e-05, "loss": 1.0352, "step": 562 }, { "epoch": 0.08, "learning_rate": 2.979926008439372e-05, "loss": 1.0413, "step": 563 }, { "epoch": 0.08, "learning_rate": 2.9798123698425785e-05, "loss": 1.1842, "step": 564 }, { "epoch": 0.08, "learning_rate": 2.9796984126788017e-05, "loss": 0.9766, "step": 565 }, { "epoch": 0.08, "learning_rate": 2.9795841369725733e-05, "loss": 0.9408, "step": 566 }, { "epoch": 0.08, "learning_rate": 2.979469542748495e-05, "loss": 0.9855, "step": 567 }, { "epoch": 0.08, "learning_rate": 2.9793546300312344e-05, "loss": 0.9512, "step": 568 }, { "epoch": 0.08, "learning_rate": 2.9792393988455303e-05, "loss": 0.8195, "step": 569 }, { "epoch": 0.08, "learning_rate": 2.9791238492161895e-05, "loss": 1.0212, "step": 570 }, { "epoch": 0.08, "learning_rate": 2.979007981168087e-05, "loss": 0.8122, "step": 571 }, { "epoch": 0.08, "learning_rate": 2.9788917947261653e-05, "loss": 0.7528, "step": 572 }, { "epoch": 0.08, "learning_rate": 2.9787752899154373e-05, "loss": 0.8019, "step": 573 }, { "epoch": 0.08, "learning_rate": 2.9786584667609834e-05, "loss": 0.981, "step": 574 }, { "epoch": 0.08, "learning_rate": 2.9785413252879528e-05, "loss": 0.7656, "step": 575 }, { "epoch": 0.08, "learning_rate": 2.9784238655215627e-05, "loss": 0.6869, "step": 576 }, { "epoch": 0.08, "learning_rate": 2.9783060874871004e-05, "loss": 0.6988, "step": 577 }, { "epoch": 0.08, "learning_rate": 2.978187991209919e-05, "loss": 1.0329, "step": 578 }, { "epoch": 0.08, "learning_rate": 2.9780695767154427e-05, "loss": 0.6847, "step": 579 }, { "epoch": 0.08, "learning_rate": 2.9779508440291633e-05, "loss": 1.0569, "step": 580 }, { "epoch": 0.08, "learning_rate": 2.9778317931766412e-05, "loss": 0.9704, "step": 581 }, { "epoch": 0.08, "learning_rate": 2.977712424183504e-05, "loss": 0.8331, "step": 582 }, { "epoch": 0.08, "learning_rate": 2.97759273707545e-05, "loss": 0.9453, "step": 583 }, { "epoch": 0.08, "learning_rate": 2.9774727318782444e-05, "loss": 1.0547, "step": 584 }, { "epoch": 0.08, "learning_rate": 2.9773524086177215e-05, "loss": 0.8164, "step": 585 }, { "epoch": 0.08, "learning_rate": 2.9772317673197832e-05, "loss": 0.9827, "step": 586 }, { "epoch": 0.08, "learning_rate": 2.977110808010402e-05, "loss": 0.7938, "step": 587 }, { "epoch": 0.08, "learning_rate": 2.9769895307156163e-05, "loss": 0.9194, "step": 588 }, { "epoch": 0.08, "learning_rate": 2.9768679354615342e-05, "loss": 1.0519, "step": 589 }, { "epoch": 0.08, "learning_rate": 2.976746022274333e-05, "loss": 1.0569, "step": 590 }, { "epoch": 0.08, "learning_rate": 2.9766237911802566e-05, "loss": 1.0061, "step": 591 }, { "epoch": 0.08, "learning_rate": 2.9765012422056185e-05, "loss": 0.8733, "step": 592 }, { "epoch": 0.08, "learning_rate": 2.976378375376801e-05, "loss": 0.9369, "step": 593 }, { "epoch": 0.09, "learning_rate": 2.976255190720254e-05, "loss": 0.8828, "step": 594 }, { "epoch": 0.09, "learning_rate": 2.976131688262496e-05, "loss": 0.8984, "step": 595 }, { "epoch": 0.09, "learning_rate": 2.9760078680301142e-05, "loss": 1.034, "step": 596 }, { "epoch": 0.09, "learning_rate": 2.9758837300497635e-05, "loss": 0.8566, "step": 597 }, { "epoch": 0.09, "learning_rate": 2.9757592743481688e-05, "loss": 1.0787, "step": 598 }, { "epoch": 0.09, "learning_rate": 2.9756345009521218e-05, "loss": 0.9102, "step": 599 }, { "epoch": 0.09, "learning_rate": 2.9755094098884825e-05, "loss": 0.9163, "step": 600 }, { "epoch": 0.09, "learning_rate": 2.9753840011841808e-05, "loss": 0.851, "step": 601 }, { "epoch": 0.09, "learning_rate": 2.9752582748662133e-05, "loss": 1.1484, "step": 602 }, { "epoch": 0.09, "learning_rate": 2.9751322309616466e-05, "loss": 0.7891, "step": 603 }, { "epoch": 0.09, "learning_rate": 2.975005869497614e-05, "loss": 0.8432, "step": 604 }, { "epoch": 0.09, "learning_rate": 2.9748791905013187e-05, "loss": 0.7229, "step": 605 }, { "epoch": 0.09, "learning_rate": 2.9747521940000316e-05, "loss": 0.8683, "step": 606 }, { "epoch": 0.09, "learning_rate": 2.974624880021091e-05, "loss": 0.9844, "step": 607 }, { "epoch": 0.09, "learning_rate": 2.9744972485919055e-05, "loss": 0.8739, "step": 608 }, { "epoch": 0.09, "learning_rate": 2.9743692997399502e-05, "loss": 0.928, "step": 609 }, { "epoch": 0.09, "learning_rate": 2.97424103349277e-05, "loss": 0.9648, "step": 610 }, { "epoch": 0.09, "learning_rate": 2.9741124498779764e-05, "loss": 1.0312, "step": 611 }, { "epoch": 0.09, "learning_rate": 2.9739835489232514e-05, "loss": 0.9916, "step": 612 }, { "epoch": 0.09, "learning_rate": 2.9738543306563437e-05, "loss": 0.9289, "step": 613 }, { "epoch": 0.09, "learning_rate": 2.9737247951050706e-05, "loss": 0.9448, "step": 614 }, { "epoch": 0.09, "learning_rate": 2.973594942297319e-05, "loss": 0.8354, "step": 615 }, { "epoch": 0.09, "learning_rate": 2.9734647722610414e-05, "loss": 0.9174, "step": 616 }, { "epoch": 0.09, "learning_rate": 2.9733342850242613e-05, "loss": 0.8622, "step": 617 }, { "epoch": 0.09, "learning_rate": 2.9732034806150684e-05, "loss": 0.8901, "step": 618 }, { "epoch": 0.09, "learning_rate": 2.973072359061623e-05, "loss": 0.9604, "step": 619 }, { "epoch": 0.09, "learning_rate": 2.972940920392151e-05, "loss": 0.942, "step": 620 }, { "epoch": 0.09, "learning_rate": 2.9728091646349486e-05, "loss": 1.0725, "step": 621 }, { "epoch": 0.09, "learning_rate": 2.9726770918183795e-05, "loss": 0.803, "step": 622 }, { "epoch": 0.09, "learning_rate": 2.9725447019708756e-05, "loss": 0.882, "step": 623 }, { "epoch": 0.09, "learning_rate": 2.9724119951209374e-05, "loss": 0.8923, "step": 624 }, { "epoch": 0.09, "learning_rate": 2.9722789712971328e-05, "loss": 1.1685, "step": 625 }, { "epoch": 0.09, "learning_rate": 2.972145630528099e-05, "loss": 0.8638, "step": 626 }, { "epoch": 0.09, "learning_rate": 2.9720119728425413e-05, "loss": 0.9704, "step": 627 }, { "epoch": 0.09, "learning_rate": 2.971877998269232e-05, "loss": 0.9141, "step": 628 }, { "epoch": 0.09, "learning_rate": 2.9717437068370134e-05, "loss": 0.9392, "step": 629 }, { "epoch": 0.09, "learning_rate": 2.971609098574795e-05, "loss": 0.8817, "step": 630 }, { "epoch": 0.09, "learning_rate": 2.9714741735115535e-05, "loss": 0.8136, "step": 631 }, { "epoch": 0.09, "learning_rate": 2.971338931676336e-05, "loss": 1.0262, "step": 632 }, { "epoch": 0.09, "learning_rate": 2.9712033730982565e-05, "loss": 0.8555, "step": 633 }, { "epoch": 0.09, "learning_rate": 2.971067497806498e-05, "loss": 1.0784, "step": 634 }, { "epoch": 0.09, "learning_rate": 2.97093130583031e-05, "loss": 1.1724, "step": 635 }, { "epoch": 0.09, "learning_rate": 2.9707947971990117e-05, "loss": 0.9537, "step": 636 }, { "epoch": 0.09, "learning_rate": 2.9706579719419902e-05, "loss": 0.8973, "step": 637 }, { "epoch": 0.09, "learning_rate": 2.9705208300887004e-05, "loss": 0.887, "step": 638 }, { "epoch": 0.09, "learning_rate": 2.9703833716686654e-05, "loss": 0.9141, "step": 639 }, { "epoch": 0.09, "learning_rate": 2.970245596711477e-05, "loss": 0.9035, "step": 640 }, { "epoch": 0.09, "learning_rate": 2.9701075052467952e-05, "loss": 0.7662, "step": 641 }, { "epoch": 0.09, "learning_rate": 2.9699690973043463e-05, "loss": 0.8666, "step": 642 }, { "epoch": 0.09, "learning_rate": 2.969830372913927e-05, "loss": 0.8622, "step": 643 }, { "epoch": 0.09, "learning_rate": 2.9696913321054014e-05, "loss": 1.0212, "step": 644 }, { "epoch": 0.09, "learning_rate": 2.9695519749087006e-05, "loss": 0.8984, "step": 645 }, { "epoch": 0.09, "learning_rate": 2.9694123013538257e-05, "loss": 0.9018, "step": 646 }, { "epoch": 0.09, "learning_rate": 2.9692723114708445e-05, "loss": 0.9397, "step": 647 }, { "epoch": 0.09, "learning_rate": 2.969132005289894e-05, "loss": 0.9609, "step": 648 }, { "epoch": 0.09, "learning_rate": 2.9689913828411776e-05, "loss": 1.0474, "step": 649 }, { "epoch": 0.09, "learning_rate": 2.9688504441549683e-05, "loss": 0.9922, "step": 650 }, { "epoch": 0.09, "learning_rate": 2.968709189261607e-05, "loss": 0.947, "step": 651 }, { "epoch": 0.09, "learning_rate": 2.9685676181915024e-05, "loss": 1.1032, "step": 652 }, { "epoch": 0.09, "learning_rate": 2.9684257309751304e-05, "loss": 0.8951, "step": 653 }, { "epoch": 0.09, "learning_rate": 2.9682835276430367e-05, "loss": 0.7704, "step": 654 }, { "epoch": 0.09, "learning_rate": 2.9681410082258342e-05, "loss": 1.0006, "step": 655 }, { "epoch": 0.09, "learning_rate": 2.9679981727542028e-05, "loss": 0.8789, "step": 656 }, { "epoch": 0.09, "learning_rate": 2.9678550212588926e-05, "loss": 0.9358, "step": 657 }, { "epoch": 0.09, "learning_rate": 2.9677115537707203e-05, "loss": 1.0312, "step": 658 }, { "epoch": 0.09, "learning_rate": 2.9675677703205704e-05, "loss": 1.0357, "step": 659 }, { "epoch": 0.09, "learning_rate": 2.9674236709393962e-05, "loss": 0.8867, "step": 660 }, { "epoch": 0.09, "learning_rate": 2.9672792556582188e-05, "loss": 0.8917, "step": 661 }, { "epoch": 0.09, "learning_rate": 2.967134524508127e-05, "loss": 0.909, "step": 662 }, { "epoch": 0.09, "learning_rate": 2.966989477520278e-05, "loss": 0.8203, "step": 663 }, { "epoch": 0.1, "learning_rate": 2.9668441147258973e-05, "loss": 0.8677, "step": 664 }, { "epoch": 0.1, "learning_rate": 2.966698436156277e-05, "loss": 0.88, "step": 665 }, { "epoch": 0.1, "learning_rate": 2.9665524418427786e-05, "loss": 0.9777, "step": 666 }, { "epoch": 0.1, "learning_rate": 2.9664061318168316e-05, "loss": 0.7852, "step": 667 }, { "epoch": 0.1, "learning_rate": 2.9662595061099318e-05, "loss": 0.9291, "step": 668 }, { "epoch": 0.1, "learning_rate": 2.9661125647536445e-05, "loss": 0.8878, "step": 669 }, { "epoch": 0.1, "learning_rate": 2.9659653077796027e-05, "loss": 0.8906, "step": 670 }, { "epoch": 0.1, "learning_rate": 2.9658177352195073e-05, "loss": 0.9325, "step": 671 }, { "epoch": 0.1, "learning_rate": 2.965669847105127e-05, "loss": 0.9777, "step": 672 }, { "epoch": 0.1, "learning_rate": 2.965521643468298e-05, "loss": 1.139, "step": 673 }, { "epoch": 0.1, "learning_rate": 2.9653731243409258e-05, "loss": 0.7391, "step": 674 }, { "epoch": 0.1, "learning_rate": 2.9652242897549815e-05, "loss": 0.9275, "step": 675 }, { "epoch": 0.1, "learning_rate": 2.9650751397425067e-05, "loss": 1.0106, "step": 676 }, { "epoch": 0.1, "learning_rate": 2.9649256743356095e-05, "loss": 0.8158, "step": 677 }, { "epoch": 0.1, "learning_rate": 2.964775893566465e-05, "loss": 0.8323, "step": 678 }, { "epoch": 0.1, "learning_rate": 2.9646257974673193e-05, "loss": 0.9358, "step": 679 }, { "epoch": 0.1, "learning_rate": 2.9644753860704827e-05, "loss": 1.0374, "step": 680 }, { "epoch": 0.1, "learning_rate": 2.964324659408336e-05, "loss": 1.1669, "step": 681 }, { "epoch": 0.1, "learning_rate": 2.964173617513326e-05, "loss": 0.9029, "step": 682 }, { "epoch": 0.1, "learning_rate": 2.964022260417969e-05, "loss": 0.8326, "step": 683 }, { "epoch": 0.1, "learning_rate": 2.9638705881548484e-05, "loss": 0.8086, "step": 684 }, { "epoch": 0.1, "learning_rate": 2.9637186007566152e-05, "loss": 0.8566, "step": 685 }, { "epoch": 0.1, "learning_rate": 2.9635662982559887e-05, "loss": 0.9512, "step": 686 }, { "epoch": 0.1, "learning_rate": 2.963413680685756e-05, "loss": 1.0307, "step": 687 }, { "epoch": 0.1, "learning_rate": 2.9632607480787716e-05, "loss": 0.8758, "step": 688 }, { "epoch": 0.1, "learning_rate": 2.9631075004679582e-05, "loss": 0.8521, "step": 689 }, { "epoch": 0.1, "learning_rate": 2.9629539378863063e-05, "loss": 0.8131, "step": 690 }, { "epoch": 0.1, "learning_rate": 2.9628000603668738e-05, "loss": 0.9286, "step": 691 }, { "epoch": 0.1, "learning_rate": 2.9626458679427875e-05, "loss": 0.9057, "step": 692 }, { "epoch": 0.1, "learning_rate": 2.9624913606472402e-05, "loss": 0.8504, "step": 693 }, { "epoch": 0.1, "learning_rate": 2.962336538513494e-05, "loss": 0.9425, "step": 694 }, { "epoch": 0.1, "learning_rate": 2.9621814015748785e-05, "loss": 1.0435, "step": 695 }, { "epoch": 0.1, "learning_rate": 2.9620259498647906e-05, "loss": 0.8694, "step": 696 }, { "epoch": 0.1, "learning_rate": 2.961870183416695e-05, "loss": 0.7838, "step": 697 }, { "epoch": 0.1, "learning_rate": 2.961714102264125e-05, "loss": 1.1881, "step": 698 }, { "epoch": 0.1, "learning_rate": 2.96155770644068e-05, "loss": 1.0089, "step": 699 }, { "epoch": 0.1, "learning_rate": 2.9614009959800296e-05, "loss": 0.9135, "step": 700 }, { "epoch": 0.1, "learning_rate": 2.961243970915908e-05, "loss": 0.7941, "step": 701 }, { "epoch": 0.1, "learning_rate": 2.96108663128212e-05, "loss": 0.887, "step": 702 }, { "epoch": 0.1, "learning_rate": 2.9609289771125366e-05, "loss": 0.7388, "step": 703 }, { "epoch": 0.1, "learning_rate": 2.960771008441097e-05, "loss": 0.9955, "step": 704 }, { "epoch": 0.1, "learning_rate": 2.960612725301807e-05, "loss": 1.2489, "step": 705 }, { "epoch": 0.1, "learning_rate": 2.9604541277287423e-05, "loss": 1.0329, "step": 706 }, { "epoch": 0.1, "learning_rate": 2.9602952157560447e-05, "loss": 0.9794, "step": 707 }, { "epoch": 0.1, "learning_rate": 2.9601359894179237e-05, "loss": 0.9989, "step": 708 }, { "epoch": 0.1, "learning_rate": 2.9599764487486566e-05, "loss": 1.0028, "step": 709 }, { "epoch": 0.1, "learning_rate": 2.9598165937825892e-05, "loss": 0.9704, "step": 710 }, { "epoch": 0.1, "learning_rate": 2.959656424554134e-05, "loss": 0.9732, "step": 711 }, { "epoch": 0.1, "learning_rate": 2.9594959410977714e-05, "loss": 0.9554, "step": 712 }, { "epoch": 0.1, "learning_rate": 2.9593351434480495e-05, "loss": 0.885, "step": 713 }, { "epoch": 0.1, "learning_rate": 2.9591740316395845e-05, "loss": 0.8114, "step": 714 }, { "epoch": 0.1, "learning_rate": 2.9590126057070594e-05, "loss": 1.0525, "step": 715 }, { "epoch": 0.1, "learning_rate": 2.958850865685225e-05, "loss": 0.865, "step": 716 }, { "epoch": 0.1, "learning_rate": 2.9586888116089003e-05, "loss": 0.99, "step": 717 }, { "epoch": 0.1, "learning_rate": 2.9585264435129713e-05, "loss": 0.9777, "step": 718 }, { "epoch": 0.1, "learning_rate": 2.958363761432392e-05, "loss": 0.9023, "step": 719 }, { "epoch": 0.1, "learning_rate": 2.9582007654021838e-05, "loss": 1.0307, "step": 720 }, { "epoch": 0.1, "learning_rate": 2.9580374554574356e-05, "loss": 0.8778, "step": 721 }, { "epoch": 0.1, "learning_rate": 2.9578738316333042e-05, "loss": 0.9721, "step": 722 }, { "epoch": 0.1, "learning_rate": 2.9577098939650135e-05, "loss": 0.6554, "step": 723 }, { "epoch": 0.1, "learning_rate": 2.9575456424878555e-05, "loss": 1.0502, "step": 724 }, { "epoch": 0.1, "learning_rate": 2.957381077237189e-05, "loss": 0.9615, "step": 725 }, { "epoch": 0.1, "learning_rate": 2.9572161982484413e-05, "loss": 0.8142, "step": 726 }, { "epoch": 0.1, "learning_rate": 2.957051005557107e-05, "loss": 0.8627, "step": 727 }, { "epoch": 0.1, "learning_rate": 2.956885499198747e-05, "loss": 0.7866, "step": 728 }, { "epoch": 0.1, "learning_rate": 2.9567196792089917e-05, "loss": 0.7759, "step": 729 }, { "epoch": 0.1, "learning_rate": 2.956553545623538e-05, "loss": 0.9174, "step": 730 }, { "epoch": 0.1, "learning_rate": 2.9563870984781495e-05, "loss": 1.0061, "step": 731 }, { "epoch": 0.1, "learning_rate": 2.9562203378086588e-05, "loss": 0.9269, "step": 732 }, { "epoch": 0.1, "learning_rate": 2.9560532636509654e-05, "loss": 0.9185, "step": 733 }, { "epoch": 0.11, "learning_rate": 2.9558858760410358e-05, "loss": 1.1908, "step": 734 }, { "epoch": 0.11, "learning_rate": 2.9557181750149048e-05, "loss": 0.6978, "step": 735 }, { "epoch": 0.11, "learning_rate": 2.9555501606086742e-05, "loss": 0.8103, "step": 736 }, { "epoch": 0.11, "learning_rate": 2.955381832858513e-05, "loss": 0.9922, "step": 737 }, { "epoch": 0.11, "learning_rate": 2.955213191800659e-05, "loss": 0.9127, "step": 738 }, { "epoch": 0.11, "learning_rate": 2.955044237471415e-05, "loss": 0.8337, "step": 739 }, { "epoch": 0.11, "learning_rate": 2.9548749699071536e-05, "loss": 0.9933, "step": 740 }, { "epoch": 0.11, "learning_rate": 2.9547053891443137e-05, "loss": 0.784, "step": 741 }, { "epoch": 0.11, "learning_rate": 2.954535495219402e-05, "loss": 1.0268, "step": 742 }, { "epoch": 0.11, "learning_rate": 2.954365288168993e-05, "loss": 1.0011, "step": 743 }, { "epoch": 0.11, "learning_rate": 2.9541947680297264e-05, "loss": 0.9542, "step": 744 }, { "epoch": 0.11, "learning_rate": 2.9540239348383125e-05, "loss": 0.8856, "step": 745 }, { "epoch": 0.11, "learning_rate": 2.9538527886315265e-05, "loss": 0.8454, "step": 746 }, { "epoch": 0.11, "learning_rate": 2.9536813294462128e-05, "loss": 0.8186, "step": 747 }, { "epoch": 0.11, "learning_rate": 2.9535095573192817e-05, "loss": 0.9531, "step": 748 }, { "epoch": 0.11, "learning_rate": 2.953337472287712e-05, "loss": 0.8867, "step": 749 }, { "epoch": 0.11, "learning_rate": 2.953165074388549e-05, "loss": 1.0921, "step": 750 }, { "epoch": 0.11, "learning_rate": 2.952992363658905e-05, "loss": 0.7263, "step": 751 }, { "epoch": 0.11, "learning_rate": 2.952819340135962e-05, "loss": 1.0717, "step": 752 }, { "epoch": 0.11, "learning_rate": 2.9526460038569665e-05, "loss": 0.8697, "step": 753 }, { "epoch": 0.11, "learning_rate": 2.9524723548592335e-05, "loss": 0.863, "step": 754 }, { "epoch": 0.11, "learning_rate": 2.9522983931801455e-05, "loss": 1.077, "step": 755 }, { "epoch": 0.11, "learning_rate": 2.952124118857153e-05, "loss": 0.8359, "step": 756 }, { "epoch": 0.11, "learning_rate": 2.9519495319277713e-05, "loss": 0.9046, "step": 757 }, { "epoch": 0.11, "learning_rate": 2.951774632429586e-05, "loss": 0.7751, "step": 758 }, { "epoch": 0.11, "learning_rate": 2.9515994204002485e-05, "loss": 0.8337, "step": 759 }, { "epoch": 0.11, "learning_rate": 2.9514238958774767e-05, "loss": 0.8956, "step": 760 }, { "epoch": 0.11, "learning_rate": 2.9512480588990578e-05, "loss": 0.8583, "step": 761 }, { "epoch": 0.11, "learning_rate": 2.951071909502844e-05, "loss": 0.9118, "step": 762 }, { "epoch": 0.11, "learning_rate": 2.950895447726757e-05, "loss": 0.9838, "step": 763 }, { "epoch": 0.11, "learning_rate": 2.9507186736087843e-05, "loss": 1.13, "step": 764 }, { "epoch": 0.11, "learning_rate": 2.9505415871869808e-05, "loss": 0.9559, "step": 765 }, { "epoch": 0.11, "learning_rate": 2.9503641884994686e-05, "loss": 0.9849, "step": 766 }, { "epoch": 0.11, "learning_rate": 2.9501864775844376e-05, "loss": 0.9358, "step": 767 }, { "epoch": 0.11, "learning_rate": 2.950008454480145e-05, "loss": 0.9314, "step": 768 }, { "epoch": 0.11, "learning_rate": 2.9498301192249138e-05, "loss": 0.815, "step": 769 }, { "epoch": 0.11, "learning_rate": 2.9496514718571357e-05, "loss": 0.9118, "step": 770 }, { "epoch": 0.11, "learning_rate": 2.949472512415269e-05, "loss": 1.034, "step": 771 }, { "epoch": 0.11, "learning_rate": 2.9492932409378397e-05, "loss": 1.0095, "step": 772 }, { "epoch": 0.11, "learning_rate": 2.94911365746344e-05, "loss": 0.9029, "step": 773 }, { "epoch": 0.11, "learning_rate": 2.9489337620307296e-05, "loss": 1.0251, "step": 774 }, { "epoch": 0.11, "learning_rate": 2.9487535546784356e-05, "loss": 0.8499, "step": 775 }, { "epoch": 0.11, "learning_rate": 2.9485730354453527e-05, "loss": 0.7617, "step": 776 }, { "epoch": 0.11, "learning_rate": 2.9483922043703415e-05, "loss": 0.899, "step": 777 }, { "epoch": 0.11, "learning_rate": 2.9482110614923308e-05, "loss": 0.7559, "step": 778 }, { "epoch": 0.11, "learning_rate": 2.9480296068503165e-05, "loss": 0.8867, "step": 779 }, { "epoch": 0.11, "learning_rate": 2.9478478404833612e-05, "loss": 1.2188, "step": 780 }, { "epoch": 0.11, "learning_rate": 2.9476657624305943e-05, "loss": 0.8454, "step": 781 }, { "epoch": 0.11, "learning_rate": 2.9474833727312126e-05, "loss": 0.9587, "step": 782 }, { "epoch": 0.11, "learning_rate": 2.9473006714244812e-05, "loss": 0.8175, "step": 783 }, { "epoch": 0.11, "learning_rate": 2.9471176585497296e-05, "loss": 0.9342, "step": 784 }, { "epoch": 0.11, "learning_rate": 2.9469343341463572e-05, "loss": 0.9291, "step": 785 }, { "epoch": 0.11, "learning_rate": 2.9467506982538285e-05, "loss": 1.0653, "step": 786 }, { "epoch": 0.11, "learning_rate": 2.946566750911676e-05, "loss": 0.9448, "step": 787 }, { "epoch": 0.11, "learning_rate": 2.946382492159499e-05, "loss": 0.9129, "step": 788 }, { "epoch": 0.11, "learning_rate": 2.9461979220369637e-05, "loss": 1.0792, "step": 789 }, { "epoch": 0.11, "learning_rate": 2.946013040583804e-05, "loss": 0.9208, "step": 790 }, { "epoch": 0.11, "learning_rate": 2.9458278478398197e-05, "loss": 0.9375, "step": 791 }, { "epoch": 0.11, "learning_rate": 2.9456423438448785e-05, "loss": 0.9263, "step": 792 }, { "epoch": 0.11, "learning_rate": 2.9454565286389146e-05, "loss": 1.005, "step": 793 }, { "epoch": 0.11, "learning_rate": 2.9452704022619298e-05, "loss": 0.9085, "step": 794 }, { "epoch": 0.11, "learning_rate": 2.9450839647539923e-05, "loss": 1.082, "step": 795 }, { "epoch": 0.11, "learning_rate": 2.9448972161552374e-05, "loss": 0.8895, "step": 796 }, { "epoch": 0.11, "learning_rate": 2.944710156505867e-05, "loss": 0.9152, "step": 797 }, { "epoch": 0.11, "learning_rate": 2.9445227858461517e-05, "loss": 0.9012, "step": 798 }, { "epoch": 0.11, "learning_rate": 2.9443351042164266e-05, "loss": 0.9174, "step": 799 }, { "epoch": 0.11, "learning_rate": 2.9441471116570958e-05, "loss": 1.005, "step": 800 }, { "epoch": 0.11, "learning_rate": 2.9439588082086284e-05, "loss": 1.1275, "step": 801 }, { "epoch": 0.11, "learning_rate": 2.943770193911562e-05, "loss": 0.8343, "step": 802 }, { "epoch": 0.12, "learning_rate": 2.943581268806501e-05, "loss": 0.8348, "step": 803 }, { "epoch": 0.12, "learning_rate": 2.943392032934116e-05, "loss": 0.9685, "step": 804 }, { "epoch": 0.12, "learning_rate": 2.943202486335144e-05, "loss": 0.8862, "step": 805 }, { "epoch": 0.12, "learning_rate": 2.943012629050391e-05, "loss": 0.8092, "step": 806 }, { "epoch": 0.12, "learning_rate": 2.942822461120728e-05, "loss": 0.9883, "step": 807 }, { "epoch": 0.12, "learning_rate": 2.9426319825870932e-05, "loss": 0.8945, "step": 808 }, { "epoch": 0.12, "learning_rate": 2.942441193490492e-05, "loss": 0.87, "step": 809 }, { "epoch": 0.12, "learning_rate": 2.942250093871997e-05, "loss": 0.8103, "step": 810 }, { "epoch": 0.12, "learning_rate": 2.942058683772747e-05, "loss": 0.9358, "step": 811 }, { "epoch": 0.12, "learning_rate": 2.9418669632339476e-05, "loss": 1.077, "step": 812 }, { "epoch": 0.12, "learning_rate": 2.941674932296872e-05, "loss": 0.9496, "step": 813 }, { "epoch": 0.12, "learning_rate": 2.9414825910028587e-05, "loss": 0.7924, "step": 814 }, { "epoch": 0.12, "learning_rate": 2.9412899393933155e-05, "loss": 0.9888, "step": 815 }, { "epoch": 0.12, "learning_rate": 2.941096977509714e-05, "loss": 0.9347, "step": 816 }, { "epoch": 0.12, "learning_rate": 2.9409037053935955e-05, "loss": 0.9135, "step": 817 }, { "epoch": 0.12, "learning_rate": 2.9407101230865655e-05, "loss": 0.7695, "step": 818 }, { "epoch": 0.12, "learning_rate": 2.9405162306302987e-05, "loss": 0.8259, "step": 819 }, { "epoch": 0.12, "learning_rate": 2.940322028066534e-05, "loss": 0.8209, "step": 820 }, { "epoch": 0.12, "learning_rate": 2.9401275154370797e-05, "loss": 0.8435, "step": 821 }, { "epoch": 0.12, "learning_rate": 2.9399326927838087e-05, "loss": 0.9174, "step": 822 }, { "epoch": 0.12, "learning_rate": 2.9397375601486616e-05, "loss": 0.9135, "step": 823 }, { "epoch": 0.12, "learning_rate": 2.939542117573646e-05, "loss": 0.9029, "step": 824 }, { "epoch": 0.12, "learning_rate": 2.9393463651008355e-05, "loss": 0.9213, "step": 825 }, { "epoch": 0.12, "learning_rate": 2.939150302772371e-05, "loss": 1.0229, "step": 826 }, { "epoch": 0.12, "learning_rate": 2.9389539306304597e-05, "loss": 0.8281, "step": 827 }, { "epoch": 0.12, "learning_rate": 2.9387572487173754e-05, "loss": 1.2093, "step": 828 }, { "epoch": 0.12, "learning_rate": 2.9385602570754596e-05, "loss": 1.0954, "step": 829 }, { "epoch": 0.12, "learning_rate": 2.938362955747119e-05, "loss": 1.1808, "step": 830 }, { "epoch": 0.12, "learning_rate": 2.938165344774828e-05, "loss": 0.7542, "step": 831 }, { "epoch": 0.12, "learning_rate": 2.9379674242011277e-05, "loss": 1.0123, "step": 832 }, { "epoch": 0.12, "learning_rate": 2.937769194068625e-05, "loss": 0.793, "step": 833 }, { "epoch": 0.12, "learning_rate": 2.9375706544199936e-05, "loss": 0.9749, "step": 834 }, { "epoch": 0.12, "learning_rate": 2.937371805297975e-05, "loss": 0.9911, "step": 835 }, { "epoch": 0.12, "learning_rate": 2.9371726467453763e-05, "loss": 1.0061, "step": 836 }, { "epoch": 0.12, "learning_rate": 2.9369731788050712e-05, "loss": 0.957, "step": 837 }, { "epoch": 0.12, "learning_rate": 2.9367734015200006e-05, "loss": 0.8493, "step": 838 }, { "epoch": 0.12, "learning_rate": 2.9365733149331705e-05, "loss": 0.8862, "step": 839 }, { "epoch": 0.12, "learning_rate": 2.9363729190876558e-05, "loss": 0.9146, "step": 840 }, { "epoch": 0.12, "learning_rate": 2.9361722140265963e-05, "loss": 0.9933, "step": 841 }, { "epoch": 0.12, "learning_rate": 2.9359711997931992e-05, "loss": 0.8728, "step": 842 }, { "epoch": 0.12, "learning_rate": 2.935769876430737e-05, "loss": 1.0039, "step": 843 }, { "epoch": 0.12, "learning_rate": 2.9355682439825502e-05, "loss": 0.8276, "step": 844 }, { "epoch": 0.12, "learning_rate": 2.9353663024920453e-05, "loss": 0.7305, "step": 845 }, { "epoch": 0.12, "learning_rate": 2.9351640520026955e-05, "loss": 0.9654, "step": 846 }, { "epoch": 0.12, "learning_rate": 2.9349614925580402e-05, "loss": 0.8359, "step": 847 }, { "epoch": 0.12, "learning_rate": 2.934758624201685e-05, "loss": 0.9314, "step": 848 }, { "epoch": 0.12, "learning_rate": 2.9345554469773027e-05, "loss": 0.8354, "step": 849 }, { "epoch": 0.12, "learning_rate": 2.934351960928633e-05, "loss": 0.9414, "step": 850 }, { "epoch": 0.12, "learning_rate": 2.9341481660994803e-05, "loss": 1.0128, "step": 851 }, { "epoch": 0.12, "learning_rate": 2.933944062533717e-05, "loss": 0.7793, "step": 852 }, { "epoch": 0.12, "learning_rate": 2.933739650275282e-05, "loss": 0.9807, "step": 853 }, { "epoch": 0.12, "learning_rate": 2.93353492936818e-05, "loss": 0.8627, "step": 854 }, { "epoch": 0.12, "learning_rate": 2.9333298998564818e-05, "loss": 0.9414, "step": 855 }, { "epoch": 0.12, "learning_rate": 2.9331245617843258e-05, "loss": 0.8418, "step": 856 }, { "epoch": 0.12, "learning_rate": 2.9329189151959158e-05, "loss": 0.865, "step": 857 }, { "epoch": 0.12, "learning_rate": 2.9327129601355227e-05, "loss": 0.7793, "step": 858 }, { "epoch": 0.12, "learning_rate": 2.9325066966474834e-05, "loss": 0.7773, "step": 859 }, { "epoch": 0.12, "learning_rate": 2.9323001247762012e-05, "loss": 0.7896, "step": 860 }, { "epoch": 0.12, "learning_rate": 2.9320932445661462e-05, "loss": 0.7667, "step": 861 }, { "epoch": 0.12, "learning_rate": 2.9318860560618543e-05, "loss": 0.877, "step": 862 }, { "epoch": 0.12, "learning_rate": 2.9316785593079286e-05, "loss": 0.8181, "step": 863 }, { "epoch": 0.12, "learning_rate": 2.9314707543490374e-05, "loss": 0.8873, "step": 864 }, { "epoch": 0.12, "learning_rate": 2.931262641229916e-05, "loss": 1.0195, "step": 865 }, { "epoch": 0.12, "learning_rate": 2.9310542199953665e-05, "loss": 0.8951, "step": 866 }, { "epoch": 0.12, "learning_rate": 2.9308454906902564e-05, "loss": 0.832, "step": 867 }, { "epoch": 0.12, "learning_rate": 2.93063645335952e-05, "loss": 0.8345, "step": 868 }, { "epoch": 0.12, "learning_rate": 2.9304271080481578e-05, "loss": 0.9076, "step": 869 }, { "epoch": 0.12, "learning_rate": 2.930217454801237e-05, "loss": 0.909, "step": 870 }, { "epoch": 0.12, "learning_rate": 2.930007493663891e-05, "loss": 0.7734, "step": 871 }, { "epoch": 0.12, "learning_rate": 2.9297972246813183e-05, "loss": 0.8203, "step": 872 }, { "epoch": 0.13, "learning_rate": 2.929586647898785e-05, "loss": 0.8482, "step": 873 }, { "epoch": 0.13, "learning_rate": 2.9293757633616237e-05, "loss": 0.9581, "step": 874 }, { "epoch": 0.13, "learning_rate": 2.9291645711152317e-05, "loss": 0.7129, "step": 875 }, { "epoch": 0.13, "learning_rate": 2.928953071205074e-05, "loss": 0.933, "step": 876 }, { "epoch": 0.13, "learning_rate": 2.928741263676681e-05, "loss": 0.8471, "step": 877 }, { "epoch": 0.13, "learning_rate": 2.9285291485756496e-05, "loss": 0.8923, "step": 878 }, { "epoch": 0.13, "learning_rate": 2.928316725947644e-05, "loss": 0.9155, "step": 879 }, { "epoch": 0.13, "learning_rate": 2.9281039958383922e-05, "loss": 1.1417, "step": 880 }, { "epoch": 0.13, "learning_rate": 2.9278909582936897e-05, "loss": 0.9113, "step": 881 }, { "epoch": 0.13, "learning_rate": 2.9276776133593994e-05, "loss": 1.005, "step": 882 }, { "epoch": 0.13, "learning_rate": 2.927463961081448e-05, "loss": 0.8119, "step": 883 }, { "epoch": 0.13, "learning_rate": 2.9272500015058302e-05, "loss": 0.7188, "step": 884 }, { "epoch": 0.13, "learning_rate": 2.927035734678606e-05, "loss": 0.8979, "step": 885 }, { "epoch": 0.13, "learning_rate": 2.9268211606459018e-05, "loss": 0.9068, "step": 886 }, { "epoch": 0.13, "learning_rate": 2.92660627945391e-05, "loss": 0.9302, "step": 887 }, { "epoch": 0.13, "learning_rate": 2.9263910911488895e-05, "loss": 0.8142, "step": 888 }, { "epoch": 0.13, "learning_rate": 2.9261755957771645e-05, "loss": 1.0647, "step": 889 }, { "epoch": 0.13, "learning_rate": 2.925959793385126e-05, "loss": 0.9838, "step": 890 }, { "epoch": 0.13, "learning_rate": 2.9257436840192317e-05, "loss": 0.8479, "step": 891 }, { "epoch": 0.13, "learning_rate": 2.9255272677260027e-05, "loss": 1.0106, "step": 892 }, { "epoch": 0.13, "learning_rate": 2.9253105445520306e-05, "loss": 0.7467, "step": 893 }, { "epoch": 0.13, "learning_rate": 2.925093514543968e-05, "loss": 0.9146, "step": 894 }, { "epoch": 0.13, "learning_rate": 2.9248761777485383e-05, "loss": 0.7288, "step": 895 }, { "epoch": 0.13, "learning_rate": 2.9246585342125275e-05, "loss": 0.8795, "step": 896 }, { "epoch": 0.13, "learning_rate": 2.924440583982789e-05, "loss": 1.0619, "step": 897 }, { "epoch": 0.13, "learning_rate": 2.924222327106242e-05, "loss": 0.8624, "step": 898 }, { "epoch": 0.13, "learning_rate": 2.924003763629872e-05, "loss": 0.9035, "step": 899 }, { "epoch": 0.13, "learning_rate": 2.9237848936007302e-05, "loss": 0.8739, "step": 900 }, { "epoch": 0.13, "learning_rate": 2.9235657170659345e-05, "loss": 0.8906, "step": 901 }, { "epoch": 0.13, "learning_rate": 2.9233462340726677e-05, "loss": 0.8331, "step": 902 }, { "epoch": 0.13, "learning_rate": 2.9231264446681785e-05, "loss": 1.0558, "step": 903 }, { "epoch": 0.13, "learning_rate": 2.9229063488997833e-05, "loss": 0.904, "step": 904 }, { "epoch": 0.13, "learning_rate": 2.9226859468148623e-05, "loss": 0.9163, "step": 905 }, { "epoch": 0.13, "learning_rate": 2.922465238460863e-05, "loss": 0.8253, "step": 906 }, { "epoch": 0.13, "learning_rate": 2.9222442238852986e-05, "loss": 0.9308, "step": 907 }, { "epoch": 0.13, "learning_rate": 2.9220229031357476e-05, "loss": 0.9208, "step": 908 }, { "epoch": 0.13, "learning_rate": 2.921801276259855e-05, "loss": 1.0374, "step": 909 }, { "epoch": 0.13, "learning_rate": 2.9215793433053322e-05, "loss": 1.005, "step": 910 }, { "epoch": 0.13, "learning_rate": 2.9213571043199553e-05, "loss": 1.1825, "step": 911 }, { "epoch": 0.13, "learning_rate": 2.9211345593515667e-05, "loss": 0.822, "step": 912 }, { "epoch": 0.13, "learning_rate": 2.9209117084480753e-05, "loss": 0.8248, "step": 913 }, { "epoch": 0.13, "learning_rate": 2.920688551657455e-05, "loss": 1.058, "step": 914 }, { "epoch": 0.13, "learning_rate": 2.9204650890277464e-05, "loss": 0.8284, "step": 915 }, { "epoch": 0.13, "learning_rate": 2.9202413206070552e-05, "loss": 1.0329, "step": 916 }, { "epoch": 0.13, "learning_rate": 2.920017246443553e-05, "loss": 0.9916, "step": 917 }, { "epoch": 0.13, "learning_rate": 2.919792866585478e-05, "loss": 0.9157, "step": 918 }, { "epoch": 0.13, "learning_rate": 2.9195681810811325e-05, "loss": 0.8292, "step": 919 }, { "epoch": 0.13, "learning_rate": 2.919343189978887e-05, "loss": 0.9397, "step": 920 }, { "epoch": 0.13, "learning_rate": 2.9191178933271756e-05, "loss": 0.9146, "step": 921 }, { "epoch": 0.13, "learning_rate": 2.9188922911745e-05, "loss": 0.8683, "step": 922 }, { "epoch": 0.13, "learning_rate": 2.9186663835694258e-05, "loss": 0.6749, "step": 923 }, { "epoch": 0.13, "learning_rate": 2.9184401705605863e-05, "loss": 1.01, "step": 924 }, { "epoch": 0.13, "learning_rate": 2.9182136521966783e-05, "loss": 0.986, "step": 925 }, { "epoch": 0.13, "learning_rate": 2.9179868285264665e-05, "loss": 0.8387, "step": 926 }, { "epoch": 0.13, "learning_rate": 2.917759699598781e-05, "loss": 0.9344, "step": 927 }, { "epoch": 0.13, "learning_rate": 2.9175322654625156e-05, "loss": 0.9408, "step": 928 }, { "epoch": 0.13, "learning_rate": 2.917304526166632e-05, "loss": 0.9375, "step": 929 }, { "epoch": 0.13, "learning_rate": 2.9170764817601565e-05, "loss": 0.9135, "step": 930 }, { "epoch": 0.13, "learning_rate": 2.9168481322921823e-05, "loss": 0.8823, "step": 931 }, { "epoch": 0.13, "learning_rate": 2.9166194778118664e-05, "loss": 0.9414, "step": 932 }, { "epoch": 0.13, "learning_rate": 2.9163905183684327e-05, "loss": 1.0463, "step": 933 }, { "epoch": 0.13, "learning_rate": 2.9161612540111706e-05, "loss": 0.9219, "step": 934 }, { "epoch": 0.13, "learning_rate": 2.9159316847894354e-05, "loss": 1.0452, "step": 935 }, { "epoch": 0.13, "learning_rate": 2.9157018107526474e-05, "loss": 0.9911, "step": 936 }, { "epoch": 0.13, "learning_rate": 2.9154716319502923e-05, "loss": 0.8398, "step": 937 }, { "epoch": 0.13, "learning_rate": 2.9152411484319225e-05, "loss": 0.9369, "step": 938 }, { "epoch": 0.13, "learning_rate": 2.9150103602471553e-05, "loss": 0.76, "step": 939 }, { "epoch": 0.13, "learning_rate": 2.9147792674456734e-05, "loss": 0.9191, "step": 940 }, { "epoch": 0.13, "learning_rate": 2.9145478700772255e-05, "loss": 1.0402, "step": 941 }, { "epoch": 0.13, "learning_rate": 2.9143161681916264e-05, "loss": 1.067, "step": 942 }, { "epoch": 0.14, "learning_rate": 2.9140841618387546e-05, "loss": 0.8862, "step": 943 }, { "epoch": 0.14, "learning_rate": 2.9138518510685564e-05, "loss": 0.8987, "step": 944 }, { "epoch": 0.14, "learning_rate": 2.9136192359310416e-05, "loss": 0.9888, "step": 945 }, { "epoch": 0.14, "learning_rate": 2.913386316476287e-05, "loss": 0.9325, "step": 946 }, { "epoch": 0.14, "learning_rate": 2.9131530927544346e-05, "loss": 0.9813, "step": 947 }, { "epoch": 0.14, "learning_rate": 2.912919564815691e-05, "loss": 0.9682, "step": 948 }, { "epoch": 0.14, "learning_rate": 2.91268573271033e-05, "loss": 0.9358, "step": 949 }, { "epoch": 0.14, "learning_rate": 2.912451596488689e-05, "loss": 0.9927, "step": 950 }, { "epoch": 0.14, "learning_rate": 2.912217156201172e-05, "loss": 1.0597, "step": 951 }, { "epoch": 0.14, "learning_rate": 2.9119824118982482e-05, "loss": 0.9621, "step": 952 }, { "epoch": 0.14, "learning_rate": 2.9117473636304526e-05, "loss": 0.8817, "step": 953 }, { "epoch": 0.14, "learning_rate": 2.9115120114483845e-05, "loss": 0.8555, "step": 954 }, { "epoch": 0.14, "learning_rate": 2.91127635540271e-05, "loss": 0.9961, "step": 955 }, { "epoch": 0.14, "learning_rate": 2.9110403955441597e-05, "loss": 1.1233, "step": 956 }, { "epoch": 0.14, "learning_rate": 2.9108041319235302e-05, "loss": 0.8683, "step": 957 }, { "epoch": 0.14, "learning_rate": 2.9105675645916832e-05, "loss": 0.6722, "step": 958 }, { "epoch": 0.14, "learning_rate": 2.9103306935995452e-05, "loss": 0.8348, "step": 959 }, { "epoch": 0.14, "learning_rate": 2.9100935189981098e-05, "loss": 0.8418, "step": 960 }, { "epoch": 0.14, "learning_rate": 2.909856040838434e-05, "loss": 0.8069, "step": 961 }, { "epoch": 0.14, "learning_rate": 2.9096182591716405e-05, "loss": 0.9615, "step": 962 }, { "epoch": 0.14, "learning_rate": 2.909380174048919e-05, "loss": 1.0156, "step": 963 }, { "epoch": 0.14, "learning_rate": 2.909141785521523e-05, "loss": 1.0162, "step": 964 }, { "epoch": 0.14, "learning_rate": 2.908903093640771e-05, "loss": 0.9916, "step": 965 }, { "epoch": 0.14, "learning_rate": 2.9086640984580483e-05, "loss": 0.8214, "step": 966 }, { "epoch": 0.14, "learning_rate": 2.9084248000248035e-05, "loss": 0.8544, "step": 967 }, { "epoch": 0.14, "learning_rate": 2.908185198392553e-05, "loss": 0.9364, "step": 968 }, { "epoch": 0.14, "learning_rate": 2.907945293612876e-05, "loss": 0.9464, "step": 969 }, { "epoch": 0.14, "learning_rate": 2.907705085737419e-05, "loss": 0.7829, "step": 970 }, { "epoch": 0.14, "learning_rate": 2.9074645748178918e-05, "loss": 1.005, "step": 971 }, { "epoch": 0.14, "learning_rate": 2.9072237609060715e-05, "loss": 0.8041, "step": 972 }, { "epoch": 0.14, "learning_rate": 2.9069826440537985e-05, "loss": 0.8443, "step": 973 }, { "epoch": 0.14, "learning_rate": 2.9067412243129798e-05, "loss": 0.9375, "step": 974 }, { "epoch": 0.14, "learning_rate": 2.9064995017355868e-05, "loss": 0.9163, "step": 975 }, { "epoch": 0.14, "learning_rate": 2.9062574763736568e-05, "loss": 0.8666, "step": 976 }, { "epoch": 0.14, "learning_rate": 2.9060151482792913e-05, "loss": 0.8733, "step": 977 }, { "epoch": 0.14, "learning_rate": 2.905772517504658e-05, "loss": 1.0307, "step": 978 }, { "epoch": 0.14, "learning_rate": 2.9055295841019893e-05, "loss": 0.9688, "step": 979 }, { "epoch": 0.14, "learning_rate": 2.9052863481235826e-05, "loss": 1.125, "step": 980 }, { "epoch": 0.14, "learning_rate": 2.9050428096218002e-05, "loss": 1.0056, "step": 981 }, { "epoch": 0.14, "learning_rate": 2.9047989686490707e-05, "loss": 0.9743, "step": 982 }, { "epoch": 0.14, "learning_rate": 2.904554825257886e-05, "loss": 0.9358, "step": 983 }, { "epoch": 0.14, "learning_rate": 2.9043103795008056e-05, "loss": 1.014, "step": 984 }, { "epoch": 0.14, "learning_rate": 2.9040656314304512e-05, "loss": 0.858, "step": 985 }, { "epoch": 0.14, "learning_rate": 2.903820581099512e-05, "loss": 0.8962, "step": 986 }, { "epoch": 0.14, "learning_rate": 2.9035752285607407e-05, "loss": 0.8011, "step": 987 }, { "epoch": 0.14, "learning_rate": 2.9033295738669554e-05, "loss": 0.784, "step": 988 }, { "epoch": 0.14, "learning_rate": 2.9030836170710402e-05, "loss": 0.8025, "step": 989 }, { "epoch": 0.14, "learning_rate": 2.9028373582259434e-05, "loss": 0.7785, "step": 990 }, { "epoch": 0.14, "learning_rate": 2.9025907973846778e-05, "loss": 0.7528, "step": 991 }, { "epoch": 0.14, "learning_rate": 2.9023439346003226e-05, "loss": 0.8382, "step": 992 }, { "epoch": 0.14, "learning_rate": 2.9020967699260205e-05, "loss": 1.0547, "step": 993 }, { "epoch": 0.14, "learning_rate": 2.9018493034149808e-05, "loss": 0.9205, "step": 994 }, { "epoch": 0.14, "learning_rate": 2.9016015351204762e-05, "loss": 0.8672, "step": 995 }, { "epoch": 0.14, "learning_rate": 2.9013534650958456e-05, "loss": 1.0965, "step": 996 }, { "epoch": 0.14, "learning_rate": 2.901105093394492e-05, "loss": 0.9632, "step": 997 }, { "epoch": 0.14, "learning_rate": 2.900856420069883e-05, "loss": 0.7341, "step": 998 }, { "epoch": 0.14, "learning_rate": 2.9006074451755535e-05, "loss": 0.9805, "step": 999 }, { "epoch": 0.14, "learning_rate": 2.9003581687651006e-05, "loss": 0.9018, "step": 1000 }, { "epoch": 0.14, "learning_rate": 2.900108590892187e-05, "loss": 0.9375, "step": 1001 }, { "epoch": 0.14, "learning_rate": 2.899858711610541e-05, "loss": 0.861, "step": 1002 }, { "epoch": 0.14, "learning_rate": 2.899608530973956e-05, "loss": 0.865, "step": 1003 }, { "epoch": 0.14, "learning_rate": 2.899358049036289e-05, "loss": 0.7257, "step": 1004 }, { "epoch": 0.14, "learning_rate": 2.899107265851463e-05, "loss": 0.8929, "step": 1005 }, { "epoch": 0.14, "learning_rate": 2.898856181473465e-05, "loss": 0.8711, "step": 1006 }, { "epoch": 0.14, "learning_rate": 2.8986047959563472e-05, "loss": 1.0519, "step": 1007 }, { "epoch": 0.14, "learning_rate": 2.8983531093542276e-05, "loss": 0.9448, "step": 1008 }, { "epoch": 0.14, "learning_rate": 2.898101121721287e-05, "loss": 0.8817, "step": 1009 }, { "epoch": 0.14, "learning_rate": 2.8978488331117723e-05, "loss": 0.8689, "step": 1010 }, { "epoch": 0.14, "learning_rate": 2.8975962435799957e-05, "loss": 1.0162, "step": 1011 }, { "epoch": 0.14, "learning_rate": 2.8973433531803327e-05, "loss": 0.8318, "step": 1012 }, { "epoch": 0.15, "learning_rate": 2.8970901619672245e-05, "loss": 0.9314, "step": 1013 }, { "epoch": 0.15, "learning_rate": 2.8968366699951774e-05, "loss": 1.0251, "step": 1014 }, { "epoch": 0.15, "learning_rate": 2.896582877318761e-05, "loss": 1.1378, "step": 1015 }, { "epoch": 0.15, "learning_rate": 2.8963287839926123e-05, "loss": 0.9972, "step": 1016 }, { "epoch": 0.15, "learning_rate": 2.896074390071429e-05, "loss": 1.0084, "step": 1017 }, { "epoch": 0.15, "learning_rate": 2.895819695609978e-05, "loss": 0.8136, "step": 1018 }, { "epoch": 0.15, "learning_rate": 2.8955647006630868e-05, "loss": 0.9141, "step": 1019 }, { "epoch": 0.15, "learning_rate": 2.895309405285651e-05, "loss": 0.7118, "step": 1020 }, { "epoch": 0.15, "learning_rate": 2.8950538095326288e-05, "loss": 0.9615, "step": 1021 }, { "epoch": 0.15, "learning_rate": 2.894797913459043e-05, "loss": 0.8086, "step": 1022 }, { "epoch": 0.15, "learning_rate": 2.8945417171199828e-05, "loss": 0.8158, "step": 1023 }, { "epoch": 0.15, "learning_rate": 2.8942852205706006e-05, "loss": 0.9994, "step": 1024 }, { "epoch": 0.15, "learning_rate": 2.8940284238661132e-05, "loss": 0.8778, "step": 1025 }, { "epoch": 0.15, "learning_rate": 2.8937713270618033e-05, "loss": 1.0977, "step": 1026 }, { "epoch": 0.15, "learning_rate": 2.8935139302130168e-05, "loss": 0.9453, "step": 1027 }, { "epoch": 0.15, "learning_rate": 2.8932562333751656e-05, "loss": 1.2277, "step": 1028 }, { "epoch": 0.15, "learning_rate": 2.892998236603725e-05, "loss": 0.8789, "step": 1029 }, { "epoch": 0.15, "learning_rate": 2.892739939954236e-05, "loss": 0.9922, "step": 1030 }, { "epoch": 0.15, "learning_rate": 2.892481343482302e-05, "loss": 0.9157, "step": 1031 }, { "epoch": 0.15, "learning_rate": 2.8922224472435934e-05, "loss": 0.88, "step": 1032 }, { "epoch": 0.15, "learning_rate": 2.8919632512938444e-05, "loss": 0.8502, "step": 1033 }, { "epoch": 0.15, "learning_rate": 2.8917037556888532e-05, "loss": 1.1222, "step": 1034 }, { "epoch": 0.15, "learning_rate": 2.8914439604844824e-05, "loss": 1.0938, "step": 1035 }, { "epoch": 0.15, "learning_rate": 2.89118386573666e-05, "loss": 0.9141, "step": 1036 }, { "epoch": 0.15, "learning_rate": 2.8909234715013776e-05, "loss": 0.9408, "step": 1037 }, { "epoch": 0.15, "learning_rate": 2.8906627778346923e-05, "loss": 0.7377, "step": 1038 }, { "epoch": 0.15, "learning_rate": 2.8904017847927237e-05, "loss": 0.9933, "step": 1039 }, { "epoch": 0.15, "learning_rate": 2.8901404924316584e-05, "loss": 0.9381, "step": 1040 }, { "epoch": 0.15, "learning_rate": 2.8898789008077454e-05, "loss": 0.8531, "step": 1041 }, { "epoch": 0.15, "learning_rate": 2.8896170099772998e-05, "loss": 1.1328, "step": 1042 }, { "epoch": 0.15, "learning_rate": 2.889354819996699e-05, "loss": 1.0167, "step": 1043 }, { "epoch": 0.15, "learning_rate": 2.8890923309223873e-05, "loss": 0.8044, "step": 1044 }, { "epoch": 0.15, "learning_rate": 2.8888295428108706e-05, "loss": 1.0335, "step": 1045 }, { "epoch": 0.15, "learning_rate": 2.8885664557187223e-05, "loss": 0.7291, "step": 1046 }, { "epoch": 0.15, "learning_rate": 2.8883030697025774e-05, "loss": 0.8069, "step": 1047 }, { "epoch": 0.15, "learning_rate": 2.888039384819137e-05, "loss": 0.7849, "step": 1048 }, { "epoch": 0.15, "learning_rate": 2.8877754011251654e-05, "loss": 0.8482, "step": 1049 }, { "epoch": 0.15, "learning_rate": 2.8875111186774926e-05, "loss": 0.8253, "step": 1050 }, { "epoch": 0.15, "learning_rate": 2.887246537533011e-05, "loss": 0.8544, "step": 1051 }, { "epoch": 0.15, "learning_rate": 2.886981657748679e-05, "loss": 0.7483, "step": 1052 }, { "epoch": 0.15, "learning_rate": 2.8867164793815184e-05, "loss": 0.8493, "step": 1053 }, { "epoch": 0.15, "learning_rate": 2.886451002488616e-05, "loss": 0.9079, "step": 1054 }, { "epoch": 0.15, "learning_rate": 2.8861852271271216e-05, "loss": 0.8295, "step": 1055 }, { "epoch": 0.15, "learning_rate": 2.885919153354251e-05, "loss": 1.0145, "step": 1056 }, { "epoch": 0.15, "learning_rate": 2.8856527812272827e-05, "loss": 0.8027, "step": 1057 }, { "epoch": 0.15, "learning_rate": 2.8853861108035605e-05, "loss": 0.7919, "step": 1058 }, { "epoch": 0.15, "learning_rate": 2.8851191421404913e-05, "loss": 1.0753, "step": 1059 }, { "epoch": 0.15, "learning_rate": 2.8848518752955474e-05, "loss": 1.072, "step": 1060 }, { "epoch": 0.15, "learning_rate": 2.8845843103262642e-05, "loss": 1.014, "step": 1061 }, { "epoch": 0.15, "learning_rate": 2.884316447290242e-05, "loss": 0.8008, "step": 1062 }, { "epoch": 0.15, "learning_rate": 2.8840482862451458e-05, "loss": 0.7162, "step": 1063 }, { "epoch": 0.15, "learning_rate": 2.883779827248703e-05, "loss": 0.9085, "step": 1064 }, { "epoch": 0.15, "learning_rate": 2.8835110703587066e-05, "loss": 0.8276, "step": 1065 }, { "epoch": 0.15, "learning_rate": 2.8832420156330135e-05, "loss": 0.9682, "step": 1066 }, { "epoch": 0.15, "learning_rate": 2.8829726631295445e-05, "loss": 0.7581, "step": 1067 }, { "epoch": 0.15, "learning_rate": 2.8827030129062846e-05, "loss": 0.8895, "step": 1068 }, { "epoch": 0.15, "learning_rate": 2.882433065021282e-05, "loss": 0.7801, "step": 1069 }, { "epoch": 0.15, "learning_rate": 2.8821628195326508e-05, "loss": 0.8108, "step": 1070 }, { "epoch": 0.15, "learning_rate": 2.8818922764985673e-05, "loss": 0.8479, "step": 1071 }, { "epoch": 0.15, "learning_rate": 2.8816214359772737e-05, "loss": 0.9883, "step": 1072 }, { "epoch": 0.15, "learning_rate": 2.8813502980270745e-05, "loss": 1.0234, "step": 1073 }, { "epoch": 0.15, "learning_rate": 2.88107886270634e-05, "loss": 0.9788, "step": 1074 }, { "epoch": 0.15, "learning_rate": 2.8808071300735016e-05, "loss": 0.9386, "step": 1075 }, { "epoch": 0.15, "learning_rate": 2.8805351001870584e-05, "loss": 1.0446, "step": 1076 }, { "epoch": 0.15, "learning_rate": 2.8802627731055707e-05, "loss": 0.918, "step": 1077 }, { "epoch": 0.15, "learning_rate": 2.8799901488876647e-05, "loss": 0.9297, "step": 1078 }, { "epoch": 0.15, "learning_rate": 2.8797172275920287e-05, "loss": 0.9213, "step": 1079 }, { "epoch": 0.15, "learning_rate": 2.8794440092774163e-05, "loss": 0.8103, "step": 1080 }, { "epoch": 0.15, "learning_rate": 2.8791704940026447e-05, "loss": 0.9565, "step": 1081 }, { "epoch": 0.15, "learning_rate": 2.8788966818265952e-05, "loss": 0.7907, "step": 1082 }, { "epoch": 0.16, "learning_rate": 2.878622572808212e-05, "loss": 0.8384, "step": 1083 }, { "epoch": 0.16, "learning_rate": 2.878348167006505e-05, "loss": 0.9648, "step": 1084 }, { "epoch": 0.16, "learning_rate": 2.8780734644805457e-05, "loss": 0.8474, "step": 1085 }, { "epoch": 0.16, "learning_rate": 2.877798465289472e-05, "loss": 0.7665, "step": 1086 }, { "epoch": 0.16, "learning_rate": 2.877523169492484e-05, "loss": 0.9353, "step": 1087 }, { "epoch": 0.16, "learning_rate": 2.877247577148846e-05, "loss": 0.9794, "step": 1088 }, { "epoch": 0.16, "learning_rate": 2.8769716883178862e-05, "loss": 0.8772, "step": 1089 }, { "epoch": 0.16, "learning_rate": 2.876695503058997e-05, "loss": 0.7885, "step": 1090 }, { "epoch": 0.16, "learning_rate": 2.8764190214316335e-05, "loss": 0.9453, "step": 1091 }, { "epoch": 0.16, "learning_rate": 2.876142243495316e-05, "loss": 0.8125, "step": 1092 }, { "epoch": 0.16, "learning_rate": 2.875865169309627e-05, "loss": 0.875, "step": 1093 }, { "epoch": 0.16, "learning_rate": 2.8755877989342154e-05, "loss": 0.8544, "step": 1094 }, { "epoch": 0.16, "learning_rate": 2.8753101324287906e-05, "loss": 0.9224, "step": 1095 }, { "epoch": 0.16, "learning_rate": 2.8750321698531282e-05, "loss": 0.9679, "step": 1096 }, { "epoch": 0.16, "learning_rate": 2.8747539112670665e-05, "loss": 0.9833, "step": 1097 }, { "epoch": 0.16, "learning_rate": 2.874475356730507e-05, "loss": 1.0307, "step": 1098 }, { "epoch": 0.16, "learning_rate": 2.8741965063034167e-05, "loss": 0.6554, "step": 1099 }, { "epoch": 0.16, "learning_rate": 2.8739173600458245e-05, "loss": 0.9185, "step": 1100 }, { "epoch": 0.16, "learning_rate": 2.873637918017824e-05, "loss": 0.8309, "step": 1101 }, { "epoch": 0.16, "learning_rate": 2.873358180279572e-05, "loss": 0.9191, "step": 1102 }, { "epoch": 0.16, "learning_rate": 2.8730781468912887e-05, "loss": 0.8923, "step": 1103 }, { "epoch": 0.16, "learning_rate": 2.8727978179132594e-05, "loss": 0.8089, "step": 1104 }, { "epoch": 0.16, "learning_rate": 2.8725171934058313e-05, "loss": 0.8192, "step": 1105 }, { "epoch": 0.16, "learning_rate": 2.8722362734294157e-05, "loss": 0.9196, "step": 1106 }, { "epoch": 0.16, "learning_rate": 2.8719550580444877e-05, "loss": 0.8929, "step": 1107 }, { "epoch": 0.16, "learning_rate": 2.8716735473115868e-05, "loss": 1.0508, "step": 1108 }, { "epoch": 0.16, "learning_rate": 2.8713917412913145e-05, "loss": 0.9325, "step": 1109 }, { "epoch": 0.16, "learning_rate": 2.8711096400443373e-05, "loss": 0.894, "step": 1110 }, { "epoch": 0.16, "learning_rate": 2.8708272436313845e-05, "loss": 0.8039, "step": 1111 }, { "epoch": 0.16, "learning_rate": 2.8705445521132482e-05, "loss": 0.7282, "step": 1112 }, { "epoch": 0.16, "learning_rate": 2.870261565550786e-05, "loss": 0.8931, "step": 1113 }, { "epoch": 0.16, "learning_rate": 2.8699782840049174e-05, "loss": 0.6688, "step": 1114 }, { "epoch": 0.16, "learning_rate": 2.869694707536626e-05, "loss": 0.8323, "step": 1115 }, { "epoch": 0.16, "learning_rate": 2.8694108362069585e-05, "loss": 0.851, "step": 1116 }, { "epoch": 0.16, "learning_rate": 2.869126670077026e-05, "loss": 0.9224, "step": 1117 }, { "epoch": 0.16, "learning_rate": 2.8688422092080018e-05, "loss": 0.8329, "step": 1118 }, { "epoch": 0.16, "learning_rate": 2.8685574536611233e-05, "loss": 0.8809, "step": 1119 }, { "epoch": 0.16, "learning_rate": 2.868272403497692e-05, "loss": 0.9888, "step": 1120 }, { "epoch": 0.16, "learning_rate": 2.8679870587790715e-05, "loss": 0.9805, "step": 1121 }, { "epoch": 0.16, "learning_rate": 2.8677014195666896e-05, "loss": 1.014, "step": 1122 }, { "epoch": 0.16, "learning_rate": 2.8674154859220374e-05, "loss": 0.9855, "step": 1123 }, { "epoch": 0.16, "learning_rate": 2.8671292579066694e-05, "loss": 0.6897, "step": 1124 }, { "epoch": 0.16, "learning_rate": 2.8668427355822036e-05, "loss": 0.8583, "step": 1125 }, { "epoch": 0.16, "learning_rate": 2.866555919010321e-05, "loss": 0.909, "step": 1126 }, { "epoch": 0.16, "learning_rate": 2.8662688082527656e-05, "loss": 0.8535, "step": 1127 }, { "epoch": 0.16, "learning_rate": 2.8659814033713458e-05, "loss": 0.894, "step": 1128 }, { "epoch": 0.16, "learning_rate": 2.865693704427933e-05, "loss": 0.9146, "step": 1129 }, { "epoch": 0.16, "learning_rate": 2.865405711484461e-05, "loss": 0.8379, "step": 1130 }, { "epoch": 0.16, "learning_rate": 2.865117424602928e-05, "loss": 1.1741, "step": 1131 }, { "epoch": 0.16, "learning_rate": 2.864828843845395e-05, "loss": 0.8488, "step": 1132 }, { "epoch": 0.16, "learning_rate": 2.8645399692739854e-05, "loss": 0.8622, "step": 1133 }, { "epoch": 0.16, "learning_rate": 2.8642508009508884e-05, "loss": 0.8956, "step": 1134 }, { "epoch": 0.16, "learning_rate": 2.8639613389383538e-05, "loss": 0.8047, "step": 1135 }, { "epoch": 0.16, "learning_rate": 2.8636715832986956e-05, "loss": 0.8318, "step": 1136 }, { "epoch": 0.16, "learning_rate": 2.8633815340942912e-05, "loss": 0.9375, "step": 1137 }, { "epoch": 0.16, "learning_rate": 2.863091191387581e-05, "loss": 0.9397, "step": 1138 }, { "epoch": 0.16, "learning_rate": 2.8628005552410685e-05, "loss": 0.7641, "step": 1139 }, { "epoch": 0.16, "learning_rate": 2.8625096257173203e-05, "loss": 0.9386, "step": 1140 }, { "epoch": 0.16, "learning_rate": 2.862218402878967e-05, "loss": 0.7874, "step": 1141 }, { "epoch": 0.16, "learning_rate": 2.8619268867887008e-05, "loss": 0.9023, "step": 1142 }, { "epoch": 0.16, "learning_rate": 2.8616350775092785e-05, "loss": 0.8092, "step": 1143 }, { "epoch": 0.16, "learning_rate": 2.8613429751035192e-05, "loss": 0.8733, "step": 1144 }, { "epoch": 0.16, "learning_rate": 2.8610505796343057e-05, "loss": 0.7207, "step": 1145 }, { "epoch": 0.16, "learning_rate": 2.860757891164583e-05, "loss": 0.8856, "step": 1146 }, { "epoch": 0.16, "learning_rate": 2.86046490975736e-05, "loss": 0.8929, "step": 1147 }, { "epoch": 0.16, "learning_rate": 2.8601716354757075e-05, "loss": 0.8209, "step": 1148 }, { "epoch": 0.16, "learning_rate": 2.8598780683827615e-05, "loss": 0.7388, "step": 1149 }, { "epoch": 0.16, "learning_rate": 2.859584208541719e-05, "loss": 1.0368, "step": 1150 }, { "epoch": 0.16, "learning_rate": 2.859290056015841e-05, "loss": 0.9877, "step": 1151 }, { "epoch": 0.17, "learning_rate": 2.8589956108684514e-05, "loss": 1.024, "step": 1152 }, { "epoch": 0.17, "learning_rate": 2.858700873162936e-05, "loss": 0.8265, "step": 1153 }, { "epoch": 0.17, "learning_rate": 2.858405842962746e-05, "loss": 1.0134, "step": 1154 }, { "epoch": 0.17, "learning_rate": 2.858110520331393e-05, "loss": 0.9381, "step": 1155 }, { "epoch": 0.17, "learning_rate": 2.8578149053324526e-05, "loss": 0.9637, "step": 1156 }, { "epoch": 0.17, "learning_rate": 2.857518998029564e-05, "loss": 0.8228, "step": 1157 }, { "epoch": 0.17, "learning_rate": 2.8572227984864284e-05, "loss": 0.9643, "step": 1158 }, { "epoch": 0.17, "learning_rate": 2.8569263067668107e-05, "loss": 0.9208, "step": 1159 }, { "epoch": 0.17, "learning_rate": 2.856629522934538e-05, "loss": 0.8929, "step": 1160 }, { "epoch": 0.17, "learning_rate": 2.8563324470535e-05, "loss": 0.9682, "step": 1161 }, { "epoch": 0.17, "learning_rate": 2.85603507918765e-05, "loss": 0.9721, "step": 1162 }, { "epoch": 0.17, "learning_rate": 2.8557374194010048e-05, "loss": 1.0753, "step": 1163 }, { "epoch": 0.17, "learning_rate": 2.855439467757642e-05, "loss": 0.8538, "step": 1164 }, { "epoch": 0.17, "learning_rate": 2.8551412243217036e-05, "loss": 1.0474, "step": 1165 }, { "epoch": 0.17, "learning_rate": 2.8548426891573945e-05, "loss": 0.87, "step": 1166 }, { "epoch": 0.17, "learning_rate": 2.8545438623289818e-05, "loss": 0.9939, "step": 1167 }, { "epoch": 0.17, "learning_rate": 2.8542447439007946e-05, "loss": 1.0006, "step": 1168 }, { "epoch": 0.17, "learning_rate": 2.8539453339372266e-05, "loss": 0.8431, "step": 1169 }, { "epoch": 0.17, "learning_rate": 2.8536456325027335e-05, "loss": 0.9531, "step": 1170 }, { "epoch": 0.17, "learning_rate": 2.8533456396618326e-05, "loss": 0.7503, "step": 1171 }, { "epoch": 0.17, "learning_rate": 2.853045355479106e-05, "loss": 0.8836, "step": 1172 }, { "epoch": 0.17, "learning_rate": 2.852744780019197e-05, "loss": 0.7991, "step": 1173 }, { "epoch": 0.17, "learning_rate": 2.8524439133468124e-05, "loss": 0.9442, "step": 1174 }, { "epoch": 0.17, "learning_rate": 2.852142755526721e-05, "loss": 0.9682, "step": 1175 }, { "epoch": 0.17, "learning_rate": 2.8518413066237543e-05, "loss": 0.8426, "step": 1176 }, { "epoch": 0.17, "learning_rate": 2.8515395667028073e-05, "loss": 1.0714, "step": 1177 }, { "epoch": 0.17, "learning_rate": 2.851237535828837e-05, "loss": 0.9202, "step": 1178 }, { "epoch": 0.17, "learning_rate": 2.850935214066863e-05, "loss": 0.8047, "step": 1179 }, { "epoch": 0.17, "learning_rate": 2.8506326014819678e-05, "loss": 1.0117, "step": 1180 }, { "epoch": 0.17, "learning_rate": 2.850329698139297e-05, "loss": 0.971, "step": 1181 }, { "epoch": 0.17, "learning_rate": 2.8500265041040576e-05, "loss": 0.87, "step": 1182 }, { "epoch": 0.17, "learning_rate": 2.8497230194415194e-05, "loss": 0.9023, "step": 1183 }, { "epoch": 0.17, "learning_rate": 2.849419244217016e-05, "loss": 0.9079, "step": 1184 }, { "epoch": 0.17, "learning_rate": 2.8491151784959414e-05, "loss": 0.9955, "step": 1185 }, { "epoch": 0.17, "learning_rate": 2.8488108223437552e-05, "loss": 0.9392, "step": 1186 }, { "epoch": 0.17, "learning_rate": 2.8485061758259767e-05, "loss": 0.9403, "step": 1187 }, { "epoch": 0.17, "learning_rate": 2.848201239008189e-05, "loss": 0.9559, "step": 1188 }, { "epoch": 0.17, "learning_rate": 2.8478960119560368e-05, "loss": 0.8516, "step": 1189 }, { "epoch": 0.17, "learning_rate": 2.8475904947352288e-05, "loss": 0.7252, "step": 1190 }, { "epoch": 0.17, "learning_rate": 2.847284687411535e-05, "loss": 1.0575, "step": 1191 }, { "epoch": 0.17, "learning_rate": 2.846978590050788e-05, "loss": 0.9012, "step": 1192 }, { "epoch": 0.17, "learning_rate": 2.8466722027188833e-05, "loss": 1.0117, "step": 1193 }, { "epoch": 0.17, "learning_rate": 2.8463655254817784e-05, "loss": 0.7821, "step": 1194 }, { "epoch": 0.17, "learning_rate": 2.8460585584054925e-05, "loss": 0.8181, "step": 1195 }, { "epoch": 0.17, "learning_rate": 2.8457513015561095e-05, "loss": 0.9481, "step": 1196 }, { "epoch": 0.17, "learning_rate": 2.845443754999773e-05, "loss": 0.827, "step": 1197 }, { "epoch": 0.17, "learning_rate": 2.845135918802691e-05, "loss": 0.8825, "step": 1198 }, { "epoch": 0.17, "learning_rate": 2.8448277930311323e-05, "loss": 0.781, "step": 1199 }, { "epoch": 0.17, "learning_rate": 2.8445193777514294e-05, "loss": 1.0419, "step": 1200 }, { "epoch": 0.17, "learning_rate": 2.8442106730299762e-05, "loss": 0.7031, "step": 1201 }, { "epoch": 0.17, "learning_rate": 2.8439016789332287e-05, "loss": 1.0229, "step": 1202 }, { "epoch": 0.17, "learning_rate": 2.8435923955277066e-05, "loss": 1.0625, "step": 1203 }, { "epoch": 0.17, "learning_rate": 2.8432828228799903e-05, "loss": 0.8761, "step": 1204 }, { "epoch": 0.17, "learning_rate": 2.842972961056723e-05, "loss": 0.9269, "step": 1205 }, { "epoch": 0.17, "learning_rate": 2.842662810124611e-05, "loss": 1.0982, "step": 1206 }, { "epoch": 0.17, "learning_rate": 2.8423523701504216e-05, "loss": 1.0067, "step": 1207 }, { "epoch": 0.17, "learning_rate": 2.842041641200985e-05, "loss": 0.8711, "step": 1208 }, { "epoch": 0.17, "learning_rate": 2.8417306233431936e-05, "loss": 1.0285, "step": 1209 }, { "epoch": 0.17, "learning_rate": 2.8414193166440013e-05, "loss": 0.8546, "step": 1210 }, { "epoch": 0.17, "learning_rate": 2.8411077211704254e-05, "loss": 1.0926, "step": 1211 }, { "epoch": 0.17, "learning_rate": 2.8407958369895444e-05, "loss": 0.9227, "step": 1212 }, { "epoch": 0.17, "learning_rate": 2.840483664168499e-05, "loss": 0.899, "step": 1213 }, { "epoch": 0.17, "learning_rate": 2.840171202774493e-05, "loss": 0.9046, "step": 1214 }, { "epoch": 0.17, "learning_rate": 2.8398584528747902e-05, "loss": 0.7771, "step": 1215 }, { "epoch": 0.17, "learning_rate": 2.83954541453672e-05, "loss": 0.9339, "step": 1216 }, { "epoch": 0.17, "learning_rate": 2.8392320878276697e-05, "loss": 0.875, "step": 1217 }, { "epoch": 0.17, "learning_rate": 2.838918472815092e-05, "loss": 0.8538, "step": 1218 }, { "epoch": 0.17, "learning_rate": 2.8386045695665004e-05, "loss": 0.8968, "step": 1219 }, { "epoch": 0.17, "learning_rate": 2.8382903781494702e-05, "loss": 0.9961, "step": 1220 }, { "epoch": 0.17, "learning_rate": 2.8379758986316393e-05, "loss": 0.9118, "step": 1221 }, { "epoch": 0.18, "learning_rate": 2.8376611310807067e-05, "loss": 0.8365, "step": 1222 }, { "epoch": 0.18, "learning_rate": 2.8373460755644353e-05, "loss": 0.8036, "step": 1223 }, { "epoch": 0.18, "learning_rate": 2.8370307321506477e-05, "loss": 1.0541, "step": 1224 }, { "epoch": 0.18, "learning_rate": 2.8367151009072293e-05, "loss": 0.8968, "step": 1225 }, { "epoch": 0.18, "learning_rate": 2.8363991819021287e-05, "loss": 0.9397, "step": 1226 }, { "epoch": 0.18, "learning_rate": 2.836082975203355e-05, "loss": 1.0815, "step": 1227 }, { "epoch": 0.18, "learning_rate": 2.83576648087898e-05, "loss": 0.6928, "step": 1228 }, { "epoch": 0.18, "learning_rate": 2.835449698997136e-05, "loss": 0.7093, "step": 1229 }, { "epoch": 0.18, "learning_rate": 2.8351326296260194e-05, "loss": 0.7656, "step": 1230 }, { "epoch": 0.18, "learning_rate": 2.834815272833887e-05, "loss": 0.9157, "step": 1231 }, { "epoch": 0.18, "learning_rate": 2.834497628689058e-05, "loss": 1.0664, "step": 1232 }, { "epoch": 0.18, "learning_rate": 2.8341796972599133e-05, "loss": 0.9369, "step": 1233 }, { "epoch": 0.18, "learning_rate": 2.8338614786148953e-05, "loss": 0.8142, "step": 1234 }, { "epoch": 0.18, "learning_rate": 2.8335429728225093e-05, "loss": 0.7249, "step": 1235 }, { "epoch": 0.18, "learning_rate": 2.833224179951321e-05, "loss": 0.714, "step": 1236 }, { "epoch": 0.18, "learning_rate": 2.8329051000699587e-05, "loss": 0.9626, "step": 1237 }, { "epoch": 0.18, "learning_rate": 2.8325857332471133e-05, "loss": 0.743, "step": 1238 }, { "epoch": 0.18, "learning_rate": 2.832266079551536e-05, "loss": 0.827, "step": 1239 }, { "epoch": 0.18, "learning_rate": 2.8319461390520395e-05, "loss": 0.7684, "step": 1240 }, { "epoch": 0.18, "learning_rate": 2.8316259118175004e-05, "loss": 0.971, "step": 1241 }, { "epoch": 0.18, "learning_rate": 2.831305397916855e-05, "loss": 0.9805, "step": 1242 }, { "epoch": 0.18, "learning_rate": 2.8309845974191015e-05, "loss": 0.9258, "step": 1243 }, { "epoch": 0.18, "learning_rate": 2.8306635103933016e-05, "loss": 0.8008, "step": 1244 }, { "epoch": 0.18, "learning_rate": 2.8303421369085765e-05, "loss": 0.827, "step": 1245 }, { "epoch": 0.18, "learning_rate": 2.8300204770341102e-05, "loss": 0.8036, "step": 1246 }, { "epoch": 0.18, "learning_rate": 2.8296985308391478e-05, "loss": 0.8956, "step": 1247 }, { "epoch": 0.18, "learning_rate": 2.829376298392997e-05, "loss": 1.072, "step": 1248 }, { "epoch": 0.18, "learning_rate": 2.8290537797650263e-05, "loss": 0.9096, "step": 1249 }, { "epoch": 0.18, "learning_rate": 2.8287309750246654e-05, "loss": 0.8633, "step": 1250 }, { "epoch": 0.18, "learning_rate": 2.828407884241407e-05, "loss": 1.0014, "step": 1251 }, { "epoch": 0.18, "learning_rate": 2.8280845074848038e-05, "loss": 0.8912, "step": 1252 }, { "epoch": 0.18, "learning_rate": 2.8277608448244713e-05, "loss": 1.0017, "step": 1253 }, { "epoch": 0.18, "learning_rate": 2.8274368963300858e-05, "loss": 0.9576, "step": 1254 }, { "epoch": 0.18, "learning_rate": 2.827112662071385e-05, "loss": 0.9581, "step": 1255 }, { "epoch": 0.18, "learning_rate": 2.8267881421181694e-05, "loss": 1.0273, "step": 1256 }, { "epoch": 0.18, "learning_rate": 2.8264633365402997e-05, "loss": 0.8538, "step": 1257 }, { "epoch": 0.18, "learning_rate": 2.8261382454076986e-05, "loss": 0.8613, "step": 1258 }, { "epoch": 0.18, "learning_rate": 2.82581286879035e-05, "loss": 0.8906, "step": 1259 }, { "epoch": 0.18, "learning_rate": 2.8254872067582997e-05, "loss": 0.8016, "step": 1260 }, { "epoch": 0.18, "learning_rate": 2.825161259381654e-05, "loss": 0.659, "step": 1261 }, { "epoch": 0.18, "learning_rate": 2.8248350267305823e-05, "loss": 0.9035, "step": 1262 }, { "epoch": 0.18, "learning_rate": 2.8245085088753136e-05, "loss": 0.8803, "step": 1263 }, { "epoch": 0.18, "learning_rate": 2.82418170588614e-05, "loss": 0.8583, "step": 1264 }, { "epoch": 0.18, "learning_rate": 2.823854617833413e-05, "loss": 0.889, "step": 1265 }, { "epoch": 0.18, "learning_rate": 2.8235272447875475e-05, "loss": 0.9263, "step": 1266 }, { "epoch": 0.18, "learning_rate": 2.823199586819018e-05, "loss": 0.7623, "step": 1267 }, { "epoch": 0.18, "learning_rate": 2.8228716439983614e-05, "loss": 0.9715, "step": 1268 }, { "epoch": 0.18, "learning_rate": 2.8225434163961766e-05, "loss": 0.9163, "step": 1269 }, { "epoch": 0.18, "learning_rate": 2.8222149040831216e-05, "loss": 0.9498, "step": 1270 }, { "epoch": 0.18, "learning_rate": 2.8218861071299173e-05, "loss": 1.0508, "step": 1271 }, { "epoch": 0.18, "learning_rate": 2.821557025607346e-05, "loss": 0.9905, "step": 1272 }, { "epoch": 0.18, "learning_rate": 2.821227659586251e-05, "loss": 0.7612, "step": 1273 }, { "epoch": 0.18, "learning_rate": 2.8208980091375355e-05, "loss": 0.8901, "step": 1274 }, { "epoch": 0.18, "learning_rate": 2.8205680743321663e-05, "loss": 0.8387, "step": 1275 }, { "epoch": 0.18, "learning_rate": 2.820237855241169e-05, "loss": 0.9531, "step": 1276 }, { "epoch": 0.18, "learning_rate": 2.819907351935633e-05, "loss": 1.0826, "step": 1277 }, { "epoch": 0.18, "learning_rate": 2.8195765644867063e-05, "loss": 0.918, "step": 1278 }, { "epoch": 0.18, "learning_rate": 2.8192454929656e-05, "loss": 0.8761, "step": 1279 }, { "epoch": 0.18, "learning_rate": 2.818914137443585e-05, "loss": 0.8934, "step": 1280 }, { "epoch": 0.18, "learning_rate": 2.8185824979919944e-05, "loss": 0.8675, "step": 1281 }, { "epoch": 0.18, "learning_rate": 2.818250574682222e-05, "loss": 0.8675, "step": 1282 }, { "epoch": 0.18, "learning_rate": 2.8179183675857224e-05, "loss": 0.9157, "step": 1283 }, { "epoch": 0.18, "learning_rate": 2.8175858767740117e-05, "loss": 0.9325, "step": 1284 }, { "epoch": 0.18, "learning_rate": 2.8172531023186673e-05, "loss": 0.9849, "step": 1285 }, { "epoch": 0.18, "learning_rate": 2.8169200442913262e-05, "loss": 0.8415, "step": 1286 }, { "epoch": 0.18, "learning_rate": 2.8165867027636887e-05, "loss": 0.9637, "step": 1287 }, { "epoch": 0.18, "learning_rate": 2.8162530778075143e-05, "loss": 0.8571, "step": 1288 }, { "epoch": 0.18, "learning_rate": 2.8159191694946245e-05, "loss": 0.9849, "step": 1289 }, { "epoch": 0.18, "learning_rate": 2.8155849778969015e-05, "loss": 1.0335, "step": 1290 }, { "epoch": 0.18, "learning_rate": 2.8152505030862888e-05, "loss": 1.0536, "step": 1291 }, { "epoch": 0.19, "learning_rate": 2.8149157451347896e-05, "loss": 0.8449, "step": 1292 }, { "epoch": 0.19, "learning_rate": 2.81458070411447e-05, "loss": 0.9342, "step": 1293 }, { "epoch": 0.19, "learning_rate": 2.8142453800974557e-05, "loss": 1.0357, "step": 1294 }, { "epoch": 0.19, "learning_rate": 2.8139097731559332e-05, "loss": 0.8771, "step": 1295 }, { "epoch": 0.19, "learning_rate": 2.8135738833621508e-05, "loss": 0.6599, "step": 1296 }, { "epoch": 0.19, "learning_rate": 2.8132377107884173e-05, "loss": 0.9085, "step": 1297 }, { "epoch": 0.19, "learning_rate": 2.812901255507102e-05, "loss": 0.9654, "step": 1298 }, { "epoch": 0.19, "learning_rate": 2.8125645175906368e-05, "loss": 0.9559, "step": 1299 }, { "epoch": 0.19, "learning_rate": 2.8122274971115104e-05, "loss": 0.928, "step": 1300 }, { "epoch": 0.19, "learning_rate": 2.8118901941422777e-05, "loss": 0.9325, "step": 1301 }, { "epoch": 0.19, "learning_rate": 2.8115526087555495e-05, "loss": 0.8931, "step": 1302 }, { "epoch": 0.19, "learning_rate": 2.811214741024001e-05, "loss": 0.9548, "step": 1303 }, { "epoch": 0.19, "learning_rate": 2.8108765910203663e-05, "loss": 0.8677, "step": 1304 }, { "epoch": 0.19, "learning_rate": 2.81053815881744e-05, "loss": 0.9738, "step": 1305 }, { "epoch": 0.19, "learning_rate": 2.81019944448808e-05, "loss": 0.7168, "step": 1306 }, { "epoch": 0.19, "learning_rate": 2.8098604481052013e-05, "loss": 0.8968, "step": 1307 }, { "epoch": 0.19, "learning_rate": 2.8095211697417823e-05, "loss": 0.9754, "step": 1308 }, { "epoch": 0.19, "learning_rate": 2.809181609470861e-05, "loss": 0.6504, "step": 1309 }, { "epoch": 0.19, "learning_rate": 2.8088417673655366e-05, "loss": 0.9749, "step": 1310 }, { "epoch": 0.19, "learning_rate": 2.8085016434989684e-05, "loss": 0.9766, "step": 1311 }, { "epoch": 0.19, "learning_rate": 2.8081612379443766e-05, "loss": 0.9676, "step": 1312 }, { "epoch": 0.19, "learning_rate": 2.8078205507750423e-05, "loss": 1.0804, "step": 1313 }, { "epoch": 0.19, "learning_rate": 2.807479582064307e-05, "loss": 0.7924, "step": 1314 }, { "epoch": 0.19, "learning_rate": 2.807138331885573e-05, "loss": 0.8471, "step": 1315 }, { "epoch": 0.19, "learning_rate": 2.8067968003123026e-05, "loss": 0.8025, "step": 1316 }, { "epoch": 0.19, "learning_rate": 2.806454987418019e-05, "loss": 0.9004, "step": 1317 }, { "epoch": 0.19, "learning_rate": 2.8061128932763067e-05, "loss": 0.7162, "step": 1318 }, { "epoch": 0.19, "learning_rate": 2.8057705179608096e-05, "loss": 0.9955, "step": 1319 }, { "epoch": 0.19, "learning_rate": 2.805427861545233e-05, "loss": 0.9615, "step": 1320 }, { "epoch": 0.19, "learning_rate": 2.805084924103342e-05, "loss": 0.8839, "step": 1321 }, { "epoch": 0.19, "learning_rate": 2.8047417057089626e-05, "loss": 0.7673, "step": 1322 }, { "epoch": 0.19, "learning_rate": 2.804398206435981e-05, "loss": 0.9573, "step": 1323 }, { "epoch": 0.19, "learning_rate": 2.804054426358345e-05, "loss": 0.8108, "step": 1324 }, { "epoch": 0.19, "learning_rate": 2.8037103655500615e-05, "loss": 0.8267, "step": 1325 }, { "epoch": 0.19, "learning_rate": 2.8033660240851975e-05, "loss": 1.0301, "step": 1326 }, { "epoch": 0.19, "learning_rate": 2.803021402037882e-05, "loss": 0.798, "step": 1327 }, { "epoch": 0.19, "learning_rate": 2.8026764994823035e-05, "loss": 0.9944, "step": 1328 }, { "epoch": 0.19, "learning_rate": 2.802331316492711e-05, "loss": 0.9269, "step": 1329 }, { "epoch": 0.19, "learning_rate": 2.8019858531434134e-05, "loss": 0.9763, "step": 1330 }, { "epoch": 0.19, "learning_rate": 2.8016401095087808e-05, "loss": 0.8996, "step": 1331 }, { "epoch": 0.19, "learning_rate": 2.8012940856632436e-05, "loss": 0.7999, "step": 1332 }, { "epoch": 0.19, "learning_rate": 2.8009477816812916e-05, "loss": 0.8728, "step": 1333 }, { "epoch": 0.19, "learning_rate": 2.8006011976374756e-05, "loss": 0.899, "step": 1334 }, { "epoch": 0.19, "learning_rate": 2.800254333606407e-05, "loss": 0.9509, "step": 1335 }, { "epoch": 0.19, "learning_rate": 2.7999071896627558e-05, "loss": 0.8823, "step": 1336 }, { "epoch": 0.19, "learning_rate": 2.7995597658812552e-05, "loss": 0.9782, "step": 1337 }, { "epoch": 0.19, "learning_rate": 2.7992120623366956e-05, "loss": 0.7656, "step": 1338 }, { "epoch": 0.19, "learning_rate": 2.79886407910393e-05, "loss": 0.8968, "step": 1339 }, { "epoch": 0.19, "learning_rate": 2.7985158162578697e-05, "loss": 0.8276, "step": 1340 }, { "epoch": 0.19, "learning_rate": 2.7981672738734876e-05, "loss": 0.9442, "step": 1341 }, { "epoch": 0.19, "learning_rate": 2.797818452025816e-05, "loss": 0.942, "step": 1342 }, { "epoch": 0.19, "learning_rate": 2.7974693507899477e-05, "loss": 0.9322, "step": 1343 }, { "epoch": 0.19, "learning_rate": 2.7971199702410354e-05, "loss": 0.9102, "step": 1344 }, { "epoch": 0.19, "learning_rate": 2.7967703104542925e-05, "loss": 0.9269, "step": 1345 }, { "epoch": 0.19, "learning_rate": 2.7964203715049917e-05, "loss": 1.031, "step": 1346 }, { "epoch": 0.19, "learning_rate": 2.7960701534684663e-05, "loss": 0.8259, "step": 1347 }, { "epoch": 0.19, "learning_rate": 2.7957196564201097e-05, "loss": 0.8666, "step": 1348 }, { "epoch": 0.19, "learning_rate": 2.7953688804353756e-05, "loss": 1.0335, "step": 1349 }, { "epoch": 0.19, "learning_rate": 2.7950178255897763e-05, "loss": 0.928, "step": 1350 }, { "epoch": 0.19, "learning_rate": 2.7946664919588864e-05, "loss": 0.9593, "step": 1351 }, { "epoch": 0.19, "learning_rate": 2.7943148796183382e-05, "loss": 0.8396, "step": 1352 }, { "epoch": 0.19, "learning_rate": 2.793962988643826e-05, "loss": 0.8809, "step": 1353 }, { "epoch": 0.19, "learning_rate": 2.793610819111103e-05, "loss": 1.0011, "step": 1354 }, { "epoch": 0.19, "learning_rate": 2.7932583710959823e-05, "loss": 0.9358, "step": 1355 }, { "epoch": 0.19, "learning_rate": 2.7929056446743377e-05, "loss": 1.0709, "step": 1356 }, { "epoch": 0.19, "learning_rate": 2.792552639922102e-05, "loss": 0.757, "step": 1357 }, { "epoch": 0.19, "learning_rate": 2.792199356915269e-05, "loss": 0.7835, "step": 1358 }, { "epoch": 0.19, "learning_rate": 2.7918457957298913e-05, "loss": 0.9275, "step": 1359 }, { "epoch": 0.19, "learning_rate": 2.791491956442082e-05, "loss": 1.0485, "step": 1360 }, { "epoch": 0.19, "learning_rate": 2.791137839128014e-05, "loss": 0.8287, "step": 1361 }, { "epoch": 0.2, "learning_rate": 2.79078344386392e-05, "loss": 1.0273, "step": 1362 }, { "epoch": 0.2, "learning_rate": 2.7904287707260924e-05, "loss": 0.8493, "step": 1363 }, { "epoch": 0.2, "learning_rate": 2.7900738197908837e-05, "loss": 0.897, "step": 1364 }, { "epoch": 0.2, "learning_rate": 2.7897185911347055e-05, "loss": 1.0061, "step": 1365 }, { "epoch": 0.2, "learning_rate": 2.7893630848340304e-05, "loss": 1.0742, "step": 1366 }, { "epoch": 0.2, "learning_rate": 2.7890073009653904e-05, "loss": 0.7863, "step": 1367 }, { "epoch": 0.2, "learning_rate": 2.788651239605376e-05, "loss": 0.9035, "step": 1368 }, { "epoch": 0.2, "learning_rate": 2.7882949008306392e-05, "loss": 0.9855, "step": 1369 }, { "epoch": 0.2, "learning_rate": 2.7879382847178908e-05, "loss": 0.8069, "step": 1370 }, { "epoch": 0.2, "learning_rate": 2.7875813913439006e-05, "loss": 0.8114, "step": 1371 }, { "epoch": 0.2, "learning_rate": 2.7872242207855e-05, "loss": 0.8186, "step": 1372 }, { "epoch": 0.2, "learning_rate": 2.7868667731195777e-05, "loss": 0.8069, "step": 1373 }, { "epoch": 0.2, "learning_rate": 2.786509048423085e-05, "loss": 0.8192, "step": 1374 }, { "epoch": 0.2, "learning_rate": 2.78615104677303e-05, "loss": 0.9258, "step": 1375 }, { "epoch": 0.2, "learning_rate": 2.7857927682464824e-05, "loss": 0.9671, "step": 1376 }, { "epoch": 0.2, "learning_rate": 2.7854342129205698e-05, "loss": 1.0201, "step": 1377 }, { "epoch": 0.2, "learning_rate": 2.7850753808724804e-05, "loss": 0.87, "step": 1378 }, { "epoch": 0.2, "learning_rate": 2.7847162721794626e-05, "loss": 0.8772, "step": 1379 }, { "epoch": 0.2, "learning_rate": 2.7843568869188234e-05, "loss": 0.9314, "step": 1380 }, { "epoch": 0.2, "learning_rate": 2.7839972251679287e-05, "loss": 0.7972, "step": 1381 }, { "epoch": 0.2, "learning_rate": 2.783637287004206e-05, "loss": 0.8164, "step": 1382 }, { "epoch": 0.2, "learning_rate": 2.78327707250514e-05, "loss": 0.9018, "step": 1383 }, { "epoch": 0.2, "learning_rate": 2.7829165817482772e-05, "loss": 0.8739, "step": 1384 }, { "epoch": 0.2, "learning_rate": 2.782555814811221e-05, "loss": 1.0033, "step": 1385 }, { "epoch": 0.2, "learning_rate": 2.7821947717716363e-05, "loss": 0.9883, "step": 1386 }, { "epoch": 0.2, "learning_rate": 2.7818334527072466e-05, "loss": 1.0011, "step": 1387 }, { "epoch": 0.2, "learning_rate": 2.781471857695835e-05, "loss": 0.9554, "step": 1388 }, { "epoch": 0.2, "learning_rate": 2.7811099868152443e-05, "loss": 1.0681, "step": 1389 }, { "epoch": 0.2, "learning_rate": 2.7807478401433756e-05, "loss": 0.9063, "step": 1390 }, { "epoch": 0.2, "learning_rate": 2.7803854177581905e-05, "loss": 0.7408, "step": 1391 }, { "epoch": 0.2, "learning_rate": 2.7800227197377097e-05, "loss": 0.7999, "step": 1392 }, { "epoch": 0.2, "learning_rate": 2.779659746160013e-05, "loss": 0.7773, "step": 1393 }, { "epoch": 0.2, "learning_rate": 2.77929649710324e-05, "loss": 0.8482, "step": 1394 }, { "epoch": 0.2, "learning_rate": 2.7789329726455884e-05, "loss": 0.9074, "step": 1395 }, { "epoch": 0.2, "learning_rate": 2.7785691728653163e-05, "loss": 0.9261, "step": 1396 }, { "epoch": 0.2, "learning_rate": 2.778205097840741e-05, "loss": 0.7712, "step": 1397 }, { "epoch": 0.2, "learning_rate": 2.7778407476502393e-05, "loss": 0.9777, "step": 1398 }, { "epoch": 0.2, "learning_rate": 2.7774761223722456e-05, "loss": 0.9576, "step": 1399 }, { "epoch": 0.2, "learning_rate": 2.7771112220852557e-05, "loss": 0.8638, "step": 1400 }, { "epoch": 0.2, "learning_rate": 2.7767460468678233e-05, "loss": 0.8689, "step": 1401 }, { "epoch": 0.2, "learning_rate": 2.7763805967985617e-05, "loss": 1.0251, "step": 1402 }, { "epoch": 0.2, "learning_rate": 2.776014871956143e-05, "loss": 1.0195, "step": 1403 }, { "epoch": 0.2, "learning_rate": 2.7756488724192988e-05, "loss": 0.8984, "step": 1404 }, { "epoch": 0.2, "learning_rate": 2.77528259826682e-05, "loss": 0.9252, "step": 1405 }, { "epoch": 0.2, "learning_rate": 2.774916049577556e-05, "loss": 0.8705, "step": 1406 }, { "epoch": 0.2, "learning_rate": 2.7745492264304155e-05, "loss": 0.9308, "step": 1407 }, { "epoch": 0.2, "learning_rate": 2.774182128904367e-05, "loss": 0.8237, "step": 1408 }, { "epoch": 0.2, "learning_rate": 2.7738147570784373e-05, "loss": 0.8057, "step": 1409 }, { "epoch": 0.2, "learning_rate": 2.773447111031712e-05, "loss": 0.9325, "step": 1410 }, { "epoch": 0.2, "learning_rate": 2.7730791908433374e-05, "loss": 0.9632, "step": 1411 }, { "epoch": 0.2, "learning_rate": 2.7727109965925164e-05, "loss": 0.8202, "step": 1412 }, { "epoch": 0.2, "learning_rate": 2.7723425283585128e-05, "loss": 1.0106, "step": 1413 }, { "epoch": 0.2, "learning_rate": 2.771973786220648e-05, "loss": 0.8984, "step": 1414 }, { "epoch": 0.2, "learning_rate": 2.7716047702583037e-05, "loss": 1.0536, "step": 1415 }, { "epoch": 0.2, "learning_rate": 2.7712354805509193e-05, "loss": 0.9855, "step": 1416 }, { "epoch": 0.2, "learning_rate": 2.7708659171779948e-05, "loss": 1.0352, "step": 1417 }, { "epoch": 0.2, "learning_rate": 2.7704960802190868e-05, "loss": 0.9018, "step": 1418 }, { "epoch": 0.2, "learning_rate": 2.7701259697538127e-05, "loss": 0.9648, "step": 1419 }, { "epoch": 0.2, "learning_rate": 2.7697555858618485e-05, "loss": 0.8379, "step": 1420 }, { "epoch": 0.2, "learning_rate": 2.7693849286229282e-05, "loss": 0.8058, "step": 1421 }, { "epoch": 0.2, "learning_rate": 2.7690139981168452e-05, "loss": 0.8929, "step": 1422 }, { "epoch": 0.2, "learning_rate": 2.768642794423451e-05, "loss": 0.9805, "step": 1423 }, { "epoch": 0.2, "learning_rate": 2.7682713176226578e-05, "loss": 0.6317, "step": 1424 }, { "epoch": 0.2, "learning_rate": 2.7678995677944344e-05, "loss": 0.87, "step": 1425 }, { "epoch": 0.2, "learning_rate": 2.76752754501881e-05, "loss": 0.8577, "step": 1426 }, { "epoch": 0.2, "learning_rate": 2.7671552493758713e-05, "loss": 0.9157, "step": 1427 }, { "epoch": 0.2, "learning_rate": 2.766782680945765e-05, "loss": 0.9308, "step": 1428 }, { "epoch": 0.2, "learning_rate": 2.766409839808695e-05, "loss": 1.0151, "step": 1429 }, { "epoch": 0.2, "learning_rate": 2.766036726044926e-05, "loss": 0.9185, "step": 1430 }, { "epoch": 0.2, "learning_rate": 2.7656633397347783e-05, "loss": 0.8588, "step": 1431 }, { "epoch": 0.21, "learning_rate": 2.765289680958635e-05, "loss": 1.0229, "step": 1432 }, { "epoch": 0.21, "learning_rate": 2.764915749796934e-05, "loss": 0.8256, "step": 1433 }, { "epoch": 0.21, "learning_rate": 2.7645415463301735e-05, "loss": 0.8641, "step": 1434 }, { "epoch": 0.21, "learning_rate": 2.7641670706389108e-05, "loss": 0.9816, "step": 1435 }, { "epoch": 0.21, "learning_rate": 2.7637923228037615e-05, "loss": 0.8507, "step": 1436 }, { "epoch": 0.21, "learning_rate": 2.7634173029053986e-05, "loss": 0.957, "step": 1437 }, { "epoch": 0.21, "learning_rate": 2.7630420110245557e-05, "loss": 0.8873, "step": 1438 }, { "epoch": 0.21, "learning_rate": 2.7626664472420228e-05, "loss": 0.9431, "step": 1439 }, { "epoch": 0.21, "learning_rate": 2.76229061163865e-05, "loss": 0.9163, "step": 1440 }, { "epoch": 0.21, "learning_rate": 2.7619145042953455e-05, "loss": 0.8956, "step": 1441 }, { "epoch": 0.21, "learning_rate": 2.7615381252930755e-05, "loss": 0.7148, "step": 1442 }, { "epoch": 0.21, "learning_rate": 2.7611614747128657e-05, "loss": 0.9208, "step": 1443 }, { "epoch": 0.21, "learning_rate": 2.7607845526357993e-05, "loss": 0.9347, "step": 1444 }, { "epoch": 0.21, "learning_rate": 2.760407359143018e-05, "loss": 0.851, "step": 1445 }, { "epoch": 0.21, "learning_rate": 2.7600298943157226e-05, "loss": 0.8211, "step": 1446 }, { "epoch": 0.21, "learning_rate": 2.759652158235172e-05, "loss": 0.8103, "step": 1447 }, { "epoch": 0.21, "learning_rate": 2.759274150982683e-05, "loss": 1.0614, "step": 1448 }, { "epoch": 0.21, "learning_rate": 2.7588958726396317e-05, "loss": 0.7796, "step": 1449 }, { "epoch": 0.21, "learning_rate": 2.758517323287452e-05, "loss": 0.8873, "step": 1450 }, { "epoch": 0.21, "learning_rate": 2.7581385030076357e-05, "loss": 0.8633, "step": 1451 }, { "epoch": 0.21, "learning_rate": 2.757759411881734e-05, "loss": 0.8019, "step": 1452 }, { "epoch": 0.21, "learning_rate": 2.7573800499913557e-05, "loss": 0.8923, "step": 1453 }, { "epoch": 0.21, "learning_rate": 2.7570004174181676e-05, "loss": 0.9007, "step": 1454 }, { "epoch": 0.21, "learning_rate": 2.756620514243896e-05, "loss": 0.9972, "step": 1455 }, { "epoch": 0.21, "learning_rate": 2.7562403405503238e-05, "loss": 1.029, "step": 1456 }, { "epoch": 0.21, "learning_rate": 2.755859896419294e-05, "loss": 0.899, "step": 1457 }, { "epoch": 0.21, "learning_rate": 2.7554791819327056e-05, "loss": 0.8792, "step": 1458 }, { "epoch": 0.21, "learning_rate": 2.7550981971725178e-05, "loss": 0.9643, "step": 1459 }, { "epoch": 0.21, "learning_rate": 2.754716942220747e-05, "loss": 0.8504, "step": 1460 }, { "epoch": 0.21, "learning_rate": 2.7543354171594684e-05, "loss": 0.8387, "step": 1461 }, { "epoch": 0.21, "learning_rate": 2.753953622070814e-05, "loss": 0.9888, "step": 1462 }, { "epoch": 0.21, "learning_rate": 2.7535715570369753e-05, "loss": 0.81, "step": 1463 }, { "epoch": 0.21, "learning_rate": 2.7531892221402016e-05, "loss": 0.9241, "step": 1464 }, { "epoch": 0.21, "learning_rate": 2.7528066174628e-05, "loss": 0.9275, "step": 1465 }, { "epoch": 0.21, "learning_rate": 2.7524237430871357e-05, "loss": 0.9894, "step": 1466 }, { "epoch": 0.21, "learning_rate": 2.7520405990956322e-05, "loss": 0.8287, "step": 1467 }, { "epoch": 0.21, "learning_rate": 2.7516571855707707e-05, "loss": 0.9079, "step": 1468 }, { "epoch": 0.21, "learning_rate": 2.7512735025950912e-05, "loss": 0.9358, "step": 1469 }, { "epoch": 0.21, "learning_rate": 2.7508895502511908e-05, "loss": 0.9531, "step": 1470 }, { "epoch": 0.21, "learning_rate": 2.7505053286217247e-05, "loss": 0.8482, "step": 1471 }, { "epoch": 0.21, "learning_rate": 2.7501208377894068e-05, "loss": 0.861, "step": 1472 }, { "epoch": 0.21, "learning_rate": 2.749736077837008e-05, "loss": 1.0497, "step": 1473 }, { "epoch": 0.21, "learning_rate": 2.7493510488473585e-05, "loss": 0.8909, "step": 1474 }, { "epoch": 0.21, "learning_rate": 2.748965750903344e-05, "loss": 0.9916, "step": 1475 }, { "epoch": 0.21, "learning_rate": 2.7485801840879107e-05, "loss": 0.8912, "step": 1476 }, { "epoch": 0.21, "learning_rate": 2.748194348484061e-05, "loss": 0.8733, "step": 1477 }, { "epoch": 0.21, "learning_rate": 2.7478082441748568e-05, "loss": 0.7112, "step": 1478 }, { "epoch": 0.21, "learning_rate": 2.7474218712434155e-05, "loss": 0.8234, "step": 1479 }, { "epoch": 0.21, "learning_rate": 2.7470352297729146e-05, "loss": 0.7514, "step": 1480 }, { "epoch": 0.21, "learning_rate": 2.746648319846588e-05, "loss": 0.7148, "step": 1481 }, { "epoch": 0.21, "learning_rate": 2.7462611415477274e-05, "loss": 0.9381, "step": 1482 }, { "epoch": 0.21, "learning_rate": 2.7458736949596833e-05, "loss": 0.9074, "step": 1483 }, { "epoch": 0.21, "learning_rate": 2.7454859801658637e-05, "loss": 0.8092, "step": 1484 }, { "epoch": 0.21, "learning_rate": 2.7450979972497335e-05, "loss": 0.9079, "step": 1485 }, { "epoch": 0.21, "learning_rate": 2.7447097462948156e-05, "loss": 0.9358, "step": 1486 }, { "epoch": 0.21, "learning_rate": 2.7443212273846915e-05, "loss": 0.9927, "step": 1487 }, { "epoch": 0.21, "learning_rate": 2.743932440602999e-05, "loss": 0.8694, "step": 1488 }, { "epoch": 0.21, "learning_rate": 2.7435433860334348e-05, "loss": 0.9665, "step": 1489 }, { "epoch": 0.21, "learning_rate": 2.7431540637597525e-05, "loss": 0.87, "step": 1490 }, { "epoch": 0.21, "learning_rate": 2.7427644738657635e-05, "loss": 0.8747, "step": 1491 }, { "epoch": 0.21, "learning_rate": 2.7423746164353366e-05, "loss": 0.8962, "step": 1492 }, { "epoch": 0.21, "learning_rate": 2.741984491552399e-05, "loss": 0.7454, "step": 1493 }, { "epoch": 0.21, "learning_rate": 2.7415940993009347e-05, "loss": 0.8167, "step": 1494 }, { "epoch": 0.21, "learning_rate": 2.7412034397649853e-05, "loss": 0.8864, "step": 1495 }, { "epoch": 0.21, "learning_rate": 2.7408125130286513e-05, "loss": 0.6116, "step": 1496 }, { "epoch": 0.21, "learning_rate": 2.7404213191760876e-05, "loss": 0.8516, "step": 1497 }, { "epoch": 0.21, "learning_rate": 2.7400298582915098e-05, "loss": 0.8901, "step": 1498 }, { "epoch": 0.21, "learning_rate": 2.7396381304591895e-05, "loss": 0.9604, "step": 1499 }, { "epoch": 0.21, "learning_rate": 2.7392461357634562e-05, "loss": 0.952, "step": 1500 }, { "epoch": 0.22, "learning_rate": 2.738853874288696e-05, "loss": 0.8627, "step": 1501 }, { "epoch": 0.22, "learning_rate": 2.738461346119354e-05, "loss": 0.8823, "step": 1502 }, { "epoch": 0.22, "learning_rate": 2.738068551339931e-05, "loss": 0.8248, "step": 1503 }, { "epoch": 0.22, "learning_rate": 2.7376754900349862e-05, "loss": 0.8454, "step": 1504 }, { "epoch": 0.22, "learning_rate": 2.7372821622891367e-05, "loss": 0.7472, "step": 1505 }, { "epoch": 0.22, "learning_rate": 2.7368885681870556e-05, "loss": 0.9093, "step": 1506 }, { "epoch": 0.22, "learning_rate": 2.7364947078134735e-05, "loss": 0.7771, "step": 1507 }, { "epoch": 0.22, "learning_rate": 2.7361005812531792e-05, "loss": 0.8482, "step": 1508 }, { "epoch": 0.22, "learning_rate": 2.735706188591019e-05, "loss": 0.8502, "step": 1509 }, { "epoch": 0.22, "learning_rate": 2.735311529911895e-05, "loss": 0.8011, "step": 1510 }, { "epoch": 0.22, "learning_rate": 2.7349166053007684e-05, "loss": 0.8772, "step": 1511 }, { "epoch": 0.22, "learning_rate": 2.7345214148426554e-05, "loss": 0.998, "step": 1512 }, { "epoch": 0.22, "learning_rate": 2.7341259586226322e-05, "loss": 0.8619, "step": 1513 }, { "epoch": 0.22, "learning_rate": 2.7337302367258292e-05, "loss": 1.0, "step": 1514 }, { "epoch": 0.22, "learning_rate": 2.733334249237437e-05, "loss": 0.9131, "step": 1515 }, { "epoch": 0.22, "learning_rate": 2.7329379962427006e-05, "loss": 1.0084, "step": 1516 }, { "epoch": 0.22, "learning_rate": 2.7325414778269247e-05, "loss": 1.0552, "step": 1517 }, { "epoch": 0.22, "learning_rate": 2.7321446940754688e-05, "loss": 0.9297, "step": 1518 }, { "epoch": 0.22, "learning_rate": 2.7317476450737512e-05, "loss": 0.8209, "step": 1519 }, { "epoch": 0.22, "learning_rate": 2.731350330907247e-05, "loss": 0.7977, "step": 1520 }, { "epoch": 0.22, "learning_rate": 2.7309527516614875e-05, "loss": 0.8265, "step": 1521 }, { "epoch": 0.22, "learning_rate": 2.7305549074220617e-05, "loss": 0.9442, "step": 1522 }, { "epoch": 0.22, "learning_rate": 2.730156798274616e-05, "loss": 0.9643, "step": 1523 }, { "epoch": 0.22, "learning_rate": 2.7297584243048536e-05, "loss": 0.8633, "step": 1524 }, { "epoch": 0.22, "learning_rate": 2.7293597855985338e-05, "loss": 0.976, "step": 1525 }, { "epoch": 0.22, "learning_rate": 2.7289608822414743e-05, "loss": 0.7874, "step": 1526 }, { "epoch": 0.22, "learning_rate": 2.728561714319549e-05, "loss": 0.9202, "step": 1527 }, { "epoch": 0.22, "learning_rate": 2.7281622819186885e-05, "loss": 0.9018, "step": 1528 }, { "epoch": 0.22, "learning_rate": 2.7277625851248813e-05, "loss": 0.899, "step": 1529 }, { "epoch": 0.22, "learning_rate": 2.727362624024172e-05, "loss": 1.0084, "step": 1530 }, { "epoch": 0.22, "learning_rate": 2.7269623987026616e-05, "loss": 0.9146, "step": 1531 }, { "epoch": 0.22, "learning_rate": 2.7265619092465102e-05, "loss": 0.8465, "step": 1532 }, { "epoch": 0.22, "learning_rate": 2.7261611557419318e-05, "loss": 0.9576, "step": 1533 }, { "epoch": 0.22, "learning_rate": 2.7257601382751997e-05, "loss": 0.9648, "step": 1534 }, { "epoch": 0.22, "learning_rate": 2.7253588569326426e-05, "loss": 0.8025, "step": 1535 }, { "epoch": 0.22, "learning_rate": 2.7249573118006462e-05, "loss": 1.1295, "step": 1536 }, { "epoch": 0.22, "learning_rate": 2.724555502965654e-05, "loss": 0.8884, "step": 1537 }, { "epoch": 0.22, "learning_rate": 2.724153430514165e-05, "loss": 0.9425, "step": 1538 }, { "epoch": 0.22, "learning_rate": 2.7237510945327352e-05, "loss": 0.918, "step": 1539 }, { "epoch": 0.22, "learning_rate": 2.7233484951079787e-05, "loss": 0.8733, "step": 1540 }, { "epoch": 0.22, "learning_rate": 2.7229456323265632e-05, "loss": 0.8432, "step": 1541 }, { "epoch": 0.22, "learning_rate": 2.722542506275217e-05, "loss": 1.0206, "step": 1542 }, { "epoch": 0.22, "learning_rate": 2.7221391170407225e-05, "loss": 0.8834, "step": 1543 }, { "epoch": 0.22, "learning_rate": 2.7217354647099188e-05, "loss": 0.7606, "step": 1544 }, { "epoch": 0.22, "learning_rate": 2.7213315493697036e-05, "loss": 0.7377, "step": 1545 }, { "epoch": 0.22, "learning_rate": 2.7209273711070285e-05, "loss": 1.0575, "step": 1546 }, { "epoch": 0.22, "learning_rate": 2.720522930008904e-05, "loss": 0.9163, "step": 1547 }, { "epoch": 0.22, "learning_rate": 2.7201182261623955e-05, "loss": 0.9844, "step": 1548 }, { "epoch": 0.22, "learning_rate": 2.7197132596546262e-05, "loss": 0.8404, "step": 1549 }, { "epoch": 0.22, "learning_rate": 2.7193080305727754e-05, "loss": 0.7607, "step": 1550 }, { "epoch": 0.22, "learning_rate": 2.718902539004079e-05, "loss": 0.8125, "step": 1551 }, { "epoch": 0.22, "learning_rate": 2.7184967850358288e-05, "loss": 0.986, "step": 1552 }, { "epoch": 0.22, "learning_rate": 2.7180907687553745e-05, "loss": 0.6917, "step": 1553 }, { "epoch": 0.22, "learning_rate": 2.7176844902501202e-05, "loss": 0.9012, "step": 1554 }, { "epoch": 0.22, "learning_rate": 2.717277949607528e-05, "loss": 1.0045, "step": 1555 }, { "epoch": 0.22, "learning_rate": 2.7168711469151166e-05, "loss": 0.8929, "step": 1556 }, { "epoch": 0.22, "learning_rate": 2.7164640822604602e-05, "loss": 0.9286, "step": 1557 }, { "epoch": 0.22, "learning_rate": 2.7160567557311896e-05, "loss": 0.8348, "step": 1558 }, { "epoch": 0.22, "learning_rate": 2.715649167414992e-05, "loss": 0.8292, "step": 1559 }, { "epoch": 0.22, "learning_rate": 2.7152413173996116e-05, "loss": 0.7614, "step": 1560 }, { "epoch": 0.22, "learning_rate": 2.714833205772848e-05, "loss": 0.9911, "step": 1561 }, { "epoch": 0.22, "learning_rate": 2.7144248326225576e-05, "loss": 0.8493, "step": 1562 }, { "epoch": 0.22, "learning_rate": 2.714016198036653e-05, "loss": 0.9746, "step": 1563 }, { "epoch": 0.22, "learning_rate": 2.713607302103103e-05, "loss": 0.88, "step": 1564 }, { "epoch": 0.22, "learning_rate": 2.7131981449099334e-05, "loss": 0.7818, "step": 1565 }, { "epoch": 0.22, "learning_rate": 2.7127887265452254e-05, "loss": 1.0128, "step": 1566 }, { "epoch": 0.22, "learning_rate": 2.712379047097116e-05, "loss": 0.9682, "step": 1567 }, { "epoch": 0.22, "learning_rate": 2.7119691066537998e-05, "loss": 0.8404, "step": 1568 }, { "epoch": 0.22, "learning_rate": 2.7115589053035262e-05, "loss": 0.8823, "step": 1569 }, { "epoch": 0.22, "learning_rate": 2.7111484431346015e-05, "loss": 0.8465, "step": 1570 }, { "epoch": 0.23, "learning_rate": 2.7107377202353892e-05, "loss": 0.8306, "step": 1571 }, { "epoch": 0.23, "learning_rate": 2.710326736694306e-05, "loss": 0.9431, "step": 1572 }, { "epoch": 0.23, "learning_rate": 2.709915492599828e-05, "loss": 0.9408, "step": 1573 }, { "epoch": 0.23, "learning_rate": 2.7095039880404843e-05, "loss": 1.0502, "step": 1574 }, { "epoch": 0.23, "learning_rate": 2.7090922231048628e-05, "loss": 0.9397, "step": 1575 }, { "epoch": 0.23, "learning_rate": 2.7086801978816063e-05, "loss": 0.7342, "step": 1576 }, { "epoch": 0.23, "learning_rate": 2.7082679124594135e-05, "loss": 0.9509, "step": 1577 }, { "epoch": 0.23, "learning_rate": 2.7078553669270395e-05, "loss": 0.8186, "step": 1578 }, { "epoch": 0.23, "learning_rate": 2.707442561373294e-05, "loss": 0.9983, "step": 1579 }, { "epoch": 0.23, "learning_rate": 2.707029495887045e-05, "loss": 0.9342, "step": 1580 }, { "epoch": 0.23, "learning_rate": 2.7066161705572145e-05, "loss": 0.7985, "step": 1581 }, { "epoch": 0.23, "learning_rate": 2.706202585472782e-05, "loss": 0.9252, "step": 1582 }, { "epoch": 0.23, "learning_rate": 2.7057887407227815e-05, "loss": 0.8064, "step": 1583 }, { "epoch": 0.23, "learning_rate": 2.7053746363963038e-05, "loss": 0.8806, "step": 1584 }, { "epoch": 0.23, "learning_rate": 2.7049602725824957e-05, "loss": 0.9671, "step": 1585 }, { "epoch": 0.23, "learning_rate": 2.704545649370558e-05, "loss": 0.9275, "step": 1586 }, { "epoch": 0.23, "learning_rate": 2.7041307668497508e-05, "loss": 0.8292, "step": 1587 }, { "epoch": 0.23, "learning_rate": 2.703715625109387e-05, "loss": 0.8382, "step": 1588 }, { "epoch": 0.23, "learning_rate": 2.7033002242388363e-05, "loss": 0.9046, "step": 1589 }, { "epoch": 0.23, "learning_rate": 2.7028845643275246e-05, "loss": 0.8856, "step": 1590 }, { "epoch": 0.23, "learning_rate": 2.7024686454649326e-05, "loss": 0.822, "step": 1591 }, { "epoch": 0.23, "learning_rate": 2.702052467740598e-05, "loss": 0.8379, "step": 1592 }, { "epoch": 0.23, "learning_rate": 2.7016360312441128e-05, "loss": 0.798, "step": 1593 }, { "epoch": 0.23, "learning_rate": 2.7012193360651263e-05, "loss": 1.0089, "step": 1594 }, { "epoch": 0.23, "learning_rate": 2.7008023822933423e-05, "loss": 0.8809, "step": 1595 }, { "epoch": 0.23, "learning_rate": 2.700385170018521e-05, "loss": 0.8432, "step": 1596 }, { "epoch": 0.23, "learning_rate": 2.6999676993304766e-05, "loss": 1.0145, "step": 1597 }, { "epoch": 0.23, "learning_rate": 2.699549970319082e-05, "loss": 0.8825, "step": 1598 }, { "epoch": 0.23, "learning_rate": 2.6991319830742626e-05, "loss": 1.0435, "step": 1599 }, { "epoch": 0.23, "learning_rate": 2.6987137376860014e-05, "loss": 0.9277, "step": 1600 }, { "epoch": 0.23, "learning_rate": 2.698295234244336e-05, "loss": 0.8555, "step": 1601 }, { "epoch": 0.23, "learning_rate": 2.6978764728393597e-05, "loss": 0.9565, "step": 1602 }, { "epoch": 0.23, "learning_rate": 2.6974574535612217e-05, "loss": 0.9118, "step": 1603 }, { "epoch": 0.23, "learning_rate": 2.6970381765001262e-05, "loss": 0.986, "step": 1604 }, { "epoch": 0.23, "learning_rate": 2.6966186417463337e-05, "loss": 1.0045, "step": 1605 }, { "epoch": 0.23, "learning_rate": 2.6961988493901595e-05, "loss": 0.9888, "step": 1606 }, { "epoch": 0.23, "learning_rate": 2.6957787995219738e-05, "loss": 0.909, "step": 1607 }, { "epoch": 0.23, "learning_rate": 2.6953584922322042e-05, "loss": 0.822, "step": 1608 }, { "epoch": 0.23, "learning_rate": 2.6949379276113312e-05, "loss": 0.8033, "step": 1609 }, { "epoch": 0.23, "learning_rate": 2.6945171057498926e-05, "loss": 0.745, "step": 1610 }, { "epoch": 0.23, "learning_rate": 2.694096026738481e-05, "loss": 0.9771, "step": 1611 }, { "epoch": 0.23, "learning_rate": 2.6936746906677438e-05, "loss": 0.9302, "step": 1612 }, { "epoch": 0.23, "learning_rate": 2.693253097628385e-05, "loss": 0.9509, "step": 1613 }, { "epoch": 0.23, "learning_rate": 2.6928312477111627e-05, "loss": 0.764, "step": 1614 }, { "epoch": 0.23, "learning_rate": 2.6924091410068905e-05, "loss": 1.0039, "step": 1615 }, { "epoch": 0.23, "learning_rate": 2.6919867776064376e-05, "loss": 0.9615, "step": 1616 }, { "epoch": 0.23, "learning_rate": 2.691564157600729e-05, "loss": 0.9978, "step": 1617 }, { "epoch": 0.23, "learning_rate": 2.691141281080744e-05, "loss": 0.957, "step": 1618 }, { "epoch": 0.23, "learning_rate": 2.690718148137517e-05, "loss": 0.6186, "step": 1619 }, { "epoch": 0.23, "learning_rate": 2.6902947588621384e-05, "loss": 0.7871, "step": 1620 }, { "epoch": 0.23, "learning_rate": 2.6898711133457536e-05, "loss": 0.9509, "step": 1621 }, { "epoch": 0.23, "learning_rate": 2.6894472116795626e-05, "loss": 0.9727, "step": 1622 }, { "epoch": 0.23, "learning_rate": 2.6890230539548215e-05, "loss": 0.7857, "step": 1623 }, { "epoch": 0.23, "learning_rate": 2.6885986402628402e-05, "loss": 0.7458, "step": 1624 }, { "epoch": 0.23, "learning_rate": 2.6881739706949853e-05, "loss": 0.9258, "step": 1625 }, { "epoch": 0.23, "learning_rate": 2.6877490453426773e-05, "loss": 0.8382, "step": 1626 }, { "epoch": 0.23, "learning_rate": 2.6873238642973917e-05, "loss": 0.9509, "step": 1627 }, { "epoch": 0.23, "learning_rate": 2.6868984276506604e-05, "loss": 0.8638, "step": 1628 }, { "epoch": 0.23, "learning_rate": 2.6864727354940684e-05, "loss": 0.7896, "step": 1629 }, { "epoch": 0.23, "learning_rate": 2.686046787919257e-05, "loss": 0.7977, "step": 1630 }, { "epoch": 0.23, "learning_rate": 2.6856205850179223e-05, "loss": 0.8158, "step": 1631 }, { "epoch": 0.23, "learning_rate": 2.685194126881815e-05, "loss": 0.9146, "step": 1632 }, { "epoch": 0.23, "learning_rate": 2.6847674136027416e-05, "loss": 0.8636, "step": 1633 }, { "epoch": 0.23, "learning_rate": 2.6843404452725628e-05, "loss": 0.8504, "step": 1634 }, { "epoch": 0.23, "learning_rate": 2.683913221983193e-05, "loss": 0.8873, "step": 1635 }, { "epoch": 0.23, "learning_rate": 2.6834857438266047e-05, "loss": 0.7651, "step": 1636 }, { "epoch": 0.23, "learning_rate": 2.6830580108948223e-05, "loss": 0.8697, "step": 1637 }, { "epoch": 0.23, "learning_rate": 2.6826300232799258e-05, "loss": 0.8958, "step": 1638 }, { "epoch": 0.23, "learning_rate": 2.6822017810740514e-05, "loss": 1.0022, "step": 1639 }, { "epoch": 0.23, "learning_rate": 2.6817732843693882e-05, "loss": 0.9057, "step": 1640 }, { "epoch": 0.24, "learning_rate": 2.681344533258181e-05, "loss": 0.798, "step": 1641 }, { "epoch": 0.24, "learning_rate": 2.6809155278327303e-05, "loss": 0.8393, "step": 1642 }, { "epoch": 0.24, "learning_rate": 2.6804862681853887e-05, "loss": 0.8167, "step": 1643 }, { "epoch": 0.24, "learning_rate": 2.680056754408567e-05, "loss": 0.976, "step": 1644 }, { "epoch": 0.24, "learning_rate": 2.679626986594727e-05, "loss": 0.9431, "step": 1645 }, { "epoch": 0.24, "learning_rate": 2.6791969648363882e-05, "loss": 0.9051, "step": 1646 }, { "epoch": 0.24, "learning_rate": 2.678766689226124e-05, "loss": 0.7749, "step": 1647 }, { "epoch": 0.24, "learning_rate": 2.6783361598565612e-05, "loss": 0.9107, "step": 1648 }, { "epoch": 0.24, "learning_rate": 2.6779053768203827e-05, "loss": 0.8789, "step": 1649 }, { "epoch": 0.24, "learning_rate": 2.6774743402103244e-05, "loss": 0.7199, "step": 1650 }, { "epoch": 0.24, "learning_rate": 2.6770430501191796e-05, "loss": 0.983, "step": 1651 }, { "epoch": 0.24, "learning_rate": 2.676611506639793e-05, "loss": 0.8968, "step": 1652 }, { "epoch": 0.24, "learning_rate": 2.6761797098650658e-05, "loss": 0.9587, "step": 1653 }, { "epoch": 0.24, "learning_rate": 2.6757476598879528e-05, "loss": 0.8125, "step": 1654 }, { "epoch": 0.24, "learning_rate": 2.675315356801464e-05, "loss": 0.7796, "step": 1655 }, { "epoch": 0.24, "learning_rate": 2.6748828006986633e-05, "loss": 0.9682, "step": 1656 }, { "epoch": 0.24, "learning_rate": 2.6744499916726695e-05, "loss": 0.685, "step": 1657 }, { "epoch": 0.24, "learning_rate": 2.6740169298166555e-05, "loss": 0.9604, "step": 1658 }, { "epoch": 0.24, "learning_rate": 2.6735836152238493e-05, "loss": 0.7785, "step": 1659 }, { "epoch": 0.24, "learning_rate": 2.673150047987532e-05, "loss": 0.8756, "step": 1660 }, { "epoch": 0.24, "learning_rate": 2.6727162282010404e-05, "loss": 0.9749, "step": 1661 }, { "epoch": 0.24, "learning_rate": 2.6722821559577655e-05, "loss": 1.1311, "step": 1662 }, { "epoch": 0.24, "learning_rate": 2.6718478313511514e-05, "loss": 0.8345, "step": 1663 }, { "epoch": 0.24, "learning_rate": 2.671413254474698e-05, "loss": 1.0117, "step": 1664 }, { "epoch": 0.24, "learning_rate": 2.6709784254219588e-05, "loss": 0.9565, "step": 1665 }, { "epoch": 0.24, "learning_rate": 2.6705433442865424e-05, "loss": 0.9291, "step": 1666 }, { "epoch": 0.24, "learning_rate": 2.67010801116211e-05, "loss": 1.0988, "step": 1667 }, { "epoch": 0.24, "learning_rate": 2.6696724261423787e-05, "loss": 0.9771, "step": 1668 }, { "epoch": 0.24, "learning_rate": 2.6692365893211188e-05, "loss": 0.9609, "step": 1669 }, { "epoch": 0.24, "learning_rate": 2.668800500792155e-05, "loss": 0.9439, "step": 1670 }, { "epoch": 0.24, "learning_rate": 2.6683641606493675e-05, "loss": 0.8895, "step": 1671 }, { "epoch": 0.24, "learning_rate": 2.667927568986688e-05, "loss": 0.7425, "step": 1672 }, { "epoch": 0.24, "learning_rate": 2.667490725898105e-05, "loss": 0.8002, "step": 1673 }, { "epoch": 0.24, "learning_rate": 2.6670536314776593e-05, "loss": 0.9682, "step": 1674 }, { "epoch": 0.24, "learning_rate": 2.666616285819447e-05, "loss": 0.935, "step": 1675 }, { "epoch": 0.24, "learning_rate": 2.666178689017618e-05, "loss": 0.7467, "step": 1676 }, { "epoch": 0.24, "learning_rate": 2.665740841166375e-05, "loss": 0.6758, "step": 1677 }, { "epoch": 0.24, "learning_rate": 2.6653027423599772e-05, "loss": 0.9643, "step": 1678 }, { "epoch": 0.24, "learning_rate": 2.6648643926927353e-05, "loss": 0.9838, "step": 1679 }, { "epoch": 0.24, "learning_rate": 2.6644257922590157e-05, "loss": 1.0301, "step": 1680 }, { "epoch": 0.24, "learning_rate": 2.6639869411532378e-05, "loss": 0.9129, "step": 1681 }, { "epoch": 0.24, "learning_rate": 2.663547839469876e-05, "loss": 0.8722, "step": 1682 }, { "epoch": 0.24, "learning_rate": 2.6631084873034578e-05, "loss": 0.7762, "step": 1683 }, { "epoch": 0.24, "learning_rate": 2.6626688847485647e-05, "loss": 0.8468, "step": 1684 }, { "epoch": 0.24, "learning_rate": 2.662229031899832e-05, "loss": 0.9263, "step": 1685 }, { "epoch": 0.24, "learning_rate": 2.6617889288519498e-05, "loss": 0.8019, "step": 1686 }, { "epoch": 0.24, "learning_rate": 2.661348575699661e-05, "loss": 0.8767, "step": 1687 }, { "epoch": 0.24, "learning_rate": 2.6609079725377625e-05, "loss": 0.8585, "step": 1688 }, { "epoch": 0.24, "learning_rate": 2.6604671194611055e-05, "loss": 0.9202, "step": 1689 }, { "epoch": 0.24, "learning_rate": 2.660026016564595e-05, "loss": 0.9459, "step": 1690 }, { "epoch": 0.24, "learning_rate": 2.659584663943189e-05, "loss": 0.7924, "step": 1691 }, { "epoch": 0.24, "learning_rate": 2.6591430616919005e-05, "loss": 0.9699, "step": 1692 }, { "epoch": 0.24, "learning_rate": 2.6587012099057953e-05, "loss": 0.8856, "step": 1693 }, { "epoch": 0.24, "learning_rate": 2.6582591086799922e-05, "loss": 0.9146, "step": 1694 }, { "epoch": 0.24, "learning_rate": 2.6578167581096662e-05, "loss": 1.0999, "step": 1695 }, { "epoch": 0.24, "learning_rate": 2.6573741582900435e-05, "loss": 0.8158, "step": 1696 }, { "epoch": 0.24, "learning_rate": 2.6569313093164055e-05, "loss": 0.9983, "step": 1697 }, { "epoch": 0.24, "learning_rate": 2.6564882112840856e-05, "loss": 0.933, "step": 1698 }, { "epoch": 0.24, "learning_rate": 2.6560448642884728e-05, "loss": 0.9799, "step": 1699 }, { "epoch": 0.24, "learning_rate": 2.6556012684250078e-05, "loss": 0.9051, "step": 1700 }, { "epoch": 0.24, "learning_rate": 2.655157423789187e-05, "loss": 0.6939, "step": 1701 }, { "epoch": 0.24, "learning_rate": 2.6547133304765584e-05, "loss": 0.8465, "step": 1702 }, { "epoch": 0.24, "learning_rate": 2.6542689885827245e-05, "loss": 0.9258, "step": 1703 }, { "epoch": 0.24, "learning_rate": 2.653824398203341e-05, "loss": 0.8923, "step": 1704 }, { "epoch": 0.24, "learning_rate": 2.6533795594341172e-05, "loss": 0.5596, "step": 1705 }, { "epoch": 0.24, "learning_rate": 2.6529344723708163e-05, "loss": 0.8996, "step": 1706 }, { "epoch": 0.24, "learning_rate": 2.652489137109254e-05, "loss": 0.9448, "step": 1707 }, { "epoch": 0.24, "learning_rate": 2.6520435537453e-05, "loss": 1.0173, "step": 1708 }, { "epoch": 0.24, "learning_rate": 2.651597722374878e-05, "loss": 0.8577, "step": 1709 }, { "epoch": 0.24, "learning_rate": 2.6511516430939636e-05, "loss": 0.9615, "step": 1710 }, { "epoch": 0.25, "learning_rate": 2.650705315998587e-05, "loss": 0.6359, "step": 1711 }, { "epoch": 0.25, "learning_rate": 2.6502587411848317e-05, "loss": 0.9849, "step": 1712 }, { "epoch": 0.25, "learning_rate": 2.6498119187488332e-05, "loss": 0.8125, "step": 1713 }, { "epoch": 0.25, "learning_rate": 2.6493648487867824e-05, "loss": 0.9704, "step": 1714 }, { "epoch": 0.25, "learning_rate": 2.6489175313949227e-05, "loss": 1.1027, "step": 1715 }, { "epoch": 0.25, "learning_rate": 2.648469966669549e-05, "loss": 1.005, "step": 1716 }, { "epoch": 0.25, "learning_rate": 2.648022154707012e-05, "loss": 0.8538, "step": 1717 }, { "epoch": 0.25, "learning_rate": 2.6475740956037144e-05, "loss": 0.9049, "step": 1718 }, { "epoch": 0.25, "learning_rate": 2.6471257894561115e-05, "loss": 0.8225, "step": 1719 }, { "epoch": 0.25, "learning_rate": 2.6466772363607133e-05, "loss": 0.8873, "step": 1720 }, { "epoch": 0.25, "learning_rate": 2.646228436414082e-05, "loss": 0.9436, "step": 1721 }, { "epoch": 0.25, "learning_rate": 2.6457793897128336e-05, "loss": 0.7461, "step": 1722 }, { "epoch": 0.25, "learning_rate": 2.645330096353636e-05, "loss": 0.8984, "step": 1723 }, { "epoch": 0.25, "learning_rate": 2.6448805564332107e-05, "loss": 0.7614, "step": 1724 }, { "epoch": 0.25, "learning_rate": 2.644430770048333e-05, "loss": 0.8825, "step": 1725 }, { "epoch": 0.25, "learning_rate": 2.643980737295831e-05, "loss": 1.0485, "step": 1726 }, { "epoch": 0.25, "learning_rate": 2.6435304582725855e-05, "loss": 1.0011, "step": 1727 }, { "epoch": 0.25, "learning_rate": 2.64307993307553e-05, "loss": 0.7056, "step": 1728 }, { "epoch": 0.25, "learning_rate": 2.6426291618016523e-05, "loss": 0.8831, "step": 1729 }, { "epoch": 0.25, "learning_rate": 2.6421781445479912e-05, "loss": 0.8923, "step": 1730 }, { "epoch": 0.25, "learning_rate": 2.6417268814116404e-05, "loss": 0.8384, "step": 1731 }, { "epoch": 0.25, "learning_rate": 2.6412753724897454e-05, "loss": 0.8186, "step": 1732 }, { "epoch": 0.25, "learning_rate": 2.6408236178795053e-05, "loss": 0.8386, "step": 1733 }, { "epoch": 0.25, "learning_rate": 2.64037161767817e-05, "loss": 0.9632, "step": 1734 }, { "epoch": 0.25, "learning_rate": 2.6399193719830463e-05, "loss": 1.0273, "step": 1735 }, { "epoch": 0.25, "learning_rate": 2.6394668808914902e-05, "loss": 0.9727, "step": 1736 }, { "epoch": 0.25, "learning_rate": 2.639014144500912e-05, "loss": 1.0402, "step": 1737 }, { "epoch": 0.25, "learning_rate": 2.638561162908775e-05, "loss": 1.0536, "step": 1738 }, { "epoch": 0.25, "learning_rate": 2.638107936212594e-05, "loss": 1.0469, "step": 1739 }, { "epoch": 0.25, "learning_rate": 2.637654464509939e-05, "loss": 0.8881, "step": 1740 }, { "epoch": 0.25, "learning_rate": 2.6372007478984294e-05, "loss": 0.726, "step": 1741 }, { "epoch": 0.25, "learning_rate": 2.6367467864757404e-05, "loss": 0.9651, "step": 1742 }, { "epoch": 0.25, "learning_rate": 2.6362925803395988e-05, "loss": 0.8237, "step": 1743 }, { "epoch": 0.25, "learning_rate": 2.6358381295877834e-05, "loss": 0.9548, "step": 1744 }, { "epoch": 0.25, "learning_rate": 2.635383434318126e-05, "loss": 1.0279, "step": 1745 }, { "epoch": 0.25, "learning_rate": 2.6349284946285122e-05, "loss": 0.8968, "step": 1746 }, { "epoch": 0.25, "learning_rate": 2.6344733106168782e-05, "loss": 0.7977, "step": 1747 }, { "epoch": 0.25, "learning_rate": 2.634017882381214e-05, "loss": 0.9364, "step": 1748 }, { "epoch": 0.25, "learning_rate": 2.6335622100195628e-05, "loss": 1.0312, "step": 1749 }, { "epoch": 0.25, "learning_rate": 2.633106293630019e-05, "loss": 0.8348, "step": 1750 }, { "epoch": 0.25, "learning_rate": 2.63265013331073e-05, "loss": 0.8237, "step": 1751 }, { "epoch": 0.25, "learning_rate": 2.632193729159896e-05, "loss": 0.952, "step": 1752 }, { "epoch": 0.25, "learning_rate": 2.63173708127577e-05, "loss": 0.656, "step": 1753 }, { "epoch": 0.25, "learning_rate": 2.6312801897566563e-05, "loss": 0.7796, "step": 1754 }, { "epoch": 0.25, "learning_rate": 2.6308230547009126e-05, "loss": 0.7718, "step": 1755 }, { "epoch": 0.25, "learning_rate": 2.6303656762069485e-05, "loss": 0.8443, "step": 1756 }, { "epoch": 0.25, "learning_rate": 2.6299080543732267e-05, "loss": 0.935, "step": 1757 }, { "epoch": 0.25, "learning_rate": 2.6294501892982618e-05, "loss": 0.9609, "step": 1758 }, { "epoch": 0.25, "learning_rate": 2.6289920810806207e-05, "loss": 0.9001, "step": 1759 }, { "epoch": 0.25, "learning_rate": 2.6285337298189223e-05, "loss": 0.8923, "step": 1760 }, { "epoch": 0.25, "learning_rate": 2.6280751356118394e-05, "loss": 0.981, "step": 1761 }, { "epoch": 0.25, "learning_rate": 2.6276162985580948e-05, "loss": 0.7214, "step": 1762 }, { "epoch": 0.25, "learning_rate": 2.6271572187564653e-05, "loss": 0.9314, "step": 1763 }, { "epoch": 0.25, "learning_rate": 2.6266978963057797e-05, "loss": 0.8265, "step": 1764 }, { "epoch": 0.25, "learning_rate": 2.626238331304919e-05, "loss": 0.8599, "step": 1765 }, { "epoch": 0.25, "learning_rate": 2.6257785238528147e-05, "loss": 0.9467, "step": 1766 }, { "epoch": 0.25, "learning_rate": 2.625318474048453e-05, "loss": 0.7416, "step": 1767 }, { "epoch": 0.25, "learning_rate": 2.6248581819908716e-05, "loss": 0.9241, "step": 1768 }, { "epoch": 0.25, "learning_rate": 2.6243976477791593e-05, "loss": 0.8583, "step": 1769 }, { "epoch": 0.25, "learning_rate": 2.623936871512458e-05, "loss": 1.0061, "step": 1770 }, { "epoch": 0.25, "learning_rate": 2.6234758532899615e-05, "loss": 0.9637, "step": 1771 }, { "epoch": 0.25, "learning_rate": 2.6230145932109155e-05, "loss": 0.9542, "step": 1772 }, { "epoch": 0.25, "learning_rate": 2.6225530913746177e-05, "loss": 0.9185, "step": 1773 }, { "epoch": 0.25, "learning_rate": 2.6220913478804178e-05, "loss": 0.755, "step": 1774 }, { "epoch": 0.25, "learning_rate": 2.621629362827719e-05, "loss": 0.8075, "step": 1775 }, { "epoch": 0.25, "learning_rate": 2.6211671363159737e-05, "loss": 1.0731, "step": 1776 }, { "epoch": 0.25, "learning_rate": 2.6207046684446888e-05, "loss": 0.9414, "step": 1777 }, { "epoch": 0.25, "learning_rate": 2.6202419593134222e-05, "loss": 0.8583, "step": 1778 }, { "epoch": 0.25, "learning_rate": 2.619779009021783e-05, "loss": 0.8588, "step": 1779 }, { "epoch": 0.25, "learning_rate": 2.6193158176694336e-05, "loss": 0.861, "step": 1780 }, { "epoch": 0.26, "learning_rate": 2.6188523853560876e-05, "loss": 0.9905, "step": 1781 }, { "epoch": 0.26, "learning_rate": 2.61838871218151e-05, "loss": 0.827, "step": 1782 }, { "epoch": 0.26, "learning_rate": 2.6179247982455186e-05, "loss": 0.9972, "step": 1783 }, { "epoch": 0.26, "learning_rate": 2.6174606436479823e-05, "loss": 0.8811, "step": 1784 }, { "epoch": 0.26, "learning_rate": 2.6169962484888227e-05, "loss": 0.9096, "step": 1785 }, { "epoch": 0.26, "learning_rate": 2.6165316128680116e-05, "loss": 0.7974, "step": 1786 }, { "epoch": 0.26, "learning_rate": 2.616066736885575e-05, "loss": 0.7824, "step": 1787 }, { "epoch": 0.26, "learning_rate": 2.6156016206415873e-05, "loss": 0.916, "step": 1788 }, { "epoch": 0.26, "learning_rate": 2.6151362642361782e-05, "loss": 0.9891, "step": 1789 }, { "epoch": 0.26, "learning_rate": 2.6146706677695267e-05, "loss": 0.8675, "step": 1790 }, { "epoch": 0.26, "learning_rate": 2.614204831341864e-05, "loss": 0.9922, "step": 1791 }, { "epoch": 0.26, "learning_rate": 2.613738755053474e-05, "loss": 0.9805, "step": 1792 }, { "epoch": 0.26, "learning_rate": 2.6132724390046895e-05, "loss": 0.9336, "step": 1793 }, { "epoch": 0.26, "learning_rate": 2.6128058832958993e-05, "loss": 1.053, "step": 1794 }, { "epoch": 0.26, "learning_rate": 2.6123390880275396e-05, "loss": 0.8756, "step": 1795 }, { "epoch": 0.26, "learning_rate": 2.6118720533001007e-05, "loss": 0.8873, "step": 1796 }, { "epoch": 0.26, "learning_rate": 2.6114047792141227e-05, "loss": 0.933, "step": 1797 }, { "epoch": 0.26, "learning_rate": 2.6109372658701993e-05, "loss": 0.9766, "step": 1798 }, { "epoch": 0.26, "learning_rate": 2.6104695133689735e-05, "loss": 0.8178, "step": 1799 }, { "epoch": 0.26, "learning_rate": 2.6100015218111416e-05, "loss": 1.0137, "step": 1800 }, { "epoch": 0.26, "learning_rate": 2.6095332912974504e-05, "loss": 0.8747, "step": 1801 }, { "epoch": 0.26, "learning_rate": 2.609064821928698e-05, "loss": 0.8873, "step": 1802 }, { "epoch": 0.26, "learning_rate": 2.608596113805735e-05, "loss": 0.7684, "step": 1803 }, { "epoch": 0.26, "learning_rate": 2.6081271670294618e-05, "loss": 0.8806, "step": 1804 }, { "epoch": 0.26, "learning_rate": 2.6076579817008312e-05, "loss": 0.9414, "step": 1805 }, { "epoch": 0.26, "learning_rate": 2.6071885579208477e-05, "loss": 1.0234, "step": 1806 }, { "epoch": 0.26, "learning_rate": 2.6067188957905668e-05, "loss": 0.9258, "step": 1807 }, { "epoch": 0.26, "learning_rate": 2.6062489954110942e-05, "loss": 1.0474, "step": 1808 }, { "epoch": 0.26, "learning_rate": 2.6057788568835886e-05, "loss": 0.8005, "step": 1809 }, { "epoch": 0.26, "learning_rate": 2.6053084803092584e-05, "loss": 0.9364, "step": 1810 }, { "epoch": 0.26, "learning_rate": 2.604837865789365e-05, "loss": 0.9799, "step": 1811 }, { "epoch": 0.26, "learning_rate": 2.6043670134252192e-05, "loss": 0.8449, "step": 1812 }, { "epoch": 0.26, "learning_rate": 2.6038959233181842e-05, "loss": 1.077, "step": 1813 }, { "epoch": 0.26, "learning_rate": 2.6034245955696743e-05, "loss": 0.8828, "step": 1814 }, { "epoch": 0.26, "learning_rate": 2.602953030281155e-05, "loss": 0.8195, "step": 1815 }, { "epoch": 0.26, "learning_rate": 2.6024812275541415e-05, "loss": 0.8739, "step": 1816 }, { "epoch": 0.26, "learning_rate": 2.602009187490202e-05, "loss": 0.9286, "step": 1817 }, { "epoch": 0.26, "learning_rate": 2.601536910190955e-05, "loss": 0.8675, "step": 1818 }, { "epoch": 0.26, "learning_rate": 2.60106439575807e-05, "loss": 0.8633, "step": 1819 }, { "epoch": 0.26, "learning_rate": 2.6005916442932676e-05, "loss": 1.0714, "step": 1820 }, { "epoch": 0.26, "learning_rate": 2.6001186558983198e-05, "loss": 0.9453, "step": 1821 }, { "epoch": 0.26, "learning_rate": 2.5996454306750487e-05, "loss": 0.7372, "step": 1822 }, { "epoch": 0.26, "learning_rate": 2.599171968725329e-05, "loss": 0.9408, "step": 1823 }, { "epoch": 0.26, "learning_rate": 2.598698270151084e-05, "loss": 0.923, "step": 1824 }, { "epoch": 0.26, "learning_rate": 2.5982243350542906e-05, "loss": 1.0357, "step": 1825 }, { "epoch": 0.26, "learning_rate": 2.5977501635369743e-05, "loss": 1.0112, "step": 1826 }, { "epoch": 0.26, "learning_rate": 2.597275755701213e-05, "loss": 1.0089, "step": 1827 }, { "epoch": 0.26, "learning_rate": 2.5968011116491353e-05, "loss": 0.8013, "step": 1828 }, { "epoch": 0.26, "learning_rate": 2.596326231482919e-05, "loss": 0.966, "step": 1829 }, { "epoch": 0.26, "learning_rate": 2.5958511153047956e-05, "loss": 0.7729, "step": 1830 }, { "epoch": 0.26, "learning_rate": 2.595375763217045e-05, "loss": 1.13, "step": 1831 }, { "epoch": 0.26, "learning_rate": 2.594900175321999e-05, "loss": 0.8521, "step": 1832 }, { "epoch": 0.26, "learning_rate": 2.5944243517220397e-05, "loss": 1.0251, "step": 1833 }, { "epoch": 0.26, "learning_rate": 2.5939482925196004e-05, "loss": 0.9888, "step": 1834 }, { "epoch": 0.26, "learning_rate": 2.5934719978171648e-05, "loss": 0.8984, "step": 1835 }, { "epoch": 0.26, "learning_rate": 2.592995467717267e-05, "loss": 0.9054, "step": 1836 }, { "epoch": 0.26, "learning_rate": 2.5925187023224923e-05, "loss": 0.8382, "step": 1837 }, { "epoch": 0.26, "learning_rate": 2.5920417017354773e-05, "loss": 0.817, "step": 1838 }, { "epoch": 0.26, "learning_rate": 2.5915644660589076e-05, "loss": 0.5381, "step": 1839 }, { "epoch": 0.26, "learning_rate": 2.59108699539552e-05, "loss": 1.12, "step": 1840 }, { "epoch": 0.26, "learning_rate": 2.5906092898481028e-05, "loss": 0.9688, "step": 1841 }, { "epoch": 0.26, "learning_rate": 2.5901313495194937e-05, "loss": 0.8845, "step": 1842 }, { "epoch": 0.26, "learning_rate": 2.5896531745125814e-05, "loss": 0.928, "step": 1843 }, { "epoch": 0.26, "learning_rate": 2.5891747649303057e-05, "loss": 0.7525, "step": 1844 }, { "epoch": 0.26, "learning_rate": 2.588696120875656e-05, "loss": 0.8094, "step": 1845 }, { "epoch": 0.26, "learning_rate": 2.5882172424516726e-05, "loss": 0.7533, "step": 1846 }, { "epoch": 0.26, "learning_rate": 2.5877381297614457e-05, "loss": 0.8728, "step": 1847 }, { "epoch": 0.26, "learning_rate": 2.587258782908117e-05, "loss": 1.0435, "step": 1848 }, { "epoch": 0.26, "learning_rate": 2.586779201994878e-05, "loss": 0.8376, "step": 1849 }, { "epoch": 0.27, "learning_rate": 2.5862993871249704e-05, "loss": 0.8125, "step": 1850 }, { "epoch": 0.27, "learning_rate": 2.585819338401686e-05, "loss": 0.9403, "step": 1851 }, { "epoch": 0.27, "learning_rate": 2.5853390559283686e-05, "loss": 0.894, "step": 1852 }, { "epoch": 0.27, "learning_rate": 2.58485853980841e-05, "loss": 0.793, "step": 1853 }, { "epoch": 0.27, "learning_rate": 2.584377790145254e-05, "loss": 0.885, "step": 1854 }, { "epoch": 0.27, "learning_rate": 2.5838968070423936e-05, "loss": 0.7796, "step": 1855 }, { "epoch": 0.27, "learning_rate": 2.5834155906033736e-05, "loss": 0.8803, "step": 1856 }, { "epoch": 0.27, "learning_rate": 2.5829341409317866e-05, "loss": 0.9392, "step": 1857 }, { "epoch": 0.27, "learning_rate": 2.582452458131278e-05, "loss": 0.9492, "step": 1858 }, { "epoch": 0.27, "learning_rate": 2.5819705423055416e-05, "loss": 0.7391, "step": 1859 }, { "epoch": 0.27, "learning_rate": 2.581488393558322e-05, "loss": 0.8817, "step": 1860 }, { "epoch": 0.27, "learning_rate": 2.581006011993414e-05, "loss": 1.0815, "step": 1861 }, { "epoch": 0.27, "learning_rate": 2.580523397714663e-05, "loss": 0.9392, "step": 1862 }, { "epoch": 0.27, "learning_rate": 2.580040550825962e-05, "loss": 0.8956, "step": 1863 }, { "epoch": 0.27, "learning_rate": 2.5795574714312578e-05, "loss": 0.9196, "step": 1864 }, { "epoch": 0.27, "learning_rate": 2.579074159634545e-05, "loss": 1.0045, "step": 1865 }, { "epoch": 0.27, "learning_rate": 2.578590615539869e-05, "loss": 0.7081, "step": 1866 }, { "epoch": 0.27, "learning_rate": 2.5781068392513237e-05, "loss": 0.9283, "step": 1867 }, { "epoch": 0.27, "learning_rate": 2.5776228308730555e-05, "loss": 0.9791, "step": 1868 }, { "epoch": 0.27, "learning_rate": 2.577138590509258e-05, "loss": 0.9738, "step": 1869 }, { "epoch": 0.27, "learning_rate": 2.576654118264178e-05, "loss": 0.6964, "step": 1870 }, { "epoch": 0.27, "learning_rate": 2.5761694142421087e-05, "loss": 0.8705, "step": 1871 }, { "epoch": 0.27, "learning_rate": 2.5756844785473958e-05, "loss": 0.995, "step": 1872 }, { "epoch": 0.27, "learning_rate": 2.5751993112844336e-05, "loss": 0.9107, "step": 1873 }, { "epoch": 0.27, "learning_rate": 2.574713912557667e-05, "loss": 0.8828, "step": 1874 }, { "epoch": 0.27, "learning_rate": 2.57422828247159e-05, "loss": 0.9727, "step": 1875 }, { "epoch": 0.27, "learning_rate": 2.5737424211307467e-05, "loss": 0.9235, "step": 1876 }, { "epoch": 0.27, "learning_rate": 2.5732563286397313e-05, "loss": 0.9436, "step": 1877 }, { "epoch": 0.27, "learning_rate": 2.5727700051031873e-05, "loss": 0.9542, "step": 1878 }, { "epoch": 0.27, "learning_rate": 2.5722834506258077e-05, "loss": 0.858, "step": 1879 }, { "epoch": 0.27, "learning_rate": 2.5717966653123365e-05, "loss": 0.802, "step": 1880 }, { "epoch": 0.27, "learning_rate": 2.5713096492675662e-05, "loss": 0.8867, "step": 1881 }, { "epoch": 0.27, "learning_rate": 2.5708224025963387e-05, "loss": 1.0854, "step": 1882 }, { "epoch": 0.27, "learning_rate": 2.5703349254035473e-05, "loss": 0.8722, "step": 1883 }, { "epoch": 0.27, "learning_rate": 2.569847217794133e-05, "loss": 0.8357, "step": 1884 }, { "epoch": 0.27, "learning_rate": 2.5693592798730872e-05, "loss": 0.8873, "step": 1885 }, { "epoch": 0.27, "learning_rate": 2.568871111745451e-05, "loss": 0.8765, "step": 1886 }, { "epoch": 0.27, "learning_rate": 2.5683827135163145e-05, "loss": 1.0491, "step": 1887 }, { "epoch": 0.27, "learning_rate": 2.5678940852908186e-05, "loss": 0.8206, "step": 1888 }, { "epoch": 0.27, "learning_rate": 2.567405227174152e-05, "loss": 0.8973, "step": 1889 }, { "epoch": 0.27, "learning_rate": 2.5669161392715545e-05, "loss": 0.8119, "step": 1890 }, { "epoch": 0.27, "learning_rate": 2.5664268216883142e-05, "loss": 0.8817, "step": 1891 }, { "epoch": 0.27, "learning_rate": 2.5659372745297694e-05, "loss": 0.8728, "step": 1892 }, { "epoch": 0.27, "learning_rate": 2.5654474979013066e-05, "loss": 0.9621, "step": 1893 }, { "epoch": 0.27, "learning_rate": 2.564957491908364e-05, "loss": 0.7807, "step": 1894 }, { "epoch": 0.27, "learning_rate": 2.564467256656427e-05, "loss": 0.8234, "step": 1895 }, { "epoch": 0.27, "learning_rate": 2.5639767922510306e-05, "loss": 0.7455, "step": 1896 }, { "epoch": 0.27, "learning_rate": 2.56348609879776e-05, "loss": 0.8326, "step": 1897 }, { "epoch": 0.27, "learning_rate": 2.5629951764022505e-05, "loss": 0.846, "step": 1898 }, { "epoch": 0.27, "learning_rate": 2.5625040251701836e-05, "loss": 0.8848, "step": 1899 }, { "epoch": 0.27, "learning_rate": 2.5620126452072932e-05, "loss": 0.9565, "step": 1900 }, { "epoch": 0.27, "learning_rate": 2.5615210366193613e-05, "loss": 1.0608, "step": 1901 }, { "epoch": 0.27, "learning_rate": 2.5610291995122187e-05, "loss": 0.8917, "step": 1902 }, { "epoch": 0.27, "learning_rate": 2.5605371339917454e-05, "loss": 0.8583, "step": 1903 }, { "epoch": 0.27, "learning_rate": 2.5600448401638718e-05, "loss": 1.0631, "step": 1904 }, { "epoch": 0.27, "learning_rate": 2.559552318134576e-05, "loss": 1.0329, "step": 1905 }, { "epoch": 0.27, "learning_rate": 2.5590595680098862e-05, "loss": 1.0033, "step": 1906 }, { "epoch": 0.27, "learning_rate": 2.5585665898958787e-05, "loss": 0.7522, "step": 1907 }, { "epoch": 0.27, "learning_rate": 2.5580733838986806e-05, "loss": 0.952, "step": 1908 }, { "epoch": 0.27, "learning_rate": 2.5575799501244656e-05, "loss": 1.0119, "step": 1909 }, { "epoch": 0.27, "learning_rate": 2.5570862886794583e-05, "loss": 1.0346, "step": 1910 }, { "epoch": 0.27, "learning_rate": 2.5565923996699317e-05, "loss": 0.7316, "step": 1911 }, { "epoch": 0.27, "learning_rate": 2.5560982832022082e-05, "loss": 0.9191, "step": 1912 }, { "epoch": 0.27, "learning_rate": 2.555603939382659e-05, "loss": 0.9297, "step": 1913 }, { "epoch": 0.27, "learning_rate": 2.555109368317704e-05, "loss": 1.0485, "step": 1914 }, { "epoch": 0.27, "learning_rate": 2.5546145701138114e-05, "loss": 0.8689, "step": 1915 }, { "epoch": 0.27, "learning_rate": 2.5541195448774998e-05, "loss": 0.8181, "step": 1916 }, { "epoch": 0.27, "learning_rate": 2.5536242927153353e-05, "loss": 0.947, "step": 1917 }, { "epoch": 0.27, "learning_rate": 2.5531288137339343e-05, "loss": 0.9855, "step": 1918 }, { "epoch": 0.27, "learning_rate": 2.5526331080399596e-05, "loss": 0.8689, "step": 1919 }, { "epoch": 0.28, "learning_rate": 2.552137175740126e-05, "loss": 0.6417, "step": 1920 }, { "epoch": 0.28, "learning_rate": 2.551641016941195e-05, "loss": 0.9464, "step": 1921 }, { "epoch": 0.28, "learning_rate": 2.5511446317499767e-05, "loss": 0.9688, "step": 1922 }, { "epoch": 0.28, "learning_rate": 2.5506480202733313e-05, "loss": 0.8136, "step": 1923 }, { "epoch": 0.28, "learning_rate": 2.5501511826181662e-05, "loss": 1.0212, "step": 1924 }, { "epoch": 0.28, "learning_rate": 2.5496541188914393e-05, "loss": 0.9961, "step": 1925 }, { "epoch": 0.28, "learning_rate": 2.5491568292001555e-05, "loss": 0.7991, "step": 1926 }, { "epoch": 0.28, "learning_rate": 2.5486593136513683e-05, "loss": 0.8867, "step": 1927 }, { "epoch": 0.28, "learning_rate": 2.5481615723521813e-05, "loss": 0.8387, "step": 1928 }, { "epoch": 0.28, "learning_rate": 2.5476636054097465e-05, "loss": 0.6847, "step": 1929 }, { "epoch": 0.28, "learning_rate": 2.5471654129312623e-05, "loss": 0.8041, "step": 1930 }, { "epoch": 0.28, "learning_rate": 2.5466669950239785e-05, "loss": 0.8209, "step": 1931 }, { "epoch": 0.28, "learning_rate": 2.5461683517951913e-05, "loss": 0.8454, "step": 1932 }, { "epoch": 0.28, "learning_rate": 2.5456694833522467e-05, "loss": 0.8292, "step": 1933 }, { "epoch": 0.28, "learning_rate": 2.5451703898025388e-05, "loss": 1.0089, "step": 1934 }, { "epoch": 0.28, "learning_rate": 2.54467107125351e-05, "loss": 0.904, "step": 1935 }, { "epoch": 0.28, "learning_rate": 2.5441715278126506e-05, "loss": 1.0804, "step": 1936 }, { "epoch": 0.28, "learning_rate": 2.5436717595875013e-05, "loss": 0.6655, "step": 1937 }, { "epoch": 0.28, "learning_rate": 2.5431717666856485e-05, "loss": 0.7946, "step": 1938 }, { "epoch": 0.28, "learning_rate": 2.5426715492147287e-05, "loss": 0.7782, "step": 1939 }, { "epoch": 0.28, "learning_rate": 2.542171107282427e-05, "loss": 0.9933, "step": 1940 }, { "epoch": 0.28, "learning_rate": 2.541670440996475e-05, "loss": 0.9241, "step": 1941 }, { "epoch": 0.28, "learning_rate": 2.541169550464655e-05, "loss": 0.9782, "step": 1942 }, { "epoch": 0.28, "learning_rate": 2.5406684357947956e-05, "loss": 0.8912, "step": 1943 }, { "epoch": 0.28, "learning_rate": 2.540167097094774e-05, "loss": 0.9961, "step": 1944 }, { "epoch": 0.28, "learning_rate": 2.5396655344725168e-05, "loss": 0.7718, "step": 1945 }, { "epoch": 0.28, "learning_rate": 2.539163748035998e-05, "loss": 0.726, "step": 1946 }, { "epoch": 0.28, "learning_rate": 2.538661737893239e-05, "loss": 0.8231, "step": 1947 }, { "epoch": 0.28, "learning_rate": 2.5381595041523118e-05, "loss": 0.8147, "step": 1948 }, { "epoch": 0.28, "learning_rate": 2.537657046921333e-05, "loss": 0.8306, "step": 1949 }, { "epoch": 0.28, "learning_rate": 2.5371543663084703e-05, "loss": 1.0128, "step": 1950 }, { "epoch": 0.28, "learning_rate": 2.536651462421938e-05, "loss": 0.8867, "step": 1951 }, { "epoch": 0.28, "learning_rate": 2.536148335369999e-05, "loss": 1.0285, "step": 1952 }, { "epoch": 0.28, "learning_rate": 2.535644985260964e-05, "loss": 0.8092, "step": 1953 }, { "epoch": 0.28, "learning_rate": 2.535141412203192e-05, "loss": 0.9587, "step": 1954 }, { "epoch": 0.28, "learning_rate": 2.5346376163050894e-05, "loss": 0.8259, "step": 1955 }, { "epoch": 0.28, "learning_rate": 2.5341335976751117e-05, "loss": 0.673, "step": 1956 }, { "epoch": 0.28, "learning_rate": 2.5336293564217612e-05, "loss": 0.9018, "step": 1957 }, { "epoch": 0.28, "learning_rate": 2.5331248926535887e-05, "loss": 0.8672, "step": 1958 }, { "epoch": 0.28, "learning_rate": 2.532620206479193e-05, "loss": 0.8917, "step": 1959 }, { "epoch": 0.28, "learning_rate": 2.5321152980072193e-05, "loss": 0.964, "step": 1960 }, { "epoch": 0.28, "learning_rate": 2.531610167346363e-05, "loss": 0.9247, "step": 1961 }, { "epoch": 0.28, "learning_rate": 2.5311048146053668e-05, "loss": 0.7902, "step": 1962 }, { "epoch": 0.28, "learning_rate": 2.5305992398930197e-05, "loss": 0.6867, "step": 1963 }, { "epoch": 0.28, "learning_rate": 2.530093443318159e-05, "loss": 0.772, "step": 1964 }, { "epoch": 0.28, "learning_rate": 2.5295874249896717e-05, "loss": 0.8736, "step": 1965 }, { "epoch": 0.28, "learning_rate": 2.5290811850164894e-05, "loss": 0.8622, "step": 1966 }, { "epoch": 0.28, "learning_rate": 2.5285747235075942e-05, "loss": 0.8797, "step": 1967 }, { "epoch": 0.28, "learning_rate": 2.528068040572014e-05, "loss": 1.0485, "step": 1968 }, { "epoch": 0.28, "learning_rate": 2.5275611363188255e-05, "loss": 0.8677, "step": 1969 }, { "epoch": 0.28, "learning_rate": 2.5270540108571522e-05, "loss": 0.9816, "step": 1970 }, { "epoch": 0.28, "learning_rate": 2.5265466642961668e-05, "loss": 1.0776, "step": 1971 }, { "epoch": 0.28, "learning_rate": 2.526039096745087e-05, "loss": 0.779, "step": 1972 }, { "epoch": 0.28, "learning_rate": 2.5255313083131802e-05, "loss": 0.7511, "step": 1973 }, { "epoch": 0.28, "learning_rate": 2.5250232991097614e-05, "loss": 0.6493, "step": 1974 }, { "epoch": 0.28, "learning_rate": 2.524515069244191e-05, "loss": 0.8013, "step": 1975 }, { "epoch": 0.28, "learning_rate": 2.524006618825879e-05, "loss": 0.8398, "step": 1976 }, { "epoch": 0.28, "learning_rate": 2.5234979479642826e-05, "loss": 0.8315, "step": 1977 }, { "epoch": 0.28, "learning_rate": 2.5229890567689053e-05, "loss": 1.106, "step": 1978 }, { "epoch": 0.28, "learning_rate": 2.5224799453492993e-05, "loss": 0.769, "step": 1979 }, { "epoch": 0.28, "learning_rate": 2.5219706138150638e-05, "loss": 0.8047, "step": 1980 }, { "epoch": 0.28, "learning_rate": 2.5214610622758445e-05, "loss": 0.7924, "step": 1981 }, { "epoch": 0.28, "learning_rate": 2.520951290841336e-05, "loss": 0.8789, "step": 1982 }, { "epoch": 0.28, "learning_rate": 2.520441299621279e-05, "loss": 1.0469, "step": 1983 }, { "epoch": 0.28, "learning_rate": 2.5199310887254624e-05, "loss": 0.8789, "step": 1984 }, { "epoch": 0.28, "learning_rate": 2.5194206582637213e-05, "loss": 0.8387, "step": 1985 }, { "epoch": 0.28, "learning_rate": 2.51891000834594e-05, "loss": 0.8465, "step": 1986 }, { "epoch": 0.28, "learning_rate": 2.5183991390820477e-05, "loss": 0.8214, "step": 1987 }, { "epoch": 0.28, "learning_rate": 2.517888050582022e-05, "loss": 0.9096, "step": 1988 }, { "epoch": 0.28, "learning_rate": 2.5173767429558877e-05, "loss": 0.899, "step": 1989 }, { "epoch": 0.29, "learning_rate": 2.516865216313717e-05, "loss": 1.0681, "step": 1990 }, { "epoch": 0.29, "learning_rate": 2.5163534707656285e-05, "loss": 0.8956, "step": 1991 }, { "epoch": 0.29, "learning_rate": 2.5158415064217895e-05, "loss": 0.971, "step": 1992 }, { "epoch": 0.29, "learning_rate": 2.5153293233924116e-05, "loss": 0.9247, "step": 1993 }, { "epoch": 0.29, "learning_rate": 2.5148169217877558e-05, "loss": 0.7224, "step": 1994 }, { "epoch": 0.29, "learning_rate": 2.5143043017181298e-05, "loss": 0.8834, "step": 1995 }, { "epoch": 0.29, "learning_rate": 2.5137914632938875e-05, "loss": 0.846, "step": 1996 }, { "epoch": 0.29, "learning_rate": 2.513278406625431e-05, "loss": 0.9157, "step": 1997 }, { "epoch": 0.29, "learning_rate": 2.5127651318232083e-05, "loss": 1.0346, "step": 1998 }, { "epoch": 0.29, "learning_rate": 2.5122516389977143e-05, "loss": 1.024, "step": 1999 }, { "epoch": 0.29, "learning_rate": 2.5117379282594925e-05, "loss": 0.995, "step": 2000 }, { "epoch": 0.29, "learning_rate": 2.5112239997191306e-05, "loss": 0.9169, "step": 2001 }, { "epoch": 0.29, "learning_rate": 2.5107098534872656e-05, "loss": 0.6794, "step": 2002 }, { "epoch": 0.29, "learning_rate": 2.5101954896745808e-05, "loss": 0.7335, "step": 2003 }, { "epoch": 0.29, "learning_rate": 2.5096809083918052e-05, "loss": 0.851, "step": 2004 }, { "epoch": 0.29, "learning_rate": 2.509166109749716e-05, "loss": 0.7896, "step": 2005 }, { "epoch": 0.29, "learning_rate": 2.508651093859136e-05, "loss": 0.9191, "step": 2006 }, { "epoch": 0.29, "learning_rate": 2.5081358608309357e-05, "loss": 0.8256, "step": 2007 }, { "epoch": 0.29, "learning_rate": 2.5076204107760324e-05, "loss": 0.9632, "step": 2008 }, { "epoch": 0.29, "learning_rate": 2.5071047438053896e-05, "loss": 0.8036, "step": 2009 }, { "epoch": 0.29, "learning_rate": 2.5065888600300165e-05, "loss": 0.9135, "step": 2010 }, { "epoch": 0.29, "learning_rate": 2.506072759560972e-05, "loss": 0.832, "step": 2011 }, { "epoch": 0.29, "learning_rate": 2.5055564425093584e-05, "loss": 0.8002, "step": 2012 }, { "epoch": 0.29, "learning_rate": 2.5050399089863264e-05, "loss": 0.9353, "step": 2013 }, { "epoch": 0.29, "learning_rate": 2.504523159103073e-05, "loss": 0.9286, "step": 2014 }, { "epoch": 0.29, "learning_rate": 2.504006192970841e-05, "loss": 0.822, "step": 2015 }, { "epoch": 0.29, "learning_rate": 2.5034890107009215e-05, "loss": 0.7238, "step": 2016 }, { "epoch": 0.29, "learning_rate": 2.5029716124046504e-05, "loss": 0.7651, "step": 2017 }, { "epoch": 0.29, "learning_rate": 2.502453998193411e-05, "loss": 0.9065, "step": 2018 }, { "epoch": 0.29, "learning_rate": 2.501936168178632e-05, "loss": 0.7743, "step": 2019 }, { "epoch": 0.29, "learning_rate": 2.501418122471791e-05, "loss": 0.9632, "step": 2020 }, { "epoch": 0.29, "learning_rate": 2.5008998611844087e-05, "loss": 0.8482, "step": 2021 }, { "epoch": 0.29, "learning_rate": 2.5003813844280553e-05, "loss": 0.8817, "step": 2022 }, { "epoch": 0.29, "learning_rate": 2.499862692314345e-05, "loss": 0.7274, "step": 2023 }, { "epoch": 0.29, "learning_rate": 2.4993437849549396e-05, "loss": 0.8366, "step": 2024 }, { "epoch": 0.29, "learning_rate": 2.4988246624615477e-05, "loss": 1.0151, "step": 2025 }, { "epoch": 0.29, "learning_rate": 2.498305324945923e-05, "loss": 0.8955, "step": 2026 }, { "epoch": 0.29, "learning_rate": 2.497785772519866e-05, "loss": 1.0335, "step": 2027 }, { "epoch": 0.29, "learning_rate": 2.4972660052952237e-05, "loss": 0.9294, "step": 2028 }, { "epoch": 0.29, "learning_rate": 2.4967460233838886e-05, "loss": 0.9135, "step": 2029 }, { "epoch": 0.29, "learning_rate": 2.4962258268978008e-05, "loss": 0.9648, "step": 2030 }, { "epoch": 0.29, "learning_rate": 2.4957054159489442e-05, "loss": 0.9565, "step": 2031 }, { "epoch": 0.29, "learning_rate": 2.4951847906493524e-05, "loss": 0.846, "step": 2032 }, { "epoch": 0.29, "learning_rate": 2.494663951111102e-05, "loss": 0.846, "step": 2033 }, { "epoch": 0.29, "learning_rate": 2.494142897446317e-05, "loss": 0.9291, "step": 2034 }, { "epoch": 0.29, "learning_rate": 2.493621629767167e-05, "loss": 0.7068, "step": 2035 }, { "epoch": 0.29, "learning_rate": 2.493100148185869e-05, "loss": 0.8834, "step": 2036 }, { "epoch": 0.29, "learning_rate": 2.4925784528146843e-05, "loss": 0.8577, "step": 2037 }, { "epoch": 0.29, "learning_rate": 2.4920565437659214e-05, "loss": 0.9135, "step": 2038 }, { "epoch": 0.29, "learning_rate": 2.4915344211519342e-05, "loss": 0.8697, "step": 2039 }, { "epoch": 0.29, "learning_rate": 2.491012085085122e-05, "loss": 0.9286, "step": 2040 }, { "epoch": 0.29, "learning_rate": 2.4904895356779326e-05, "loss": 0.8493, "step": 2041 }, { "epoch": 0.29, "learning_rate": 2.4899667730428567e-05, "loss": 0.8549, "step": 2042 }, { "epoch": 0.29, "learning_rate": 2.4894437972924325e-05, "loss": 0.8242, "step": 2043 }, { "epoch": 0.29, "learning_rate": 2.488920608539243e-05, "loss": 0.7665, "step": 2044 }, { "epoch": 0.29, "learning_rate": 2.488397206895919e-05, "loss": 0.8415, "step": 2045 }, { "epoch": 0.29, "learning_rate": 2.4878735924751356e-05, "loss": 0.8789, "step": 2046 }, { "epoch": 0.29, "learning_rate": 2.4873497653896137e-05, "loss": 0.764, "step": 2047 }, { "epoch": 0.29, "learning_rate": 2.4868257257521197e-05, "loss": 0.748, "step": 2048 }, { "epoch": 0.29, "learning_rate": 2.4863014736754683e-05, "loss": 0.8276, "step": 2049 }, { "epoch": 0.29, "learning_rate": 2.4857770092725163e-05, "loss": 0.8214, "step": 2050 }, { "epoch": 0.29, "learning_rate": 2.485252332656168e-05, "loss": 0.8903, "step": 2051 }, { "epoch": 0.29, "learning_rate": 2.4847274439393738e-05, "loss": 0.8184, "step": 2052 }, { "epoch": 0.29, "learning_rate": 2.4842023432351295e-05, "loss": 0.8733, "step": 2053 }, { "epoch": 0.29, "learning_rate": 2.4836770306564758e-05, "loss": 1.0619, "step": 2054 }, { "epoch": 0.29, "learning_rate": 2.4831515063164997e-05, "loss": 0.9342, "step": 2055 }, { "epoch": 0.29, "learning_rate": 2.482625770328333e-05, "loss": 0.8229, "step": 2056 }, { "epoch": 0.29, "learning_rate": 2.4820998228051545e-05, "loss": 0.9648, "step": 2057 }, { "epoch": 0.29, "learning_rate": 2.4815736638601875e-05, "loss": 0.899, "step": 2058 }, { "epoch": 0.29, "learning_rate": 2.481047293606701e-05, "loss": 0.7294, "step": 2059 }, { "epoch": 0.3, "learning_rate": 2.480520712158009e-05, "loss": 0.9833, "step": 2060 }, { "epoch": 0.3, "learning_rate": 2.4799939196274718e-05, "loss": 0.8516, "step": 2061 }, { "epoch": 0.3, "learning_rate": 2.479466916128495e-05, "loss": 0.8926, "step": 2062 }, { "epoch": 0.3, "learning_rate": 2.478939701774529e-05, "loss": 1.0424, "step": 2063 }, { "epoch": 0.3, "learning_rate": 2.4784122766790703e-05, "loss": 0.7723, "step": 2064 }, { "epoch": 0.3, "learning_rate": 2.47788464095566e-05, "loss": 0.9364, "step": 2065 }, { "epoch": 0.3, "learning_rate": 2.477356794717886e-05, "loss": 0.7999, "step": 2066 }, { "epoch": 0.3, "learning_rate": 2.4768287380793798e-05, "loss": 0.6184, "step": 2067 }, { "epoch": 0.3, "learning_rate": 2.476300471153819e-05, "loss": 0.7469, "step": 2068 }, { "epoch": 0.3, "learning_rate": 2.475771994054926e-05, "loss": 0.8315, "step": 2069 }, { "epoch": 0.3, "learning_rate": 2.4752433068964695e-05, "loss": 0.9648, "step": 2070 }, { "epoch": 0.3, "learning_rate": 2.474714409792263e-05, "loss": 0.8471, "step": 2071 }, { "epoch": 0.3, "learning_rate": 2.4741853028561642e-05, "loss": 0.7988, "step": 2072 }, { "epoch": 0.3, "learning_rate": 2.4736559862020773e-05, "loss": 0.8477, "step": 2073 }, { "epoch": 0.3, "learning_rate": 2.47312645994395e-05, "loss": 0.8689, "step": 2074 }, { "epoch": 0.3, "learning_rate": 2.472596724195778e-05, "loss": 0.8945, "step": 2075 }, { "epoch": 0.3, "learning_rate": 2.4720667790715988e-05, "loss": 0.87, "step": 2076 }, { "epoch": 0.3, "learning_rate": 2.471536624685497e-05, "loss": 0.8569, "step": 2077 }, { "epoch": 0.3, "learning_rate": 2.4710062611516014e-05, "loss": 0.9576, "step": 2078 }, { "epoch": 0.3, "learning_rate": 2.4704756885840874e-05, "loss": 1.0324, "step": 2079 }, { "epoch": 0.3, "learning_rate": 2.4699449070971725e-05, "loss": 0.8474, "step": 2080 }, { "epoch": 0.3, "learning_rate": 2.4694139168051216e-05, "loss": 0.9593, "step": 2081 }, { "epoch": 0.3, "learning_rate": 2.4688827178222438e-05, "loss": 0.863, "step": 2082 }, { "epoch": 0.3, "learning_rate": 2.4683513102628925e-05, "loss": 0.8013, "step": 2083 }, { "epoch": 0.3, "learning_rate": 2.4678196942414677e-05, "loss": 0.6528, "step": 2084 }, { "epoch": 0.3, "learning_rate": 2.4672878698724124e-05, "loss": 1.0033, "step": 2085 }, { "epoch": 0.3, "learning_rate": 2.466755837270215e-05, "loss": 0.9436, "step": 2086 }, { "epoch": 0.3, "learning_rate": 2.4662235965494098e-05, "loss": 0.8845, "step": 2087 }, { "epoch": 0.3, "learning_rate": 2.4656911478245743e-05, "loss": 0.7885, "step": 2088 }, { "epoch": 0.3, "learning_rate": 2.4651584912103315e-05, "loss": 0.8795, "step": 2089 }, { "epoch": 0.3, "learning_rate": 2.4646256268213502e-05, "loss": 0.7422, "step": 2090 }, { "epoch": 0.3, "learning_rate": 2.464092554772342e-05, "loss": 0.8566, "step": 2091 }, { "epoch": 0.3, "learning_rate": 2.4635592751780643e-05, "loss": 1.1205, "step": 2092 }, { "epoch": 0.3, "learning_rate": 2.4630257881533195e-05, "loss": 0.9169, "step": 2093 }, { "epoch": 0.3, "learning_rate": 2.4624920938129528e-05, "loss": 0.8209, "step": 2094 }, { "epoch": 0.3, "learning_rate": 2.4619581922718565e-05, "loss": 0.8778, "step": 2095 }, { "epoch": 0.3, "learning_rate": 2.4614240836449665e-05, "loss": 0.8438, "step": 2096 }, { "epoch": 0.3, "learning_rate": 2.4608897680472632e-05, "loss": 0.9503, "step": 2097 }, { "epoch": 0.3, "learning_rate": 2.46035524559377e-05, "loss": 0.9241, "step": 2098 }, { "epoch": 0.3, "learning_rate": 2.4598205163995585e-05, "loss": 0.8449, "step": 2099 }, { "epoch": 0.3, "learning_rate": 2.4592855805797415e-05, "loss": 0.9163, "step": 2100 }, { "epoch": 0.3, "learning_rate": 2.4587504382494775e-05, "loss": 0.9219, "step": 2101 }, { "epoch": 0.3, "learning_rate": 2.4582150895239696e-05, "loss": 0.9074, "step": 2102 }, { "epoch": 0.3, "learning_rate": 2.457679534518465e-05, "loss": 0.8867, "step": 2103 }, { "epoch": 0.3, "learning_rate": 2.4571437733482554e-05, "loss": 0.9152, "step": 2104 }, { "epoch": 0.3, "learning_rate": 2.4566078061286776e-05, "loss": 0.8661, "step": 2105 }, { "epoch": 0.3, "learning_rate": 2.456071632975111e-05, "loss": 0.9043, "step": 2106 }, { "epoch": 0.3, "learning_rate": 2.455535254002981e-05, "loss": 0.7773, "step": 2107 }, { "epoch": 0.3, "learning_rate": 2.4549986693277564e-05, "loss": 0.829, "step": 2108 }, { "epoch": 0.3, "learning_rate": 2.4544618790649516e-05, "loss": 0.8119, "step": 2109 }, { "epoch": 0.3, "learning_rate": 2.4539248833301227e-05, "loss": 0.8477, "step": 2110 }, { "epoch": 0.3, "learning_rate": 2.4533876822388727e-05, "loss": 0.9096, "step": 2111 }, { "epoch": 0.3, "learning_rate": 2.4528502759068474e-05, "loss": 0.8803, "step": 2112 }, { "epoch": 0.3, "learning_rate": 2.452312664449737e-05, "loss": 0.8666, "step": 2113 }, { "epoch": 0.3, "learning_rate": 2.451774847983276e-05, "loss": 0.9978, "step": 2114 }, { "epoch": 0.3, "learning_rate": 2.4512368266232432e-05, "loss": 1.0898, "step": 2115 }, { "epoch": 0.3, "learning_rate": 2.450698600485461e-05, "loss": 1.019, "step": 2116 }, { "epoch": 0.3, "learning_rate": 2.4501601696857965e-05, "loss": 0.9235, "step": 2117 }, { "epoch": 0.3, "learning_rate": 2.44962153434016e-05, "loss": 0.8571, "step": 2118 }, { "epoch": 0.3, "learning_rate": 2.4490826945645062e-05, "loss": 0.9634, "step": 2119 }, { "epoch": 0.3, "learning_rate": 2.448543650474835e-05, "loss": 0.784, "step": 2120 }, { "epoch": 0.3, "learning_rate": 2.4480044021871885e-05, "loss": 0.7754, "step": 2121 }, { "epoch": 0.3, "learning_rate": 2.4474649498176538e-05, "loss": 0.8811, "step": 2122 }, { "epoch": 0.3, "learning_rate": 2.446925293482361e-05, "loss": 0.7757, "step": 2123 }, { "epoch": 0.3, "learning_rate": 2.446385433297486e-05, "loss": 1.0156, "step": 2124 }, { "epoch": 0.3, "learning_rate": 2.4458453693792464e-05, "loss": 0.7637, "step": 2125 }, { "epoch": 0.3, "learning_rate": 2.4453051018439044e-05, "loss": 0.7369, "step": 2126 }, { "epoch": 0.3, "learning_rate": 2.444764630807767e-05, "loss": 0.8532, "step": 2127 }, { "epoch": 0.3, "learning_rate": 2.444223956387184e-05, "loss": 0.76, "step": 2128 }, { "epoch": 0.3, "learning_rate": 2.443683078698549e-05, "loss": 0.9559, "step": 2129 }, { "epoch": 0.31, "learning_rate": 2.4431419978582998e-05, "loss": 0.8036, "step": 2130 }, { "epoch": 0.31, "learning_rate": 2.442600713982917e-05, "loss": 0.8658, "step": 2131 }, { "epoch": 0.31, "learning_rate": 2.4420592271889268e-05, "loss": 0.7681, "step": 2132 }, { "epoch": 0.31, "learning_rate": 2.4415175375928975e-05, "loss": 0.8728, "step": 2133 }, { "epoch": 0.31, "learning_rate": 2.440975645311441e-05, "loss": 0.8237, "step": 2134 }, { "epoch": 0.31, "learning_rate": 2.440433550461213e-05, "loss": 0.7584, "step": 2135 }, { "epoch": 0.31, "learning_rate": 2.4398912531589147e-05, "loss": 0.9766, "step": 2136 }, { "epoch": 0.31, "learning_rate": 2.439348753521288e-05, "loss": 0.7863, "step": 2137 }, { "epoch": 0.31, "learning_rate": 2.4388060516651197e-05, "loss": 0.9414, "step": 2138 }, { "epoch": 0.31, "learning_rate": 2.4382631477072403e-05, "loss": 0.9715, "step": 2139 }, { "epoch": 0.31, "learning_rate": 2.437720041764524e-05, "loss": 0.7553, "step": 2140 }, { "epoch": 0.31, "learning_rate": 2.4371767339538875e-05, "loss": 0.9146, "step": 2141 }, { "epoch": 0.31, "learning_rate": 2.4366332243922915e-05, "loss": 0.9855, "step": 2142 }, { "epoch": 0.31, "learning_rate": 2.4360895131967403e-05, "loss": 0.8398, "step": 2143 }, { "epoch": 0.31, "learning_rate": 2.435545600484282e-05, "loss": 0.7564, "step": 2144 }, { "epoch": 0.31, "learning_rate": 2.435001486372007e-05, "loss": 0.8449, "step": 2145 }, { "epoch": 0.31, "learning_rate": 2.4344571709770494e-05, "loss": 0.6797, "step": 2146 }, { "epoch": 0.31, "learning_rate": 2.433912654416587e-05, "loss": 0.5896, "step": 2147 }, { "epoch": 0.31, "learning_rate": 2.4333679368078407e-05, "loss": 0.9877, "step": 2148 }, { "epoch": 0.31, "learning_rate": 2.4328230182680755e-05, "loss": 0.8008, "step": 2149 }, { "epoch": 0.31, "learning_rate": 2.432277898914598e-05, "loss": 0.9559, "step": 2150 }, { "epoch": 0.31, "learning_rate": 2.4317325788647593e-05, "loss": 0.9263, "step": 2151 }, { "epoch": 0.31, "learning_rate": 2.431187058235953e-05, "loss": 0.7955, "step": 2152 }, { "epoch": 0.31, "learning_rate": 2.430641337145616e-05, "loss": 0.8906, "step": 2153 }, { "epoch": 0.31, "learning_rate": 2.4300954157112297e-05, "loss": 0.8214, "step": 2154 }, { "epoch": 0.31, "learning_rate": 2.4295492940503158e-05, "loss": 0.916, "step": 2155 }, { "epoch": 0.31, "learning_rate": 2.429002972280442e-05, "loss": 0.8783, "step": 2156 }, { "epoch": 0.31, "learning_rate": 2.428456450519218e-05, "loss": 0.856, "step": 2157 }, { "epoch": 0.31, "learning_rate": 2.4279097288842956e-05, "loss": 0.9531, "step": 2158 }, { "epoch": 0.31, "learning_rate": 2.4273628074933706e-05, "loss": 0.7905, "step": 2159 }, { "epoch": 0.31, "learning_rate": 2.4268156864641823e-05, "loss": 0.815, "step": 2160 }, { "epoch": 0.31, "learning_rate": 2.4262683659145116e-05, "loss": 0.8103, "step": 2161 }, { "epoch": 0.31, "learning_rate": 2.425720845962183e-05, "loss": 1.0123, "step": 2162 }, { "epoch": 0.31, "learning_rate": 2.4251731267250643e-05, "loss": 0.7374, "step": 2163 }, { "epoch": 0.31, "learning_rate": 2.424625208321066e-05, "loss": 0.8337, "step": 2164 }, { "epoch": 0.31, "learning_rate": 2.4240770908681413e-05, "loss": 0.8717, "step": 2165 }, { "epoch": 0.31, "learning_rate": 2.4235287744842867e-05, "loss": 0.7789, "step": 2166 }, { "epoch": 0.31, "learning_rate": 2.4229802592875404e-05, "loss": 0.8259, "step": 2167 }, { "epoch": 0.31, "learning_rate": 2.4224315453959843e-05, "loss": 1.0926, "step": 2168 }, { "epoch": 0.31, "learning_rate": 2.4218826329277435e-05, "loss": 0.9509, "step": 2169 }, { "epoch": 0.31, "learning_rate": 2.4213335220009845e-05, "loss": 0.9247, "step": 2170 }, { "epoch": 0.31, "learning_rate": 2.4207842127339183e-05, "loss": 0.8404, "step": 2171 }, { "epoch": 0.31, "learning_rate": 2.4202347052447964e-05, "loss": 0.868, "step": 2172 }, { "epoch": 0.31, "learning_rate": 2.4196849996519144e-05, "loss": 0.8398, "step": 2173 }, { "epoch": 0.31, "learning_rate": 2.4191350960736112e-05, "loss": 0.8862, "step": 2174 }, { "epoch": 0.31, "learning_rate": 2.418584994628267e-05, "loss": 0.7679, "step": 2175 }, { "epoch": 0.31, "learning_rate": 2.418034695434305e-05, "loss": 0.8477, "step": 2176 }, { "epoch": 0.31, "learning_rate": 2.4174841986101903e-05, "loss": 0.8153, "step": 2177 }, { "epoch": 0.31, "learning_rate": 2.4169335042744323e-05, "loss": 1.0798, "step": 2178 }, { "epoch": 0.31, "learning_rate": 2.416382612545581e-05, "loss": 0.7091, "step": 2179 }, { "epoch": 0.31, "learning_rate": 2.4158315235422304e-05, "loss": 1.0022, "step": 2180 }, { "epoch": 0.31, "learning_rate": 2.4152802373830164e-05, "loss": 0.7325, "step": 2181 }, { "epoch": 0.31, "learning_rate": 2.4147287541866163e-05, "loss": 0.8301, "step": 2182 }, { "epoch": 0.31, "learning_rate": 2.414177074071752e-05, "loss": 0.9665, "step": 2183 }, { "epoch": 0.31, "learning_rate": 2.413625197157186e-05, "loss": 0.9883, "step": 2184 }, { "epoch": 0.31, "learning_rate": 2.4130731235617235e-05, "loss": 1.0547, "step": 2185 }, { "epoch": 0.31, "learning_rate": 2.412520853404213e-05, "loss": 1.0173, "step": 2186 }, { "epoch": 0.31, "learning_rate": 2.4119683868035436e-05, "loss": 0.816, "step": 2187 }, { "epoch": 0.31, "learning_rate": 2.4114157238786485e-05, "loss": 0.7539, "step": 2188 }, { "epoch": 0.31, "learning_rate": 2.4108628647485017e-05, "loss": 0.8633, "step": 2189 }, { "epoch": 0.31, "learning_rate": 2.410309809532121e-05, "loss": 0.8301, "step": 2190 }, { "epoch": 0.31, "learning_rate": 2.409756558348565e-05, "loss": 0.8616, "step": 2191 }, { "epoch": 0.31, "learning_rate": 2.4092031113169345e-05, "loss": 0.9079, "step": 2192 }, { "epoch": 0.31, "learning_rate": 2.408649468556374e-05, "loss": 0.894, "step": 2193 }, { "epoch": 0.31, "learning_rate": 2.408095630186068e-05, "loss": 0.8739, "step": 2194 }, { "epoch": 0.31, "learning_rate": 2.407541596325245e-05, "loss": 0.7824, "step": 2195 }, { "epoch": 0.31, "learning_rate": 2.4069873670931742e-05, "loss": 0.9174, "step": 2196 }, { "epoch": 0.31, "learning_rate": 2.406432942609168e-05, "loss": 0.8895, "step": 2197 }, { "epoch": 0.31, "learning_rate": 2.4058783229925793e-05, "loss": 0.7988, "step": 2198 }, { "epoch": 0.31, "learning_rate": 2.4053235083628052e-05, "loss": 0.7709, "step": 2199 }, { "epoch": 0.32, "learning_rate": 2.4047684988392836e-05, "loss": 0.7095, "step": 2200 }, { "epoch": 0.32, "learning_rate": 2.4042132945414932e-05, "loss": 0.7932, "step": 2201 }, { "epoch": 0.32, "learning_rate": 2.4036578955889562e-05, "loss": 0.7891, "step": 2202 }, { "epoch": 0.32, "learning_rate": 2.403102302101236e-05, "loss": 0.9171, "step": 2203 }, { "epoch": 0.32, "learning_rate": 2.402546514197939e-05, "loss": 0.8471, "step": 2204 }, { "epoch": 0.32, "learning_rate": 2.4019905319987123e-05, "loss": 0.8047, "step": 2205 }, { "epoch": 0.32, "learning_rate": 2.4014343556232444e-05, "loss": 1.0078, "step": 2206 }, { "epoch": 0.32, "learning_rate": 2.400877985191267e-05, "loss": 0.8255, "step": 2207 }, { "epoch": 0.32, "learning_rate": 2.4003214208225526e-05, "loss": 0.9235, "step": 2208 }, { "epoch": 0.32, "learning_rate": 2.399764662636916e-05, "loss": 0.7277, "step": 2209 }, { "epoch": 0.32, "learning_rate": 2.3992077107542123e-05, "loss": 0.8979, "step": 2210 }, { "epoch": 0.32, "learning_rate": 2.3986505652943413e-05, "loss": 0.9754, "step": 2211 }, { "epoch": 0.32, "learning_rate": 2.3980932263772416e-05, "loss": 0.7977, "step": 2212 }, { "epoch": 0.32, "learning_rate": 2.3975356941228944e-05, "loss": 0.8175, "step": 2213 }, { "epoch": 0.32, "learning_rate": 2.3969779686513232e-05, "loss": 0.9777, "step": 2214 }, { "epoch": 0.32, "learning_rate": 2.3964200500825916e-05, "loss": 0.8998, "step": 2215 }, { "epoch": 0.32, "learning_rate": 2.3958619385368068e-05, "loss": 0.9316, "step": 2216 }, { "epoch": 0.32, "learning_rate": 2.3953036341341153e-05, "loss": 1.0067, "step": 2217 }, { "epoch": 0.32, "learning_rate": 2.3947451369947067e-05, "loss": 0.8862, "step": 2218 }, { "epoch": 0.32, "learning_rate": 2.3941864472388117e-05, "loss": 1.0446, "step": 2219 }, { "epoch": 0.32, "learning_rate": 2.3936275649867023e-05, "loss": 0.9231, "step": 2220 }, { "epoch": 0.32, "learning_rate": 2.3930684903586922e-05, "loss": 0.9001, "step": 2221 }, { "epoch": 0.32, "learning_rate": 2.392509223475136e-05, "loss": 0.8761, "step": 2222 }, { "epoch": 0.32, "learning_rate": 2.3919497644564302e-05, "loss": 0.8295, "step": 2223 }, { "epoch": 0.32, "learning_rate": 2.3913901134230122e-05, "loss": 0.9727, "step": 2224 }, { "epoch": 0.32, "learning_rate": 2.3908302704953614e-05, "loss": 0.8627, "step": 2225 }, { "epoch": 0.32, "learning_rate": 2.3902702357939982e-05, "loss": 0.8449, "step": 2226 }, { "epoch": 0.32, "learning_rate": 2.3897100094394834e-05, "loss": 0.745, "step": 2227 }, { "epoch": 0.32, "learning_rate": 2.3891495915524214e-05, "loss": 0.9548, "step": 2228 }, { "epoch": 0.32, "learning_rate": 2.3885889822534546e-05, "loss": 0.8605, "step": 2229 }, { "epoch": 0.32, "learning_rate": 2.388028181663269e-05, "loss": 0.8527, "step": 2230 }, { "epoch": 0.32, "learning_rate": 2.387467189902592e-05, "loss": 0.8627, "step": 2231 }, { "epoch": 0.32, "learning_rate": 2.3869060070921898e-05, "loss": 0.7427, "step": 2232 }, { "epoch": 0.32, "learning_rate": 2.3863446333528722e-05, "loss": 0.8426, "step": 2233 }, { "epoch": 0.32, "learning_rate": 2.385783068805488e-05, "loss": 0.9269, "step": 2234 }, { "epoch": 0.32, "learning_rate": 2.3852213135709292e-05, "loss": 0.8315, "step": 2235 }, { "epoch": 0.32, "learning_rate": 2.3846593677701273e-05, "loss": 1.0608, "step": 2236 }, { "epoch": 0.32, "learning_rate": 2.3840972315240558e-05, "loss": 0.8153, "step": 2237 }, { "epoch": 0.32, "learning_rate": 2.383534904953728e-05, "loss": 0.9534, "step": 2238 }, { "epoch": 0.32, "learning_rate": 2.3829723881801996e-05, "loss": 0.9185, "step": 2239 }, { "epoch": 0.32, "learning_rate": 2.3824096813245657e-05, "loss": 0.8131, "step": 2240 }, { "epoch": 0.32, "learning_rate": 2.381846784507964e-05, "loss": 0.9174, "step": 2241 }, { "epoch": 0.32, "learning_rate": 2.3812836978515722e-05, "loss": 0.7913, "step": 2242 }, { "epoch": 0.32, "learning_rate": 2.380720421476608e-05, "loss": 0.8449, "step": 2243 }, { "epoch": 0.32, "learning_rate": 2.3801569555043317e-05, "loss": 0.9888, "step": 2244 }, { "epoch": 0.32, "learning_rate": 2.3795933000560435e-05, "loss": 0.7815, "step": 2245 }, { "epoch": 0.32, "learning_rate": 2.3790294552530845e-05, "loss": 0.8214, "step": 2246 }, { "epoch": 0.32, "learning_rate": 2.3784654212168364e-05, "loss": 0.8412, "step": 2247 }, { "epoch": 0.32, "learning_rate": 2.3779011980687215e-05, "loss": 0.7974, "step": 2248 }, { "epoch": 0.32, "learning_rate": 2.3773367859302035e-05, "loss": 0.8111, "step": 2249 }, { "epoch": 0.32, "learning_rate": 2.376772184922786e-05, "loss": 1.1044, "step": 2250 }, { "epoch": 0.32, "learning_rate": 2.376207395168014e-05, "loss": 0.9141, "step": 2251 }, { "epoch": 0.32, "learning_rate": 2.3756424167874724e-05, "loss": 0.9135, "step": 2252 }, { "epoch": 0.32, "learning_rate": 2.3750772499027875e-05, "loss": 0.8948, "step": 2253 }, { "epoch": 0.32, "learning_rate": 2.3745118946356254e-05, "loss": 0.9029, "step": 2254 }, { "epoch": 0.32, "learning_rate": 2.373946351107693e-05, "loss": 1.0798, "step": 2255 }, { "epoch": 0.32, "learning_rate": 2.373380619440738e-05, "loss": 0.9866, "step": 2256 }, { "epoch": 0.32, "learning_rate": 2.3728146997565488e-05, "loss": 0.6816, "step": 2257 }, { "epoch": 0.32, "learning_rate": 2.3722485921769535e-05, "loss": 0.7997, "step": 2258 }, { "epoch": 0.32, "learning_rate": 2.371682296823821e-05, "loss": 0.8803, "step": 2259 }, { "epoch": 0.32, "learning_rate": 2.3711158138190602e-05, "loss": 0.7941, "step": 2260 }, { "epoch": 0.32, "learning_rate": 2.3705491432846224e-05, "loss": 0.8181, "step": 2261 }, { "epoch": 0.32, "learning_rate": 2.3699822853424966e-05, "loss": 0.8945, "step": 2262 }, { "epoch": 0.32, "learning_rate": 2.3694152401147133e-05, "loss": 0.8125, "step": 2263 }, { "epoch": 0.32, "learning_rate": 2.3688480077233434e-05, "loss": 1.0251, "step": 2264 }, { "epoch": 0.32, "learning_rate": 2.3682805882904982e-05, "loss": 0.7213, "step": 2265 }, { "epoch": 0.32, "learning_rate": 2.3677129819383288e-05, "loss": 0.952, "step": 2266 }, { "epoch": 0.32, "learning_rate": 2.367145188789027e-05, "loss": 0.8186, "step": 2267 }, { "epoch": 0.32, "learning_rate": 2.366577208964825e-05, "loss": 0.8457, "step": 2268 }, { "epoch": 0.33, "learning_rate": 2.3660090425879936e-05, "loss": 0.9994, "step": 2269 }, { "epoch": 0.33, "learning_rate": 2.3654406897808468e-05, "loss": 0.8571, "step": 2270 }, { "epoch": 0.33, "learning_rate": 2.364872150665735e-05, "loss": 0.8669, "step": 2271 }, { "epoch": 0.33, "learning_rate": 2.3643034253650518e-05, "loss": 0.8923, "step": 2272 }, { "epoch": 0.33, "learning_rate": 2.3637345140012284e-05, "loss": 0.8251, "step": 2273 }, { "epoch": 0.33, "learning_rate": 2.3631654166967392e-05, "loss": 0.8432, "step": 2274 }, { "epoch": 0.33, "learning_rate": 2.3625961335740957e-05, "loss": 0.9487, "step": 2275 }, { "epoch": 0.33, "learning_rate": 2.3620266647558503e-05, "loss": 0.8717, "step": 2276 }, { "epoch": 0.33, "learning_rate": 2.361457010364596e-05, "loss": 0.7815, "step": 2277 }, { "epoch": 0.33, "learning_rate": 2.360887170522965e-05, "loss": 1.0156, "step": 2278 }, { "epoch": 0.33, "learning_rate": 2.3603171453536302e-05, "loss": 1.0312, "step": 2279 }, { "epoch": 0.33, "learning_rate": 2.3597469349793034e-05, "loss": 0.7612, "step": 2280 }, { "epoch": 0.33, "learning_rate": 2.3591765395227366e-05, "loss": 0.8242, "step": 2281 }, { "epoch": 0.33, "learning_rate": 2.3586059591067223e-05, "loss": 0.7969, "step": 2282 }, { "epoch": 0.33, "learning_rate": 2.3580351938540927e-05, "loss": 0.8532, "step": 2283 }, { "epoch": 0.33, "learning_rate": 2.3574642438877183e-05, "loss": 0.6883, "step": 2284 }, { "epoch": 0.33, "learning_rate": 2.3568931093305113e-05, "loss": 0.6574, "step": 2285 }, { "epoch": 0.33, "learning_rate": 2.3563217903054224e-05, "loss": 0.8041, "step": 2286 }, { "epoch": 0.33, "learning_rate": 2.3557502869354435e-05, "loss": 1.0123, "step": 2287 }, { "epoch": 0.33, "learning_rate": 2.355178599343604e-05, "loss": 1.053, "step": 2288 }, { "epoch": 0.33, "learning_rate": 2.354606727652974e-05, "loss": 0.8666, "step": 2289 }, { "epoch": 0.33, "learning_rate": 2.3540346719866642e-05, "loss": 0.8906, "step": 2290 }, { "epoch": 0.33, "learning_rate": 2.3534624324678234e-05, "loss": 1.082, "step": 2291 }, { "epoch": 0.33, "learning_rate": 2.352890009219641e-05, "loss": 1.1657, "step": 2292 }, { "epoch": 0.33, "learning_rate": 2.352317402365345e-05, "loss": 0.7634, "step": 2293 }, { "epoch": 0.33, "learning_rate": 2.3517446120282045e-05, "loss": 0.8775, "step": 2294 }, { "epoch": 0.33, "learning_rate": 2.3511716383315258e-05, "loss": 0.9275, "step": 2295 }, { "epoch": 0.33, "learning_rate": 2.350598481398657e-05, "loss": 0.8943, "step": 2296 }, { "epoch": 0.33, "learning_rate": 2.3500251413529842e-05, "loss": 0.9838, "step": 2297 }, { "epoch": 0.33, "learning_rate": 2.3494516183179332e-05, "loss": 0.8912, "step": 2298 }, { "epoch": 0.33, "learning_rate": 2.3488779124169694e-05, "loss": 0.8661, "step": 2299 }, { "epoch": 0.33, "learning_rate": 2.3483040237735976e-05, "loss": 0.8982, "step": 2300 }, { "epoch": 0.33, "learning_rate": 2.3477299525113617e-05, "loss": 0.8446, "step": 2301 }, { "epoch": 0.33, "learning_rate": 2.3471556987538448e-05, "loss": 0.7813, "step": 2302 }, { "epoch": 0.33, "learning_rate": 2.3465812626246702e-05, "loss": 0.6773, "step": 2303 }, { "epoch": 0.33, "learning_rate": 2.3460066442474995e-05, "loss": 0.8502, "step": 2304 }, { "epoch": 0.33, "learning_rate": 2.3454318437460336e-05, "loss": 0.8488, "step": 2305 }, { "epoch": 0.33, "learning_rate": 2.3448568612440122e-05, "loss": 0.9593, "step": 2306 }, { "epoch": 0.33, "learning_rate": 2.3442816968652166e-05, "loss": 0.8493, "step": 2307 }, { "epoch": 0.33, "learning_rate": 2.3437063507334634e-05, "loss": 0.7952, "step": 2308 }, { "epoch": 0.33, "learning_rate": 2.3431308229726125e-05, "loss": 0.9342, "step": 2309 }, { "epoch": 0.33, "learning_rate": 2.3425551137065588e-05, "loss": 1.0737, "step": 2310 }, { "epoch": 0.33, "learning_rate": 2.3419792230592394e-05, "loss": 0.764, "step": 2311 }, { "epoch": 0.33, "learning_rate": 2.341403151154629e-05, "loss": 0.6666, "step": 2312 }, { "epoch": 0.33, "learning_rate": 2.340826898116742e-05, "loss": 0.9621, "step": 2313 }, { "epoch": 0.33, "learning_rate": 2.3402504640696307e-05, "loss": 0.9439, "step": 2314 }, { "epoch": 0.33, "learning_rate": 2.3396738491373875e-05, "loss": 1.0848, "step": 2315 }, { "epoch": 0.33, "learning_rate": 2.3390970534441433e-05, "loss": 0.9408, "step": 2316 }, { "epoch": 0.33, "learning_rate": 2.3385200771140677e-05, "loss": 0.9866, "step": 2317 }, { "epoch": 0.33, "learning_rate": 2.33794292027137e-05, "loss": 0.856, "step": 2318 }, { "epoch": 0.33, "learning_rate": 2.337365583040297e-05, "loss": 0.7531, "step": 2319 }, { "epoch": 0.33, "learning_rate": 2.3367880655451363e-05, "loss": 0.8887, "step": 2320 }, { "epoch": 0.33, "learning_rate": 2.3362103679102116e-05, "loss": 0.7455, "step": 2321 }, { "epoch": 0.33, "learning_rate": 2.3356324902598886e-05, "loss": 0.9777, "step": 2322 }, { "epoch": 0.33, "learning_rate": 2.335054432718568e-05, "loss": 0.942, "step": 2323 }, { "epoch": 0.33, "learning_rate": 2.334476195410693e-05, "loss": 0.6406, "step": 2324 }, { "epoch": 0.33, "learning_rate": 2.3338977784607434e-05, "loss": 1.0117, "step": 2325 }, { "epoch": 0.33, "learning_rate": 2.3333191819932373e-05, "loss": 0.7974, "step": 2326 }, { "epoch": 0.33, "learning_rate": 2.3327404061327334e-05, "loss": 0.8521, "step": 2327 }, { "epoch": 0.33, "learning_rate": 2.3321614510038266e-05, "loss": 0.9459, "step": 2328 }, { "epoch": 0.33, "learning_rate": 2.3315823167311525e-05, "loss": 0.8304, "step": 2329 }, { "epoch": 0.33, "learning_rate": 2.3310030034393845e-05, "loss": 0.9102, "step": 2330 }, { "epoch": 0.33, "learning_rate": 2.330423511253233e-05, "loss": 0.7288, "step": 2331 }, { "epoch": 0.33, "learning_rate": 2.3298438402974503e-05, "loss": 0.8237, "step": 2332 }, { "epoch": 0.33, "learning_rate": 2.3292639906968238e-05, "loss": 0.7743, "step": 2333 }, { "epoch": 0.33, "learning_rate": 2.328683962576181e-05, "loss": 0.8767, "step": 2334 }, { "epoch": 0.33, "learning_rate": 2.328103756060388e-05, "loss": 0.8616, "step": 2335 }, { "epoch": 0.33, "learning_rate": 2.3275233712743487e-05, "loss": 0.7026, "step": 2336 }, { "epoch": 0.33, "learning_rate": 2.326942808343006e-05, "loss": 0.7882, "step": 2337 }, { "epoch": 0.33, "learning_rate": 2.32636206739134e-05, "loss": 0.9671, "step": 2338 }, { "epoch": 0.34, "learning_rate": 2.32578114854437e-05, "loss": 0.889, "step": 2339 }, { "epoch": 0.34, "learning_rate": 2.3252000519271536e-05, "loss": 0.7372, "step": 2340 }, { "epoch": 0.34, "learning_rate": 2.3246187776647866e-05, "loss": 0.8616, "step": 2341 }, { "epoch": 0.34, "learning_rate": 2.3240373258824027e-05, "loss": 1.0257, "step": 2342 }, { "epoch": 0.34, "learning_rate": 2.323455696705175e-05, "loss": 0.8382, "step": 2343 }, { "epoch": 0.34, "learning_rate": 2.3228738902583127e-05, "loss": 0.8722, "step": 2344 }, { "epoch": 0.34, "learning_rate": 2.322291906667065e-05, "loss": 0.7467, "step": 2345 }, { "epoch": 0.34, "learning_rate": 2.321709746056718e-05, "loss": 0.8172, "step": 2346 }, { "epoch": 0.34, "learning_rate": 2.3211274085525972e-05, "loss": 0.9208, "step": 2347 }, { "epoch": 0.34, "learning_rate": 2.3205448942800648e-05, "loss": 0.8276, "step": 2348 }, { "epoch": 0.34, "learning_rate": 2.3199622033645224e-05, "loss": 0.7768, "step": 2349 }, { "epoch": 0.34, "learning_rate": 2.3193793359314082e-05, "loss": 0.9403, "step": 2350 }, { "epoch": 0.34, "learning_rate": 2.3187962921061992e-05, "loss": 0.9403, "step": 2351 }, { "epoch": 0.34, "learning_rate": 2.3182130720144107e-05, "loss": 0.9275, "step": 2352 }, { "epoch": 0.34, "learning_rate": 2.317629675781596e-05, "loss": 0.9275, "step": 2353 }, { "epoch": 0.34, "learning_rate": 2.317046103533345e-05, "loss": 0.9057, "step": 2354 }, { "epoch": 0.34, "learning_rate": 2.3164623553952867e-05, "loss": 0.9185, "step": 2355 }, { "epoch": 0.34, "learning_rate": 2.315878431493087e-05, "loss": 0.9487, "step": 2356 }, { "epoch": 0.34, "learning_rate": 2.3152943319524515e-05, "loss": 0.9392, "step": 2357 }, { "epoch": 0.34, "learning_rate": 2.314710056899122e-05, "loss": 0.7963, "step": 2358 }, { "epoch": 0.34, "learning_rate": 2.3141256064588774e-05, "loss": 1.0508, "step": 2359 }, { "epoch": 0.34, "learning_rate": 2.3135409807575364e-05, "loss": 0.8839, "step": 2360 }, { "epoch": 0.34, "learning_rate": 2.3129561799209545e-05, "loss": 0.829, "step": 2361 }, { "epoch": 0.34, "learning_rate": 2.3123712040750247e-05, "loss": 0.7543, "step": 2362 }, { "epoch": 0.34, "learning_rate": 2.3117860533456774e-05, "loss": 0.8047, "step": 2363 }, { "epoch": 0.34, "learning_rate": 2.3112007278588812e-05, "loss": 1.0597, "step": 2364 }, { "epoch": 0.34, "learning_rate": 2.3106152277406432e-05, "loss": 0.9766, "step": 2365 }, { "epoch": 0.34, "learning_rate": 2.3100295531170058e-05, "loss": 0.966, "step": 2366 }, { "epoch": 0.34, "learning_rate": 2.3094437041140507e-05, "loss": 0.7296, "step": 2367 }, { "epoch": 0.34, "learning_rate": 2.308857680857896e-05, "loss": 0.9035, "step": 2368 }, { "epoch": 0.34, "learning_rate": 2.3082714834747002e-05, "loss": 0.9347, "step": 2369 }, { "epoch": 0.34, "learning_rate": 2.307685112090655e-05, "loss": 0.8253, "step": 2370 }, { "epoch": 0.34, "learning_rate": 2.307098566831992e-05, "loss": 0.7427, "step": 2371 }, { "epoch": 0.34, "learning_rate": 2.30651184782498e-05, "loss": 0.6785, "step": 2372 }, { "epoch": 0.34, "learning_rate": 2.3059249551959264e-05, "loss": 0.9632, "step": 2373 }, { "epoch": 0.34, "learning_rate": 2.3053378890711724e-05, "loss": 0.909, "step": 2374 }, { "epoch": 0.34, "learning_rate": 2.3047506495771002e-05, "loss": 0.8622, "step": 2375 }, { "epoch": 0.34, "learning_rate": 2.3041632368401277e-05, "loss": 0.8984, "step": 2376 }, { "epoch": 0.34, "learning_rate": 2.30357565098671e-05, "loss": 0.856, "step": 2377 }, { "epoch": 0.34, "learning_rate": 2.3029878921433402e-05, "loss": 0.8703, "step": 2378 }, { "epoch": 0.34, "learning_rate": 2.3023999604365485e-05, "loss": 0.9816, "step": 2379 }, { "epoch": 0.34, "learning_rate": 2.301811855992901e-05, "loss": 0.8588, "step": 2380 }, { "epoch": 0.34, "learning_rate": 2.3012235789390026e-05, "loss": 0.9732, "step": 2381 }, { "epoch": 0.34, "learning_rate": 2.300635129401495e-05, "loss": 1.0307, "step": 2382 }, { "epoch": 0.34, "learning_rate": 2.3000465075070574e-05, "loss": 0.7109, "step": 2383 }, { "epoch": 0.34, "learning_rate": 2.2994577133824045e-05, "loss": 0.8711, "step": 2384 }, { "epoch": 0.34, "learning_rate": 2.2988687471542886e-05, "loss": 0.8862, "step": 2385 }, { "epoch": 0.34, "learning_rate": 2.2982796089495013e-05, "loss": 0.817, "step": 2386 }, { "epoch": 0.34, "learning_rate": 2.297690298894868e-05, "loss": 0.8973, "step": 2387 }, { "epoch": 0.34, "learning_rate": 2.2971008171172534e-05, "loss": 0.8259, "step": 2388 }, { "epoch": 0.34, "learning_rate": 2.2965111637435578e-05, "loss": 0.8058, "step": 2389 }, { "epoch": 0.34, "learning_rate": 2.2959213389007194e-05, "loss": 0.736, "step": 2390 }, { "epoch": 0.34, "learning_rate": 2.2953313427157128e-05, "loss": 0.8368, "step": 2391 }, { "epoch": 0.34, "learning_rate": 2.2947411753155496e-05, "loss": 1.0273, "step": 2392 }, { "epoch": 0.34, "learning_rate": 2.2941508368272777e-05, "loss": 0.6525, "step": 2393 }, { "epoch": 0.34, "learning_rate": 2.2935603273779835e-05, "loss": 0.8814, "step": 2394 }, { "epoch": 0.34, "learning_rate": 2.292969647094788e-05, "loss": 0.8348, "step": 2395 }, { "epoch": 0.34, "learning_rate": 2.2923787961048505e-05, "loss": 1.0698, "step": 2396 }, { "epoch": 0.34, "learning_rate": 2.2917877745353662e-05, "loss": 0.7718, "step": 2397 }, { "epoch": 0.34, "learning_rate": 2.2911965825135683e-05, "loss": 0.7958, "step": 2398 }, { "epoch": 0.34, "learning_rate": 2.2906052201667248e-05, "loss": 0.7405, "step": 2399 }, { "epoch": 0.34, "learning_rate": 2.290013687622142e-05, "loss": 0.8627, "step": 2400 }, { "epoch": 0.34, "learning_rate": 2.289421985007162e-05, "loss": 0.7773, "step": 2401 }, { "epoch": 0.34, "learning_rate": 2.2888301124491636e-05, "loss": 0.9866, "step": 2402 }, { "epoch": 0.34, "learning_rate": 2.288238070075563e-05, "loss": 1.0312, "step": 2403 }, { "epoch": 0.34, "learning_rate": 2.2876458580138116e-05, "loss": 0.8636, "step": 2404 }, { "epoch": 0.34, "learning_rate": 2.2870534763913984e-05, "loss": 0.904, "step": 2405 }, { "epoch": 0.34, "learning_rate": 2.2864609253358477e-05, "loss": 0.7402, "step": 2406 }, { "epoch": 0.34, "learning_rate": 2.285868204974722e-05, "loss": 0.9018, "step": 2407 }, { "epoch": 0.34, "learning_rate": 2.285275315435619e-05, "loss": 0.6722, "step": 2408 }, { "epoch": 0.35, "learning_rate": 2.2846822568461727e-05, "loss": 0.7662, "step": 2409 }, { "epoch": 0.35, "learning_rate": 2.284089029334055e-05, "loss": 0.9185, "step": 2410 }, { "epoch": 0.35, "learning_rate": 2.2834956330269717e-05, "loss": 0.8583, "step": 2411 }, { "epoch": 0.35, "learning_rate": 2.2829020680526682e-05, "loss": 0.7573, "step": 2412 }, { "epoch": 0.35, "learning_rate": 2.282308334538923e-05, "loss": 0.9816, "step": 2413 }, { "epoch": 0.35, "learning_rate": 2.2817144326135517e-05, "loss": 0.8278, "step": 2414 }, { "epoch": 0.35, "learning_rate": 2.2811203624044078e-05, "loss": 0.858, "step": 2415 }, { "epoch": 0.35, "learning_rate": 2.28052612403938e-05, "loss": 0.76, "step": 2416 }, { "epoch": 0.35, "learning_rate": 2.2799317176463926e-05, "loss": 0.8398, "step": 2417 }, { "epoch": 0.35, "learning_rate": 2.2793371433534066e-05, "loss": 0.9085, "step": 2418 }, { "epoch": 0.35, "learning_rate": 2.278742401288419e-05, "loss": 0.9286, "step": 2419 }, { "epoch": 0.35, "learning_rate": 2.278147491579464e-05, "loss": 0.9057, "step": 2420 }, { "epoch": 0.35, "learning_rate": 2.2775524143546096e-05, "loss": 0.8418, "step": 2421 }, { "epoch": 0.35, "learning_rate": 2.2769571697419614e-05, "loss": 0.9297, "step": 2422 }, { "epoch": 0.35, "learning_rate": 2.2763617578696623e-05, "loss": 1.1083, "step": 2423 }, { "epoch": 0.35, "learning_rate": 2.2757661788658886e-05, "loss": 0.9877, "step": 2424 }, { "epoch": 0.35, "learning_rate": 2.275170432858853e-05, "loss": 0.8996, "step": 2425 }, { "epoch": 0.35, "learning_rate": 2.2745745199768067e-05, "loss": 0.7472, "step": 2426 }, { "epoch": 0.35, "learning_rate": 2.273978440348033e-05, "loss": 0.8976, "step": 2427 }, { "epoch": 0.35, "learning_rate": 2.2733821941008546e-05, "loss": 0.9782, "step": 2428 }, { "epoch": 0.35, "learning_rate": 2.272785781363628e-05, "loss": 0.8454, "step": 2429 }, { "epoch": 0.35, "learning_rate": 2.2721892022647464e-05, "loss": 0.9883, "step": 2430 }, { "epoch": 0.35, "learning_rate": 2.2715924569326375e-05, "loss": 0.87, "step": 2431 }, { "epoch": 0.35, "learning_rate": 2.2709955454957677e-05, "loss": 0.8072, "step": 2432 }, { "epoch": 0.35, "learning_rate": 2.270398468082635e-05, "loss": 0.8761, "step": 2433 }, { "epoch": 0.35, "learning_rate": 2.2698012248217772e-05, "loss": 0.8114, "step": 2434 }, { "epoch": 0.35, "learning_rate": 2.269203815841765e-05, "loss": 0.8929, "step": 2435 }, { "epoch": 0.35, "learning_rate": 2.2686062412712066e-05, "loss": 1.1953, "step": 2436 }, { "epoch": 0.35, "learning_rate": 2.268008501238744e-05, "loss": 0.9241, "step": 2437 }, { "epoch": 0.35, "learning_rate": 2.2674105958730566e-05, "loss": 0.7866, "step": 2438 }, { "epoch": 0.35, "learning_rate": 2.266812525302858e-05, "loss": 0.8979, "step": 2439 }, { "epoch": 0.35, "learning_rate": 2.266214289656899e-05, "loss": 0.8219, "step": 2440 }, { "epoch": 0.35, "learning_rate": 2.2656158890639637e-05, "loss": 0.9542, "step": 2441 }, { "epoch": 0.35, "learning_rate": 2.265017323652874e-05, "loss": 0.7952, "step": 2442 }, { "epoch": 0.35, "learning_rate": 2.264418593552485e-05, "loss": 0.9537, "step": 2443 }, { "epoch": 0.35, "learning_rate": 2.26381969889169e-05, "loss": 0.9051, "step": 2444 }, { "epoch": 0.35, "learning_rate": 2.263220639799415e-05, "loss": 0.7104, "step": 2445 }, { "epoch": 0.35, "learning_rate": 2.2626214164046238e-05, "loss": 0.87, "step": 2446 }, { "epoch": 0.35, "learning_rate": 2.2620220288363126e-05, "loss": 0.8292, "step": 2447 }, { "epoch": 0.35, "learning_rate": 2.261422477223516e-05, "loss": 0.7757, "step": 2448 }, { "epoch": 0.35, "learning_rate": 2.2608227616953027e-05, "loss": 1.0564, "step": 2449 }, { "epoch": 0.35, "learning_rate": 2.2602228823807756e-05, "loss": 0.9615, "step": 2450 }, { "epoch": 0.35, "learning_rate": 2.2596228394090755e-05, "loss": 0.8544, "step": 2451 }, { "epoch": 0.35, "learning_rate": 2.2590226329093747e-05, "loss": 0.8407, "step": 2452 }, { "epoch": 0.35, "learning_rate": 2.2584222630108846e-05, "loss": 0.8638, "step": 2453 }, { "epoch": 0.35, "learning_rate": 2.2578217298428494e-05, "loss": 1.0251, "step": 2454 }, { "epoch": 0.35, "learning_rate": 2.2572210335345488e-05, "loss": 0.9475, "step": 2455 }, { "epoch": 0.35, "learning_rate": 2.2566201742152976e-05, "loss": 0.87, "step": 2456 }, { "epoch": 0.35, "learning_rate": 2.256019152014447e-05, "loss": 0.8292, "step": 2457 }, { "epoch": 0.35, "learning_rate": 2.2554179670613807e-05, "loss": 0.8917, "step": 2458 }, { "epoch": 0.35, "learning_rate": 2.2548166194855205e-05, "loss": 1.0022, "step": 2459 }, { "epoch": 0.35, "learning_rate": 2.2542151094163208e-05, "loss": 0.8415, "step": 2460 }, { "epoch": 0.35, "learning_rate": 2.2536134369832717e-05, "loss": 0.9526, "step": 2461 }, { "epoch": 0.35, "learning_rate": 2.2530116023158992e-05, "loss": 0.9431, "step": 2462 }, { "epoch": 0.35, "learning_rate": 2.252409605543762e-05, "loss": 0.8845, "step": 2463 }, { "epoch": 0.35, "learning_rate": 2.2518074467964567e-05, "loss": 0.7444, "step": 2464 }, { "epoch": 0.35, "learning_rate": 2.251205126203612e-05, "loss": 0.8672, "step": 2465 }, { "epoch": 0.35, "learning_rate": 2.250602643894894e-05, "loss": 0.8203, "step": 2466 }, { "epoch": 0.35, "learning_rate": 2.25e-05, "loss": 0.909, "step": 2467 }, { "epoch": 0.35, "learning_rate": 2.2493971946486663e-05, "loss": 1.0156, "step": 2468 }, { "epoch": 0.35, "learning_rate": 2.248794227970661e-05, "loss": 0.8209, "step": 2469 }, { "epoch": 0.35, "learning_rate": 2.2481911000957885e-05, "loss": 0.8795, "step": 2470 }, { "epoch": 0.35, "learning_rate": 2.2475878111538873e-05, "loss": 0.8633, "step": 2471 }, { "epoch": 0.35, "learning_rate": 2.2469843612748297e-05, "loss": 0.8041, "step": 2472 }, { "epoch": 0.35, "learning_rate": 2.2463807505885243e-05, "loss": 0.7651, "step": 2473 }, { "epoch": 0.35, "learning_rate": 2.2457769792249132e-05, "loss": 0.9169, "step": 2474 }, { "epoch": 0.35, "learning_rate": 2.2451730473139735e-05, "loss": 0.9397, "step": 2475 }, { "epoch": 0.35, "learning_rate": 2.2445689549857178e-05, "loss": 0.8281, "step": 2476 }, { "epoch": 0.35, "learning_rate": 2.24396470237019e-05, "loss": 0.9102, "step": 2477 }, { "epoch": 0.35, "learning_rate": 2.243360289597473e-05, "loss": 0.8912, "step": 2478 }, { "epoch": 0.36, "learning_rate": 2.2427557167976806e-05, "loss": 0.976, "step": 2479 }, { "epoch": 0.36, "learning_rate": 2.2421509841009628e-05, "loss": 0.899, "step": 2480 }, { "epoch": 0.36, "learning_rate": 2.2415460916375035e-05, "loss": 0.8209, "step": 2481 }, { "epoch": 0.36, "learning_rate": 2.240941039537521e-05, "loss": 0.8677, "step": 2482 }, { "epoch": 0.36, "learning_rate": 2.2403358279312684e-05, "loss": 0.899, "step": 2483 }, { "epoch": 0.36, "learning_rate": 2.239730456949032e-05, "loss": 0.9275, "step": 2484 }, { "epoch": 0.36, "learning_rate": 2.239124926721134e-05, "loss": 0.9754, "step": 2485 }, { "epoch": 0.36, "learning_rate": 2.23851923737793e-05, "loss": 0.8789, "step": 2486 }, { "epoch": 0.36, "learning_rate": 2.2379133890498095e-05, "loss": 0.8008, "step": 2487 }, { "epoch": 0.36, "learning_rate": 2.237307381867197e-05, "loss": 0.7902, "step": 2488 }, { "epoch": 0.36, "learning_rate": 2.2367012159605504e-05, "loss": 0.9023, "step": 2489 }, { "epoch": 0.36, "learning_rate": 2.2360948914603628e-05, "loss": 0.8421, "step": 2490 }, { "epoch": 0.36, "learning_rate": 2.2354884084971606e-05, "loss": 0.7969, "step": 2491 }, { "epoch": 0.36, "learning_rate": 2.234881767201504e-05, "loss": 0.8845, "step": 2492 }, { "epoch": 0.36, "learning_rate": 2.2342749677039892e-05, "loss": 0.8298, "step": 2493 }, { "epoch": 0.36, "learning_rate": 2.233668010135244e-05, "loss": 0.7681, "step": 2494 }, { "epoch": 0.36, "learning_rate": 2.2330608946259318e-05, "loss": 0.8996, "step": 2495 }, { "epoch": 0.36, "learning_rate": 2.232453621306749e-05, "loss": 0.8136, "step": 2496 }, { "epoch": 0.36, "learning_rate": 2.231846190308427e-05, "loss": 0.8292, "step": 2497 }, { "epoch": 0.36, "learning_rate": 2.231238601761731e-05, "loss": 0.8262, "step": 2498 }, { "epoch": 0.36, "learning_rate": 2.2306308557974594e-05, "loss": 0.9135, "step": 2499 }, { "epoch": 0.36, "learning_rate": 2.230022952546445e-05, "loss": 0.755, "step": 2500 }, { "epoch": 0.36, "learning_rate": 2.2294148921395534e-05, "loss": 1.0753, "step": 2501 }, { "epoch": 0.36, "learning_rate": 2.2288066747076867e-05, "loss": 0.9224, "step": 2502 }, { "epoch": 0.36, "learning_rate": 2.2281983003817775e-05, "loss": 0.9169, "step": 2503 }, { "epoch": 0.36, "learning_rate": 2.2275897692927943e-05, "loss": 0.6956, "step": 2504 }, { "epoch": 0.36, "learning_rate": 2.2269810815717393e-05, "loss": 1.0363, "step": 2505 }, { "epoch": 0.36, "learning_rate": 2.2263722373496466e-05, "loss": 0.9085, "step": 2506 }, { "epoch": 0.36, "learning_rate": 2.225763236757587e-05, "loss": 0.9224, "step": 2507 }, { "epoch": 0.36, "learning_rate": 2.225154079926662e-05, "loss": 0.7252, "step": 2508 }, { "epoch": 0.36, "learning_rate": 2.224544766988008e-05, "loss": 0.8248, "step": 2509 }, { "epoch": 0.36, "learning_rate": 2.2239352980727958e-05, "loss": 1.0413, "step": 2510 }, { "epoch": 0.36, "learning_rate": 2.2233256733122285e-05, "loss": 0.9004, "step": 2511 }, { "epoch": 0.36, "learning_rate": 2.2227158928375437e-05, "loss": 0.9202, "step": 2512 }, { "epoch": 0.36, "learning_rate": 2.222105956780011e-05, "loss": 0.885, "step": 2513 }, { "epoch": 0.36, "learning_rate": 2.2214958652709354e-05, "loss": 0.7843, "step": 2514 }, { "epoch": 0.36, "learning_rate": 2.220885618441654e-05, "loss": 1.0112, "step": 2515 }, { "epoch": 0.36, "learning_rate": 2.2202752164235384e-05, "loss": 0.9018, "step": 2516 }, { "epoch": 0.36, "learning_rate": 2.2196646593479926e-05, "loss": 0.8251, "step": 2517 }, { "epoch": 0.36, "learning_rate": 2.219053947346455e-05, "loss": 0.9632, "step": 2518 }, { "epoch": 0.36, "learning_rate": 2.2184430805503955e-05, "loss": 0.8711, "step": 2519 }, { "epoch": 0.36, "learning_rate": 2.21783205909132e-05, "loss": 0.9743, "step": 2520 }, { "epoch": 0.36, "learning_rate": 2.217220883100766e-05, "loss": 0.9916, "step": 2521 }, { "epoch": 0.36, "learning_rate": 2.2166095527103034e-05, "loss": 1.005, "step": 2522 }, { "epoch": 0.36, "learning_rate": 2.2159980680515387e-05, "loss": 0.9244, "step": 2523 }, { "epoch": 0.36, "learning_rate": 2.2153864292561073e-05, "loss": 0.7126, "step": 2524 }, { "epoch": 0.36, "learning_rate": 2.214774636455681e-05, "loss": 0.9129, "step": 2525 }, { "epoch": 0.36, "learning_rate": 2.214162689781963e-05, "loss": 0.8343, "step": 2526 }, { "epoch": 0.36, "learning_rate": 2.213550589366691e-05, "loss": 0.9057, "step": 2527 }, { "epoch": 0.36, "learning_rate": 2.212938335341635e-05, "loss": 0.9347, "step": 2528 }, { "epoch": 0.36, "learning_rate": 2.2123259278385977e-05, "loss": 0.851, "step": 2529 }, { "epoch": 0.36, "learning_rate": 2.2117133669894156e-05, "loss": 0.6283, "step": 2530 }, { "epoch": 0.36, "learning_rate": 2.2111006529259574e-05, "loss": 0.8426, "step": 2531 }, { "epoch": 0.36, "learning_rate": 2.210487785780126e-05, "loss": 0.8387, "step": 2532 }, { "epoch": 0.36, "learning_rate": 2.2098747656838557e-05, "loss": 0.6931, "step": 2533 }, { "epoch": 0.36, "learning_rate": 2.2092615927691155e-05, "loss": 0.9621, "step": 2534 }, { "epoch": 0.36, "learning_rate": 2.2086482671679055e-05, "loss": 0.9319, "step": 2535 }, { "epoch": 0.36, "learning_rate": 2.20803478901226e-05, "loss": 0.9113, "step": 2536 }, { "epoch": 0.36, "learning_rate": 2.2074211584342456e-05, "loss": 0.7584, "step": 2537 }, { "epoch": 0.36, "learning_rate": 2.206807375565962e-05, "loss": 0.8705, "step": 2538 }, { "epoch": 0.36, "learning_rate": 2.206193440539541e-05, "loss": 0.8449, "step": 2539 }, { "epoch": 0.36, "learning_rate": 2.2055793534871474e-05, "loss": 0.75, "step": 2540 }, { "epoch": 0.36, "learning_rate": 2.2049651145409803e-05, "loss": 0.8181, "step": 2541 }, { "epoch": 0.36, "learning_rate": 2.204350723833269e-05, "loss": 0.8583, "step": 2542 }, { "epoch": 0.36, "learning_rate": 2.2037361814962762e-05, "loss": 1.0725, "step": 2543 }, { "epoch": 0.36, "learning_rate": 2.2031214876622988e-05, "loss": 0.9023, "step": 2544 }, { "epoch": 0.36, "learning_rate": 2.202506642463664e-05, "loss": 0.9916, "step": 2545 }, { "epoch": 0.36, "learning_rate": 2.2018916460327344e-05, "loss": 0.8544, "step": 2546 }, { "epoch": 0.36, "learning_rate": 2.2012764985019014e-05, "loss": 0.8549, "step": 2547 }, { "epoch": 0.36, "learning_rate": 2.200661200003593e-05, "loss": 0.8407, "step": 2548 }, { "epoch": 0.37, "learning_rate": 2.2000457506702668e-05, "loss": 0.9392, "step": 2549 }, { "epoch": 0.37, "learning_rate": 2.1994301506344143e-05, "loss": 0.7394, "step": 2550 }, { "epoch": 0.37, "learning_rate": 2.1988144000285576e-05, "loss": 1.034, "step": 2551 }, { "epoch": 0.37, "learning_rate": 2.198198498985254e-05, "loss": 0.728, "step": 2552 }, { "epoch": 0.37, "learning_rate": 2.1975824476370916e-05, "loss": 0.8147, "step": 2553 }, { "epoch": 0.37, "learning_rate": 2.1969662461166905e-05, "loss": 0.7218, "step": 2554 }, { "epoch": 0.37, "learning_rate": 2.1963498945567042e-05, "loss": 0.9397, "step": 2555 }, { "epoch": 0.37, "learning_rate": 2.1957333930898168e-05, "loss": 0.9085, "step": 2556 }, { "epoch": 0.37, "learning_rate": 2.195116741848747e-05, "loss": 0.7868, "step": 2557 }, { "epoch": 0.37, "learning_rate": 2.194499940966244e-05, "loss": 0.8772, "step": 2558 }, { "epoch": 0.37, "learning_rate": 2.1938829905750908e-05, "loss": 0.9169, "step": 2559 }, { "epoch": 0.37, "learning_rate": 2.1932658908080993e-05, "loss": 0.8839, "step": 2560 }, { "epoch": 0.37, "learning_rate": 2.192648641798118e-05, "loss": 0.87, "step": 2561 }, { "epoch": 0.37, "learning_rate": 2.1920312436780244e-05, "loss": 0.8895, "step": 2562 }, { "epoch": 0.37, "learning_rate": 2.191413696580729e-05, "loss": 0.9972, "step": 2563 }, { "epoch": 0.37, "learning_rate": 2.1907960006391746e-05, "loss": 0.995, "step": 2564 }, { "epoch": 0.37, "learning_rate": 2.1901781559863356e-05, "loss": 0.9029, "step": 2565 }, { "epoch": 0.37, "learning_rate": 2.1895601627552196e-05, "loss": 1.0011, "step": 2566 }, { "epoch": 0.37, "learning_rate": 2.188942021078864e-05, "loss": 1.0195, "step": 2567 }, { "epoch": 0.37, "learning_rate": 2.18832373109034e-05, "loss": 0.8588, "step": 2568 }, { "epoch": 0.37, "learning_rate": 2.18770529292275e-05, "loss": 0.8951, "step": 2569 }, { "epoch": 0.37, "learning_rate": 2.187086706709228e-05, "loss": 0.7478, "step": 2570 }, { "epoch": 0.37, "learning_rate": 2.186467972582942e-05, "loss": 0.8544, "step": 2571 }, { "epoch": 0.37, "learning_rate": 2.1858490906770878e-05, "loss": 0.851, "step": 2572 }, { "epoch": 0.37, "learning_rate": 2.185230061124897e-05, "loss": 0.7966, "step": 2573 }, { "epoch": 0.37, "learning_rate": 2.1846108840596313e-05, "loss": 0.74, "step": 2574 }, { "epoch": 0.37, "learning_rate": 2.1839915596145835e-05, "loss": 0.7729, "step": 2575 }, { "epoch": 0.37, "learning_rate": 2.1833720879230782e-05, "loss": 0.8499, "step": 2576 }, { "epoch": 0.37, "learning_rate": 2.1827524691184743e-05, "loss": 1.0424, "step": 2577 }, { "epoch": 0.37, "learning_rate": 2.182132703334159e-05, "loss": 0.8036, "step": 2578 }, { "epoch": 0.37, "learning_rate": 2.1815127907035526e-05, "loss": 0.8627, "step": 2579 }, { "epoch": 0.37, "learning_rate": 2.1808927313601075e-05, "loss": 0.8906, "step": 2580 }, { "epoch": 0.37, "learning_rate": 2.1802725254373063e-05, "loss": 0.8652, "step": 2581 }, { "epoch": 0.37, "learning_rate": 2.1796521730686648e-05, "loss": 0.791, "step": 2582 }, { "epoch": 0.37, "learning_rate": 2.179031674387729e-05, "loss": 0.8443, "step": 2583 }, { "epoch": 0.37, "learning_rate": 2.178411029528078e-05, "loss": 0.9007, "step": 2584 }, { "epoch": 0.37, "learning_rate": 2.1777902386233193e-05, "loss": 0.6631, "step": 2585 }, { "epoch": 0.37, "learning_rate": 2.1771693018070958e-05, "loss": 0.76, "step": 2586 }, { "epoch": 0.37, "learning_rate": 2.1765482192130787e-05, "loss": 0.8465, "step": 2587 }, { "epoch": 0.37, "learning_rate": 2.175926990974971e-05, "loss": 0.9464, "step": 2588 }, { "epoch": 0.37, "learning_rate": 2.17530561722651e-05, "loss": 0.8613, "step": 2589 }, { "epoch": 0.37, "learning_rate": 2.1746840981014606e-05, "loss": 0.7398, "step": 2590 }, { "epoch": 0.37, "learning_rate": 2.1740624337336203e-05, "loss": 0.8337, "step": 2591 }, { "epoch": 0.37, "learning_rate": 2.173440624256819e-05, "loss": 0.9821, "step": 2592 }, { "epoch": 0.37, "learning_rate": 2.172818669804916e-05, "loss": 0.7963, "step": 2593 }, { "epoch": 0.37, "learning_rate": 2.172196570511803e-05, "loss": 0.8878, "step": 2594 }, { "epoch": 0.37, "learning_rate": 2.171574326511403e-05, "loss": 0.885, "step": 2595 }, { "epoch": 0.37, "learning_rate": 2.170951937937669e-05, "loss": 0.8011, "step": 2596 }, { "epoch": 0.37, "learning_rate": 2.1703294049245865e-05, "loss": 0.8421, "step": 2597 }, { "epoch": 0.37, "learning_rate": 2.169706727606171e-05, "loss": 0.9833, "step": 2598 }, { "epoch": 0.37, "learning_rate": 2.16908390611647e-05, "loss": 0.6934, "step": 2599 }, { "epoch": 0.37, "learning_rate": 2.168460940589561e-05, "loss": 0.7818, "step": 2600 }, { "epoch": 0.37, "learning_rate": 2.1678378311595532e-05, "loss": 0.9012, "step": 2601 }, { "epoch": 0.37, "learning_rate": 2.167214577960587e-05, "loss": 0.7634, "step": 2602 }, { "epoch": 0.37, "learning_rate": 2.166591181126833e-05, "loss": 0.8934, "step": 2603 }, { "epoch": 0.37, "learning_rate": 2.1659676407924938e-05, "loss": 0.9732, "step": 2604 }, { "epoch": 0.37, "learning_rate": 2.1653439570918013e-05, "loss": 0.8571, "step": 2605 }, { "epoch": 0.37, "learning_rate": 2.1647201301590192e-05, "loss": 0.8404, "step": 2606 }, { "epoch": 0.37, "learning_rate": 2.1640961601284432e-05, "loss": 0.8647, "step": 2607 }, { "epoch": 0.37, "learning_rate": 2.163472047134397e-05, "loss": 0.8979, "step": 2608 }, { "epoch": 0.37, "learning_rate": 2.162847791311238e-05, "loss": 0.8817, "step": 2609 }, { "epoch": 0.37, "learning_rate": 2.162223392793352e-05, "loss": 0.9989, "step": 2610 }, { "epoch": 0.37, "learning_rate": 2.1615988517151573e-05, "loss": 0.7157, "step": 2611 }, { "epoch": 0.37, "learning_rate": 2.1609741682111017e-05, "loss": 0.8248, "step": 2612 }, { "epoch": 0.37, "learning_rate": 2.160349342415664e-05, "loss": 0.8415, "step": 2613 }, { "epoch": 0.37, "learning_rate": 2.1597243744633543e-05, "loss": 1.0564, "step": 2614 }, { "epoch": 0.37, "learning_rate": 2.1590992644887123e-05, "loss": 0.7899, "step": 2615 }, { "epoch": 0.37, "learning_rate": 2.158474012626309e-05, "loss": 0.9319, "step": 2616 }, { "epoch": 0.37, "learning_rate": 2.1578486190107457e-05, "loss": 0.8839, "step": 2617 }, { "epoch": 0.38, "learning_rate": 2.1572230837766534e-05, "loss": 1.0396, "step": 2618 }, { "epoch": 0.38, "learning_rate": 2.1565974070586954e-05, "loss": 0.7729, "step": 2619 }, { "epoch": 0.38, "learning_rate": 2.1559715889915637e-05, "loss": 0.7045, "step": 2620 }, { "epoch": 0.38, "learning_rate": 2.1553456297099817e-05, "loss": 0.7595, "step": 2621 }, { "epoch": 0.38, "learning_rate": 2.154719529348703e-05, "loss": 0.7907, "step": 2622 }, { "epoch": 0.38, "learning_rate": 2.1540932880425107e-05, "loss": 1.0608, "step": 2623 }, { "epoch": 0.38, "learning_rate": 2.1534669059262207e-05, "loss": 0.8655, "step": 2624 }, { "epoch": 0.38, "learning_rate": 2.1528403831346765e-05, "loss": 0.786, "step": 2625 }, { "epoch": 0.38, "learning_rate": 2.1522137198027524e-05, "loss": 0.8976, "step": 2626 }, { "epoch": 0.38, "learning_rate": 2.151586916065355e-05, "loss": 0.8945, "step": 2627 }, { "epoch": 0.38, "learning_rate": 2.1509599720574183e-05, "loss": 0.9501, "step": 2628 }, { "epoch": 0.38, "learning_rate": 2.1503328879139092e-05, "loss": 0.8728, "step": 2629 }, { "epoch": 0.38, "learning_rate": 2.1497056637698215e-05, "loss": 0.7935, "step": 2630 }, { "epoch": 0.38, "learning_rate": 2.149078299760183e-05, "loss": 0.8242, "step": 2631 }, { "epoch": 0.38, "learning_rate": 2.1484507960200486e-05, "loss": 0.764, "step": 2632 }, { "epoch": 0.38, "learning_rate": 2.147823152684504e-05, "loss": 0.8834, "step": 2633 }, { "epoch": 0.38, "learning_rate": 2.1471953698886663e-05, "loss": 0.9325, "step": 2634 }, { "epoch": 0.38, "learning_rate": 2.1465674477676803e-05, "loss": 0.7684, "step": 2635 }, { "epoch": 0.38, "learning_rate": 2.1459393864567235e-05, "loss": 0.8435, "step": 2636 }, { "epoch": 0.38, "learning_rate": 2.145311186091001e-05, "loss": 0.822, "step": 2637 }, { "epoch": 0.38, "learning_rate": 2.1446828468057494e-05, "loss": 0.8929, "step": 2638 }, { "epoch": 0.38, "learning_rate": 2.144054368736234e-05, "loss": 0.9668, "step": 2639 }, { "epoch": 0.38, "learning_rate": 2.1434257520177514e-05, "loss": 1.0167, "step": 2640 }, { "epoch": 0.38, "learning_rate": 2.1427969967856262e-05, "loss": 0.8834, "step": 2641 }, { "epoch": 0.38, "learning_rate": 2.1421681031752145e-05, "loss": 0.9308, "step": 2642 }, { "epoch": 0.38, "learning_rate": 2.1415390713219015e-05, "loss": 0.7628, "step": 2643 }, { "epoch": 0.38, "learning_rate": 2.140909901361102e-05, "loss": 0.8789, "step": 2644 }, { "epoch": 0.38, "learning_rate": 2.1402805934282607e-05, "loss": 0.8008, "step": 2645 }, { "epoch": 0.38, "learning_rate": 2.1396511476588527e-05, "loss": 0.7757, "step": 2646 }, { "epoch": 0.38, "learning_rate": 2.1390215641883805e-05, "loss": 0.7813, "step": 2647 }, { "epoch": 0.38, "learning_rate": 2.1383918431523794e-05, "loss": 0.7257, "step": 2648 }, { "epoch": 0.38, "learning_rate": 2.137761984686412e-05, "loss": 0.8002, "step": 2649 }, { "epoch": 0.38, "learning_rate": 2.137131988926072e-05, "loss": 0.828, "step": 2650 }, { "epoch": 0.38, "learning_rate": 2.1365018560069808e-05, "loss": 0.9012, "step": 2651 }, { "epoch": 0.38, "learning_rate": 2.135871586064791e-05, "loss": 0.8521, "step": 2652 }, { "epoch": 0.38, "learning_rate": 2.1352411792351843e-05, "loss": 0.6929, "step": 2653 }, { "epoch": 0.38, "learning_rate": 2.1346106356538717e-05, "loss": 0.7596, "step": 2654 }, { "epoch": 0.38, "learning_rate": 2.1339799554565927e-05, "loss": 0.9604, "step": 2655 }, { "epoch": 0.38, "learning_rate": 2.1333491387791186e-05, "loss": 0.8125, "step": 2656 }, { "epoch": 0.38, "learning_rate": 2.1327181857572476e-05, "loss": 0.8052, "step": 2657 }, { "epoch": 0.38, "learning_rate": 2.1320870965268083e-05, "loss": 0.7676, "step": 2658 }, { "epoch": 0.38, "learning_rate": 2.1314558712236598e-05, "loss": 0.8449, "step": 2659 }, { "epoch": 0.38, "learning_rate": 2.130824509983687e-05, "loss": 0.8075, "step": 2660 }, { "epoch": 0.38, "learning_rate": 2.1301930129428083e-05, "loss": 0.7634, "step": 2661 }, { "epoch": 0.38, "learning_rate": 2.129561380236969e-05, "loss": 0.7514, "step": 2662 }, { "epoch": 0.38, "learning_rate": 2.1289296120021433e-05, "loss": 0.9141, "step": 2663 }, { "epoch": 0.38, "learning_rate": 2.1282977083743362e-05, "loss": 0.8164, "step": 2664 }, { "epoch": 0.38, "learning_rate": 2.12766566948958e-05, "loss": 0.656, "step": 2665 }, { "epoch": 0.38, "learning_rate": 2.1270334954839382e-05, "loss": 0.8691, "step": 2666 }, { "epoch": 0.38, "learning_rate": 2.1264011864935007e-05, "loss": 0.74, "step": 2667 }, { "epoch": 0.38, "learning_rate": 2.125768742654389e-05, "loss": 0.6501, "step": 2668 }, { "epoch": 0.38, "learning_rate": 2.1251361641027522e-05, "loss": 0.8253, "step": 2669 }, { "epoch": 0.38, "learning_rate": 2.1245034509747694e-05, "loss": 0.9272, "step": 2670 }, { "epoch": 0.38, "learning_rate": 2.1238706034066476e-05, "loss": 0.9342, "step": 2671 }, { "epoch": 0.38, "learning_rate": 2.123237621534623e-05, "loss": 0.9403, "step": 2672 }, { "epoch": 0.38, "learning_rate": 2.1226045054949615e-05, "loss": 0.8167, "step": 2673 }, { "epoch": 0.38, "learning_rate": 2.121971255423957e-05, "loss": 0.5947, "step": 2674 }, { "epoch": 0.38, "learning_rate": 2.1213378714579324e-05, "loss": 0.7771, "step": 2675 }, { "epoch": 0.38, "learning_rate": 2.1207043537332395e-05, "loss": 0.7846, "step": 2676 }, { "epoch": 0.38, "learning_rate": 2.1200707023862603e-05, "loss": 0.9202, "step": 2677 }, { "epoch": 0.38, "learning_rate": 2.1194369175534022e-05, "loss": 0.7483, "step": 2678 }, { "epoch": 0.38, "learning_rate": 2.1188029993711055e-05, "loss": 0.8365, "step": 2679 }, { "epoch": 0.38, "learning_rate": 2.118168947975835e-05, "loss": 0.8563, "step": 2680 }, { "epoch": 0.38, "learning_rate": 2.117534763504088e-05, "loss": 1.0681, "step": 2681 }, { "epoch": 0.38, "learning_rate": 2.116900446092388e-05, "loss": 0.9191, "step": 2682 }, { "epoch": 0.38, "learning_rate": 2.116265995877287e-05, "loss": 0.7573, "step": 2683 }, { "epoch": 0.38, "learning_rate": 2.1156314129953683e-05, "loss": 1.034, "step": 2684 }, { "epoch": 0.38, "learning_rate": 2.11499669758324e-05, "loss": 0.8979, "step": 2685 }, { "epoch": 0.38, "learning_rate": 2.1143618497775426e-05, "loss": 0.7924, "step": 2686 }, { "epoch": 0.38, "learning_rate": 2.1137268697149414e-05, "loss": 0.8772, "step": 2687 }, { "epoch": 0.39, "learning_rate": 2.113091757532133e-05, "loss": 0.8984, "step": 2688 }, { "epoch": 0.39, "learning_rate": 2.1124565133658403e-05, "loss": 0.6373, "step": 2689 }, { "epoch": 0.39, "learning_rate": 2.1118211373528164e-05, "loss": 0.9699, "step": 2690 }, { "epoch": 0.39, "learning_rate": 2.111185629629842e-05, "loss": 0.9983, "step": 2691 }, { "epoch": 0.39, "learning_rate": 2.1105499903337257e-05, "loss": 0.8917, "step": 2692 }, { "epoch": 0.39, "learning_rate": 2.1099142196013054e-05, "loss": 0.8287, "step": 2693 }, { "epoch": 0.39, "learning_rate": 2.1092783175694467e-05, "loss": 0.7091, "step": 2694 }, { "epoch": 0.39, "learning_rate": 2.1086422843750436e-05, "loss": 0.8828, "step": 2695 }, { "epoch": 0.39, "learning_rate": 2.108006120155018e-05, "loss": 1.01, "step": 2696 }, { "epoch": 0.39, "learning_rate": 2.1073698250463206e-05, "loss": 0.8694, "step": 2697 }, { "epoch": 0.39, "learning_rate": 2.1067333991859302e-05, "loss": 0.9286, "step": 2698 }, { "epoch": 0.39, "learning_rate": 2.106096842710853e-05, "loss": 0.9375, "step": 2699 }, { "epoch": 0.39, "learning_rate": 2.1054601557581243e-05, "loss": 0.9224, "step": 2700 }, { "epoch": 0.39, "learning_rate": 2.104823338464807e-05, "loss": 0.9302, "step": 2701 }, { "epoch": 0.39, "learning_rate": 2.1041863909679915e-05, "loss": 0.8242, "step": 2702 }, { "epoch": 0.39, "learning_rate": 2.1035493134047978e-05, "loss": 0.9612, "step": 2703 }, { "epoch": 0.39, "learning_rate": 2.1029121059123725e-05, "loss": 0.909, "step": 2704 }, { "epoch": 0.39, "learning_rate": 2.10227476862789e-05, "loss": 0.6897, "step": 2705 }, { "epoch": 0.39, "learning_rate": 2.1016373016885545e-05, "loss": 0.9696, "step": 2706 }, { "epoch": 0.39, "learning_rate": 2.1009997052315955e-05, "loss": 0.9305, "step": 2707 }, { "epoch": 0.39, "learning_rate": 2.1003619793942727e-05, "loss": 0.9353, "step": 2708 }, { "epoch": 0.39, "learning_rate": 2.0997241243138725e-05, "loss": 0.7098, "step": 2709 }, { "epoch": 0.39, "learning_rate": 2.0990861401277087e-05, "loss": 0.7659, "step": 2710 }, { "epoch": 0.39, "learning_rate": 2.0984480269731246e-05, "loss": 0.8146, "step": 2711 }, { "epoch": 0.39, "learning_rate": 2.0978097849874895e-05, "loss": 0.7162, "step": 2712 }, { "epoch": 0.39, "learning_rate": 2.097171414308201e-05, "loss": 0.8482, "step": 2713 }, { "epoch": 0.39, "learning_rate": 2.0965329150726843e-05, "loss": 0.8683, "step": 2714 }, { "epoch": 0.39, "learning_rate": 2.095894287418394e-05, "loss": 0.9989, "step": 2715 }, { "epoch": 0.39, "learning_rate": 2.095255531482809e-05, "loss": 0.7974, "step": 2716 }, { "epoch": 0.39, "learning_rate": 2.0946166474034382e-05, "loss": 0.7695, "step": 2717 }, { "epoch": 0.39, "learning_rate": 2.0939776353178177e-05, "loss": 0.8867, "step": 2718 }, { "epoch": 0.39, "learning_rate": 2.093338495363511e-05, "loss": 0.8228, "step": 2719 }, { "epoch": 0.39, "learning_rate": 2.0926992276781097e-05, "loss": 0.7797, "step": 2720 }, { "epoch": 0.39, "learning_rate": 2.0920598323992314e-05, "loss": 0.7154, "step": 2721 }, { "epoch": 0.39, "learning_rate": 2.091420309664522e-05, "loss": 0.721, "step": 2722 }, { "epoch": 0.39, "learning_rate": 2.0907806596116553e-05, "loss": 0.8778, "step": 2723 }, { "epoch": 0.39, "learning_rate": 2.090140882378333e-05, "loss": 0.7679, "step": 2724 }, { "epoch": 0.39, "learning_rate": 2.089500978102282e-05, "loss": 0.8415, "step": 2725 }, { "epoch": 0.39, "learning_rate": 2.088860946921258e-05, "loss": 0.873, "step": 2726 }, { "epoch": 0.39, "learning_rate": 2.0882207889730442e-05, "loss": 0.7508, "step": 2727 }, { "epoch": 0.39, "learning_rate": 2.087580504395451e-05, "loss": 0.7547, "step": 2728 }, { "epoch": 0.39, "learning_rate": 2.0869400933263155e-05, "loss": 0.8951, "step": 2729 }, { "epoch": 0.39, "learning_rate": 2.086299555903502e-05, "loss": 0.87, "step": 2730 }, { "epoch": 0.39, "learning_rate": 2.0856588922649032e-05, "loss": 0.7489, "step": 2731 }, { "epoch": 0.39, "learning_rate": 2.085018102548438e-05, "loss": 0.7433, "step": 2732 }, { "epoch": 0.39, "learning_rate": 2.0843771868920515e-05, "loss": 0.9152, "step": 2733 }, { "epoch": 0.39, "learning_rate": 2.0837361454337185e-05, "loss": 0.7706, "step": 2734 }, { "epoch": 0.39, "learning_rate": 2.083094978311438e-05, "loss": 0.8465, "step": 2735 }, { "epoch": 0.39, "learning_rate": 2.0824536856632384e-05, "loss": 0.769, "step": 2736 }, { "epoch": 0.39, "learning_rate": 2.0818122676271735e-05, "loss": 0.8337, "step": 2737 }, { "epoch": 0.39, "learning_rate": 2.0811707243413252e-05, "loss": 0.7885, "step": 2738 }, { "epoch": 0.39, "learning_rate": 2.0805290559438016e-05, "loss": 0.7204, "step": 2739 }, { "epoch": 0.39, "learning_rate": 2.0798872625727384e-05, "loss": 0.8566, "step": 2740 }, { "epoch": 0.39, "learning_rate": 2.0792453443662972e-05, "loss": 0.8778, "step": 2741 }, { "epoch": 0.39, "learning_rate": 2.0786033014626674e-05, "loss": 0.7439, "step": 2742 }, { "epoch": 0.39, "learning_rate": 2.077961134000065e-05, "loss": 0.7266, "step": 2743 }, { "epoch": 0.39, "learning_rate": 2.0773188421167333e-05, "loss": 0.7695, "step": 2744 }, { "epoch": 0.39, "learning_rate": 2.076676425950941e-05, "loss": 0.8884, "step": 2745 }, { "epoch": 0.39, "learning_rate": 2.0760338856409852e-05, "loss": 0.7581, "step": 2746 }, { "epoch": 0.39, "learning_rate": 2.0753912213251875e-05, "loss": 0.7411, "step": 2747 }, { "epoch": 0.39, "learning_rate": 2.074748433141899e-05, "loss": 0.8619, "step": 2748 }, { "epoch": 0.39, "learning_rate": 2.0741055212294958e-05, "loss": 0.7875, "step": 2749 }, { "epoch": 0.39, "learning_rate": 2.0734624857263806e-05, "loss": 0.8633, "step": 2750 }, { "epoch": 0.39, "learning_rate": 2.072819326770983e-05, "loss": 0.8934, "step": 2751 }, { "epoch": 0.39, "learning_rate": 2.0721760445017598e-05, "loss": 0.8178, "step": 2752 }, { "epoch": 0.39, "learning_rate": 2.0715326390571933e-05, "loss": 0.7863, "step": 2753 }, { "epoch": 0.39, "learning_rate": 2.070889110575793e-05, "loss": 0.877, "step": 2754 }, { "epoch": 0.39, "learning_rate": 2.070245459196094e-05, "loss": 0.74, "step": 2755 }, { "epoch": 0.39, "learning_rate": 2.0696016850566596e-05, "loss": 0.7944, "step": 2756 }, { "epoch": 0.39, "learning_rate": 2.0689577882960778e-05, "loss": 0.9107, "step": 2757 }, { "epoch": 0.4, "learning_rate": 2.068313769052963e-05, "loss": 0.8884, "step": 2758 }, { "epoch": 0.4, "learning_rate": 2.0676696274659585e-05, "loss": 0.9163, "step": 2759 }, { "epoch": 0.4, "learning_rate": 2.0670253636737295e-05, "loss": 0.7566, "step": 2760 }, { "epoch": 0.4, "learning_rate": 2.0663809778149726e-05, "loss": 0.7606, "step": 2761 }, { "epoch": 0.4, "learning_rate": 2.0657364700284064e-05, "loss": 0.8147, "step": 2762 }, { "epoch": 0.4, "learning_rate": 2.065091840452778e-05, "loss": 0.6992, "step": 2763 }, { "epoch": 0.4, "learning_rate": 2.0644470892268596e-05, "loss": 0.7324, "step": 2764 }, { "epoch": 0.4, "learning_rate": 2.0638022164894518e-05, "loss": 1.0307, "step": 2765 }, { "epoch": 0.4, "learning_rate": 2.0631572223793783e-05, "loss": 0.7773, "step": 2766 }, { "epoch": 0.4, "learning_rate": 2.062512107035491e-05, "loss": 0.8122, "step": 2767 }, { "epoch": 0.4, "learning_rate": 2.0618668705966666e-05, "loss": 0.7294, "step": 2768 }, { "epoch": 0.4, "learning_rate": 2.061221513201809e-05, "loss": 0.9978, "step": 2769 }, { "epoch": 0.4, "learning_rate": 2.0605760349898484e-05, "loss": 0.8661, "step": 2770 }, { "epoch": 0.4, "learning_rate": 2.059930436099739e-05, "loss": 1.0028, "step": 2771 }, { "epoch": 0.4, "learning_rate": 2.059284716670463e-05, "loss": 0.863, "step": 2772 }, { "epoch": 0.4, "learning_rate": 2.0586388768410276e-05, "loss": 0.8756, "step": 2773 }, { "epoch": 0.4, "learning_rate": 2.0579929167504664e-05, "loss": 0.9654, "step": 2774 }, { "epoch": 0.4, "learning_rate": 2.057346836537838e-05, "loss": 0.7377, "step": 2775 }, { "epoch": 0.4, "learning_rate": 2.0567006363422282e-05, "loss": 0.9464, "step": 2776 }, { "epoch": 0.4, "learning_rate": 2.0560543163027474e-05, "loss": 0.7712, "step": 2777 }, { "epoch": 0.4, "learning_rate": 2.0554078765585325e-05, "loss": 0.7573, "step": 2778 }, { "epoch": 0.4, "learning_rate": 2.0547613172487464e-05, "loss": 1.0558, "step": 2779 }, { "epoch": 0.4, "learning_rate": 2.0541146385125764e-05, "loss": 0.9453, "step": 2780 }, { "epoch": 0.4, "learning_rate": 2.0534678404892372e-05, "loss": 0.9124, "step": 2781 }, { "epoch": 0.4, "learning_rate": 2.052820923317968e-05, "loss": 0.9085, "step": 2782 }, { "epoch": 0.4, "learning_rate": 2.0521738871380345e-05, "loss": 0.9381, "step": 2783 }, { "epoch": 0.4, "learning_rate": 2.0515267320887268e-05, "loss": 0.942, "step": 2784 }, { "epoch": 0.4, "learning_rate": 2.050879458309362e-05, "loss": 0.8103, "step": 2785 }, { "epoch": 0.4, "learning_rate": 2.050232065939282e-05, "loss": 0.8728, "step": 2786 }, { "epoch": 0.4, "learning_rate": 2.0495845551178546e-05, "loss": 0.9241, "step": 2787 }, { "epoch": 0.4, "learning_rate": 2.0489369259844727e-05, "loss": 0.7941, "step": 2788 }, { "epoch": 0.4, "learning_rate": 2.048289178678554e-05, "loss": 0.7857, "step": 2789 }, { "epoch": 0.4, "learning_rate": 2.047641313339544e-05, "loss": 0.9615, "step": 2790 }, { "epoch": 0.4, "learning_rate": 2.0469933301069105e-05, "loss": 1.0893, "step": 2791 }, { "epoch": 0.4, "learning_rate": 2.0463452291201498e-05, "loss": 0.9498, "step": 2792 }, { "epoch": 0.4, "learning_rate": 2.0456970105187806e-05, "loss": 0.9353, "step": 2793 }, { "epoch": 0.4, "learning_rate": 2.045048674442349e-05, "loss": 0.9286, "step": 2794 }, { "epoch": 0.4, "learning_rate": 2.044400221030426e-05, "loss": 0.6708, "step": 2795 }, { "epoch": 0.4, "learning_rate": 2.043751650422607e-05, "loss": 0.8733, "step": 2796 }, { "epoch": 0.4, "learning_rate": 2.0431029627585133e-05, "loss": 0.8929, "step": 2797 }, { "epoch": 0.4, "learning_rate": 2.0424541581777915e-05, "loss": 0.8284, "step": 2798 }, { "epoch": 0.4, "learning_rate": 2.0418052368201135e-05, "loss": 0.7573, "step": 2799 }, { "epoch": 0.4, "learning_rate": 2.0411561988251754e-05, "loss": 0.8532, "step": 2800 }, { "epoch": 0.4, "learning_rate": 2.040507044332699e-05, "loss": 0.762, "step": 2801 }, { "epoch": 0.4, "learning_rate": 2.0398577734824313e-05, "loss": 0.8664, "step": 2802 }, { "epoch": 0.4, "learning_rate": 2.0392083864141445e-05, "loss": 0.7849, "step": 2803 }, { "epoch": 0.4, "learning_rate": 2.038558883267636e-05, "loss": 0.7835, "step": 2804 }, { "epoch": 0.4, "learning_rate": 2.0379092641827263e-05, "loss": 0.8438, "step": 2805 }, { "epoch": 0.4, "learning_rate": 2.037259529299264e-05, "loss": 0.9113, "step": 2806 }, { "epoch": 0.4, "learning_rate": 2.0366096787571198e-05, "loss": 0.875, "step": 2807 }, { "epoch": 0.4, "learning_rate": 2.035959712696191e-05, "loss": 0.7935, "step": 2808 }, { "epoch": 0.4, "learning_rate": 2.0353096312563983e-05, "loss": 0.9581, "step": 2809 }, { "epoch": 0.4, "learning_rate": 2.0346594345776894e-05, "loss": 0.8761, "step": 2810 }, { "epoch": 0.4, "learning_rate": 2.0340091228000352e-05, "loss": 0.9381, "step": 2811 }, { "epoch": 0.4, "learning_rate": 2.033358696063431e-05, "loss": 0.6688, "step": 2812 }, { "epoch": 0.4, "learning_rate": 2.032708154507899e-05, "loss": 0.9079, "step": 2813 }, { "epoch": 0.4, "learning_rate": 2.0320574982734824e-05, "loss": 0.8114, "step": 2814 }, { "epoch": 0.4, "learning_rate": 2.031406727500254e-05, "loss": 0.9799, "step": 2815 }, { "epoch": 0.4, "learning_rate": 2.030755842328307e-05, "loss": 0.904, "step": 2816 }, { "epoch": 0.4, "learning_rate": 2.030104842897762e-05, "loss": 0.7506, "step": 2817 }, { "epoch": 0.4, "learning_rate": 2.029453729348761e-05, "loss": 0.7221, "step": 2818 }, { "epoch": 0.4, "learning_rate": 2.028802501821475e-05, "loss": 0.8142, "step": 2819 }, { "epoch": 0.4, "learning_rate": 2.028151160456096e-05, "loss": 0.7589, "step": 2820 }, { "epoch": 0.4, "learning_rate": 2.027499705392842e-05, "loss": 0.8022, "step": 2821 }, { "epoch": 0.4, "learning_rate": 2.0268481367719546e-05, "loss": 0.8689, "step": 2822 }, { "epoch": 0.4, "learning_rate": 2.026196454733701e-05, "loss": 0.8225, "step": 2823 }, { "epoch": 0.4, "learning_rate": 2.025544659418372e-05, "loss": 0.6956, "step": 2824 }, { "epoch": 0.4, "learning_rate": 2.0248927509662834e-05, "loss": 0.6992, "step": 2825 }, { "epoch": 0.4, "learning_rate": 2.024240729517774e-05, "loss": 0.8917, "step": 2826 }, { "epoch": 0.4, "learning_rate": 2.023588595213209e-05, "loss": 0.8438, "step": 2827 }, { "epoch": 0.41, "learning_rate": 2.0229363481929758e-05, "loss": 0.9252, "step": 2828 }, { "epoch": 0.41, "learning_rate": 2.0222839885974883e-05, "loss": 0.7746, "step": 2829 }, { "epoch": 0.41, "learning_rate": 2.0216315165671817e-05, "loss": 0.7963, "step": 2830 }, { "epoch": 0.41, "learning_rate": 2.0209789322425183e-05, "loss": 0.8292, "step": 2831 }, { "epoch": 0.41, "learning_rate": 2.020326235763983e-05, "loss": 0.9503, "step": 2832 }, { "epoch": 0.41, "learning_rate": 2.0196734272720857e-05, "loss": 0.9051, "step": 2833 }, { "epoch": 0.41, "learning_rate": 2.0190205069073584e-05, "loss": 0.8019, "step": 2834 }, { "epoch": 0.41, "learning_rate": 2.0183674748103606e-05, "loss": 0.875, "step": 2835 }, { "epoch": 0.41, "learning_rate": 2.0177143311216728e-05, "loss": 0.6136, "step": 2836 }, { "epoch": 0.41, "learning_rate": 2.0170610759819007e-05, "loss": 1.0536, "step": 2837 }, { "epoch": 0.41, "learning_rate": 2.016407709531675e-05, "loss": 1.0128, "step": 2838 }, { "epoch": 0.41, "learning_rate": 2.0157542319116474e-05, "loss": 0.9688, "step": 2839 }, { "epoch": 0.41, "learning_rate": 2.0151006432624975e-05, "loss": 0.7185, "step": 2840 }, { "epoch": 0.41, "learning_rate": 2.0144469437249255e-05, "loss": 0.9012, "step": 2841 }, { "epoch": 0.41, "learning_rate": 2.0137931334396574e-05, "loss": 0.9213, "step": 2842 }, { "epoch": 0.41, "learning_rate": 2.0131392125474412e-05, "loss": 0.7891, "step": 2843 }, { "epoch": 0.41, "learning_rate": 2.012485181189052e-05, "loss": 0.8541, "step": 2844 }, { "epoch": 0.41, "learning_rate": 2.0118310395052844e-05, "loss": 1.0555, "step": 2845 }, { "epoch": 0.41, "learning_rate": 2.01117678763696e-05, "loss": 1.0301, "step": 2846 }, { "epoch": 0.41, "learning_rate": 2.0105224257249233e-05, "loss": 0.8652, "step": 2847 }, { "epoch": 0.41, "learning_rate": 2.0098679539100415e-05, "loss": 0.8767, "step": 2848 }, { "epoch": 0.41, "learning_rate": 2.0092133723332066e-05, "loss": 0.9475, "step": 2849 }, { "epoch": 0.41, "learning_rate": 2.0085586811353336e-05, "loss": 0.7475, "step": 2850 }, { "epoch": 0.41, "learning_rate": 2.0079038804573614e-05, "loss": 0.9503, "step": 2851 }, { "epoch": 0.41, "learning_rate": 2.0072489704402527e-05, "loss": 0.7768, "step": 2852 }, { "epoch": 0.41, "learning_rate": 2.0065939512249928e-05, "loss": 0.8878, "step": 2853 }, { "epoch": 0.41, "learning_rate": 2.0059388229525915e-05, "loss": 0.7031, "step": 2854 }, { "epoch": 0.41, "learning_rate": 2.0052835857640816e-05, "loss": 0.9453, "step": 2855 }, { "epoch": 0.41, "learning_rate": 2.00462823980052e-05, "loss": 0.9102, "step": 2856 }, { "epoch": 0.41, "learning_rate": 2.0039727852029857e-05, "loss": 0.74, "step": 2857 }, { "epoch": 0.41, "learning_rate": 2.0033172221125818e-05, "loss": 0.7762, "step": 2858 }, { "epoch": 0.41, "learning_rate": 2.0026615506704355e-05, "loss": 0.8069, "step": 2859 }, { "epoch": 0.41, "learning_rate": 2.0020057710176964e-05, "loss": 0.8839, "step": 2860 }, { "epoch": 0.41, "learning_rate": 2.001349883295537e-05, "loss": 0.6903, "step": 2861 }, { "epoch": 0.41, "learning_rate": 2.0006938876451547e-05, "loss": 1.0067, "step": 2862 }, { "epoch": 0.41, "learning_rate": 2.000037784207769e-05, "loss": 0.7208, "step": 2863 }, { "epoch": 0.41, "learning_rate": 1.9993815731246212e-05, "loss": 0.8298, "step": 2864 }, { "epoch": 0.41, "learning_rate": 1.9987252545369793e-05, "loss": 0.9637, "step": 2865 }, { "epoch": 0.41, "learning_rate": 1.9980688285861314e-05, "loss": 0.7603, "step": 2866 }, { "epoch": 0.41, "learning_rate": 1.9974122954133903e-05, "loss": 0.9235, "step": 2867 }, { "epoch": 0.41, "learning_rate": 1.9967556551600904e-05, "loss": 0.8331, "step": 2868 }, { "epoch": 0.41, "learning_rate": 1.9960989079675914e-05, "loss": 0.8599, "step": 2869 }, { "epoch": 0.41, "learning_rate": 1.9954420539772736e-05, "loss": 0.7567, "step": 2870 }, { "epoch": 0.41, "learning_rate": 1.994785093330542e-05, "loss": 0.7969, "step": 2871 }, { "epoch": 0.41, "learning_rate": 1.994128026168824e-05, "loss": 0.9135, "step": 2872 }, { "epoch": 0.41, "learning_rate": 1.9934708526335695e-05, "loss": 0.7344, "step": 2873 }, { "epoch": 0.41, "learning_rate": 1.9928135728662524e-05, "loss": 0.971, "step": 2874 }, { "epoch": 0.41, "learning_rate": 1.9921561870083677e-05, "loss": 0.8435, "step": 2875 }, { "epoch": 0.41, "learning_rate": 1.9914986952014353e-05, "loss": 0.8094, "step": 2876 }, { "epoch": 0.41, "learning_rate": 1.9908410975869963e-05, "loss": 0.7648, "step": 2877 }, { "epoch": 0.41, "learning_rate": 1.9901833943066158e-05, "loss": 0.8404, "step": 2878 }, { "epoch": 0.41, "learning_rate": 1.9895255855018804e-05, "loss": 0.9955, "step": 2879 }, { "epoch": 0.41, "learning_rate": 1.9888676713144004e-05, "loss": 0.7394, "step": 2880 }, { "epoch": 0.41, "learning_rate": 1.988209651885808e-05, "loss": 0.8546, "step": 2881 }, { "epoch": 0.41, "learning_rate": 1.987551527357759e-05, "loss": 0.755, "step": 2882 }, { "epoch": 0.41, "learning_rate": 1.9868932978719316e-05, "loss": 0.7564, "step": 2883 }, { "epoch": 0.41, "learning_rate": 1.986234963570025e-05, "loss": 0.7517, "step": 2884 }, { "epoch": 0.41, "learning_rate": 1.9855765245937644e-05, "loss": 0.817, "step": 2885 }, { "epoch": 0.41, "learning_rate": 1.9849179810848932e-05, "loss": 0.8527, "step": 2886 }, { "epoch": 0.41, "learning_rate": 1.984259333185181e-05, "loss": 0.8092, "step": 2887 }, { "epoch": 0.41, "learning_rate": 1.9836005810364184e-05, "loss": 0.8845, "step": 2888 }, { "epoch": 0.41, "learning_rate": 1.9829417247804168e-05, "loss": 0.8666, "step": 2889 }, { "epoch": 0.41, "learning_rate": 1.982282764559014e-05, "loss": 0.654, "step": 2890 }, { "epoch": 0.41, "learning_rate": 1.9816237005140656e-05, "loss": 0.7347, "step": 2891 }, { "epoch": 0.41, "learning_rate": 1.980964532787454e-05, "loss": 0.7835, "step": 2892 }, { "epoch": 0.41, "learning_rate": 1.9803052615210793e-05, "loss": 0.7436, "step": 2893 }, { "epoch": 0.41, "learning_rate": 1.979645886856868e-05, "loss": 0.7034, "step": 2894 }, { "epoch": 0.41, "learning_rate": 1.9789864089367668e-05, "loss": 0.8192, "step": 2895 }, { "epoch": 0.41, "learning_rate": 1.9783268279027445e-05, "loss": 0.9492, "step": 2896 }, { "epoch": 0.41, "learning_rate": 1.9776671438967934e-05, "loss": 0.7059, "step": 2897 }, { "epoch": 0.42, "learning_rate": 1.977007357060926e-05, "loss": 0.7645, "step": 2898 }, { "epoch": 0.42, "learning_rate": 1.976347467537179e-05, "loss": 0.7687, "step": 2899 }, { "epoch": 0.42, "learning_rate": 1.9756874754676103e-05, "loss": 0.8376, "step": 2900 }, { "epoch": 0.42, "learning_rate": 1.9750273809942993e-05, "loss": 0.8546, "step": 2901 }, { "epoch": 0.42, "learning_rate": 1.974367184259348e-05, "loss": 0.8064, "step": 2902 }, { "epoch": 0.42, "learning_rate": 1.9737068854048813e-05, "loss": 0.865, "step": 2903 }, { "epoch": 0.42, "learning_rate": 1.9730464845730443e-05, "loss": 0.7681, "step": 2904 }, { "epoch": 0.42, "learning_rate": 1.9723859819060054e-05, "loss": 0.7751, "step": 2905 }, { "epoch": 0.42, "learning_rate": 1.971725377545954e-05, "loss": 0.8493, "step": 2906 }, { "epoch": 0.42, "learning_rate": 1.9710646716351027e-05, "loss": 0.8348, "step": 2907 }, { "epoch": 0.42, "learning_rate": 1.9704038643156844e-05, "loss": 0.772, "step": 2908 }, { "epoch": 0.42, "learning_rate": 1.9697429557299542e-05, "loss": 0.6142, "step": 2909 }, { "epoch": 0.42, "learning_rate": 1.9690819460201908e-05, "loss": 0.7595, "step": 2910 }, { "epoch": 0.42, "learning_rate": 1.968420835328692e-05, "loss": 0.9414, "step": 2911 }, { "epoch": 0.42, "learning_rate": 1.9677596237977795e-05, "loss": 0.8248, "step": 2912 }, { "epoch": 0.42, "learning_rate": 1.9670983115697943e-05, "loss": 0.9071, "step": 2913 }, { "epoch": 0.42, "learning_rate": 1.9664368987871023e-05, "loss": 0.9157, "step": 2914 }, { "epoch": 0.42, "learning_rate": 1.9657753855920882e-05, "loss": 0.9252, "step": 2915 }, { "epoch": 0.42, "learning_rate": 1.9651137721271597e-05, "loss": 0.942, "step": 2916 }, { "epoch": 0.42, "learning_rate": 1.964452058534746e-05, "loss": 0.8231, "step": 2917 }, { "epoch": 0.42, "learning_rate": 1.963790244957297e-05, "loss": 0.8566, "step": 2918 }, { "epoch": 0.42, "learning_rate": 1.9631283315372863e-05, "loss": 0.8092, "step": 2919 }, { "epoch": 0.42, "learning_rate": 1.9624663184172055e-05, "loss": 0.8756, "step": 2920 }, { "epoch": 0.42, "learning_rate": 1.961804205739571e-05, "loss": 0.7907, "step": 2921 }, { "epoch": 0.42, "learning_rate": 1.961141993646919e-05, "loss": 0.858, "step": 2922 }, { "epoch": 0.42, "learning_rate": 1.960479682281807e-05, "loss": 0.9051, "step": 2923 }, { "epoch": 0.42, "learning_rate": 1.959817271786815e-05, "loss": 0.9704, "step": 2924 }, { "epoch": 0.42, "learning_rate": 1.9591547623045424e-05, "loss": 0.5717, "step": 2925 }, { "epoch": 0.42, "learning_rate": 1.9584921539776123e-05, "loss": 0.7634, "step": 2926 }, { "epoch": 0.42, "learning_rate": 1.957829446948667e-05, "loss": 0.8834, "step": 2927 }, { "epoch": 0.42, "learning_rate": 1.9571666413603716e-05, "loss": 0.8945, "step": 2928 }, { "epoch": 0.42, "learning_rate": 1.9565037373554114e-05, "loss": 0.7693, "step": 2929 }, { "epoch": 0.42, "learning_rate": 1.955840735076493e-05, "loss": 0.7129, "step": 2930 }, { "epoch": 0.42, "learning_rate": 1.955177634666345e-05, "loss": 0.9927, "step": 2931 }, { "epoch": 0.42, "learning_rate": 1.9545144362677157e-05, "loss": 0.7422, "step": 2932 }, { "epoch": 0.42, "learning_rate": 1.9538511400233763e-05, "loss": 0.7612, "step": 2933 }, { "epoch": 0.42, "learning_rate": 1.9531877460761172e-05, "loss": 0.7467, "step": 2934 }, { "epoch": 0.42, "learning_rate": 1.952524254568751e-05, "loss": 0.9598, "step": 2935 }, { "epoch": 0.42, "learning_rate": 1.951860665644111e-05, "loss": 0.7511, "step": 2936 }, { "epoch": 0.42, "learning_rate": 1.951196979445052e-05, "loss": 0.6674, "step": 2937 }, { "epoch": 0.42, "learning_rate": 1.950533196114448e-05, "loss": 0.9118, "step": 2938 }, { "epoch": 0.42, "learning_rate": 1.9498693157951964e-05, "loss": 0.7155, "step": 2939 }, { "epoch": 0.42, "learning_rate": 1.9492053386302136e-05, "loss": 0.8209, "step": 2940 }, { "epoch": 0.42, "learning_rate": 1.948541264762437e-05, "loss": 0.7296, "step": 2941 }, { "epoch": 0.42, "learning_rate": 1.9478770943348263e-05, "loss": 0.8457, "step": 2942 }, { "epoch": 0.42, "learning_rate": 1.94721282749036e-05, "loss": 0.9526, "step": 2943 }, { "epoch": 0.42, "learning_rate": 1.946548464372039e-05, "loss": 0.822, "step": 2944 }, { "epoch": 0.42, "learning_rate": 1.945884005122884e-05, "loss": 0.7321, "step": 2945 }, { "epoch": 0.42, "learning_rate": 1.9452194498859363e-05, "loss": 0.8767, "step": 2946 }, { "epoch": 0.42, "learning_rate": 1.944554798804259e-05, "loss": 0.764, "step": 2947 }, { "epoch": 0.42, "learning_rate": 1.9438900520209338e-05, "loss": 0.9431, "step": 2948 }, { "epoch": 0.42, "learning_rate": 1.943225209679066e-05, "loss": 0.8175, "step": 2949 }, { "epoch": 0.42, "learning_rate": 1.942560271921778e-05, "loss": 0.7475, "step": 2950 }, { "epoch": 0.42, "learning_rate": 1.9418952388922154e-05, "loss": 0.986, "step": 2951 }, { "epoch": 0.42, "learning_rate": 1.9412301107335425e-05, "loss": 0.757, "step": 2952 }, { "epoch": 0.42, "learning_rate": 1.9405648875889463e-05, "loss": 0.8404, "step": 2953 }, { "epoch": 0.42, "learning_rate": 1.939899569601632e-05, "loss": 0.8856, "step": 2954 }, { "epoch": 0.42, "learning_rate": 1.9392341569148254e-05, "loss": 0.8767, "step": 2955 }, { "epoch": 0.42, "learning_rate": 1.938568649671775e-05, "loss": 0.9777, "step": 2956 }, { "epoch": 0.42, "learning_rate": 1.937903048015747e-05, "loss": 0.7796, "step": 2957 }, { "epoch": 0.42, "learning_rate": 1.9372373520900293e-05, "loss": 0.9177, "step": 2958 }, { "epoch": 0.42, "learning_rate": 1.9365715620379288e-05, "loss": 0.9766, "step": 2959 }, { "epoch": 0.42, "learning_rate": 1.9359056780027754e-05, "loss": 0.7514, "step": 2960 }, { "epoch": 0.42, "learning_rate": 1.9352397001279162e-05, "loss": 0.8354, "step": 2961 }, { "epoch": 0.42, "learning_rate": 1.9345736285567202e-05, "loss": 0.7655, "step": 2962 }, { "epoch": 0.42, "learning_rate": 1.9339074634325754e-05, "loss": 0.8795, "step": 2963 }, { "epoch": 0.42, "learning_rate": 1.933241204898892e-05, "loss": 0.9459, "step": 2964 }, { "epoch": 0.42, "learning_rate": 1.932574853099098e-05, "loss": 0.8393, "step": 2965 }, { "epoch": 0.42, "learning_rate": 1.9319084081766427e-05, "loss": 0.9364, "step": 2966 }, { "epoch": 0.43, "learning_rate": 1.9312418702749956e-05, "loss": 0.7595, "step": 2967 }, { "epoch": 0.43, "learning_rate": 1.930575239537645e-05, "loss": 0.7733, "step": 2968 }, { "epoch": 0.43, "learning_rate": 1.9299085161081003e-05, "loss": 0.6858, "step": 2969 }, { "epoch": 0.43, "learning_rate": 1.9292417001298906e-05, "loss": 0.9308, "step": 2970 }, { "epoch": 0.43, "learning_rate": 1.9285747917465654e-05, "loss": 0.7938, "step": 2971 }, { "epoch": 0.43, "learning_rate": 1.9279077911016925e-05, "loss": 0.8153, "step": 2972 }, { "epoch": 0.43, "learning_rate": 1.927240698338862e-05, "loss": 0.8245, "step": 2973 }, { "epoch": 0.43, "learning_rate": 1.926573513601681e-05, "loss": 0.9269, "step": 2974 }, { "epoch": 0.43, "learning_rate": 1.925906237033779e-05, "loss": 1.048, "step": 2975 }, { "epoch": 0.43, "learning_rate": 1.9252388687788035e-05, "loss": 0.7729, "step": 2976 }, { "epoch": 0.43, "learning_rate": 1.9245714089804225e-05, "loss": 0.928, "step": 2977 }, { "epoch": 0.43, "learning_rate": 1.923903857782324e-05, "loss": 0.9185, "step": 2978 }, { "epoch": 0.43, "learning_rate": 1.9232362153282147e-05, "loss": 0.8161, "step": 2979 }, { "epoch": 0.43, "learning_rate": 1.9225684817618218e-05, "loss": 0.8465, "step": 2980 }, { "epoch": 0.43, "learning_rate": 1.9219006572268913e-05, "loss": 0.8407, "step": 2981 }, { "epoch": 0.43, "learning_rate": 1.9212327418671905e-05, "loss": 0.6804, "step": 2982 }, { "epoch": 0.43, "learning_rate": 1.9205647358265036e-05, "loss": 0.8694, "step": 2983 }, { "epoch": 0.43, "learning_rate": 1.9198966392486364e-05, "loss": 0.7885, "step": 2984 }, { "epoch": 0.43, "learning_rate": 1.9192284522774144e-05, "loss": 0.8186, "step": 2985 }, { "epoch": 0.43, "learning_rate": 1.9185601750566802e-05, "loss": 0.9074, "step": 2986 }, { "epoch": 0.43, "learning_rate": 1.9178918077302988e-05, "loss": 0.7896, "step": 2987 }, { "epoch": 0.43, "learning_rate": 1.9172233504421513e-05, "loss": 0.8588, "step": 2988 }, { "epoch": 0.43, "learning_rate": 1.916554803336142e-05, "loss": 0.7483, "step": 2989 }, { "epoch": 0.43, "learning_rate": 1.9158861665561912e-05, "loss": 0.9676, "step": 2990 }, { "epoch": 0.43, "learning_rate": 1.9152174402462404e-05, "loss": 0.6939, "step": 2991 }, { "epoch": 0.43, "learning_rate": 1.91454862455025e-05, "loss": 0.6895, "step": 2992 }, { "epoch": 0.43, "learning_rate": 1.913879719612199e-05, "loss": 0.918, "step": 2993 }, { "epoch": 0.43, "learning_rate": 1.9132107255760865e-05, "loss": 0.8583, "step": 2994 }, { "epoch": 0.43, "learning_rate": 1.91254164258593e-05, "loss": 0.87, "step": 2995 }, { "epoch": 0.43, "learning_rate": 1.911872470785767e-05, "loss": 0.75, "step": 2996 }, { "epoch": 0.43, "learning_rate": 1.9112032103196525e-05, "loss": 0.7868, "step": 2997 }, { "epoch": 0.43, "learning_rate": 1.9105338613316634e-05, "loss": 0.7835, "step": 2998 }, { "epoch": 0.43, "learning_rate": 1.9098644239658927e-05, "loss": 0.9788, "step": 2999 }, { "epoch": 0.43, "learning_rate": 1.909194898366454e-05, "loss": 0.721, "step": 3000 }, { "epoch": 0.43, "learning_rate": 1.90852528467748e-05, "loss": 0.952, "step": 3001 }, { "epoch": 0.43, "learning_rate": 1.907855583043121e-05, "loss": 0.8432, "step": 3002 }, { "epoch": 0.43, "learning_rate": 1.907185793607549e-05, "loss": 0.8186, "step": 3003 }, { "epoch": 0.43, "learning_rate": 1.9065159165149506e-05, "loss": 0.7087, "step": 3004 }, { "epoch": 0.43, "learning_rate": 1.9058459519095358e-05, "loss": 0.8357, "step": 3005 }, { "epoch": 0.43, "learning_rate": 1.9051758999355304e-05, "loss": 0.7274, "step": 3006 }, { "epoch": 0.43, "learning_rate": 1.9045057607371806e-05, "loss": 0.9152, "step": 3007 }, { "epoch": 0.43, "learning_rate": 1.9038355344587505e-05, "loss": 0.9637, "step": 3008 }, { "epoch": 0.43, "learning_rate": 1.9031652212445226e-05, "loss": 0.6621, "step": 3009 }, { "epoch": 0.43, "learning_rate": 1.9024948212388e-05, "loss": 0.8588, "step": 3010 }, { "epoch": 0.43, "learning_rate": 1.901824334585902e-05, "loss": 0.7028, "step": 3011 }, { "epoch": 0.43, "learning_rate": 1.901153761430169e-05, "loss": 0.9249, "step": 3012 }, { "epoch": 0.43, "learning_rate": 1.900483101915958e-05, "loss": 0.7254, "step": 3013 }, { "epoch": 0.43, "learning_rate": 1.8998123561876456e-05, "loss": 0.728, "step": 3014 }, { "epoch": 0.43, "learning_rate": 1.8991415243896266e-05, "loss": 0.8524, "step": 3015 }, { "epoch": 0.43, "learning_rate": 1.8984706066663144e-05, "loss": 0.8588, "step": 3016 }, { "epoch": 0.43, "learning_rate": 1.8977996031621416e-05, "loss": 0.8376, "step": 3017 }, { "epoch": 0.43, "learning_rate": 1.897128514021558e-05, "loss": 0.7408, "step": 3018 }, { "epoch": 0.43, "learning_rate": 1.896457339389033e-05, "loss": 0.7193, "step": 3019 }, { "epoch": 0.43, "learning_rate": 1.8957860794090536e-05, "loss": 0.8817, "step": 3020 }, { "epoch": 0.43, "learning_rate": 1.8951147342261254e-05, "loss": 0.7963, "step": 3021 }, { "epoch": 0.43, "learning_rate": 1.894443303984772e-05, "loss": 0.8153, "step": 3022 }, { "epoch": 0.43, "learning_rate": 1.893771788829537e-05, "loss": 0.8867, "step": 3023 }, { "epoch": 0.43, "learning_rate": 1.8931001889049798e-05, "loss": 0.8142, "step": 3024 }, { "epoch": 0.43, "learning_rate": 1.8924285043556796e-05, "loss": 0.7877, "step": 3025 }, { "epoch": 0.43, "learning_rate": 1.8917567353262338e-05, "loss": 0.6814, "step": 3026 }, { "epoch": 0.43, "learning_rate": 1.8910848819612566e-05, "loss": 0.7874, "step": 3027 }, { "epoch": 0.43, "learning_rate": 1.890412944405383e-05, "loss": 0.7628, "step": 3028 }, { "epoch": 0.43, "learning_rate": 1.8897409228032634e-05, "loss": 0.8544, "step": 3029 }, { "epoch": 0.43, "learning_rate": 1.889068817299568e-05, "loss": 0.9559, "step": 3030 }, { "epoch": 0.43, "learning_rate": 1.8883966280389835e-05, "loss": 1.0765, "step": 3031 }, { "epoch": 0.43, "learning_rate": 1.887724355166217e-05, "loss": 0.9459, "step": 3032 }, { "epoch": 0.43, "learning_rate": 1.8870519988259913e-05, "loss": 0.7469, "step": 3033 }, { "epoch": 0.43, "learning_rate": 1.8863795591630484e-05, "loss": 0.8834, "step": 3034 }, { "epoch": 0.43, "learning_rate": 1.8857070363221474e-05, "loss": 0.9771, "step": 3035 }, { "epoch": 0.43, "learning_rate": 1.8850344304480665e-05, "loss": 0.6948, "step": 3036 }, { "epoch": 0.44, "learning_rate": 1.8843617416856013e-05, "loss": 0.9132, "step": 3037 }, { "epoch": 0.44, "learning_rate": 1.883688970179564e-05, "loss": 0.9392, "step": 3038 }, { "epoch": 0.44, "learning_rate": 1.8830161160747864e-05, "loss": 0.5991, "step": 3039 }, { "epoch": 0.44, "learning_rate": 1.8823431795161173e-05, "loss": 0.7651, "step": 3040 }, { "epoch": 0.44, "learning_rate": 1.881670160648423e-05, "loss": 0.8917, "step": 3041 }, { "epoch": 0.44, "learning_rate": 1.880997059616588e-05, "loss": 0.9291, "step": 3042 }, { "epoch": 0.44, "learning_rate": 1.880323876565514e-05, "loss": 0.88, "step": 3043 }, { "epoch": 0.44, "learning_rate": 1.8796506116401214e-05, "loss": 0.7846, "step": 3044 }, { "epoch": 0.44, "learning_rate": 1.878977264985347e-05, "loss": 0.8912, "step": 3045 }, { "epoch": 0.44, "learning_rate": 1.878303836746146e-05, "loss": 0.7559, "step": 3046 }, { "epoch": 0.44, "learning_rate": 1.8776303270674894e-05, "loss": 0.8507, "step": 3047 }, { "epoch": 0.44, "learning_rate": 1.876956736094369e-05, "loss": 0.779, "step": 3048 }, { "epoch": 0.44, "learning_rate": 1.8762830639717918e-05, "loss": 0.9397, "step": 3049 }, { "epoch": 0.44, "learning_rate": 1.875609310844782e-05, "loss": 0.8862, "step": 3050 }, { "epoch": 0.44, "learning_rate": 1.8749354768583823e-05, "loss": 0.7461, "step": 3051 }, { "epoch": 0.44, "learning_rate": 1.874261562157653e-05, "loss": 0.6881, "step": 3052 }, { "epoch": 0.44, "learning_rate": 1.873587566887671e-05, "loss": 0.9263, "step": 3053 }, { "epoch": 0.44, "learning_rate": 1.8729134911935308e-05, "loss": 0.8131, "step": 3054 }, { "epoch": 0.44, "learning_rate": 1.8722393352203438e-05, "loss": 0.9079, "step": 3055 }, { "epoch": 0.44, "learning_rate": 1.87156509911324e-05, "loss": 1.1066, "step": 3056 }, { "epoch": 0.44, "learning_rate": 1.8708907830173646e-05, "loss": 0.7818, "step": 3057 }, { "epoch": 0.44, "learning_rate": 1.870216387077882e-05, "loss": 0.6913, "step": 3058 }, { "epoch": 0.44, "learning_rate": 1.8695419114399724e-05, "loss": 0.9478, "step": 3059 }, { "epoch": 0.44, "learning_rate": 1.8688673562488338e-05, "loss": 0.7249, "step": 3060 }, { "epoch": 0.44, "learning_rate": 1.868192721649682e-05, "loss": 0.7478, "step": 3061 }, { "epoch": 0.44, "learning_rate": 1.8675180077877485e-05, "loss": 0.6879, "step": 3062 }, { "epoch": 0.44, "learning_rate": 1.8668432148082823e-05, "loss": 0.9741, "step": 3063 }, { "epoch": 0.44, "learning_rate": 1.8661683428565503e-05, "loss": 0.9102, "step": 3064 }, { "epoch": 0.44, "learning_rate": 1.865493392077835e-05, "loss": 0.986, "step": 3065 }, { "epoch": 0.44, "learning_rate": 1.864818362617437e-05, "loss": 0.8789, "step": 3066 }, { "epoch": 0.44, "learning_rate": 1.8641432546206735e-05, "loss": 0.8878, "step": 3067 }, { "epoch": 0.44, "learning_rate": 1.8634680682328785e-05, "loss": 0.786, "step": 3068 }, { "epoch": 0.44, "learning_rate": 1.8627928035994026e-05, "loss": 0.8951, "step": 3069 }, { "epoch": 0.44, "learning_rate": 1.8621174608656137e-05, "loss": 0.8292, "step": 3070 }, { "epoch": 0.44, "learning_rate": 1.861442040176897e-05, "loss": 0.9565, "step": 3071 }, { "epoch": 0.44, "learning_rate": 1.8607665416786528e-05, "loss": 1.0056, "step": 3072 }, { "epoch": 0.44, "learning_rate": 1.8600909655163005e-05, "loss": 1.1551, "step": 3073 }, { "epoch": 0.44, "learning_rate": 1.8594153118352736e-05, "loss": 0.6526, "step": 3074 }, { "epoch": 0.44, "learning_rate": 1.8587395807810244e-05, "loss": 0.8259, "step": 3075 }, { "epoch": 0.44, "learning_rate": 1.8580637724990205e-05, "loss": 0.9565, "step": 3076 }, { "epoch": 0.44, "learning_rate": 1.8573878871347474e-05, "loss": 0.8384, "step": 3077 }, { "epoch": 0.44, "learning_rate": 1.8567119248337066e-05, "loss": 0.793, "step": 3078 }, { "epoch": 0.44, "learning_rate": 1.856035885741415e-05, "loss": 0.7983, "step": 3079 }, { "epoch": 0.44, "learning_rate": 1.855359770003408e-05, "loss": 0.7416, "step": 3080 }, { "epoch": 0.44, "learning_rate": 1.8546835777652362e-05, "loss": 1.0636, "step": 3081 }, { "epoch": 0.44, "learning_rate": 1.8540073091724674e-05, "loss": 0.822, "step": 3082 }, { "epoch": 0.44, "learning_rate": 1.853330964370685e-05, "loss": 0.7472, "step": 3083 }, { "epoch": 0.44, "learning_rate": 1.8526545435054898e-05, "loss": 0.736, "step": 3084 }, { "epoch": 0.44, "learning_rate": 1.8519780467224977e-05, "loss": 0.9467, "step": 3085 }, { "epoch": 0.44, "learning_rate": 1.8513014741673425e-05, "loss": 0.7174, "step": 3086 }, { "epoch": 0.44, "learning_rate": 1.8506248259856735e-05, "loss": 0.9358, "step": 3087 }, { "epoch": 0.44, "learning_rate": 1.849948102323156e-05, "loss": 0.7919, "step": 3088 }, { "epoch": 0.44, "learning_rate": 1.849271303325472e-05, "loss": 0.9358, "step": 3089 }, { "epoch": 0.44, "learning_rate": 1.8485944291383192e-05, "loss": 0.9364, "step": 3090 }, { "epoch": 0.44, "learning_rate": 1.8479174799074124e-05, "loss": 0.7254, "step": 3091 }, { "epoch": 0.44, "learning_rate": 1.8472404557784817e-05, "loss": 0.6794, "step": 3092 }, { "epoch": 0.44, "learning_rate": 1.846563356897274e-05, "loss": 1.0525, "step": 3093 }, { "epoch": 0.44, "learning_rate": 1.8458861834095515e-05, "loss": 0.7729, "step": 3094 }, { "epoch": 0.44, "learning_rate": 1.8452089354610932e-05, "loss": 0.8025, "step": 3095 }, { "epoch": 0.44, "learning_rate": 1.8445316131976937e-05, "loss": 0.9933, "step": 3096 }, { "epoch": 0.44, "learning_rate": 1.8438542167651633e-05, "loss": 0.7656, "step": 3097 }, { "epoch": 0.44, "learning_rate": 1.8431767463093297e-05, "loss": 0.7637, "step": 3098 }, { "epoch": 0.44, "learning_rate": 1.8424992019760342e-05, "loss": 0.9199, "step": 3099 }, { "epoch": 0.44, "learning_rate": 1.8418215839111367e-05, "loss": 0.7723, "step": 3100 }, { "epoch": 0.44, "learning_rate": 1.8411438922605103e-05, "loss": 0.7852, "step": 3101 }, { "epoch": 0.44, "learning_rate": 1.8404661271700465e-05, "loss": 0.9169, "step": 3102 }, { "epoch": 0.44, "learning_rate": 1.8397882887856506e-05, "loss": 0.8128, "step": 3103 }, { "epoch": 0.44, "learning_rate": 1.8391103772532445e-05, "loss": 0.8616, "step": 3104 }, { "epoch": 0.44, "learning_rate": 1.8384323927187657e-05, "loss": 1.0262, "step": 3105 }, { "epoch": 0.44, "learning_rate": 1.837754335328168e-05, "loss": 0.6853, "step": 3106 }, { "epoch": 0.45, "learning_rate": 1.8370762052274198e-05, "loss": 0.9224, "step": 3107 }, { "epoch": 0.45, "learning_rate": 1.8363980025625062e-05, "loss": 0.7748, "step": 3108 }, { "epoch": 0.45, "learning_rate": 1.8357197274794272e-05, "loss": 0.9738, "step": 3109 }, { "epoch": 0.45, "learning_rate": 1.835041380124198e-05, "loss": 0.9643, "step": 3110 }, { "epoch": 0.45, "learning_rate": 1.834362960642851e-05, "loss": 0.8951, "step": 3111 }, { "epoch": 0.45, "learning_rate": 1.8336844691814333e-05, "loss": 0.8652, "step": 3112 }, { "epoch": 0.45, "learning_rate": 1.833005905886006e-05, "loss": 0.7031, "step": 3113 }, { "epoch": 0.45, "learning_rate": 1.8323272709026484e-05, "loss": 0.8834, "step": 3114 }, { "epoch": 0.45, "learning_rate": 1.831648564377453e-05, "loss": 0.8454, "step": 3115 }, { "epoch": 0.45, "learning_rate": 1.830969786456529e-05, "loss": 0.9319, "step": 3116 }, { "epoch": 0.45, "learning_rate": 1.8302909372859995e-05, "loss": 0.7162, "step": 3117 }, { "epoch": 0.45, "learning_rate": 1.8296120170120053e-05, "loss": 0.8817, "step": 3118 }, { "epoch": 0.45, "learning_rate": 1.8289330257807e-05, "loss": 0.906, "step": 3119 }, { "epoch": 0.45, "learning_rate": 1.828253963738254e-05, "loss": 0.9626, "step": 3120 }, { "epoch": 0.45, "learning_rate": 1.827574831030853e-05, "loss": 0.986, "step": 3121 }, { "epoch": 0.45, "learning_rate": 1.8268956278046968e-05, "loss": 0.8482, "step": 3122 }, { "epoch": 0.45, "learning_rate": 1.826216354206002e-05, "loss": 0.7852, "step": 3123 }, { "epoch": 0.45, "learning_rate": 1.8255370103809982e-05, "loss": 0.8521, "step": 3124 }, { "epoch": 0.45, "learning_rate": 1.824857596475932e-05, "loss": 0.8198, "step": 3125 }, { "epoch": 0.45, "learning_rate": 1.8241781126370638e-05, "loss": 0.7218, "step": 3126 }, { "epoch": 0.45, "learning_rate": 1.823498559010671e-05, "loss": 0.7648, "step": 3127 }, { "epoch": 0.45, "learning_rate": 1.822818935743043e-05, "loss": 0.8147, "step": 3128 }, { "epoch": 0.45, "learning_rate": 1.822139242980487e-05, "loss": 0.7946, "step": 3129 }, { "epoch": 0.45, "learning_rate": 1.8214594808693234e-05, "loss": 0.9704, "step": 3130 }, { "epoch": 0.45, "learning_rate": 1.8207796495558887e-05, "loss": 0.7716, "step": 3131 }, { "epoch": 0.45, "learning_rate": 1.8200997491865337e-05, "loss": 0.7729, "step": 3132 }, { "epoch": 0.45, "learning_rate": 1.8194197799076237e-05, "loss": 0.7492, "step": 3133 }, { "epoch": 0.45, "learning_rate": 1.8187397418655395e-05, "loss": 0.757, "step": 3134 }, { "epoch": 0.45, "learning_rate": 1.8180596352066764e-05, "loss": 0.7104, "step": 3135 }, { "epoch": 0.45, "learning_rate": 1.817379460077445e-05, "loss": 0.8315, "step": 3136 }, { "epoch": 0.45, "learning_rate": 1.8166992166242697e-05, "loss": 1.0067, "step": 3137 }, { "epoch": 0.45, "learning_rate": 1.8160189049935895e-05, "loss": 0.8214, "step": 3138 }, { "epoch": 0.45, "learning_rate": 1.81533852533186e-05, "loss": 0.6967, "step": 3139 }, { "epoch": 0.45, "learning_rate": 1.8146580777855487e-05, "loss": 0.7924, "step": 3140 }, { "epoch": 0.45, "learning_rate": 1.813977562501141e-05, "loss": 0.9143, "step": 3141 }, { "epoch": 0.45, "learning_rate": 1.8132969796251326e-05, "loss": 0.9219, "step": 3142 }, { "epoch": 0.45, "learning_rate": 1.8126163293040377e-05, "loss": 0.7656, "step": 3143 }, { "epoch": 0.45, "learning_rate": 1.8119356116843836e-05, "loss": 0.8984, "step": 3144 }, { "epoch": 0.45, "learning_rate": 1.8112548269127112e-05, "loss": 0.88, "step": 3145 }, { "epoch": 0.45, "learning_rate": 1.8105739751355773e-05, "loss": 0.8259, "step": 3146 }, { "epoch": 0.45, "learning_rate": 1.8098930564995512e-05, "loss": 0.9877, "step": 3147 }, { "epoch": 0.45, "learning_rate": 1.8092120711512196e-05, "loss": 0.9129, "step": 3148 }, { "epoch": 0.45, "learning_rate": 1.8085310192371806e-05, "loss": 0.9152, "step": 3149 }, { "epoch": 0.45, "learning_rate": 1.8078499009040485e-05, "loss": 0.7598, "step": 3150 }, { "epoch": 0.45, "learning_rate": 1.8071687162984497e-05, "loss": 0.7003, "step": 3151 }, { "epoch": 0.45, "learning_rate": 1.8064874655670284e-05, "loss": 0.6507, "step": 3152 }, { "epoch": 0.45, "learning_rate": 1.8058061488564402e-05, "loss": 0.8588, "step": 3153 }, { "epoch": 0.45, "learning_rate": 1.8051247663133552e-05, "loss": 0.9598, "step": 3154 }, { "epoch": 0.45, "learning_rate": 1.804443318084459e-05, "loss": 0.9085, "step": 3155 }, { "epoch": 0.45, "learning_rate": 1.8037618043164498e-05, "loss": 0.8298, "step": 3156 }, { "epoch": 0.45, "learning_rate": 1.8030802251560416e-05, "loss": 0.8956, "step": 3157 }, { "epoch": 0.45, "learning_rate": 1.802398580749961e-05, "loss": 0.9037, "step": 3158 }, { "epoch": 0.45, "learning_rate": 1.8017168712449488e-05, "loss": 0.9269, "step": 3159 }, { "epoch": 0.45, "learning_rate": 1.801035096787761e-05, "loss": 0.7885, "step": 3160 }, { "epoch": 0.45, "learning_rate": 1.800353257525166e-05, "loss": 0.8281, "step": 3161 }, { "epoch": 0.45, "learning_rate": 1.7996713536039473e-05, "loss": 0.9141, "step": 3162 }, { "epoch": 0.45, "learning_rate": 1.7989893851709016e-05, "loss": 0.8443, "step": 3163 }, { "epoch": 0.45, "learning_rate": 1.7983073523728403e-05, "loss": 0.8393, "step": 3164 }, { "epoch": 0.45, "learning_rate": 1.7976252553565882e-05, "loss": 0.9063, "step": 3165 }, { "epoch": 0.45, "learning_rate": 1.7969430942689838e-05, "loss": 0.7687, "step": 3166 }, { "epoch": 0.45, "learning_rate": 1.796260869256879e-05, "loss": 0.8086, "step": 3167 }, { "epoch": 0.45, "learning_rate": 1.7955785804671407e-05, "loss": 0.7801, "step": 3168 }, { "epoch": 0.45, "learning_rate": 1.7948962280466483e-05, "loss": 0.7282, "step": 3169 }, { "epoch": 0.45, "learning_rate": 1.7942138121422955e-05, "loss": 0.8052, "step": 3170 }, { "epoch": 0.45, "learning_rate": 1.79353133290099e-05, "loss": 0.7952, "step": 3171 }, { "epoch": 0.45, "learning_rate": 1.7928487904696515e-05, "loss": 0.8934, "step": 3172 }, { "epoch": 0.45, "learning_rate": 1.7921661849952163e-05, "loss": 0.7985, "step": 3173 }, { "epoch": 0.45, "learning_rate": 1.7914835166246305e-05, "loss": 0.8834, "step": 3174 }, { "epoch": 0.45, "learning_rate": 1.7908007855048574e-05, "loss": 0.774, "step": 3175 }, { "epoch": 0.45, "learning_rate": 1.790117991782871e-05, "loss": 0.9827, "step": 3176 }, { "epoch": 0.46, "learning_rate": 1.7894351356056603e-05, "loss": 0.798, "step": 3177 }, { "epoch": 0.46, "learning_rate": 1.7887522171202273e-05, "loss": 0.7054, "step": 3178 }, { "epoch": 0.46, "learning_rate": 1.7880692364735878e-05, "loss": 0.8633, "step": 3179 }, { "epoch": 0.46, "learning_rate": 1.78738619381277e-05, "loss": 0.8677, "step": 3180 }, { "epoch": 0.46, "learning_rate": 1.7867030892848162e-05, "loss": 0.7737, "step": 3181 }, { "epoch": 0.46, "learning_rate": 1.7860199230367825e-05, "loss": 0.7709, "step": 3182 }, { "epoch": 0.46, "learning_rate": 1.785336695215737e-05, "loss": 0.9994, "step": 3183 }, { "epoch": 0.46, "learning_rate": 1.784653405968762e-05, "loss": 0.9353, "step": 3184 }, { "epoch": 0.46, "learning_rate": 1.7839700554429527e-05, "loss": 0.7773, "step": 3185 }, { "epoch": 0.46, "learning_rate": 1.783286643785418e-05, "loss": 0.8744, "step": 3186 }, { "epoch": 0.46, "learning_rate": 1.7826031711432788e-05, "loss": 0.7863, "step": 3187 }, { "epoch": 0.46, "learning_rate": 1.7819196376636703e-05, "loss": 0.7779, "step": 3188 }, { "epoch": 0.46, "learning_rate": 1.78123604349374e-05, "loss": 0.8131, "step": 3189 }, { "epoch": 0.46, "learning_rate": 1.7805523887806495e-05, "loss": 0.9609, "step": 3190 }, { "epoch": 0.46, "learning_rate": 1.7798686736715723e-05, "loss": 0.853, "step": 3191 }, { "epoch": 0.46, "learning_rate": 1.7791848983136948e-05, "loss": 0.858, "step": 3192 }, { "epoch": 0.46, "learning_rate": 1.778501062854218e-05, "loss": 0.957, "step": 3193 }, { "epoch": 0.46, "learning_rate": 1.7778171674403536e-05, "loss": 0.9234, "step": 3194 }, { "epoch": 0.46, "learning_rate": 1.777133212219329e-05, "loss": 0.7386, "step": 3195 }, { "epoch": 0.46, "learning_rate": 1.7764491973383803e-05, "loss": 0.9537, "step": 3196 }, { "epoch": 0.46, "learning_rate": 1.775765122944762e-05, "loss": 0.7715, "step": 3197 }, { "epoch": 0.46, "learning_rate": 1.775080989185736e-05, "loss": 0.7813, "step": 3198 }, { "epoch": 0.46, "learning_rate": 1.77439679620858e-05, "loss": 0.719, "step": 3199 }, { "epoch": 0.46, "learning_rate": 1.7737125441605846e-05, "loss": 0.8973, "step": 3200 }, { "epoch": 0.46, "learning_rate": 1.7730282331890514e-05, "loss": 0.793, "step": 3201 }, { "epoch": 0.46, "learning_rate": 1.7723438634412964e-05, "loss": 0.7503, "step": 3202 }, { "epoch": 0.46, "learning_rate": 1.7716594350646466e-05, "loss": 0.8175, "step": 3203 }, { "epoch": 0.46, "learning_rate": 1.7709749482064433e-05, "loss": 0.8019, "step": 3204 }, { "epoch": 0.46, "learning_rate": 1.770290403014039e-05, "loss": 0.8622, "step": 3205 }, { "epoch": 0.46, "learning_rate": 1.7696057996347997e-05, "loss": 0.957, "step": 3206 }, { "epoch": 0.46, "learning_rate": 1.7689211382161035e-05, "loss": 0.7958, "step": 3207 }, { "epoch": 0.46, "learning_rate": 1.7682364189053414e-05, "loss": 0.8371, "step": 3208 }, { "epoch": 0.46, "learning_rate": 1.7675516418499153e-05, "loss": 0.8415, "step": 3209 }, { "epoch": 0.46, "learning_rate": 1.7668668071972418e-05, "loss": 0.8133, "step": 3210 }, { "epoch": 0.46, "learning_rate": 1.7661819150947495e-05, "loss": 0.7634, "step": 3211 }, { "epoch": 0.46, "learning_rate": 1.765496965689877e-05, "loss": 0.7902, "step": 3212 }, { "epoch": 0.46, "learning_rate": 1.7648119591300778e-05, "loss": 0.8396, "step": 3213 }, { "epoch": 0.46, "learning_rate": 1.764126895562817e-05, "loss": 0.6854, "step": 3214 }, { "epoch": 0.46, "learning_rate": 1.763441775135572e-05, "loss": 0.8789, "step": 3215 }, { "epoch": 0.46, "learning_rate": 1.7627565979958317e-05, "loss": 0.6987, "step": 3216 }, { "epoch": 0.46, "learning_rate": 1.7620713642910978e-05, "loss": 0.6964, "step": 3217 }, { "epoch": 0.46, "learning_rate": 1.761386074168885e-05, "loss": 0.8147, "step": 3218 }, { "epoch": 0.46, "learning_rate": 1.7607007277767184e-05, "loss": 0.7807, "step": 3219 }, { "epoch": 0.46, "learning_rate": 1.7600153252621365e-05, "loss": 0.8256, "step": 3220 }, { "epoch": 0.46, "learning_rate": 1.7593298667726895e-05, "loss": 0.909, "step": 3221 }, { "epoch": 0.46, "learning_rate": 1.75864435245594e-05, "loss": 0.7905, "step": 3222 }, { "epoch": 0.46, "learning_rate": 1.7579587824594612e-05, "loss": 0.7656, "step": 3223 }, { "epoch": 0.46, "learning_rate": 1.75727315693084e-05, "loss": 0.7863, "step": 3224 }, { "epoch": 0.46, "learning_rate": 1.7565874760176755e-05, "loss": 0.7773, "step": 3225 }, { "epoch": 0.46, "learning_rate": 1.7559017398675758e-05, "loss": 0.7489, "step": 3226 }, { "epoch": 0.46, "learning_rate": 1.755215948628165e-05, "loss": 0.7991, "step": 3227 }, { "epoch": 0.46, "learning_rate": 1.7545301024470757e-05, "loss": 0.7598, "step": 3228 }, { "epoch": 0.46, "learning_rate": 1.7538442014719545e-05, "loss": 0.7606, "step": 3229 }, { "epoch": 0.46, "learning_rate": 1.7531582458504576e-05, "loss": 0.7433, "step": 3230 }, { "epoch": 0.46, "learning_rate": 1.7524722357302555e-05, "loss": 0.851, "step": 3231 }, { "epoch": 0.46, "learning_rate": 1.7517861712590286e-05, "loss": 1.0592, "step": 3232 }, { "epoch": 0.46, "learning_rate": 1.7511000525844697e-05, "loss": 0.8571, "step": 3233 }, { "epoch": 0.46, "learning_rate": 1.7504138798542833e-05, "loss": 0.9375, "step": 3234 }, { "epoch": 0.46, "learning_rate": 1.7497276532161853e-05, "loss": 0.9297, "step": 3235 }, { "epoch": 0.46, "learning_rate": 1.749041372817904e-05, "loss": 0.8225, "step": 3236 }, { "epoch": 0.46, "learning_rate": 1.7483550388071777e-05, "loss": 0.7469, "step": 3237 }, { "epoch": 0.46, "learning_rate": 1.747668651331757e-05, "loss": 0.6458, "step": 3238 }, { "epoch": 0.46, "learning_rate": 1.7469822105394043e-05, "loss": 0.7732, "step": 3239 }, { "epoch": 0.46, "learning_rate": 1.7462957165778938e-05, "loss": 0.9492, "step": 3240 }, { "epoch": 0.46, "learning_rate": 1.7456091695950106e-05, "loss": 0.8862, "step": 3241 }, { "epoch": 0.46, "learning_rate": 1.744922569738551e-05, "loss": 0.7252, "step": 3242 }, { "epoch": 0.46, "learning_rate": 1.744235917156323e-05, "loss": 0.9732, "step": 3243 }, { "epoch": 0.46, "learning_rate": 1.7435492119961455e-05, "loss": 0.8225, "step": 3244 }, { "epoch": 0.46, "learning_rate": 1.7428624544058505e-05, "loss": 0.7539, "step": 3245 }, { "epoch": 0.46, "learning_rate": 1.742175644533278e-05, "loss": 0.7776, "step": 3246 }, { "epoch": 0.47, "learning_rate": 1.7414887825262828e-05, "loss": 0.8167, "step": 3247 }, { "epoch": 0.47, "learning_rate": 1.7408018685327282e-05, "loss": 0.7324, "step": 3248 }, { "epoch": 0.47, "learning_rate": 1.7401149027004907e-05, "loss": 0.798, "step": 3249 }, { "epoch": 0.47, "learning_rate": 1.7394278851774563e-05, "loss": 0.7517, "step": 3250 }, { "epoch": 0.47, "learning_rate": 1.738740816111523e-05, "loss": 0.6696, "step": 3251 }, { "epoch": 0.47, "learning_rate": 1.7380536956506002e-05, "loss": 0.8097, "step": 3252 }, { "epoch": 0.47, "learning_rate": 1.7373665239426074e-05, "loss": 0.7405, "step": 3253 }, { "epoch": 0.47, "learning_rate": 1.7366793011354763e-05, "loss": 0.721, "step": 3254 }, { "epoch": 0.47, "learning_rate": 1.735992027377148e-05, "loss": 0.9749, "step": 3255 }, { "epoch": 0.47, "learning_rate": 1.7353047028155768e-05, "loss": 0.7296, "step": 3256 }, { "epoch": 0.47, "learning_rate": 1.7346173275987254e-05, "loss": 0.8248, "step": 3257 }, { "epoch": 0.47, "learning_rate": 1.7339299018745692e-05, "loss": 1.0089, "step": 3258 }, { "epoch": 0.47, "learning_rate": 1.7332424257910944e-05, "loss": 0.7879, "step": 3259 }, { "epoch": 0.47, "learning_rate": 1.732554899496297e-05, "loss": 1.0234, "step": 3260 }, { "epoch": 0.47, "learning_rate": 1.731867323138185e-05, "loss": 0.683, "step": 3261 }, { "epoch": 0.47, "learning_rate": 1.731179696864775e-05, "loss": 0.7952, "step": 3262 }, { "epoch": 0.47, "learning_rate": 1.730492020824098e-05, "loss": 0.6214, "step": 3263 }, { "epoch": 0.47, "learning_rate": 1.7298042951641924e-05, "loss": 0.7623, "step": 3264 }, { "epoch": 0.47, "learning_rate": 1.729116520033109e-05, "loss": 0.7556, "step": 3265 }, { "epoch": 0.47, "learning_rate": 1.7284286955789086e-05, "loss": 0.8181, "step": 3266 }, { "epoch": 0.47, "learning_rate": 1.727740821949662e-05, "loss": 0.8147, "step": 3267 }, { "epoch": 0.47, "learning_rate": 1.7270528992934535e-05, "loss": 0.8923, "step": 3268 }, { "epoch": 0.47, "learning_rate": 1.7263649277583735e-05, "loss": 0.8133, "step": 3269 }, { "epoch": 0.47, "learning_rate": 1.725676907492527e-05, "loss": 0.8315, "step": 3270 }, { "epoch": 0.47, "learning_rate": 1.724988838644026e-05, "loss": 0.8432, "step": 3271 }, { "epoch": 0.47, "learning_rate": 1.7243007213609963e-05, "loss": 0.8873, "step": 3272 }, { "epoch": 0.47, "learning_rate": 1.7236125557915717e-05, "loss": 0.8418, "step": 3273 }, { "epoch": 0.47, "learning_rate": 1.7229243420838974e-05, "loss": 0.7751, "step": 3274 }, { "epoch": 0.47, "learning_rate": 1.722236080386129e-05, "loss": 0.8432, "step": 3275 }, { "epoch": 0.47, "learning_rate": 1.7215477708464312e-05, "loss": 0.745, "step": 3276 }, { "epoch": 0.47, "learning_rate": 1.7208594136129816e-05, "loss": 0.851, "step": 3277 }, { "epoch": 0.47, "learning_rate": 1.7201710088339654e-05, "loss": 0.9068, "step": 3278 }, { "epoch": 0.47, "learning_rate": 1.7194825566575797e-05, "loss": 0.8912, "step": 3279 }, { "epoch": 0.47, "learning_rate": 1.7187940572320303e-05, "loss": 0.8186, "step": 3280 }, { "epoch": 0.47, "learning_rate": 1.718105510705535e-05, "loss": 0.9788, "step": 3281 }, { "epoch": 0.47, "learning_rate": 1.7174169172263206e-05, "loss": 0.8136, "step": 3282 }, { "epoch": 0.47, "learning_rate": 1.716728276942624e-05, "loss": 0.8119, "step": 3283 }, { "epoch": 0.47, "learning_rate": 1.7160395900026925e-05, "loss": 0.8192, "step": 3284 }, { "epoch": 0.47, "learning_rate": 1.7153508565547835e-05, "loss": 0.9001, "step": 3285 }, { "epoch": 0.47, "learning_rate": 1.7146620767471645e-05, "loss": 0.8599, "step": 3286 }, { "epoch": 0.47, "learning_rate": 1.713973250728112e-05, "loss": 0.7651, "step": 3287 }, { "epoch": 0.47, "learning_rate": 1.7132843786459135e-05, "loss": 0.9487, "step": 3288 }, { "epoch": 0.47, "learning_rate": 1.7125954606488668e-05, "loss": 0.9754, "step": 3289 }, { "epoch": 0.47, "learning_rate": 1.711906496885278e-05, "loss": 0.8471, "step": 3290 }, { "epoch": 0.47, "learning_rate": 1.7112174875034644e-05, "loss": 0.8443, "step": 3291 }, { "epoch": 0.47, "learning_rate": 1.7105284326517527e-05, "loss": 0.7268, "step": 3292 }, { "epoch": 0.47, "learning_rate": 1.7098393324784788e-05, "loss": 0.7377, "step": 3293 }, { "epoch": 0.47, "learning_rate": 1.7091501871319896e-05, "loss": 0.8834, "step": 3294 }, { "epoch": 0.47, "learning_rate": 1.7084609967606407e-05, "loss": 0.8571, "step": 3295 }, { "epoch": 0.47, "learning_rate": 1.7077717615127978e-05, "loss": 0.6814, "step": 3296 }, { "epoch": 0.47, "learning_rate": 1.7070824815368364e-05, "loss": 0.8491, "step": 3297 }, { "epoch": 0.47, "learning_rate": 1.706393156981141e-05, "loss": 0.8566, "step": 3298 }, { "epoch": 0.47, "learning_rate": 1.7057037879941066e-05, "loss": 0.9994, "step": 3299 }, { "epoch": 0.47, "learning_rate": 1.7050143747241372e-05, "loss": 0.9576, "step": 3300 }, { "epoch": 0.47, "learning_rate": 1.7043249173196456e-05, "loss": 0.8521, "step": 3301 }, { "epoch": 0.47, "learning_rate": 1.7036354159290555e-05, "loss": 0.8376, "step": 3302 }, { "epoch": 0.47, "learning_rate": 1.7029458707008e-05, "loss": 0.8661, "step": 3303 }, { "epoch": 0.47, "learning_rate": 1.702256281783321e-05, "loss": 0.8613, "step": 3304 }, { "epoch": 0.47, "learning_rate": 1.7015666493250688e-05, "loss": 0.9252, "step": 3305 }, { "epoch": 0.47, "learning_rate": 1.7008769734745056e-05, "loss": 0.7303, "step": 3306 }, { "epoch": 0.47, "learning_rate": 1.7001872543801008e-05, "loss": 0.7475, "step": 3307 }, { "epoch": 0.47, "learning_rate": 1.699497492190334e-05, "loss": 1.144, "step": 3308 }, { "epoch": 0.47, "learning_rate": 1.6988076870536934e-05, "loss": 0.7846, "step": 3309 }, { "epoch": 0.47, "learning_rate": 1.6981178391186777e-05, "loss": 0.9554, "step": 3310 }, { "epoch": 0.47, "learning_rate": 1.6974279485337946e-05, "loss": 0.7695, "step": 3311 }, { "epoch": 0.47, "learning_rate": 1.6967380154475592e-05, "loss": 0.9978, "step": 3312 }, { "epoch": 0.47, "learning_rate": 1.6960480400084977e-05, "loss": 0.9364, "step": 3313 }, { "epoch": 0.47, "learning_rate": 1.6953580223651445e-05, "loss": 0.7556, "step": 3314 }, { "epoch": 0.47, "learning_rate": 1.6946679626660437e-05, "loss": 0.865, "step": 3315 }, { "epoch": 0.48, "learning_rate": 1.6939778610597476e-05, "loss": 0.8041, "step": 3316 }, { "epoch": 0.48, "learning_rate": 1.6932877176948183e-05, "loss": 0.7687, "step": 3317 }, { "epoch": 0.48, "learning_rate": 1.6925975327198265e-05, "loss": 0.8722, "step": 3318 }, { "epoch": 0.48, "learning_rate": 1.691907306283352e-05, "loss": 0.942, "step": 3319 }, { "epoch": 0.48, "learning_rate": 1.6912170385339836e-05, "loss": 0.9905, "step": 3320 }, { "epoch": 0.48, "learning_rate": 1.6905267296203182e-05, "loss": 0.8499, "step": 3321 }, { "epoch": 0.48, "learning_rate": 1.6898363796909633e-05, "loss": 0.9833, "step": 3322 }, { "epoch": 0.48, "learning_rate": 1.689145988894533e-05, "loss": 0.8781, "step": 3323 }, { "epoch": 0.48, "learning_rate": 1.6884555573796525e-05, "loss": 0.8605, "step": 3324 }, { "epoch": 0.48, "learning_rate": 1.6877650852949533e-05, "loss": 0.7277, "step": 3325 }, { "epoch": 0.48, "learning_rate": 1.6870745727890784e-05, "loss": 0.7743, "step": 3326 }, { "epoch": 0.48, "learning_rate": 1.686384020010677e-05, "loss": 0.7243, "step": 3327 }, { "epoch": 0.48, "learning_rate": 1.6856934271084083e-05, "loss": 0.8309, "step": 3328 }, { "epoch": 0.48, "learning_rate": 1.6850027942309405e-05, "loss": 0.7765, "step": 3329 }, { "epoch": 0.48, "learning_rate": 1.6843121215269482e-05, "loss": 0.8382, "step": 3330 }, { "epoch": 0.48, "learning_rate": 1.683621409145118e-05, "loss": 1.0268, "step": 3331 }, { "epoch": 0.48, "learning_rate": 1.682930657234142e-05, "loss": 0.8209, "step": 3332 }, { "epoch": 0.48, "learning_rate": 1.6822398659427226e-05, "loss": 1.0218, "step": 3333 }, { "epoch": 0.48, "learning_rate": 1.6815490354195695e-05, "loss": 0.9118, "step": 3334 }, { "epoch": 0.48, "learning_rate": 1.680858165813402e-05, "loss": 0.9576, "step": 3335 }, { "epoch": 0.48, "learning_rate": 1.6801672572729468e-05, "loss": 0.697, "step": 3336 }, { "epoch": 0.48, "learning_rate": 1.6794763099469396e-05, "loss": 0.7179, "step": 3337 }, { "epoch": 0.48, "learning_rate": 1.6787853239841242e-05, "loss": 0.8133, "step": 3338 }, { "epoch": 0.48, "learning_rate": 1.6780942995332528e-05, "loss": 0.8253, "step": 3339 }, { "epoch": 0.48, "learning_rate": 1.677403236743086e-05, "loss": 0.9085, "step": 3340 }, { "epoch": 0.48, "learning_rate": 1.6767121357623922e-05, "loss": 0.923, "step": 3341 }, { "epoch": 0.48, "learning_rate": 1.6760209967399484e-05, "loss": 0.9249, "step": 3342 }, { "epoch": 0.48, "learning_rate": 1.6753298198245397e-05, "loss": 0.8945, "step": 3343 }, { "epoch": 0.48, "learning_rate": 1.67463860516496e-05, "loss": 0.9866, "step": 3344 }, { "epoch": 0.48, "learning_rate": 1.6739473529100104e-05, "loss": 0.8705, "step": 3345 }, { "epoch": 0.48, "learning_rate": 1.673256063208499e-05, "loss": 0.8622, "step": 3346 }, { "epoch": 0.48, "learning_rate": 1.6725647362092463e-05, "loss": 0.9408, "step": 3347 }, { "epoch": 0.48, "learning_rate": 1.6718733720610752e-05, "loss": 0.8465, "step": 3348 }, { "epoch": 0.48, "learning_rate": 1.6711819709128208e-05, "loss": 0.7952, "step": 3349 }, { "epoch": 0.48, "learning_rate": 1.6704905329133232e-05, "loss": 0.8906, "step": 3350 }, { "epoch": 0.48, "learning_rate": 1.6697990582114338e-05, "loss": 0.9068, "step": 3351 }, { "epoch": 0.48, "learning_rate": 1.669107546956009e-05, "loss": 0.8002, "step": 3352 }, { "epoch": 0.48, "learning_rate": 1.6684159992959136e-05, "loss": 0.8373, "step": 3353 }, { "epoch": 0.48, "learning_rate": 1.6677244153800218e-05, "loss": 0.8153, "step": 3354 }, { "epoch": 0.48, "learning_rate": 1.667032795357213e-05, "loss": 0.7857, "step": 3355 }, { "epoch": 0.48, "learning_rate": 1.6663411393763778e-05, "loss": 0.7614, "step": 3356 }, { "epoch": 0.48, "learning_rate": 1.665649447586411e-05, "loss": 0.8555, "step": 3357 }, { "epoch": 0.48, "learning_rate": 1.6649577201362176e-05, "loss": 0.894, "step": 3358 }, { "epoch": 0.48, "learning_rate": 1.6642659571747087e-05, "loss": 0.8655, "step": 3359 }, { "epoch": 0.48, "learning_rate": 1.6635741588508046e-05, "loss": 0.7132, "step": 3360 }, { "epoch": 0.48, "learning_rate": 1.6628823253134318e-05, "loss": 0.8175, "step": 3361 }, { "epoch": 0.48, "learning_rate": 1.6621904567115252e-05, "loss": 0.8588, "step": 3362 }, { "epoch": 0.48, "learning_rate": 1.6614985531940267e-05, "loss": 0.8761, "step": 3363 }, { "epoch": 0.48, "learning_rate": 1.660806614909886e-05, "loss": 0.7299, "step": 3364 }, { "epoch": 0.48, "learning_rate": 1.6601146420080612e-05, "loss": 0.7734, "step": 3365 }, { "epoch": 0.48, "learning_rate": 1.6594226346375156e-05, "loss": 0.9213, "step": 3366 }, { "epoch": 0.48, "learning_rate": 1.6587305929472214e-05, "loss": 0.8036, "step": 3367 }, { "epoch": 0.48, "learning_rate": 1.6580385170861592e-05, "loss": 0.8912, "step": 3368 }, { "epoch": 0.48, "learning_rate": 1.657346407203315e-05, "loss": 0.8828, "step": 3369 }, { "epoch": 0.48, "learning_rate": 1.656654263447683e-05, "loss": 0.885, "step": 3370 }, { "epoch": 0.48, "learning_rate": 1.6559620859682644e-05, "loss": 1.0424, "step": 3371 }, { "epoch": 0.48, "learning_rate": 1.6552698749140685e-05, "loss": 0.8744, "step": 3372 }, { "epoch": 0.48, "learning_rate": 1.6545776304341107e-05, "loss": 0.9297, "step": 3373 }, { "epoch": 0.48, "learning_rate": 1.6538853526774147e-05, "loss": 0.8248, "step": 3374 }, { "epoch": 0.48, "learning_rate": 1.6531930417930097e-05, "loss": 1.0067, "step": 3375 }, { "epoch": 0.48, "learning_rate": 1.6525006979299342e-05, "loss": 0.8806, "step": 3376 }, { "epoch": 0.48, "learning_rate": 1.651808321237232e-05, "loss": 1.0246, "step": 3377 }, { "epoch": 0.48, "learning_rate": 1.6511159118639548e-05, "loss": 0.8527, "step": 3378 }, { "epoch": 0.48, "learning_rate": 1.650423469959162e-05, "loss": 0.7042, "step": 3379 }, { "epoch": 0.48, "learning_rate": 1.6497309956719174e-05, "loss": 0.8666, "step": 3380 }, { "epoch": 0.48, "learning_rate": 1.6490384891512958e-05, "loss": 0.7277, "step": 3381 }, { "epoch": 0.48, "learning_rate": 1.648345950546375e-05, "loss": 0.76, "step": 3382 }, { "epoch": 0.48, "learning_rate": 1.6476533800062423e-05, "loss": 0.8164, "step": 3383 }, { "epoch": 0.48, "learning_rate": 1.64696077767999e-05, "loss": 0.9827, "step": 3384 }, { "epoch": 0.48, "learning_rate": 1.64626814371672e-05, "loss": 0.8672, "step": 3385 }, { "epoch": 0.49, "learning_rate": 1.6455754782655377e-05, "loss": 0.8404, "step": 3386 }, { "epoch": 0.49, "learning_rate": 1.6448827814755576e-05, "loss": 0.8917, "step": 3387 }, { "epoch": 0.49, "learning_rate": 1.6441900534958995e-05, "loss": 0.9235, "step": 3388 }, { "epoch": 0.49, "learning_rate": 1.643497294475691e-05, "loss": 0.7985, "step": 3389 }, { "epoch": 0.49, "learning_rate": 1.6428045045640672e-05, "loss": 0.7907, "step": 3390 }, { "epoch": 0.49, "learning_rate": 1.6421116839101664e-05, "loss": 0.7559, "step": 3391 }, { "epoch": 0.49, "learning_rate": 1.6414188326631368e-05, "loss": 0.8281, "step": 3392 }, { "epoch": 0.49, "learning_rate": 1.640725950972132e-05, "loss": 0.6378, "step": 3393 }, { "epoch": 0.49, "learning_rate": 1.6400330389863128e-05, "loss": 0.9849, "step": 3394 }, { "epoch": 0.49, "learning_rate": 1.6393400968548458e-05, "loss": 0.827, "step": 3395 }, { "epoch": 0.49, "learning_rate": 1.6386471247269033e-05, "loss": 0.6842, "step": 3396 }, { "epoch": 0.49, "learning_rate": 1.6379541227516665e-05, "loss": 0.9782, "step": 3397 }, { "epoch": 0.49, "learning_rate": 1.6372610910783202e-05, "loss": 0.8482, "step": 3398 }, { "epoch": 0.49, "learning_rate": 1.6365680298560588e-05, "loss": 0.8549, "step": 3399 }, { "epoch": 0.49, "learning_rate": 1.635874939234079e-05, "loss": 0.8055, "step": 3400 }, { "epoch": 0.49, "learning_rate": 1.6351818193615876e-05, "loss": 0.8301, "step": 3401 }, { "epoch": 0.49, "learning_rate": 1.6344886703877957e-05, "loss": 0.8772, "step": 3402 }, { "epoch": 0.49, "learning_rate": 1.6337954924619213e-05, "loss": 0.8136, "step": 3403 }, { "epoch": 0.49, "learning_rate": 1.6331022857331886e-05, "loss": 0.894, "step": 3404 }, { "epoch": 0.49, "learning_rate": 1.632409050350826e-05, "loss": 0.9235, "step": 3405 }, { "epoch": 0.49, "learning_rate": 1.6317157864640732e-05, "loss": 0.8783, "step": 3406 }, { "epoch": 0.49, "learning_rate": 1.6310224942221704e-05, "loss": 0.7076, "step": 3407 }, { "epoch": 0.49, "learning_rate": 1.6303291737743673e-05, "loss": 0.7037, "step": 3408 }, { "epoch": 0.49, "learning_rate": 1.6296358252699175e-05, "loss": 0.8856, "step": 3409 }, { "epoch": 0.49, "learning_rate": 1.628942448858083e-05, "loss": 0.8304, "step": 3410 }, { "epoch": 0.49, "learning_rate": 1.62824904468813e-05, "loss": 0.8906, "step": 3411 }, { "epoch": 0.49, "learning_rate": 1.627555612909331e-05, "loss": 0.9018, "step": 3412 }, { "epoch": 0.49, "learning_rate": 1.6268621536709653e-05, "loss": 0.9286, "step": 3413 }, { "epoch": 0.49, "learning_rate": 1.6261686671223174e-05, "loss": 0.841, "step": 3414 }, { "epoch": 0.49, "learning_rate": 1.6254751534126777e-05, "loss": 0.7408, "step": 3415 }, { "epoch": 0.49, "learning_rate": 1.6247816126913424e-05, "loss": 0.8362, "step": 3416 }, { "epoch": 0.49, "learning_rate": 1.6240880451076138e-05, "loss": 0.8493, "step": 3417 }, { "epoch": 0.49, "learning_rate": 1.623394450810799e-05, "loss": 0.8404, "step": 3418 }, { "epoch": 0.49, "learning_rate": 1.6227008299502137e-05, "loss": 0.9609, "step": 3419 }, { "epoch": 0.49, "learning_rate": 1.6220071826751754e-05, "loss": 0.9375, "step": 3420 }, { "epoch": 0.49, "learning_rate": 1.6213135091350103e-05, "loss": 0.7846, "step": 3421 }, { "epoch": 0.49, "learning_rate": 1.6206198094790483e-05, "loss": 0.8583, "step": 3422 }, { "epoch": 0.49, "learning_rate": 1.6199260838566263e-05, "loss": 0.7896, "step": 3423 }, { "epoch": 0.49, "learning_rate": 1.6192323324170866e-05, "loss": 0.9481, "step": 3424 }, { "epoch": 0.49, "learning_rate": 1.6185385553097754e-05, "loss": 0.7031, "step": 3425 }, { "epoch": 0.49, "learning_rate": 1.6178447526840472e-05, "loss": 0.8376, "step": 3426 }, { "epoch": 0.49, "learning_rate": 1.6171509246892598e-05, "loss": 0.986, "step": 3427 }, { "epoch": 0.49, "learning_rate": 1.6164570714747776e-05, "loss": 0.9727, "step": 3428 }, { "epoch": 0.49, "learning_rate": 1.6157631931899696e-05, "loss": 0.6922, "step": 3429 }, { "epoch": 0.49, "learning_rate": 1.6150692899842107e-05, "loss": 0.6719, "step": 3430 }, { "epoch": 0.49, "learning_rate": 1.6143753620068813e-05, "loss": 0.7213, "step": 3431 }, { "epoch": 0.49, "learning_rate": 1.6136814094073667e-05, "loss": 0.8237, "step": 3432 }, { "epoch": 0.49, "learning_rate": 1.6129874323350584e-05, "loss": 0.9113, "step": 3433 }, { "epoch": 0.49, "learning_rate": 1.6122934309393513e-05, "loss": 0.726, "step": 3434 }, { "epoch": 0.49, "learning_rate": 1.6115994053696474e-05, "loss": 0.9314, "step": 3435 }, { "epoch": 0.49, "learning_rate": 1.6109053557753533e-05, "loss": 0.8114, "step": 3436 }, { "epoch": 0.49, "learning_rate": 1.6102112823058804e-05, "loss": 0.9581, "step": 3437 }, { "epoch": 0.49, "learning_rate": 1.609517185110646e-05, "loss": 0.8672, "step": 3438 }, { "epoch": 0.49, "learning_rate": 1.6088230643390718e-05, "loss": 0.7182, "step": 3439 }, { "epoch": 0.49, "learning_rate": 1.608128920140585e-05, "loss": 0.7425, "step": 3440 }, { "epoch": 0.49, "learning_rate": 1.607434752664617e-05, "loss": 0.8996, "step": 3441 }, { "epoch": 0.49, "learning_rate": 1.6067405620606053e-05, "loss": 0.6685, "step": 3442 }, { "epoch": 0.49, "learning_rate": 1.606046348477992e-05, "loss": 1.0089, "step": 3443 }, { "epoch": 0.49, "learning_rate": 1.6053521120662248e-05, "loss": 0.9102, "step": 3444 }, { "epoch": 0.49, "learning_rate": 1.6046578529747544e-05, "loss": 0.8566, "step": 3445 }, { "epoch": 0.49, "learning_rate": 1.603963571353038e-05, "loss": 0.7528, "step": 3446 }, { "epoch": 0.49, "learning_rate": 1.6032692673505374e-05, "loss": 0.8209, "step": 3447 }, { "epoch": 0.49, "learning_rate": 1.602574941116719e-05, "loss": 0.7296, "step": 3448 }, { "epoch": 0.49, "learning_rate": 1.6018805928010547e-05, "loss": 0.7679, "step": 3449 }, { "epoch": 0.49, "learning_rate": 1.6011862225530194e-05, "loss": 0.661, "step": 3450 }, { "epoch": 0.49, "learning_rate": 1.6004918305220946e-05, "loss": 0.8544, "step": 3451 }, { "epoch": 0.49, "learning_rate": 1.5997974168577653e-05, "loss": 0.9208, "step": 3452 }, { "epoch": 0.49, "learning_rate": 1.5991029817095217e-05, "loss": 0.7589, "step": 3453 }, { "epoch": 0.49, "learning_rate": 1.598408525226859e-05, "loss": 0.7813, "step": 3454 }, { "epoch": 0.49, "learning_rate": 1.5977140475592753e-05, "loss": 0.7907, "step": 3455 }, { "epoch": 0.5, "learning_rate": 1.5970195488562757e-05, "loss": 0.7623, "step": 3456 }, { "epoch": 0.5, "learning_rate": 1.596325029267368e-05, "loss": 0.885, "step": 3457 }, { "epoch": 0.5, "learning_rate": 1.5956304889420654e-05, "loss": 0.7757, "step": 3458 }, { "epoch": 0.5, "learning_rate": 1.5949359280298847e-05, "loss": 0.9152, "step": 3459 }, { "epoch": 0.5, "learning_rate": 1.5942413466803487e-05, "loss": 0.8119, "step": 3460 }, { "epoch": 0.5, "learning_rate": 1.5935467450429817e-05, "loss": 0.87, "step": 3461 }, { "epoch": 0.5, "learning_rate": 1.592852123267316e-05, "loss": 0.7542, "step": 3462 }, { "epoch": 0.5, "learning_rate": 1.592157481502886e-05, "loss": 0.8929, "step": 3463 }, { "epoch": 0.5, "learning_rate": 1.5914628198992307e-05, "loss": 0.8705, "step": 3464 }, { "epoch": 0.5, "learning_rate": 1.590768138605894e-05, "loss": 0.7425, "step": 3465 }, { "epoch": 0.5, "learning_rate": 1.5900734377724227e-05, "loss": 0.7056, "step": 3466 }, { "epoch": 0.5, "learning_rate": 1.5893787175483693e-05, "loss": 0.9051, "step": 3467 }, { "epoch": 0.5, "learning_rate": 1.5886839780832896e-05, "loss": 0.8661, "step": 3468 }, { "epoch": 0.5, "learning_rate": 1.5879892195267446e-05, "loss": 0.7533, "step": 3469 }, { "epoch": 0.5, "learning_rate": 1.5872944420282975e-05, "loss": 0.7801, "step": 3470 }, { "epoch": 0.5, "learning_rate": 1.5865996457375175e-05, "loss": 0.8956, "step": 3471 }, { "epoch": 0.5, "learning_rate": 1.5859048308039764e-05, "loss": 0.7734, "step": 3472 }, { "epoch": 0.5, "learning_rate": 1.5852099973772513e-05, "loss": 0.6671, "step": 3473 }, { "epoch": 0.5, "learning_rate": 1.584515145606923e-05, "loss": 0.8728, "step": 3474 }, { "epoch": 0.5, "learning_rate": 1.583820275642574e-05, "loss": 0.9375, "step": 3475 }, { "epoch": 0.5, "learning_rate": 1.583125387633795e-05, "loss": 0.7882, "step": 3476 }, { "epoch": 0.5, "learning_rate": 1.5824304817301767e-05, "loss": 0.8225, "step": 3477 }, { "epoch": 0.5, "learning_rate": 1.5817355580813157e-05, "loss": 0.7564, "step": 3478 }, { "epoch": 0.5, "learning_rate": 1.581040616836811e-05, "loss": 0.597, "step": 3479 }, { "epoch": 0.5, "learning_rate": 1.5803456581462675e-05, "loss": 0.8267, "step": 3480 }, { "epoch": 0.5, "learning_rate": 1.579650682159292e-05, "loss": 0.7879, "step": 3481 }, { "epoch": 0.5, "learning_rate": 1.5789556890254957e-05, "loss": 0.6674, "step": 3482 }, { "epoch": 0.5, "learning_rate": 1.5782606788944935e-05, "loss": 0.8499, "step": 3483 }, { "epoch": 0.5, "learning_rate": 1.577565651915903e-05, "loss": 0.9116, "step": 3484 }, { "epoch": 0.5, "learning_rate": 1.5768706082393483e-05, "loss": 0.8108, "step": 3485 }, { "epoch": 0.5, "learning_rate": 1.576175548014453e-05, "loss": 0.7997, "step": 3486 }, { "epoch": 0.5, "learning_rate": 1.5754804713908477e-05, "loss": 0.9492, "step": 3487 }, { "epoch": 0.5, "learning_rate": 1.5747853785181646e-05, "loss": 0.7852, "step": 3488 }, { "epoch": 0.5, "learning_rate": 1.57409026954604e-05, "loss": 0.8471, "step": 3489 }, { "epoch": 0.5, "learning_rate": 1.5733951446241143e-05, "loss": 0.6758, "step": 3490 }, { "epoch": 0.5, "learning_rate": 1.57270000390203e-05, "loss": 0.832, "step": 3491 }, { "epoch": 0.5, "learning_rate": 1.5720048475294337e-05, "loss": 0.8331, "step": 3492 }, { "epoch": 0.5, "learning_rate": 1.5713096756559755e-05, "loss": 0.8064, "step": 3493 }, { "epoch": 0.5, "learning_rate": 1.5706144884313092e-05, "loss": 0.9342, "step": 3494 }, { "epoch": 0.5, "learning_rate": 1.569919286005091e-05, "loss": 0.971, "step": 3495 }, { "epoch": 0.5, "learning_rate": 1.56922406852698e-05, "loss": 0.8242, "step": 3496 }, { "epoch": 0.5, "learning_rate": 1.5685288361466405e-05, "loss": 0.7575, "step": 3497 }, { "epoch": 0.5, "learning_rate": 1.5678335890137388e-05, "loss": 0.702, "step": 3498 }, { "epoch": 0.5, "learning_rate": 1.5671383272779436e-05, "loss": 1.1147, "step": 3499 }, { "epoch": 0.5, "learning_rate": 1.5664430510889278e-05, "loss": 0.8133, "step": 3500 }, { "epoch": 0.5, "learning_rate": 1.5657477605963678e-05, "loss": 0.8764, "step": 3501 }, { "epoch": 0.5, "learning_rate": 1.5650524559499415e-05, "loss": 0.8722, "step": 3502 }, { "epoch": 0.5, "learning_rate": 1.5643571372993318e-05, "loss": 0.8002, "step": 3503 }, { "epoch": 0.5, "learning_rate": 1.5636618047942225e-05, "loss": 0.8393, "step": 3504 }, { "epoch": 0.5, "learning_rate": 1.562966458584302e-05, "loss": 0.7787, "step": 3505 }, { "epoch": 0.5, "learning_rate": 1.5622710988192617e-05, "loss": 0.7081, "step": 3506 }, { "epoch": 0.5, "learning_rate": 1.5615757256487942e-05, "loss": 0.8499, "step": 3507 }, { "epoch": 0.5, "learning_rate": 1.5608803392225972e-05, "loss": 0.9252, "step": 3508 }, { "epoch": 0.5, "learning_rate": 1.560184939690369e-05, "loss": 0.8945, "step": 3509 }, { "epoch": 0.5, "learning_rate": 1.5594895272018134e-05, "loss": 0.9933, "step": 3510 }, { "epoch": 0.5, "learning_rate": 1.5587941019066345e-05, "loss": 0.6981, "step": 3511 }, { "epoch": 0.5, "learning_rate": 1.5580986639545407e-05, "loss": 0.9531, "step": 3512 }, { "epoch": 0.5, "learning_rate": 1.5574032134952414e-05, "loss": 0.692, "step": 3513 }, { "epoch": 0.5, "learning_rate": 1.5567077506784514e-05, "loss": 0.9894, "step": 3514 }, { "epoch": 0.5, "learning_rate": 1.5560122756538855e-05, "loss": 0.8761, "step": 3515 }, { "epoch": 0.5, "learning_rate": 1.555316788571263e-05, "loss": 0.7991, "step": 3516 }, { "epoch": 0.5, "learning_rate": 1.554621289580305e-05, "loss": 0.8973, "step": 3517 }, { "epoch": 0.5, "learning_rate": 1.5539257788307348e-05, "loss": 0.7098, "step": 3518 }, { "epoch": 0.5, "learning_rate": 1.553230256472279e-05, "loss": 0.7958, "step": 3519 }, { "epoch": 0.5, "learning_rate": 1.5525347226546663e-05, "loss": 0.671, "step": 3520 }, { "epoch": 0.5, "learning_rate": 1.5518391775276276e-05, "loss": 1.0458, "step": 3521 }, { "epoch": 0.5, "learning_rate": 1.551143621240897e-05, "loss": 0.8465, "step": 3522 }, { "epoch": 0.5, "learning_rate": 1.55044805394421e-05, "loss": 0.7985, "step": 3523 }, { "epoch": 0.5, "learning_rate": 1.549752475787306e-05, "loss": 0.8382, "step": 3524 }, { "epoch": 0.5, "learning_rate": 1.5490568869199243e-05, "loss": 0.6998, "step": 3525 }, { "epoch": 0.51, "learning_rate": 1.5483612874918097e-05, "loss": 0.8426, "step": 3526 }, { "epoch": 0.51, "learning_rate": 1.5476656776527063e-05, "loss": 0.7762, "step": 3527 }, { "epoch": 0.51, "learning_rate": 1.5469700575523623e-05, "loss": 0.7327, "step": 3528 }, { "epoch": 0.51, "learning_rate": 1.546274427340526e-05, "loss": 0.798, "step": 3529 }, { "epoch": 0.51, "learning_rate": 1.5455787871669524e-05, "loss": 0.875, "step": 3530 }, { "epoch": 0.51, "learning_rate": 1.5448831371813926e-05, "loss": 0.8108, "step": 3531 }, { "epoch": 0.51, "learning_rate": 1.5441874775336044e-05, "loss": 0.9188, "step": 3532 }, { "epoch": 0.51, "learning_rate": 1.5434918083733462e-05, "loss": 0.8471, "step": 3533 }, { "epoch": 0.51, "learning_rate": 1.5427961298503767e-05, "loss": 0.8917, "step": 3534 }, { "epoch": 0.51, "learning_rate": 1.5421004421144606e-05, "loss": 0.8644, "step": 3535 }, { "epoch": 0.51, "learning_rate": 1.541404745315361e-05, "loss": 0.7321, "step": 3536 }, { "epoch": 0.51, "learning_rate": 1.5407090396028445e-05, "loss": 0.8482, "step": 3537 }, { "epoch": 0.51, "learning_rate": 1.5400133251266783e-05, "loss": 0.8117, "step": 3538 }, { "epoch": 0.51, "learning_rate": 1.5393176020366343e-05, "loss": 0.6635, "step": 3539 }, { "epoch": 0.51, "learning_rate": 1.538621870482483e-05, "loss": 0.8058, "step": 3540 }, { "epoch": 0.51, "learning_rate": 1.5379261306139988e-05, "loss": 0.7958, "step": 3541 }, { "epoch": 0.51, "learning_rate": 1.5372303825809575e-05, "loss": 0.9626, "step": 3542 }, { "epoch": 0.51, "learning_rate": 1.5365346265331356e-05, "loss": 0.865, "step": 3543 }, { "epoch": 0.51, "learning_rate": 1.5358388626203132e-05, "loss": 0.8058, "step": 3544 }, { "epoch": 0.51, "learning_rate": 1.5351430909922703e-05, "loss": 0.7381, "step": 3545 }, { "epoch": 0.51, "learning_rate": 1.5344473117987896e-05, "loss": 0.8661, "step": 3546 }, { "epoch": 0.51, "learning_rate": 1.533751525189655e-05, "loss": 0.8362, "step": 3547 }, { "epoch": 0.51, "learning_rate": 1.5330557313146523e-05, "loss": 0.9581, "step": 3548 }, { "epoch": 0.51, "learning_rate": 1.5323599303235692e-05, "loss": 0.8075, "step": 3549 }, { "epoch": 0.51, "learning_rate": 1.531664122366193e-05, "loss": 1.0831, "step": 3550 }, { "epoch": 0.51, "learning_rate": 1.5309683075923155e-05, "loss": 0.9208, "step": 3551 }, { "epoch": 0.51, "learning_rate": 1.5302724861517272e-05, "loss": 0.7839, "step": 3552 }, { "epoch": 0.51, "learning_rate": 1.5295766581942227e-05, "loss": 0.8351, "step": 3553 }, { "epoch": 0.51, "learning_rate": 1.528880823869594e-05, "loss": 0.5964, "step": 3554 }, { "epoch": 0.51, "learning_rate": 1.52818498332764e-05, "loss": 0.8884, "step": 3555 }, { "epoch": 0.51, "learning_rate": 1.527489136718156e-05, "loss": 0.7193, "step": 3556 }, { "epoch": 0.51, "learning_rate": 1.5267932841909412e-05, "loss": 0.7386, "step": 3557 }, { "epoch": 0.51, "learning_rate": 1.5260974258957956e-05, "loss": 0.7606, "step": 3558 }, { "epoch": 0.51, "learning_rate": 1.5254015619825196e-05, "loss": 0.7757, "step": 3559 }, { "epoch": 0.51, "learning_rate": 1.5247056926009162e-05, "loss": 0.8973, "step": 3560 }, { "epoch": 0.51, "learning_rate": 1.5240098179007884e-05, "loss": 0.8488, "step": 3561 }, { "epoch": 0.51, "learning_rate": 1.5233139380319411e-05, "loss": 0.8331, "step": 3562 }, { "epoch": 0.51, "learning_rate": 1.5226180531441795e-05, "loss": 0.8387, "step": 3563 }, { "epoch": 0.51, "learning_rate": 1.521922163387311e-05, "loss": 0.6752, "step": 3564 }, { "epoch": 0.51, "learning_rate": 1.5212262689111433e-05, "loss": 0.793, "step": 3565 }, { "epoch": 0.51, "learning_rate": 1.5205303698654852e-05, "loss": 0.976, "step": 3566 }, { "epoch": 0.51, "learning_rate": 1.519834466400146e-05, "loss": 0.8884, "step": 3567 }, { "epoch": 0.51, "learning_rate": 1.5191385586649375e-05, "loss": 0.8739, "step": 3568 }, { "epoch": 0.51, "learning_rate": 1.518442646809671e-05, "loss": 1.0145, "step": 3569 }, { "epoch": 0.51, "learning_rate": 1.517746730984159e-05, "loss": 0.7511, "step": 3570 }, { "epoch": 0.51, "learning_rate": 1.5170508113382148e-05, "loss": 0.8929, "step": 3571 }, { "epoch": 0.51, "learning_rate": 1.516354888021653e-05, "loss": 0.7128, "step": 3572 }, { "epoch": 0.51, "learning_rate": 1.5156589611842885e-05, "loss": 0.8945, "step": 3573 }, { "epoch": 0.51, "learning_rate": 1.514963030975937e-05, "loss": 0.8415, "step": 3574 }, { "epoch": 0.51, "learning_rate": 1.5142670975464155e-05, "loss": 0.7718, "step": 3575 }, { "epoch": 0.51, "learning_rate": 1.5135711610455407e-05, "loss": 0.6283, "step": 3576 }, { "epoch": 0.51, "learning_rate": 1.5128752216231306e-05, "loss": 0.8973, "step": 3577 }, { "epoch": 0.51, "learning_rate": 1.5121792794290043e-05, "loss": 0.7877, "step": 3578 }, { "epoch": 0.51, "learning_rate": 1.51148333461298e-05, "loss": 1.0006, "step": 3579 }, { "epoch": 0.51, "learning_rate": 1.5107873873248786e-05, "loss": 0.8482, "step": 3580 }, { "epoch": 0.51, "learning_rate": 1.5100914377145194e-05, "loss": 0.7991, "step": 3581 }, { "epoch": 0.51, "learning_rate": 1.5093954859317231e-05, "loss": 0.6855, "step": 3582 }, { "epoch": 0.51, "learning_rate": 1.5086995321263116e-05, "loss": 0.9565, "step": 3583 }, { "epoch": 0.51, "learning_rate": 1.5080035764481056e-05, "loss": 0.88, "step": 3584 }, { "epoch": 0.51, "learning_rate": 1.5073076190469282e-05, "loss": 0.7547, "step": 3585 }, { "epoch": 0.51, "learning_rate": 1.506611660072601e-05, "loss": 0.8111, "step": 3586 }, { "epoch": 0.51, "learning_rate": 1.5059156996749474e-05, "loss": 0.8532, "step": 3587 }, { "epoch": 0.51, "learning_rate": 1.5052197380037893e-05, "loss": 0.6459, "step": 3588 }, { "epoch": 0.51, "learning_rate": 1.5045237752089511e-05, "loss": 0.9191, "step": 3589 }, { "epoch": 0.51, "learning_rate": 1.503827811440256e-05, "loss": 0.8281, "step": 3590 }, { "epoch": 0.51, "learning_rate": 1.5031318468475282e-05, "loss": 0.7648, "step": 3591 }, { "epoch": 0.51, "learning_rate": 1.5024358815805909e-05, "loss": 1.0809, "step": 3592 }, { "epoch": 0.51, "learning_rate": 1.5017399157892684e-05, "loss": 0.8181, "step": 3593 }, { "epoch": 0.51, "learning_rate": 1.5010439496233855e-05, "loss": 0.6671, "step": 3594 }, { "epoch": 0.51, "learning_rate": 1.5003479832327661e-05, "loss": 0.6696, "step": 3595 }, { "epoch": 0.52, "learning_rate": 1.4996520167672343e-05, "loss": 0.9269, "step": 3596 }, { "epoch": 0.52, "learning_rate": 1.4989560503766149e-05, "loss": 0.6426, "step": 3597 }, { "epoch": 0.52, "learning_rate": 1.4982600842107318e-05, "loss": 0.7829, "step": 3598 }, { "epoch": 0.52, "learning_rate": 1.4975641184194092e-05, "loss": 0.8488, "step": 3599 }, { "epoch": 0.52, "learning_rate": 1.4968681531524722e-05, "loss": 0.7748, "step": 3600 }, { "epoch": 0.52, "learning_rate": 1.4961721885597441e-05, "loss": 0.6724, "step": 3601 }, { "epoch": 0.52, "learning_rate": 1.495476224791049e-05, "loss": 0.8845, "step": 3602 }, { "epoch": 0.52, "learning_rate": 1.494780261996211e-05, "loss": 0.8343, "step": 3603 }, { "epoch": 0.52, "learning_rate": 1.494084300325053e-05, "loss": 0.8454, "step": 3604 }, { "epoch": 0.52, "learning_rate": 1.4933883399273997e-05, "loss": 0.7958, "step": 3605 }, { "epoch": 0.52, "learning_rate": 1.492692380953072e-05, "loss": 0.9029, "step": 3606 }, { "epoch": 0.52, "learning_rate": 1.4919964235518942e-05, "loss": 0.731, "step": 3607 }, { "epoch": 0.52, "learning_rate": 1.491300467873689e-05, "loss": 0.8198, "step": 3608 }, { "epoch": 0.52, "learning_rate": 1.4906045140682771e-05, "loss": 0.7963, "step": 3609 }, { "epoch": 0.52, "learning_rate": 1.4899085622854812e-05, "loss": 1.0446, "step": 3610 }, { "epoch": 0.52, "learning_rate": 1.4892126126751217e-05, "loss": 0.8666, "step": 3611 }, { "epoch": 0.52, "learning_rate": 1.4885166653870199e-05, "loss": 0.8069, "step": 3612 }, { "epoch": 0.52, "learning_rate": 1.4878207205709963e-05, "loss": 0.8002, "step": 3613 }, { "epoch": 0.52, "learning_rate": 1.4871247783768696e-05, "loss": 0.8605, "step": 3614 }, { "epoch": 0.52, "learning_rate": 1.4864288389544594e-05, "loss": 0.7294, "step": 3615 }, { "epoch": 0.52, "learning_rate": 1.485732902453585e-05, "loss": 0.8376, "step": 3616 }, { "epoch": 0.52, "learning_rate": 1.4850369690240633e-05, "loss": 0.8217, "step": 3617 }, { "epoch": 0.52, "learning_rate": 1.484341038815712e-05, "loss": 0.7656, "step": 3618 }, { "epoch": 0.52, "learning_rate": 1.4836451119783475e-05, "loss": 0.7628, "step": 3619 }, { "epoch": 0.52, "learning_rate": 1.4829491886617851e-05, "loss": 0.9593, "step": 3620 }, { "epoch": 0.52, "learning_rate": 1.4822532690158415e-05, "loss": 0.7414, "step": 3621 }, { "epoch": 0.52, "learning_rate": 1.4815573531903291e-05, "loss": 0.779, "step": 3622 }, { "epoch": 0.52, "learning_rate": 1.4808614413350624e-05, "loss": 0.8465, "step": 3623 }, { "epoch": 0.52, "learning_rate": 1.480165533599854e-05, "loss": 0.6579, "step": 3624 }, { "epoch": 0.52, "learning_rate": 1.4794696301345152e-05, "loss": 0.8312, "step": 3625 }, { "epoch": 0.52, "learning_rate": 1.4787737310888571e-05, "loss": 0.8521, "step": 3626 }, { "epoch": 0.52, "learning_rate": 1.478077836612689e-05, "loss": 0.8848, "step": 3627 }, { "epoch": 0.52, "learning_rate": 1.4773819468558206e-05, "loss": 0.6808, "step": 3628 }, { "epoch": 0.52, "learning_rate": 1.4766860619680595e-05, "loss": 0.9065, "step": 3629 }, { "epoch": 0.52, "learning_rate": 1.475990182099212e-05, "loss": 0.7045, "step": 3630 }, { "epoch": 0.52, "learning_rate": 1.4752943073990838e-05, "loss": 0.7985, "step": 3631 }, { "epoch": 0.52, "learning_rate": 1.4745984380174808e-05, "loss": 1.01, "step": 3632 }, { "epoch": 0.52, "learning_rate": 1.4739025741042045e-05, "loss": 0.9037, "step": 3633 }, { "epoch": 0.52, "learning_rate": 1.473206715809059e-05, "loss": 0.9576, "step": 3634 }, { "epoch": 0.52, "learning_rate": 1.4725108632818441e-05, "loss": 0.865, "step": 3635 }, { "epoch": 0.52, "learning_rate": 1.4718150166723603e-05, "loss": 0.8828, "step": 3636 }, { "epoch": 0.52, "learning_rate": 1.471119176130406e-05, "loss": 0.8036, "step": 3637 }, { "epoch": 0.52, "learning_rate": 1.4704233418057779e-05, "loss": 0.6791, "step": 3638 }, { "epoch": 0.52, "learning_rate": 1.4697275138482732e-05, "loss": 0.8471, "step": 3639 }, { "epoch": 0.52, "learning_rate": 1.4690316924076848e-05, "loss": 0.9029, "step": 3640 }, { "epoch": 0.52, "learning_rate": 1.468335877633807e-05, "loss": 0.9174, "step": 3641 }, { "epoch": 0.52, "learning_rate": 1.4676400696764316e-05, "loss": 0.7891, "step": 3642 }, { "epoch": 0.52, "learning_rate": 1.4669442686853478e-05, "loss": 0.6761, "step": 3643 }, { "epoch": 0.52, "learning_rate": 1.4662484748103448e-05, "loss": 0.8036, "step": 3644 }, { "epoch": 0.52, "learning_rate": 1.4655526882012106e-05, "loss": 0.8398, "step": 3645 }, { "epoch": 0.52, "learning_rate": 1.4648569090077298e-05, "loss": 0.6599, "step": 3646 }, { "epoch": 0.52, "learning_rate": 1.4641611373796874e-05, "loss": 0.8454, "step": 3647 }, { "epoch": 0.52, "learning_rate": 1.4634653734668645e-05, "loss": 0.8917, "step": 3648 }, { "epoch": 0.52, "learning_rate": 1.4627696174190426e-05, "loss": 0.8507, "step": 3649 }, { "epoch": 0.52, "learning_rate": 1.4620738693860015e-05, "loss": 0.7405, "step": 3650 }, { "epoch": 0.52, "learning_rate": 1.4613781295175173e-05, "loss": 0.7757, "step": 3651 }, { "epoch": 0.52, "learning_rate": 1.460682397963366e-05, "loss": 0.8013, "step": 3652 }, { "epoch": 0.52, "learning_rate": 1.459986674873322e-05, "loss": 0.8945, "step": 3653 }, { "epoch": 0.52, "learning_rate": 1.4592909603971558e-05, "loss": 0.7126, "step": 3654 }, { "epoch": 0.52, "learning_rate": 1.4585952546846396e-05, "loss": 0.8315, "step": 3655 }, { "epoch": 0.52, "learning_rate": 1.4578995578855393e-05, "loss": 0.8036, "step": 3656 }, { "epoch": 0.52, "learning_rate": 1.4572038701496228e-05, "loss": 0.7687, "step": 3657 }, { "epoch": 0.52, "learning_rate": 1.4565081916266546e-05, "loss": 0.8331, "step": 3658 }, { "epoch": 0.52, "learning_rate": 1.4558125224663957e-05, "loss": 1.0469, "step": 3659 }, { "epoch": 0.52, "learning_rate": 1.4551168628186078e-05, "loss": 0.8652, "step": 3660 }, { "epoch": 0.52, "learning_rate": 1.454421212833048e-05, "loss": 0.7852, "step": 3661 }, { "epoch": 0.52, "learning_rate": 1.4537255726594734e-05, "loss": 0.6724, "step": 3662 }, { "epoch": 0.52, "learning_rate": 1.4530299424476383e-05, "loss": 0.8912, "step": 3663 }, { "epoch": 0.52, "learning_rate": 1.4523343223472941e-05, "loss": 0.6652, "step": 3664 }, { "epoch": 0.52, "learning_rate": 1.4516387125081902e-05, "loss": 0.7941, "step": 3665 }, { "epoch": 0.53, "learning_rate": 1.4509431130800759e-05, "loss": 0.6136, "step": 3666 }, { "epoch": 0.53, "learning_rate": 1.4502475242126942e-05, "loss": 0.8142, "step": 3667 }, { "epoch": 0.53, "learning_rate": 1.4495519460557901e-05, "loss": 0.8027, "step": 3668 }, { "epoch": 0.53, "learning_rate": 1.4488563787591034e-05, "loss": 0.7132, "step": 3669 }, { "epoch": 0.53, "learning_rate": 1.4481608224723725e-05, "loss": 0.8304, "step": 3670 }, { "epoch": 0.53, "learning_rate": 1.4474652773453342e-05, "loss": 0.8616, "step": 3671 }, { "epoch": 0.53, "learning_rate": 1.446769743527721e-05, "loss": 0.6627, "step": 3672 }, { "epoch": 0.53, "learning_rate": 1.4460742211692651e-05, "loss": 0.8764, "step": 3673 }, { "epoch": 0.53, "learning_rate": 1.4453787104196951e-05, "loss": 0.81, "step": 3674 }, { "epoch": 0.53, "learning_rate": 1.444683211428737e-05, "loss": 0.9369, "step": 3675 }, { "epoch": 0.53, "learning_rate": 1.4439877243461148e-05, "loss": 0.8728, "step": 3676 }, { "epoch": 0.53, "learning_rate": 1.4432922493215488e-05, "loss": 0.7188, "step": 3677 }, { "epoch": 0.53, "learning_rate": 1.4425967865047587e-05, "loss": 0.7436, "step": 3678 }, { "epoch": 0.53, "learning_rate": 1.44190133604546e-05, "loss": 0.9752, "step": 3679 }, { "epoch": 0.53, "learning_rate": 1.441205898093366e-05, "loss": 0.9637, "step": 3680 }, { "epoch": 0.53, "learning_rate": 1.4405104727981865e-05, "loss": 0.7991, "step": 3681 }, { "epoch": 0.53, "learning_rate": 1.439815060309631e-05, "loss": 0.9442, "step": 3682 }, { "epoch": 0.53, "learning_rate": 1.439119660777403e-05, "loss": 0.8658, "step": 3683 }, { "epoch": 0.53, "learning_rate": 1.4384242743512059e-05, "loss": 0.7301, "step": 3684 }, { "epoch": 0.53, "learning_rate": 1.4377289011807387e-05, "loss": 0.8638, "step": 3685 }, { "epoch": 0.53, "learning_rate": 1.4370335414156979e-05, "loss": 0.5084, "step": 3686 }, { "epoch": 0.53, "learning_rate": 1.4363381952057779e-05, "loss": 0.7829, "step": 3687 }, { "epoch": 0.53, "learning_rate": 1.4356428627006683e-05, "loss": 0.8806, "step": 3688 }, { "epoch": 0.53, "learning_rate": 1.4349475440500587e-05, "loss": 0.6589, "step": 3689 }, { "epoch": 0.53, "learning_rate": 1.4342522394036322e-05, "loss": 0.8281, "step": 3690 }, { "epoch": 0.53, "learning_rate": 1.4335569489110721e-05, "loss": 0.7347, "step": 3691 }, { "epoch": 0.53, "learning_rate": 1.4328616727220566e-05, "loss": 0.6426, "step": 3692 }, { "epoch": 0.53, "learning_rate": 1.4321664109862616e-05, "loss": 0.8454, "step": 3693 }, { "epoch": 0.53, "learning_rate": 1.431471163853359e-05, "loss": 0.8806, "step": 3694 }, { "epoch": 0.53, "learning_rate": 1.4307759314730201e-05, "loss": 0.75, "step": 3695 }, { "epoch": 0.53, "learning_rate": 1.4300807139949095e-05, "loss": 0.8772, "step": 3696 }, { "epoch": 0.53, "learning_rate": 1.4293855115686914e-05, "loss": 0.9777, "step": 3697 }, { "epoch": 0.53, "learning_rate": 1.4286903243440246e-05, "loss": 0.8789, "step": 3698 }, { "epoch": 0.53, "learning_rate": 1.4279951524705664e-05, "loss": 0.7394, "step": 3699 }, { "epoch": 0.53, "learning_rate": 1.4272999960979707e-05, "loss": 0.6482, "step": 3700 }, { "epoch": 0.53, "learning_rate": 1.426604855375886e-05, "loss": 0.8622, "step": 3701 }, { "epoch": 0.53, "learning_rate": 1.4259097304539597e-05, "loss": 0.8778, "step": 3702 }, { "epoch": 0.53, "learning_rate": 1.4252146214818358e-05, "loss": 0.9124, "step": 3703 }, { "epoch": 0.53, "learning_rate": 1.4245195286091526e-05, "loss": 0.7327, "step": 3704 }, { "epoch": 0.53, "learning_rate": 1.4238244519855474e-05, "loss": 0.745, "step": 3705 }, { "epoch": 0.53, "learning_rate": 1.4231293917606519e-05, "loss": 0.695, "step": 3706 }, { "epoch": 0.53, "learning_rate": 1.4224343480840968e-05, "loss": 0.6649, "step": 3707 }, { "epoch": 0.53, "learning_rate": 1.4217393211055072e-05, "loss": 0.8836, "step": 3708 }, { "epoch": 0.53, "learning_rate": 1.4210443109745048e-05, "loss": 0.8195, "step": 3709 }, { "epoch": 0.53, "learning_rate": 1.420349317840708e-05, "loss": 0.8008, "step": 3710 }, { "epoch": 0.53, "learning_rate": 1.4196543418537326e-05, "loss": 0.9492, "step": 3711 }, { "epoch": 0.53, "learning_rate": 1.4189593831631889e-05, "loss": 0.8443, "step": 3712 }, { "epoch": 0.53, "learning_rate": 1.418264441918685e-05, "loss": 0.933, "step": 3713 }, { "epoch": 0.53, "learning_rate": 1.4175695182698237e-05, "loss": 0.7358, "step": 3714 }, { "epoch": 0.53, "learning_rate": 1.416874612366205e-05, "loss": 0.8761, "step": 3715 }, { "epoch": 0.53, "learning_rate": 1.416179724357426e-05, "loss": 0.9063, "step": 3716 }, { "epoch": 0.53, "learning_rate": 1.4154848543930774e-05, "loss": 0.8432, "step": 3717 }, { "epoch": 0.53, "learning_rate": 1.4147900026227488e-05, "loss": 0.7684, "step": 3718 }, { "epoch": 0.53, "learning_rate": 1.4140951691960237e-05, "loss": 0.7628, "step": 3719 }, { "epoch": 0.53, "learning_rate": 1.4134003542624827e-05, "loss": 0.8811, "step": 3720 }, { "epoch": 0.53, "learning_rate": 1.4127055579717031e-05, "loss": 0.9102, "step": 3721 }, { "epoch": 0.53, "learning_rate": 1.4120107804732557e-05, "loss": 0.6576, "step": 3722 }, { "epoch": 0.53, "learning_rate": 1.4113160219167103e-05, "loss": 0.7603, "step": 3723 }, { "epoch": 0.53, "learning_rate": 1.4106212824516311e-05, "loss": 0.7801, "step": 3724 }, { "epoch": 0.53, "learning_rate": 1.4099265622275775e-05, "loss": 0.8571, "step": 3725 }, { "epoch": 0.53, "learning_rate": 1.4092318613941067e-05, "loss": 0.8795, "step": 3726 }, { "epoch": 0.53, "learning_rate": 1.4085371801007696e-05, "loss": 0.7534, "step": 3727 }, { "epoch": 0.53, "learning_rate": 1.4078425184971139e-05, "loss": 0.7807, "step": 3728 }, { "epoch": 0.53, "learning_rate": 1.407147876732684e-05, "loss": 0.9453, "step": 3729 }, { "epoch": 0.53, "learning_rate": 1.4064532549570182e-05, "loss": 0.8901, "step": 3730 }, { "epoch": 0.53, "learning_rate": 1.405758653319652e-05, "loss": 0.9012, "step": 3731 }, { "epoch": 0.53, "learning_rate": 1.4050640719701154e-05, "loss": 0.7416, "step": 3732 }, { "epoch": 0.53, "learning_rate": 1.4043695110579345e-05, "loss": 0.9487, "step": 3733 }, { "epoch": 0.53, "learning_rate": 1.4036749707326323e-05, "loss": 0.8298, "step": 3734 }, { "epoch": 0.54, "learning_rate": 1.4029804511437243e-05, "loss": 0.7497, "step": 3735 }, { "epoch": 0.54, "learning_rate": 1.4022859524407246e-05, "loss": 0.8474, "step": 3736 }, { "epoch": 0.54, "learning_rate": 1.4015914747731417e-05, "loss": 0.8703, "step": 3737 }, { "epoch": 0.54, "learning_rate": 1.4008970182904785e-05, "loss": 0.8371, "step": 3738 }, { "epoch": 0.54, "learning_rate": 1.4002025831422353e-05, "loss": 0.7693, "step": 3739 }, { "epoch": 0.54, "learning_rate": 1.3995081694779058e-05, "loss": 0.6819, "step": 3740 }, { "epoch": 0.54, "learning_rate": 1.3988137774469808e-05, "loss": 0.7863, "step": 3741 }, { "epoch": 0.54, "learning_rate": 1.3981194071989458e-05, "loss": 0.8744, "step": 3742 }, { "epoch": 0.54, "learning_rate": 1.3974250588832811e-05, "loss": 0.8622, "step": 3743 }, { "epoch": 0.54, "learning_rate": 1.3967307326494625e-05, "loss": 0.8036, "step": 3744 }, { "epoch": 0.54, "learning_rate": 1.3960364286469625e-05, "loss": 0.8267, "step": 3745 }, { "epoch": 0.54, "learning_rate": 1.395342147025246e-05, "loss": 0.8273, "step": 3746 }, { "epoch": 0.54, "learning_rate": 1.394647887933776e-05, "loss": 0.87, "step": 3747 }, { "epoch": 0.54, "learning_rate": 1.3939536515220082e-05, "loss": 0.889, "step": 3748 }, { "epoch": 0.54, "learning_rate": 1.3932594379393948e-05, "loss": 0.8025, "step": 3749 }, { "epoch": 0.54, "learning_rate": 1.3925652473353839e-05, "loss": 0.7171, "step": 3750 }, { "epoch": 0.54, "learning_rate": 1.3918710798594155e-05, "loss": 0.8175, "step": 3751 }, { "epoch": 0.54, "learning_rate": 1.3911769356609283e-05, "loss": 0.899, "step": 3752 }, { "epoch": 0.54, "learning_rate": 1.3904828148893543e-05, "loss": 0.661, "step": 3753 }, { "epoch": 0.54, "learning_rate": 1.3897887176941197e-05, "loss": 0.7907, "step": 3754 }, { "epoch": 0.54, "learning_rate": 1.3890946442246471e-05, "loss": 0.8887, "step": 3755 }, { "epoch": 0.54, "learning_rate": 1.3884005946303528e-05, "loss": 0.882, "step": 3756 }, { "epoch": 0.54, "learning_rate": 1.3877065690606488e-05, "loss": 0.6239, "step": 3757 }, { "epoch": 0.54, "learning_rate": 1.3870125676649422e-05, "loss": 0.8242, "step": 3758 }, { "epoch": 0.54, "learning_rate": 1.3863185905926334e-05, "loss": 0.7813, "step": 3759 }, { "epoch": 0.54, "learning_rate": 1.3856246379931186e-05, "loss": 0.9046, "step": 3760 }, { "epoch": 0.54, "learning_rate": 1.3849307100157896e-05, "loss": 0.8064, "step": 3761 }, { "epoch": 0.54, "learning_rate": 1.3842368068100304e-05, "loss": 0.8036, "step": 3762 }, { "epoch": 0.54, "learning_rate": 1.3835429285252227e-05, "loss": 0.8873, "step": 3763 }, { "epoch": 0.54, "learning_rate": 1.3828490753107403e-05, "loss": 0.7813, "step": 3764 }, { "epoch": 0.54, "learning_rate": 1.3821552473159528e-05, "loss": 0.7712, "step": 3765 }, { "epoch": 0.54, "learning_rate": 1.3814614446902248e-05, "loss": 0.803, "step": 3766 }, { "epoch": 0.54, "learning_rate": 1.3807676675829136e-05, "loss": 0.8566, "step": 3767 }, { "epoch": 0.54, "learning_rate": 1.380073916143374e-05, "loss": 0.8354, "step": 3768 }, { "epoch": 0.54, "learning_rate": 1.3793801905209519e-05, "loss": 0.8616, "step": 3769 }, { "epoch": 0.54, "learning_rate": 1.3786864908649902e-05, "loss": 0.8728, "step": 3770 }, { "epoch": 0.54, "learning_rate": 1.377992817324825e-05, "loss": 0.7068, "step": 3771 }, { "epoch": 0.54, "learning_rate": 1.3772991700497866e-05, "loss": 0.6847, "step": 3772 }, { "epoch": 0.54, "learning_rate": 1.3766055491892006e-05, "loss": 0.6964, "step": 3773 }, { "epoch": 0.54, "learning_rate": 1.3759119548923868e-05, "loss": 0.952, "step": 3774 }, { "epoch": 0.54, "learning_rate": 1.375218387308658e-05, "loss": 0.7656, "step": 3775 }, { "epoch": 0.54, "learning_rate": 1.374524846587323e-05, "loss": 0.8368, "step": 3776 }, { "epoch": 0.54, "learning_rate": 1.373831332877683e-05, "loss": 0.9216, "step": 3777 }, { "epoch": 0.54, "learning_rate": 1.3731378463290347e-05, "loss": 0.7891, "step": 3778 }, { "epoch": 0.54, "learning_rate": 1.3724443870906692e-05, "loss": 0.8019, "step": 3779 }, { "epoch": 0.54, "learning_rate": 1.3717509553118705e-05, "loss": 0.9947, "step": 3780 }, { "epoch": 0.54, "learning_rate": 1.3710575511419173e-05, "loss": 0.7969, "step": 3781 }, { "epoch": 0.54, "learning_rate": 1.370364174730083e-05, "loss": 0.8468, "step": 3782 }, { "epoch": 0.54, "learning_rate": 1.3696708262256331e-05, "loss": 0.7932, "step": 3783 }, { "epoch": 0.54, "learning_rate": 1.3689775057778302e-05, "loss": 0.9492, "step": 3784 }, { "epoch": 0.54, "learning_rate": 1.368284213535927e-05, "loss": 0.9565, "step": 3785 }, { "epoch": 0.54, "learning_rate": 1.3675909496491735e-05, "loss": 0.6122, "step": 3786 }, { "epoch": 0.54, "learning_rate": 1.3668977142668124e-05, "loss": 0.9012, "step": 3787 }, { "epoch": 0.54, "learning_rate": 1.3662045075380791e-05, "loss": 0.7589, "step": 3788 }, { "epoch": 0.54, "learning_rate": 1.3655113296122047e-05, "loss": 0.9146, "step": 3789 }, { "epoch": 0.54, "learning_rate": 1.3648181806384125e-05, "loss": 0.9403, "step": 3790 }, { "epoch": 0.54, "learning_rate": 1.3641250607659211e-05, "loss": 0.7327, "step": 3791 }, { "epoch": 0.54, "learning_rate": 1.363431970143942e-05, "loss": 0.8281, "step": 3792 }, { "epoch": 0.54, "learning_rate": 1.3627389089216797e-05, "loss": 0.8432, "step": 3793 }, { "epoch": 0.54, "learning_rate": 1.3620458772483336e-05, "loss": 0.7857, "step": 3794 }, { "epoch": 0.54, "learning_rate": 1.3613528752730971e-05, "loss": 0.805, "step": 3795 }, { "epoch": 0.54, "learning_rate": 1.3606599031451546e-05, "loss": 0.5936, "step": 3796 }, { "epoch": 0.54, "learning_rate": 1.3599669610136875e-05, "loss": 0.7598, "step": 3797 }, { "epoch": 0.54, "learning_rate": 1.359274049027868e-05, "loss": 0.7419, "step": 3798 }, { "epoch": 0.54, "learning_rate": 1.3585811673368635e-05, "loss": 0.6822, "step": 3799 }, { "epoch": 0.54, "learning_rate": 1.3578883160898343e-05, "loss": 0.627, "step": 3800 }, { "epoch": 0.54, "learning_rate": 1.3571954954359332e-05, "loss": 0.764, "step": 3801 }, { "epoch": 0.54, "learning_rate": 1.3565027055243086e-05, "loss": 0.7935, "step": 3802 }, { "epoch": 0.54, "learning_rate": 1.3558099465041006e-05, "loss": 0.7009, "step": 3803 }, { "epoch": 0.54, "learning_rate": 1.3551172185244427e-05, "loss": 0.6219, "step": 3804 }, { "epoch": 0.55, "learning_rate": 1.3544245217344629e-05, "loss": 0.9325, "step": 3805 }, { "epoch": 0.55, "learning_rate": 1.3537318562832803e-05, "loss": 0.8089, "step": 3806 }, { "epoch": 0.55, "learning_rate": 1.3530392223200098e-05, "loss": 0.9676, "step": 3807 }, { "epoch": 0.55, "learning_rate": 1.3523466199937584e-05, "loss": 0.8549, "step": 3808 }, { "epoch": 0.55, "learning_rate": 1.3516540494536256e-05, "loss": 0.9537, "step": 3809 }, { "epoch": 0.55, "learning_rate": 1.3509615108487043e-05, "loss": 0.7768, "step": 3810 }, { "epoch": 0.55, "learning_rate": 1.3502690043280829e-05, "loss": 0.8404, "step": 3811 }, { "epoch": 0.55, "learning_rate": 1.3495765300408385e-05, "loss": 1.0095, "step": 3812 }, { "epoch": 0.55, "learning_rate": 1.3488840881360455e-05, "loss": 0.7561, "step": 3813 }, { "epoch": 0.55, "learning_rate": 1.3481916787627683e-05, "loss": 0.7793, "step": 3814 }, { "epoch": 0.55, "learning_rate": 1.3474993020700662e-05, "loss": 0.8633, "step": 3815 }, { "epoch": 0.55, "learning_rate": 1.3468069582069907e-05, "loss": 0.9347, "step": 3816 }, { "epoch": 0.55, "learning_rate": 1.3461146473225857e-05, "loss": 0.6797, "step": 3817 }, { "epoch": 0.55, "learning_rate": 1.3454223695658898e-05, "loss": 0.8359, "step": 3818 }, { "epoch": 0.55, "learning_rate": 1.3447301250859316e-05, "loss": 0.7754, "step": 3819 }, { "epoch": 0.55, "learning_rate": 1.3440379140317357e-05, "loss": 0.827, "step": 3820 }, { "epoch": 0.55, "learning_rate": 1.3433457365523175e-05, "loss": 0.875, "step": 3821 }, { "epoch": 0.55, "learning_rate": 1.3426535927966856e-05, "loss": 0.9581, "step": 3822 }, { "epoch": 0.55, "learning_rate": 1.3419614829138408e-05, "loss": 0.8075, "step": 3823 }, { "epoch": 0.55, "learning_rate": 1.3412694070527786e-05, "loss": 0.9353, "step": 3824 }, { "epoch": 0.55, "learning_rate": 1.340577365362485e-05, "loss": 0.7726, "step": 3825 }, { "epoch": 0.55, "learning_rate": 1.3398853579919398e-05, "loss": 0.8672, "step": 3826 }, { "epoch": 0.55, "learning_rate": 1.3391933850901142e-05, "loss": 0.6222, "step": 3827 }, { "epoch": 0.55, "learning_rate": 1.3385014468059734e-05, "loss": 0.8357, "step": 3828 }, { "epoch": 0.55, "learning_rate": 1.3378095432884754e-05, "loss": 0.704, "step": 3829 }, { "epoch": 0.55, "learning_rate": 1.3371176746865683e-05, "loss": 0.7084, "step": 3830 }, { "epoch": 0.55, "learning_rate": 1.3364258411491955e-05, "loss": 0.8036, "step": 3831 }, { "epoch": 0.55, "learning_rate": 1.3357340428252915e-05, "loss": 0.9833, "step": 3832 }, { "epoch": 0.55, "learning_rate": 1.3350422798637827e-05, "loss": 0.8728, "step": 3833 }, { "epoch": 0.55, "learning_rate": 1.3343505524135896e-05, "loss": 0.7796, "step": 3834 }, { "epoch": 0.55, "learning_rate": 1.3336588606236225e-05, "loss": 0.8929, "step": 3835 }, { "epoch": 0.55, "learning_rate": 1.3329672046427867e-05, "loss": 0.9559, "step": 3836 }, { "epoch": 0.55, "learning_rate": 1.3322755846199788e-05, "loss": 0.7734, "step": 3837 }, { "epoch": 0.55, "learning_rate": 1.3315840007040866e-05, "loss": 0.7958, "step": 3838 }, { "epoch": 0.55, "learning_rate": 1.330892453043991e-05, "loss": 0.8231, "step": 3839 }, { "epoch": 0.55, "learning_rate": 1.3302009417885662e-05, "loss": 0.8195, "step": 3840 }, { "epoch": 0.55, "learning_rate": 1.3295094670866767e-05, "loss": 0.9727, "step": 3841 }, { "epoch": 0.55, "learning_rate": 1.32881802908718e-05, "loss": 0.752, "step": 3842 }, { "epoch": 0.55, "learning_rate": 1.3281266279389252e-05, "loss": 0.6387, "step": 3843 }, { "epoch": 0.55, "learning_rate": 1.327435263790754e-05, "loss": 0.7422, "step": 3844 }, { "epoch": 0.55, "learning_rate": 1.3267439367915008e-05, "loss": 0.805, "step": 3845 }, { "epoch": 0.55, "learning_rate": 1.32605264708999e-05, "loss": 0.8823, "step": 3846 }, { "epoch": 0.55, "learning_rate": 1.3253613948350404e-05, "loss": 0.8008, "step": 3847 }, { "epoch": 0.55, "learning_rate": 1.3246701801754604e-05, "loss": 0.9978, "step": 3848 }, { "epoch": 0.55, "learning_rate": 1.3239790032600519e-05, "loss": 0.7196, "step": 3849 }, { "epoch": 0.55, "learning_rate": 1.3232878642376085e-05, "loss": 0.7985, "step": 3850 }, { "epoch": 0.55, "learning_rate": 1.3225967632569143e-05, "loss": 0.8795, "step": 3851 }, { "epoch": 0.55, "learning_rate": 1.3219057004667473e-05, "loss": 0.767, "step": 3852 }, { "epoch": 0.55, "learning_rate": 1.3212146760158762e-05, "loss": 0.8438, "step": 3853 }, { "epoch": 0.55, "learning_rate": 1.3205236900530605e-05, "loss": 0.7628, "step": 3854 }, { "epoch": 0.55, "learning_rate": 1.3198327427270536e-05, "loss": 0.7204, "step": 3855 }, { "epoch": 0.55, "learning_rate": 1.3191418341865984e-05, "loss": 0.8387, "step": 3856 }, { "epoch": 0.55, "learning_rate": 1.3184509645804303e-05, "loss": 0.7575, "step": 3857 }, { "epoch": 0.55, "learning_rate": 1.3177601340572776e-05, "loss": 0.8345, "step": 3858 }, { "epoch": 0.55, "learning_rate": 1.317069342765858e-05, "loss": 0.791, "step": 3859 }, { "epoch": 0.55, "learning_rate": 1.316378590854882e-05, "loss": 0.828, "step": 3860 }, { "epoch": 0.55, "learning_rate": 1.3156878784730519e-05, "loss": 0.7531, "step": 3861 }, { "epoch": 0.55, "learning_rate": 1.3149972057690599e-05, "loss": 0.9029, "step": 3862 }, { "epoch": 0.55, "learning_rate": 1.3143065728915918e-05, "loss": 0.8962, "step": 3863 }, { "epoch": 0.55, "learning_rate": 1.3136159799893234e-05, "loss": 0.8058, "step": 3864 }, { "epoch": 0.55, "learning_rate": 1.312925427210922e-05, "loss": 0.7347, "step": 3865 }, { "epoch": 0.55, "learning_rate": 1.312234914705047e-05, "loss": 0.8613, "step": 3866 }, { "epoch": 0.55, "learning_rate": 1.311544442620348e-05, "loss": 0.8789, "step": 3867 }, { "epoch": 0.55, "learning_rate": 1.3108540111054677e-05, "loss": 0.6914, "step": 3868 }, { "epoch": 0.55, "learning_rate": 1.3101636203090373e-05, "loss": 0.7821, "step": 3869 }, { "epoch": 0.55, "learning_rate": 1.3094732703796819e-05, "loss": 0.8147, "step": 3870 }, { "epoch": 0.55, "learning_rate": 1.3087829614660171e-05, "loss": 0.8343, "step": 3871 }, { "epoch": 0.55, "learning_rate": 1.3080926937166484e-05, "loss": 0.731, "step": 3872 }, { "epoch": 0.55, "learning_rate": 1.3074024672801734e-05, "loss": 0.8493, "step": 3873 }, { "epoch": 0.55, "learning_rate": 1.3067122823051819e-05, "loss": 0.6164, "step": 3874 }, { "epoch": 0.56, "learning_rate": 1.3060221389402525e-05, "loss": 1.0368, "step": 3875 }, { "epoch": 0.56, "learning_rate": 1.3053320373339567e-05, "loss": 0.8867, "step": 3876 }, { "epoch": 0.56, "learning_rate": 1.3046419776348557e-05, "loss": 0.7341, "step": 3877 }, { "epoch": 0.56, "learning_rate": 1.3039519599915022e-05, "loss": 0.7319, "step": 3878 }, { "epoch": 0.56, "learning_rate": 1.3032619845524414e-05, "loss": 0.8086, "step": 3879 }, { "epoch": 0.56, "learning_rate": 1.3025720514662055e-05, "loss": 0.637, "step": 3880 }, { "epoch": 0.56, "learning_rate": 1.3018821608813219e-05, "loss": 0.6003, "step": 3881 }, { "epoch": 0.56, "learning_rate": 1.3011923129463066e-05, "loss": 0.7344, "step": 3882 }, { "epoch": 0.56, "learning_rate": 1.3005025078096666e-05, "loss": 0.899, "step": 3883 }, { "epoch": 0.56, "learning_rate": 1.2998127456199e-05, "loss": 0.6844, "step": 3884 }, { "epoch": 0.56, "learning_rate": 1.2991230265254946e-05, "loss": 0.9355, "step": 3885 }, { "epoch": 0.56, "learning_rate": 1.2984333506749311e-05, "loss": 0.7073, "step": 3886 }, { "epoch": 0.56, "learning_rate": 1.2977437182166795e-05, "loss": 0.8027, "step": 3887 }, { "epoch": 0.56, "learning_rate": 1.2970541292992e-05, "loss": 0.7388, "step": 3888 }, { "epoch": 0.56, "learning_rate": 1.296364584070944e-05, "loss": 0.769, "step": 3889 }, { "epoch": 0.56, "learning_rate": 1.295675082680355e-05, "loss": 0.74, "step": 3890 }, { "epoch": 0.56, "learning_rate": 1.2949856252758634e-05, "loss": 0.7712, "step": 3891 }, { "epoch": 0.56, "learning_rate": 1.2942962120058937e-05, "loss": 0.885, "step": 3892 }, { "epoch": 0.56, "learning_rate": 1.293606843018859e-05, "loss": 0.7578, "step": 3893 }, { "epoch": 0.56, "learning_rate": 1.2929175184631637e-05, "loss": 0.7347, "step": 3894 }, { "epoch": 0.56, "learning_rate": 1.2922282384872026e-05, "loss": 0.8315, "step": 3895 }, { "epoch": 0.56, "learning_rate": 1.2915390032393592e-05, "loss": 0.7969, "step": 3896 }, { "epoch": 0.56, "learning_rate": 1.2908498128680107e-05, "loss": 0.7224, "step": 3897 }, { "epoch": 0.56, "learning_rate": 1.2901606675215214e-05, "loss": 0.7737, "step": 3898 }, { "epoch": 0.56, "learning_rate": 1.2894715673482478e-05, "loss": 0.7252, "step": 3899 }, { "epoch": 0.56, "learning_rate": 1.288782512496536e-05, "loss": 0.9051, "step": 3900 }, { "epoch": 0.56, "learning_rate": 1.288093503114722e-05, "loss": 0.7246, "step": 3901 }, { "epoch": 0.56, "learning_rate": 1.2874045393511333e-05, "loss": 0.7946, "step": 3902 }, { "epoch": 0.56, "learning_rate": 1.2867156213540866e-05, "loss": 0.7606, "step": 3903 }, { "epoch": 0.56, "learning_rate": 1.2860267492718883e-05, "loss": 0.7879, "step": 3904 }, { "epoch": 0.56, "learning_rate": 1.2853379232528363e-05, "loss": 0.7486, "step": 3905 }, { "epoch": 0.56, "learning_rate": 1.284649143445217e-05, "loss": 0.7042, "step": 3906 }, { "epoch": 0.56, "learning_rate": 1.2839604099973075e-05, "loss": 0.8817, "step": 3907 }, { "epoch": 0.56, "learning_rate": 1.2832717230573766e-05, "loss": 0.8789, "step": 3908 }, { "epoch": 0.56, "learning_rate": 1.2825830827736799e-05, "loss": 0.8761, "step": 3909 }, { "epoch": 0.56, "learning_rate": 1.2818944892944651e-05, "loss": 0.7274, "step": 3910 }, { "epoch": 0.56, "learning_rate": 1.2812059427679702e-05, "loss": 0.8047, "step": 3911 }, { "epoch": 0.56, "learning_rate": 1.2805174433424207e-05, "loss": 0.639, "step": 3912 }, { "epoch": 0.56, "learning_rate": 1.279828991166035e-05, "loss": 0.822, "step": 3913 }, { "epoch": 0.56, "learning_rate": 1.2791405863870186e-05, "loss": 0.7581, "step": 3914 }, { "epoch": 0.56, "learning_rate": 1.2784522291535687e-05, "loss": 0.6579, "step": 3915 }, { "epoch": 0.56, "learning_rate": 1.2777639196138717e-05, "loss": 0.6865, "step": 3916 }, { "epoch": 0.56, "learning_rate": 1.277075657916103e-05, "loss": 0.8689, "step": 3917 }, { "epoch": 0.56, "learning_rate": 1.2763874442084288e-05, "loss": 0.7813, "step": 3918 }, { "epoch": 0.56, "learning_rate": 1.2756992786390038e-05, "loss": 0.6498, "step": 3919 }, { "epoch": 0.56, "learning_rate": 1.275011161355974e-05, "loss": 0.9113, "step": 3920 }, { "epoch": 0.56, "learning_rate": 1.2743230925074737e-05, "loss": 0.7617, "step": 3921 }, { "epoch": 0.56, "learning_rate": 1.2736350722416266e-05, "loss": 0.8968, "step": 3922 }, { "epoch": 0.56, "learning_rate": 1.2729471007065466e-05, "loss": 0.8184, "step": 3923 }, { "epoch": 0.56, "learning_rate": 1.272259178050338e-05, "loss": 0.6311, "step": 3924 }, { "epoch": 0.56, "learning_rate": 1.2715713044210917e-05, "loss": 0.6956, "step": 3925 }, { "epoch": 0.56, "learning_rate": 1.2708834799668915e-05, "loss": 0.9609, "step": 3926 }, { "epoch": 0.56, "learning_rate": 1.2701957048358079e-05, "loss": 0.9749, "step": 3927 }, { "epoch": 0.56, "learning_rate": 1.2695079791759024e-05, "loss": 0.7927, "step": 3928 }, { "epoch": 0.56, "learning_rate": 1.2688203031352253e-05, "loss": 0.8636, "step": 3929 }, { "epoch": 0.56, "learning_rate": 1.2681326768618157e-05, "loss": 0.6509, "step": 3930 }, { "epoch": 0.56, "learning_rate": 1.2674451005037032e-05, "loss": 0.8996, "step": 3931 }, { "epoch": 0.56, "learning_rate": 1.266757574208906e-05, "loss": 0.9877, "step": 3932 }, { "epoch": 0.56, "learning_rate": 1.266070098125431e-05, "loss": 0.8253, "step": 3933 }, { "epoch": 0.56, "learning_rate": 1.2653826724012752e-05, "loss": 0.841, "step": 3934 }, { "epoch": 0.56, "learning_rate": 1.2646952971844236e-05, "loss": 0.8119, "step": 3935 }, { "epoch": 0.56, "learning_rate": 1.264007972622852e-05, "loss": 0.8884, "step": 3936 }, { "epoch": 0.56, "learning_rate": 1.2633206988645243e-05, "loss": 0.8122, "step": 3937 }, { "epoch": 0.56, "learning_rate": 1.2626334760573927e-05, "loss": 0.8382, "step": 3938 }, { "epoch": 0.56, "learning_rate": 1.2619463043493997e-05, "loss": 0.7946, "step": 3939 }, { "epoch": 0.56, "learning_rate": 1.2612591838884774e-05, "loss": 0.9358, "step": 3940 }, { "epoch": 0.56, "learning_rate": 1.2605721148225437e-05, "loss": 0.8027, "step": 3941 }, { "epoch": 0.56, "learning_rate": 1.2598850972995097e-05, "loss": 0.8145, "step": 3942 }, { "epoch": 0.56, "learning_rate": 1.2591981314672718e-05, "loss": 0.7921, "step": 3943 }, { "epoch": 0.56, "learning_rate": 1.2585112174737173e-05, "loss": 0.7807, "step": 3944 }, { "epoch": 0.57, "learning_rate": 1.2578243554667223e-05, "loss": 0.9113, "step": 3945 }, { "epoch": 0.57, "learning_rate": 1.25713754559415e-05, "loss": 0.6747, "step": 3946 }, { "epoch": 0.57, "learning_rate": 1.2564507880038549e-05, "loss": 0.8211, "step": 3947 }, { "epoch": 0.57, "learning_rate": 1.2557640828436774e-05, "loss": 0.8633, "step": 3948 }, { "epoch": 0.57, "learning_rate": 1.2550774302614493e-05, "loss": 0.8683, "step": 3949 }, { "epoch": 0.57, "learning_rate": 1.2543908304049899e-05, "loss": 0.7383, "step": 3950 }, { "epoch": 0.57, "learning_rate": 1.2537042834221066e-05, "loss": 0.764, "step": 3951 }, { "epoch": 0.57, "learning_rate": 1.2530177894605956e-05, "loss": 0.8114, "step": 3952 }, { "epoch": 0.57, "learning_rate": 1.2523313486682436e-05, "loss": 0.7723, "step": 3953 }, { "epoch": 0.57, "learning_rate": 1.251644961192823e-05, "loss": 0.8686, "step": 3954 }, { "epoch": 0.57, "learning_rate": 1.2509586271820965e-05, "loss": 0.7179, "step": 3955 }, { "epoch": 0.57, "learning_rate": 1.2502723467838146e-05, "loss": 0.5689, "step": 3956 }, { "epoch": 0.57, "learning_rate": 1.2495861201457164e-05, "loss": 0.7796, "step": 3957 }, { "epoch": 0.57, "learning_rate": 1.2488999474155304e-05, "loss": 0.7221, "step": 3958 }, { "epoch": 0.57, "learning_rate": 1.2482138287409715e-05, "loss": 0.8248, "step": 3959 }, { "epoch": 0.57, "learning_rate": 1.2475277642697446e-05, "loss": 0.9632, "step": 3960 }, { "epoch": 0.57, "learning_rate": 1.2468417541495428e-05, "loss": 0.7522, "step": 3961 }, { "epoch": 0.57, "learning_rate": 1.2461557985280457e-05, "loss": 0.6557, "step": 3962 }, { "epoch": 0.57, "learning_rate": 1.2454698975529247e-05, "loss": 0.6853, "step": 3963 }, { "epoch": 0.57, "learning_rate": 1.2447840513718351e-05, "loss": 0.7171, "step": 3964 }, { "epoch": 0.57, "learning_rate": 1.2440982601324238e-05, "loss": 0.7712, "step": 3965 }, { "epoch": 0.57, "learning_rate": 1.2434125239823252e-05, "loss": 0.7472, "step": 3966 }, { "epoch": 0.57, "learning_rate": 1.24272684306916e-05, "loss": 0.8225, "step": 3967 }, { "epoch": 0.57, "learning_rate": 1.2420412175405387e-05, "loss": 0.8147, "step": 3968 }, { "epoch": 0.57, "learning_rate": 1.2413556475440605e-05, "loss": 0.7305, "step": 3969 }, { "epoch": 0.57, "learning_rate": 1.2406701332273104e-05, "loss": 0.8756, "step": 3970 }, { "epoch": 0.57, "learning_rate": 1.2399846747378637e-05, "loss": 0.714, "step": 3971 }, { "epoch": 0.57, "learning_rate": 1.2392992722232818e-05, "loss": 0.8789, "step": 3972 }, { "epoch": 0.57, "learning_rate": 1.2386139258311149e-05, "loss": 0.9213, "step": 3973 }, { "epoch": 0.57, "learning_rate": 1.2379286357089023e-05, "loss": 0.6434, "step": 3974 }, { "epoch": 0.57, "learning_rate": 1.2372434020041684e-05, "loss": 0.7673, "step": 3975 }, { "epoch": 0.57, "learning_rate": 1.2365582248644285e-05, "loss": 0.709, "step": 3976 }, { "epoch": 0.57, "learning_rate": 1.2358731044371833e-05, "loss": 0.8454, "step": 3977 }, { "epoch": 0.57, "learning_rate": 1.2351880408699224e-05, "loss": 0.5776, "step": 3978 }, { "epoch": 0.57, "learning_rate": 1.2345030343101237e-05, "loss": 0.8499, "step": 3979 }, { "epoch": 0.57, "learning_rate": 1.2338180849052511e-05, "loss": 0.7365, "step": 3980 }, { "epoch": 0.57, "learning_rate": 1.233133192802758e-05, "loss": 0.8002, "step": 3981 }, { "epoch": 0.57, "learning_rate": 1.2324483581500848e-05, "loss": 0.726, "step": 3982 }, { "epoch": 0.57, "learning_rate": 1.2317635810946592e-05, "loss": 0.9448, "step": 3983 }, { "epoch": 0.57, "learning_rate": 1.2310788617838969e-05, "loss": 0.7924, "step": 3984 }, { "epoch": 0.57, "learning_rate": 1.2303942003652007e-05, "loss": 0.779, "step": 3985 }, { "epoch": 0.57, "learning_rate": 1.2297095969859608e-05, "loss": 0.9844, "step": 3986 }, { "epoch": 0.57, "learning_rate": 1.229025051793557e-05, "loss": 0.8694, "step": 3987 }, { "epoch": 0.57, "learning_rate": 1.2283405649353535e-05, "loss": 0.7919, "step": 3988 }, { "epoch": 0.57, "learning_rate": 1.2276561365587039e-05, "loss": 0.7732, "step": 3989 }, { "epoch": 0.57, "learning_rate": 1.2269717668109489e-05, "loss": 0.8382, "step": 3990 }, { "epoch": 0.57, "learning_rate": 1.2262874558394156e-05, "loss": 0.8867, "step": 3991 }, { "epoch": 0.57, "learning_rate": 1.2256032037914201e-05, "loss": 0.9403, "step": 3992 }, { "epoch": 0.57, "learning_rate": 1.2249190108142645e-05, "loss": 0.8164, "step": 3993 }, { "epoch": 0.57, "learning_rate": 1.2242348770552387e-05, "loss": 0.8901, "step": 3994 }, { "epoch": 0.57, "learning_rate": 1.2235508026616196e-05, "loss": 0.9623, "step": 3995 }, { "epoch": 0.57, "learning_rate": 1.2228667877806715e-05, "loss": 0.88, "step": 3996 }, { "epoch": 0.57, "learning_rate": 1.2221828325596467e-05, "loss": 0.8147, "step": 3997 }, { "epoch": 0.57, "learning_rate": 1.2214989371457824e-05, "loss": 0.7402, "step": 3998 }, { "epoch": 0.57, "learning_rate": 1.2208151016863052e-05, "loss": 0.8488, "step": 3999 }, { "epoch": 0.57, "learning_rate": 1.2201313263284283e-05, "loss": 0.8516, "step": 4000 }, { "epoch": 0.57, "learning_rate": 1.2194476112193508e-05, "loss": 0.7372, "step": 4001 }, { "epoch": 0.57, "learning_rate": 1.2187639565062598e-05, "loss": 0.7137, "step": 4002 }, { "epoch": 0.57, "learning_rate": 1.21808036233633e-05, "loss": 0.7427, "step": 4003 }, { "epoch": 0.57, "learning_rate": 1.2173968288567213e-05, "loss": 0.784, "step": 4004 }, { "epoch": 0.57, "learning_rate": 1.2167133562145826e-05, "loss": 0.7091, "step": 4005 }, { "epoch": 0.57, "learning_rate": 1.2160299445570474e-05, "loss": 0.738, "step": 4006 }, { "epoch": 0.57, "learning_rate": 1.215346594031238e-05, "loss": 0.9129, "step": 4007 }, { "epoch": 0.57, "learning_rate": 1.2146633047842637e-05, "loss": 0.6749, "step": 4008 }, { "epoch": 0.57, "learning_rate": 1.2139800769632178e-05, "loss": 0.7243, "step": 4009 }, { "epoch": 0.57, "learning_rate": 1.2132969107151839e-05, "loss": 0.6169, "step": 4010 }, { "epoch": 0.57, "learning_rate": 1.2126138061872306e-05, "loss": 0.7673, "step": 4011 }, { "epoch": 0.57, "learning_rate": 1.2119307635264126e-05, "loss": 0.7754, "step": 4012 }, { "epoch": 0.57, "learning_rate": 1.211247782879773e-05, "loss": 0.6981, "step": 4013 }, { "epoch": 0.57, "learning_rate": 1.2105648643943397e-05, "loss": 0.8382, "step": 4014 }, { "epoch": 0.58, "learning_rate": 1.2098820082171291e-05, "loss": 0.6627, "step": 4015 }, { "epoch": 0.58, "learning_rate": 1.209199214495143e-05, "loss": 0.8906, "step": 4016 }, { "epoch": 0.58, "learning_rate": 1.2085164833753696e-05, "loss": 0.9146, "step": 4017 }, { "epoch": 0.58, "learning_rate": 1.207833815004784e-05, "loss": 0.8594, "step": 4018 }, { "epoch": 0.58, "learning_rate": 1.2071512095303488e-05, "loss": 0.7218, "step": 4019 }, { "epoch": 0.58, "learning_rate": 1.2064686670990102e-05, "loss": 0.8996, "step": 4020 }, { "epoch": 0.58, "learning_rate": 1.2057861878577047e-05, "loss": 0.7291, "step": 4021 }, { "epoch": 0.58, "learning_rate": 1.205103771953352e-05, "loss": 0.8145, "step": 4022 }, { "epoch": 0.58, "learning_rate": 1.2044214195328595e-05, "loss": 0.8504, "step": 4023 }, { "epoch": 0.58, "learning_rate": 1.2037391307431213e-05, "loss": 0.6359, "step": 4024 }, { "epoch": 0.58, "learning_rate": 1.2030569057310163e-05, "loss": 0.7824, "step": 4025 }, { "epoch": 0.58, "learning_rate": 1.2023747446434119e-05, "loss": 0.6387, "step": 4026 }, { "epoch": 0.58, "learning_rate": 1.2016926476271598e-05, "loss": 0.8672, "step": 4027 }, { "epoch": 0.58, "learning_rate": 1.2010106148290983e-05, "loss": 0.7667, "step": 4028 }, { "epoch": 0.58, "learning_rate": 1.2003286463960534e-05, "loss": 0.8136, "step": 4029 }, { "epoch": 0.58, "learning_rate": 1.1996467424748342e-05, "loss": 1.0184, "step": 4030 }, { "epoch": 0.58, "learning_rate": 1.1989649032122392e-05, "loss": 0.6892, "step": 4031 }, { "epoch": 0.58, "learning_rate": 1.1982831287550514e-05, "loss": 0.8359, "step": 4032 }, { "epoch": 0.58, "learning_rate": 1.1976014192500394e-05, "loss": 0.9308, "step": 4033 }, { "epoch": 0.58, "learning_rate": 1.1969197748439587e-05, "loss": 0.7148, "step": 4034 }, { "epoch": 0.58, "learning_rate": 1.1962381956835504e-05, "loss": 0.7963, "step": 4035 }, { "epoch": 0.58, "learning_rate": 1.195556681915541e-05, "loss": 0.8795, "step": 4036 }, { "epoch": 0.58, "learning_rate": 1.194875233686645e-05, "loss": 0.8225, "step": 4037 }, { "epoch": 0.58, "learning_rate": 1.1941938511435602e-05, "loss": 0.7006, "step": 4038 }, { "epoch": 0.58, "learning_rate": 1.1935125344329717e-05, "loss": 0.8449, "step": 4039 }, { "epoch": 0.58, "learning_rate": 1.1928312837015505e-05, "loss": 0.8705, "step": 4040 }, { "epoch": 0.58, "learning_rate": 1.1921500990959521e-05, "loss": 0.7882, "step": 4041 }, { "epoch": 0.58, "learning_rate": 1.1914689807628202e-05, "loss": 0.9037, "step": 4042 }, { "epoch": 0.58, "learning_rate": 1.1907879288487806e-05, "loss": 0.707, "step": 4043 }, { "epoch": 0.58, "learning_rate": 1.1901069435004486e-05, "loss": 0.9096, "step": 4044 }, { "epoch": 0.58, "learning_rate": 1.1894260248644234e-05, "loss": 0.8265, "step": 4045 }, { "epoch": 0.58, "learning_rate": 1.1887451730872892e-05, "loss": 0.7919, "step": 4046 }, { "epoch": 0.58, "learning_rate": 1.188064388315617e-05, "loss": 0.8298, "step": 4047 }, { "epoch": 0.58, "learning_rate": 1.1873836706959622e-05, "loss": 0.6705, "step": 4048 }, { "epoch": 0.58, "learning_rate": 1.1867030203748675e-05, "loss": 0.8806, "step": 4049 }, { "epoch": 0.58, "learning_rate": 1.1860224374988598e-05, "loss": 0.7631, "step": 4050 }, { "epoch": 0.58, "learning_rate": 1.1853419222144512e-05, "loss": 0.8382, "step": 4051 }, { "epoch": 0.58, "learning_rate": 1.1846614746681401e-05, "loss": 0.8013, "step": 4052 }, { "epoch": 0.58, "learning_rate": 1.183981095006411e-05, "loss": 0.8351, "step": 4053 }, { "epoch": 0.58, "learning_rate": 1.1833007833757308e-05, "loss": 0.7553, "step": 4054 }, { "epoch": 0.58, "learning_rate": 1.1826205399225555e-05, "loss": 0.9035, "step": 4055 }, { "epoch": 0.58, "learning_rate": 1.1819403647933237e-05, "loss": 0.7818, "step": 4056 }, { "epoch": 0.58, "learning_rate": 1.1812602581344604e-05, "loss": 0.9927, "step": 4057 }, { "epoch": 0.58, "learning_rate": 1.1805802200923767e-05, "loss": 0.8142, "step": 4058 }, { "epoch": 0.58, "learning_rate": 1.1799002508134664e-05, "loss": 1.0508, "step": 4059 }, { "epoch": 0.58, "learning_rate": 1.1792203504441112e-05, "loss": 0.827, "step": 4060 }, { "epoch": 0.58, "learning_rate": 1.1785405191306768e-05, "loss": 0.8086, "step": 4061 }, { "epoch": 0.58, "learning_rate": 1.1778607570195133e-05, "loss": 0.9001, "step": 4062 }, { "epoch": 0.58, "learning_rate": 1.1771810642569575e-05, "loss": 0.8504, "step": 4063 }, { "epoch": 0.58, "learning_rate": 1.1765014409893295e-05, "loss": 0.8265, "step": 4064 }, { "epoch": 0.58, "learning_rate": 1.1758218873629361e-05, "loss": 0.7712, "step": 4065 }, { "epoch": 0.58, "learning_rate": 1.1751424035240685e-05, "loss": 0.6242, "step": 4066 }, { "epoch": 0.58, "learning_rate": 1.1744629896190022e-05, "loss": 0.8973, "step": 4067 }, { "epoch": 0.58, "learning_rate": 1.173783645793998e-05, "loss": 0.8019, "step": 4068 }, { "epoch": 0.58, "learning_rate": 1.1731043721953033e-05, "loss": 0.7723, "step": 4069 }, { "epoch": 0.58, "learning_rate": 1.172425168969147e-05, "loss": 0.6903, "step": 4070 }, { "epoch": 0.58, "learning_rate": 1.1717460362617462e-05, "loss": 0.8828, "step": 4071 }, { "epoch": 0.58, "learning_rate": 1.1710669742193004e-05, "loss": 0.8089, "step": 4072 }, { "epoch": 0.58, "learning_rate": 1.1703879829879951e-05, "loss": 0.6964, "step": 4073 }, { "epoch": 0.58, "learning_rate": 1.169709062714001e-05, "loss": 0.7623, "step": 4074 }, { "epoch": 0.58, "learning_rate": 1.1690302135434715e-05, "loss": 0.8354, "step": 4075 }, { "epoch": 0.58, "learning_rate": 1.1683514356225477e-05, "loss": 0.8538, "step": 4076 }, { "epoch": 0.58, "learning_rate": 1.1676727290973519e-05, "loss": 0.755, "step": 4077 }, { "epoch": 0.58, "learning_rate": 1.166994094113994e-05, "loss": 0.6908, "step": 4078 }, { "epoch": 0.58, "learning_rate": 1.1663155308185673e-05, "loss": 0.7416, "step": 4079 }, { "epoch": 0.58, "learning_rate": 1.1656370393571491e-05, "loss": 0.7939, "step": 4080 }, { "epoch": 0.58, "learning_rate": 1.1649586198758017e-05, "loss": 0.7302, "step": 4081 }, { "epoch": 0.58, "learning_rate": 1.1642802725205733e-05, "loss": 0.9185, "step": 4082 }, { "epoch": 0.58, "learning_rate": 1.1636019974374942e-05, "loss": 0.8867, "step": 4083 }, { "epoch": 0.59, "learning_rate": 1.1629237947725805e-05, "loss": 0.8186, "step": 4084 }, { "epoch": 0.59, "learning_rate": 1.1622456646718324e-05, "loss": 0.8516, "step": 4085 }, { "epoch": 0.59, "learning_rate": 1.1615676072812342e-05, "loss": 0.8096, "step": 4086 }, { "epoch": 0.59, "learning_rate": 1.1608896227467556e-05, "loss": 0.8387, "step": 4087 }, { "epoch": 0.59, "learning_rate": 1.1602117112143496e-05, "loss": 0.702, "step": 4088 }, { "epoch": 0.59, "learning_rate": 1.1595338728299536e-05, "loss": 0.8477, "step": 4089 }, { "epoch": 0.59, "learning_rate": 1.1588561077394897e-05, "loss": 0.8828, "step": 4090 }, { "epoch": 0.59, "learning_rate": 1.1581784160888634e-05, "loss": 0.9615, "step": 4091 }, { "epoch": 0.59, "learning_rate": 1.1575007980239664e-05, "loss": 0.7801, "step": 4092 }, { "epoch": 0.59, "learning_rate": 1.1568232536906709e-05, "loss": 0.8901, "step": 4093 }, { "epoch": 0.59, "learning_rate": 1.1561457832348369e-05, "loss": 0.6987, "step": 4094 }, { "epoch": 0.59, "learning_rate": 1.1554683868023069e-05, "loss": 0.7958, "step": 4095 }, { "epoch": 0.59, "learning_rate": 1.1547910645389072e-05, "loss": 0.8318, "step": 4096 }, { "epoch": 0.59, "learning_rate": 1.1541138165904484e-05, "loss": 0.8119, "step": 4097 }, { "epoch": 0.59, "learning_rate": 1.1534366431027262e-05, "loss": 0.8471, "step": 4098 }, { "epoch": 0.59, "learning_rate": 1.1527595442215183e-05, "loss": 0.7916, "step": 4099 }, { "epoch": 0.59, "learning_rate": 1.1520825200925878e-05, "loss": 0.7377, "step": 4100 }, { "epoch": 0.59, "learning_rate": 1.151405570861681e-05, "loss": 0.7388, "step": 4101 }, { "epoch": 0.59, "learning_rate": 1.150728696674528e-05, "loss": 0.7734, "step": 4102 }, { "epoch": 0.59, "learning_rate": 1.1500518976768444e-05, "loss": 0.6007, "step": 4103 }, { "epoch": 0.59, "learning_rate": 1.1493751740143265e-05, "loss": 0.8265, "step": 4104 }, { "epoch": 0.59, "learning_rate": 1.1486985258326578e-05, "loss": 0.8633, "step": 4105 }, { "epoch": 0.59, "learning_rate": 1.1480219532775026e-05, "loss": 0.8655, "step": 4106 }, { "epoch": 0.59, "learning_rate": 1.1473454564945108e-05, "loss": 0.9727, "step": 4107 }, { "epoch": 0.59, "learning_rate": 1.1466690356293156e-05, "loss": 0.8571, "step": 4108 }, { "epoch": 0.59, "learning_rate": 1.1459926908275328e-05, "loss": 0.8984, "step": 4109 }, { "epoch": 0.59, "learning_rate": 1.1453164222347638e-05, "loss": 0.8097, "step": 4110 }, { "epoch": 0.59, "learning_rate": 1.1446402299965922e-05, "loss": 0.9766, "step": 4111 }, { "epoch": 0.59, "learning_rate": 1.1439641142585852e-05, "loss": 0.9007, "step": 4112 }, { "epoch": 0.59, "learning_rate": 1.143288075166294e-05, "loss": 0.5723, "step": 4113 }, { "epoch": 0.59, "learning_rate": 1.1426121128652527e-05, "loss": 0.8301, "step": 4114 }, { "epoch": 0.59, "learning_rate": 1.1419362275009792e-05, "loss": 0.7907, "step": 4115 }, { "epoch": 0.59, "learning_rate": 1.141260419218976e-05, "loss": 0.8443, "step": 4116 }, { "epoch": 0.59, "learning_rate": 1.1405846881647266e-05, "loss": 0.7472, "step": 4117 }, { "epoch": 0.59, "learning_rate": 1.1399090344837e-05, "loss": 0.7729, "step": 4118 }, { "epoch": 0.59, "learning_rate": 1.1392334583213473e-05, "loss": 0.8811, "step": 4119 }, { "epoch": 0.59, "learning_rate": 1.138557959823103e-05, "loss": 0.719, "step": 4120 }, { "epoch": 0.59, "learning_rate": 1.1378825391343864e-05, "loss": 0.8393, "step": 4121 }, { "epoch": 0.59, "learning_rate": 1.1372071964005976e-05, "loss": 0.9286, "step": 4122 }, { "epoch": 0.59, "learning_rate": 1.1365319317671218e-05, "loss": 0.7383, "step": 4123 }, { "epoch": 0.59, "learning_rate": 1.135856745379327e-05, "loss": 0.8044, "step": 4124 }, { "epoch": 0.59, "learning_rate": 1.135181637382563e-05, "loss": 0.8287, "step": 4125 }, { "epoch": 0.59, "learning_rate": 1.1345066079221656e-05, "loss": 0.8259, "step": 4126 }, { "epoch": 0.59, "learning_rate": 1.1338316571434501e-05, "loss": 0.793, "step": 4127 }, { "epoch": 0.59, "learning_rate": 1.1331567851917178e-05, "loss": 0.7257, "step": 4128 }, { "epoch": 0.59, "learning_rate": 1.132481992212252e-05, "loss": 0.9286, "step": 4129 }, { "epoch": 0.59, "learning_rate": 1.1318072783503183e-05, "loss": 0.6879, "step": 4130 }, { "epoch": 0.59, "learning_rate": 1.131132643751166e-05, "loss": 0.7779, "step": 4131 }, { "epoch": 0.59, "learning_rate": 1.130458088560028e-05, "loss": 0.7372, "step": 4132 }, { "epoch": 0.59, "learning_rate": 1.1297836129221185e-05, "loss": 0.7154, "step": 4133 }, { "epoch": 0.59, "learning_rate": 1.129109216982636e-05, "loss": 0.6521, "step": 4134 }, { "epoch": 0.59, "learning_rate": 1.1284349008867605e-05, "loss": 0.7679, "step": 4135 }, { "epoch": 0.59, "learning_rate": 1.127760664779656e-05, "loss": 0.6306, "step": 4136 }, { "epoch": 0.59, "learning_rate": 1.1270865088064696e-05, "loss": 0.7418, "step": 4137 }, { "epoch": 0.59, "learning_rate": 1.1264124331123289e-05, "loss": 0.8733, "step": 4138 }, { "epoch": 0.59, "learning_rate": 1.1257384378423466e-05, "loss": 0.7009, "step": 4139 }, { "epoch": 0.59, "learning_rate": 1.1250645231416178e-05, "loss": 0.8968, "step": 4140 }, { "epoch": 0.59, "learning_rate": 1.1243906891552183e-05, "loss": 0.6975, "step": 4141 }, { "epoch": 0.59, "learning_rate": 1.123716936028209e-05, "loss": 0.6645, "step": 4142 }, { "epoch": 0.59, "learning_rate": 1.1230432639056309e-05, "loss": 0.9559, "step": 4143 }, { "epoch": 0.59, "learning_rate": 1.1223696729325105e-05, "loss": 0.8627, "step": 4144 }, { "epoch": 0.59, "learning_rate": 1.121696163253855e-05, "loss": 0.7916, "step": 4145 }, { "epoch": 0.59, "learning_rate": 1.1210227350146534e-05, "loss": 0.764, "step": 4146 }, { "epoch": 0.59, "learning_rate": 1.1203493883598783e-05, "loss": 0.8069, "step": 4147 }, { "epoch": 0.59, "learning_rate": 1.1196761234344863e-05, "loss": 0.6956, "step": 4148 }, { "epoch": 0.59, "learning_rate": 1.119002940383412e-05, "loss": 0.8371, "step": 4149 }, { "epoch": 0.59, "learning_rate": 1.1183298393515774e-05, "loss": 0.8778, "step": 4150 }, { "epoch": 0.59, "learning_rate": 1.1176568204838831e-05, "loss": 0.9124, "step": 4151 }, { "epoch": 0.59, "learning_rate": 1.1169838839252135e-05, "loss": 0.875, "step": 4152 }, { "epoch": 0.59, "learning_rate": 1.1163110298204367e-05, "loss": 0.817, "step": 4153 }, { "epoch": 0.6, "learning_rate": 1.1156382583143991e-05, "loss": 0.7885, "step": 4154 }, { "epoch": 0.6, "learning_rate": 1.1149655695519337e-05, "loss": 0.7148, "step": 4155 }, { "epoch": 0.6, "learning_rate": 1.1142929636778528e-05, "loss": 0.7895, "step": 4156 }, { "epoch": 0.6, "learning_rate": 1.113620440836952e-05, "loss": 0.8315, "step": 4157 }, { "epoch": 0.6, "learning_rate": 1.1129480011740093e-05, "loss": 0.697, "step": 4158 }, { "epoch": 0.6, "learning_rate": 1.112275644833783e-05, "loss": 0.8086, "step": 4159 }, { "epoch": 0.6, "learning_rate": 1.1116033719610163e-05, "loss": 0.779, "step": 4160 }, { "epoch": 0.6, "learning_rate": 1.1109311827004324e-05, "loss": 0.8438, "step": 4161 }, { "epoch": 0.6, "learning_rate": 1.1102590771967367e-05, "loss": 0.8281, "step": 4162 }, { "epoch": 0.6, "learning_rate": 1.1095870555946173e-05, "loss": 0.8689, "step": 4163 }, { "epoch": 0.6, "learning_rate": 1.1089151180387433e-05, "loss": 0.8387, "step": 4164 }, { "epoch": 0.6, "learning_rate": 1.1082432646737665e-05, "loss": 0.6716, "step": 4165 }, { "epoch": 0.6, "learning_rate": 1.1075714956443207e-05, "loss": 0.7709, "step": 4166 }, { "epoch": 0.6, "learning_rate": 1.1068998110950204e-05, "loss": 0.7472, "step": 4167 }, { "epoch": 0.6, "learning_rate": 1.1062282111704633e-05, "loss": 0.7974, "step": 4168 }, { "epoch": 0.6, "learning_rate": 1.1055566960152281e-05, "loss": 0.9291, "step": 4169 }, { "epoch": 0.6, "learning_rate": 1.1048852657738748e-05, "loss": 0.8756, "step": 4170 }, { "epoch": 0.6, "learning_rate": 1.1042139205909473e-05, "loss": 0.7372, "step": 4171 }, { "epoch": 0.6, "learning_rate": 1.1035426606109673e-05, "loss": 0.6828, "step": 4172 }, { "epoch": 0.6, "learning_rate": 1.102871485978442e-05, "loss": 0.9386, "step": 4173 }, { "epoch": 0.6, "learning_rate": 1.1022003968378588e-05, "loss": 0.8521, "step": 4174 }, { "epoch": 0.6, "learning_rate": 1.1015293933336858e-05, "loss": 0.8047, "step": 4175 }, { "epoch": 0.6, "learning_rate": 1.1008584756103741e-05, "loss": 0.7154, "step": 4176 }, { "epoch": 0.6, "learning_rate": 1.1001876438123546e-05, "loss": 0.7807, "step": 4177 }, { "epoch": 0.6, "learning_rate": 1.0995168980840423e-05, "loss": 0.7712, "step": 4178 }, { "epoch": 0.6, "learning_rate": 1.0988462385698312e-05, "loss": 0.7054, "step": 4179 }, { "epoch": 0.6, "learning_rate": 1.098175665414098e-05, "loss": 0.7461, "step": 4180 }, { "epoch": 0.6, "learning_rate": 1.0975051787611999e-05, "loss": 1.0073, "step": 4181 }, { "epoch": 0.6, "learning_rate": 1.0968347787554773e-05, "loss": 0.8627, "step": 4182 }, { "epoch": 0.6, "learning_rate": 1.09616446554125e-05, "loss": 0.8309, "step": 4183 }, { "epoch": 0.6, "learning_rate": 1.0954942392628198e-05, "loss": 0.8677, "step": 4184 }, { "epoch": 0.6, "learning_rate": 1.0948241000644697e-05, "loss": 0.8013, "step": 4185 }, { "epoch": 0.6, "learning_rate": 1.0941540480904643e-05, "loss": 0.7762, "step": 4186 }, { "epoch": 0.6, "learning_rate": 1.0934840834850498e-05, "loss": 1.0173, "step": 4187 }, { "epoch": 0.6, "learning_rate": 1.0928142063924517e-05, "loss": 0.7383, "step": 4188 }, { "epoch": 0.6, "learning_rate": 1.0921444169568788e-05, "loss": 0.7372, "step": 4189 }, { "epoch": 0.6, "learning_rate": 1.0914747153225207e-05, "loss": 0.8424, "step": 4190 }, { "epoch": 0.6, "learning_rate": 1.0908051016335463e-05, "loss": 0.745, "step": 4191 }, { "epoch": 0.6, "learning_rate": 1.0901355760341079e-05, "loss": 0.7034, "step": 4192 }, { "epoch": 0.6, "learning_rate": 1.0894661386683369e-05, "loss": 0.9269, "step": 4193 }, { "epoch": 0.6, "learning_rate": 1.0887967896803473e-05, "loss": 0.6699, "step": 4194 }, { "epoch": 0.6, "learning_rate": 1.0881275292142336e-05, "loss": 0.6565, "step": 4195 }, { "epoch": 0.6, "learning_rate": 1.0874583574140703e-05, "loss": 0.8694, "step": 4196 }, { "epoch": 0.6, "learning_rate": 1.0867892744239132e-05, "loss": 0.9375, "step": 4197 }, { "epoch": 0.6, "learning_rate": 1.0861202803878014e-05, "loss": 0.7285, "step": 4198 }, { "epoch": 0.6, "learning_rate": 1.0854513754497501e-05, "loss": 0.923, "step": 4199 }, { "epoch": 0.6, "learning_rate": 1.0847825597537598e-05, "loss": 0.8013, "step": 4200 }, { "epoch": 0.6, "learning_rate": 1.084113833443809e-05, "loss": 0.8011, "step": 4201 }, { "epoch": 0.6, "learning_rate": 1.0834451966638582e-05, "loss": 0.745, "step": 4202 }, { "epoch": 0.6, "learning_rate": 1.082776649557849e-05, "loss": 0.9012, "step": 4203 }, { "epoch": 0.6, "learning_rate": 1.0821081922697017e-05, "loss": 0.8103, "step": 4204 }, { "epoch": 0.6, "learning_rate": 1.0814398249433202e-05, "loss": 0.851, "step": 4205 }, { "epoch": 0.6, "learning_rate": 1.080771547722586e-05, "loss": 0.7391, "step": 4206 }, { "epoch": 0.6, "learning_rate": 1.0801033607513633e-05, "loss": 0.7467, "step": 4207 }, { "epoch": 0.6, "learning_rate": 1.0794352641734966e-05, "loss": 0.9498, "step": 4208 }, { "epoch": 0.6, "learning_rate": 1.07876725813281e-05, "loss": 0.7556, "step": 4209 }, { "epoch": 0.6, "learning_rate": 1.0780993427731082e-05, "loss": 0.8287, "step": 4210 }, { "epoch": 0.6, "learning_rate": 1.0774315182381786e-05, "loss": 0.8577, "step": 4211 }, { "epoch": 0.6, "learning_rate": 1.0767637846717854e-05, "loss": 0.8248, "step": 4212 }, { "epoch": 0.6, "learning_rate": 1.0760961422176765e-05, "loss": 0.7941, "step": 4213 }, { "epoch": 0.6, "learning_rate": 1.0754285910195778e-05, "loss": 0.8449, "step": 4214 }, { "epoch": 0.6, "learning_rate": 1.0747611312211966e-05, "loss": 0.9877, "step": 4215 }, { "epoch": 0.6, "learning_rate": 1.0740937629662213e-05, "loss": 0.798, "step": 4216 }, { "epoch": 0.6, "learning_rate": 1.0734264863983191e-05, "loss": 0.7757, "step": 4217 }, { "epoch": 0.6, "learning_rate": 1.0727593016611385e-05, "loss": 0.8521, "step": 4218 }, { "epoch": 0.6, "learning_rate": 1.0720922088983077e-05, "loss": 0.8175, "step": 4219 }, { "epoch": 0.6, "learning_rate": 1.0714252082534347e-05, "loss": 0.8583, "step": 4220 }, { "epoch": 0.6, "learning_rate": 1.07075829987011e-05, "loss": 0.6258, "step": 4221 }, { "epoch": 0.6, "learning_rate": 1.0700914838919e-05, "loss": 0.7391, "step": 4222 }, { "epoch": 0.6, "learning_rate": 1.0694247604623553e-05, "loss": 0.7567, "step": 4223 }, { "epoch": 0.61, "learning_rate": 1.068758129725005e-05, "loss": 0.8929, "step": 4224 }, { "epoch": 0.61, "learning_rate": 1.0680915918233574e-05, "loss": 0.5854, "step": 4225 }, { "epoch": 0.61, "learning_rate": 1.0674251469009017e-05, "loss": 0.8767, "step": 4226 }, { "epoch": 0.61, "learning_rate": 1.0667587951011079e-05, "loss": 0.8086, "step": 4227 }, { "epoch": 0.61, "learning_rate": 1.0660925365674242e-05, "loss": 0.7759, "step": 4228 }, { "epoch": 0.61, "learning_rate": 1.0654263714432802e-05, "loss": 0.6556, "step": 4229 }, { "epoch": 0.61, "learning_rate": 1.0647602998720839e-05, "loss": 0.834, "step": 4230 }, { "epoch": 0.61, "learning_rate": 1.0640943219972245e-05, "loss": 0.7433, "step": 4231 }, { "epoch": 0.61, "learning_rate": 1.0634284379620713e-05, "loss": 0.6543, "step": 4232 }, { "epoch": 0.61, "learning_rate": 1.0627626479099712e-05, "loss": 0.7087, "step": 4233 }, { "epoch": 0.61, "learning_rate": 1.0620969519842535e-05, "loss": 1.0223, "step": 4234 }, { "epoch": 0.61, "learning_rate": 1.0614313503282252e-05, "loss": 0.7863, "step": 4235 }, { "epoch": 0.61, "learning_rate": 1.0607658430851745e-05, "loss": 0.6412, "step": 4236 }, { "epoch": 0.61, "learning_rate": 1.0601004303983687e-05, "loss": 0.7801, "step": 4237 }, { "epoch": 0.61, "learning_rate": 1.0594351124110538e-05, "loss": 0.8577, "step": 4238 }, { "epoch": 0.61, "learning_rate": 1.0587698892664572e-05, "loss": 0.7879, "step": 4239 }, { "epoch": 0.61, "learning_rate": 1.0581047611077852e-05, "loss": 0.6925, "step": 4240 }, { "epoch": 0.61, "learning_rate": 1.0574397280782223e-05, "loss": 0.8708, "step": 4241 }, { "epoch": 0.61, "learning_rate": 1.0567747903209345e-05, "loss": 0.639, "step": 4242 }, { "epoch": 0.61, "learning_rate": 1.0561099479790661e-05, "loss": 0.6565, "step": 4243 }, { "epoch": 0.61, "learning_rate": 1.055445201195741e-05, "loss": 0.7974, "step": 4244 }, { "epoch": 0.61, "learning_rate": 1.054780550114064e-05, "loss": 0.822, "step": 4245 }, { "epoch": 0.61, "learning_rate": 1.0541159948771163e-05, "loss": 0.8549, "step": 4246 }, { "epoch": 0.61, "learning_rate": 1.0534515356279608e-05, "loss": 0.8686, "step": 4247 }, { "epoch": 0.61, "learning_rate": 1.0527871725096404e-05, "loss": 0.8856, "step": 4248 }, { "epoch": 0.61, "learning_rate": 1.052122905665174e-05, "loss": 0.6892, "step": 4249 }, { "epoch": 0.61, "learning_rate": 1.0514587352375632e-05, "loss": 0.7589, "step": 4250 }, { "epoch": 0.61, "learning_rate": 1.050794661369787e-05, "loss": 0.7701, "step": 4251 }, { "epoch": 0.61, "learning_rate": 1.050130684204804e-05, "loss": 0.8661, "step": 4252 }, { "epoch": 0.61, "learning_rate": 1.0494668038855524e-05, "loss": 0.8845, "step": 4253 }, { "epoch": 0.61, "learning_rate": 1.0488030205549483e-05, "loss": 0.7963, "step": 4254 }, { "epoch": 0.61, "learning_rate": 1.0481393343558895e-05, "loss": 0.7606, "step": 4255 }, { "epoch": 0.61, "learning_rate": 1.0474757454312492e-05, "loss": 0.808, "step": 4256 }, { "epoch": 0.61, "learning_rate": 1.0468122539238829e-05, "loss": 0.772, "step": 4257 }, { "epoch": 0.61, "learning_rate": 1.0461488599766242e-05, "loss": 0.9369, "step": 4258 }, { "epoch": 0.61, "learning_rate": 1.0454855637322845e-05, "loss": 0.8359, "step": 4259 }, { "epoch": 0.61, "learning_rate": 1.0448223653336549e-05, "loss": 0.8198, "step": 4260 }, { "epoch": 0.61, "learning_rate": 1.0441592649235072e-05, "loss": 0.8348, "step": 4261 }, { "epoch": 0.61, "learning_rate": 1.0434962626445888e-05, "loss": 0.791, "step": 4262 }, { "epoch": 0.61, "learning_rate": 1.0428333586396286e-05, "loss": 0.8867, "step": 4263 }, { "epoch": 0.61, "learning_rate": 1.0421705530513333e-05, "loss": 0.6984, "step": 4264 }, { "epoch": 0.61, "learning_rate": 1.0415078460223877e-05, "loss": 0.7818, "step": 4265 }, { "epoch": 0.61, "learning_rate": 1.0408452376954579e-05, "loss": 0.7289, "step": 4266 }, { "epoch": 0.61, "learning_rate": 1.0401827282131853e-05, "loss": 0.7188, "step": 4267 }, { "epoch": 0.61, "learning_rate": 1.039520317718193e-05, "loss": 0.8555, "step": 4268 }, { "epoch": 0.61, "learning_rate": 1.0388580063530815e-05, "loss": 0.63, "step": 4269 }, { "epoch": 0.61, "learning_rate": 1.0381957942604292e-05, "loss": 0.8248, "step": 4270 }, { "epoch": 0.61, "learning_rate": 1.0375336815827947e-05, "loss": 0.8527, "step": 4271 }, { "epoch": 0.61, "learning_rate": 1.0368716684627141e-05, "loss": 0.7902, "step": 4272 }, { "epoch": 0.61, "learning_rate": 1.0362097550427028e-05, "loss": 0.6084, "step": 4273 }, { "epoch": 0.61, "learning_rate": 1.0355479414652543e-05, "loss": 0.9269, "step": 4274 }, { "epoch": 0.61, "learning_rate": 1.0348862278728407e-05, "loss": 0.8588, "step": 4275 }, { "epoch": 0.61, "learning_rate": 1.0342246144079117e-05, "loss": 0.8457, "step": 4276 }, { "epoch": 0.61, "learning_rate": 1.033563101212898e-05, "loss": 0.6724, "step": 4277 }, { "epoch": 0.61, "learning_rate": 1.0329016884302056e-05, "loss": 0.7757, "step": 4278 }, { "epoch": 0.61, "learning_rate": 1.0322403762022213e-05, "loss": 0.889, "step": 4279 }, { "epoch": 0.61, "learning_rate": 1.0315791646713081e-05, "loss": 0.6501, "step": 4280 }, { "epoch": 0.61, "learning_rate": 1.0309180539798092e-05, "loss": 0.7991, "step": 4281 }, { "epoch": 0.61, "learning_rate": 1.030257044270046e-05, "loss": 0.6417, "step": 4282 }, { "epoch": 0.61, "learning_rate": 1.0295961356843159e-05, "loss": 0.7171, "step": 4283 }, { "epoch": 0.61, "learning_rate": 1.0289353283648977e-05, "loss": 0.6685, "step": 4284 }, { "epoch": 0.61, "learning_rate": 1.0282746224540462e-05, "loss": 0.8717, "step": 4285 }, { "epoch": 0.61, "learning_rate": 1.027614018093995e-05, "loss": 0.7757, "step": 4286 }, { "epoch": 0.61, "learning_rate": 1.026953515426956e-05, "loss": 0.8979, "step": 4287 }, { "epoch": 0.61, "learning_rate": 1.0262931145951186e-05, "loss": 0.6551, "step": 4288 }, { "epoch": 0.61, "learning_rate": 1.0256328157406518e-05, "loss": 0.8005, "step": 4289 }, { "epoch": 0.61, "learning_rate": 1.024972619005701e-05, "loss": 0.8689, "step": 4290 }, { "epoch": 0.61, "learning_rate": 1.02431252453239e-05, "loss": 0.909, "step": 4291 }, { "epoch": 0.61, "learning_rate": 1.0236525324628212e-05, "loss": 0.5826, "step": 4292 }, { "epoch": 0.61, "learning_rate": 1.0229926429390743e-05, "loss": 0.8638, "step": 4293 }, { "epoch": 0.62, "learning_rate": 1.0223328561032068e-05, "loss": 0.7257, "step": 4294 }, { "epoch": 0.62, "learning_rate": 1.0216731720972557e-05, "loss": 0.8449, "step": 4295 }, { "epoch": 0.62, "learning_rate": 1.0210135910632335e-05, "loss": 0.8058, "step": 4296 }, { "epoch": 0.62, "learning_rate": 1.0203541131431321e-05, "loss": 0.7924, "step": 4297 }, { "epoch": 0.62, "learning_rate": 1.0196947384789212e-05, "loss": 0.8097, "step": 4298 }, { "epoch": 0.62, "learning_rate": 1.0190354672125466e-05, "loss": 0.8075, "step": 4299 }, { "epoch": 0.62, "learning_rate": 1.0183762994859348e-05, "loss": 0.6473, "step": 4300 }, { "epoch": 0.62, "learning_rate": 1.0177172354409864e-05, "loss": 0.7489, "step": 4301 }, { "epoch": 0.62, "learning_rate": 1.0170582752195831e-05, "loss": 0.7132, "step": 4302 }, { "epoch": 0.62, "learning_rate": 1.0163994189635823e-05, "loss": 0.7115, "step": 4303 }, { "epoch": 0.62, "learning_rate": 1.015740666814819e-05, "loss": 0.6967, "step": 4304 }, { "epoch": 0.62, "learning_rate": 1.015082018915107e-05, "loss": 0.6953, "step": 4305 }, { "epoch": 0.62, "learning_rate": 1.014423475406236e-05, "loss": 0.6892, "step": 4306 }, { "epoch": 0.62, "learning_rate": 1.0137650364299747e-05, "loss": 0.9247, "step": 4307 }, { "epoch": 0.62, "learning_rate": 1.013106702128069e-05, "loss": 0.8262, "step": 4308 }, { "epoch": 0.62, "learning_rate": 1.0124484726422412e-05, "loss": 0.8211, "step": 4309 }, { "epoch": 0.62, "learning_rate": 1.0117903481141919e-05, "loss": 0.7813, "step": 4310 }, { "epoch": 0.62, "learning_rate": 1.0111323286856e-05, "loss": 0.7547, "step": 4311 }, { "epoch": 0.62, "learning_rate": 1.01047441449812e-05, "loss": 0.6987, "step": 4312 }, { "epoch": 0.62, "learning_rate": 1.0098166056933848e-05, "loss": 0.8426, "step": 4313 }, { "epoch": 0.62, "learning_rate": 1.0091589024130039e-05, "loss": 0.9615, "step": 4314 }, { "epoch": 0.62, "learning_rate": 1.0085013047985648e-05, "loss": 0.7123, "step": 4315 }, { "epoch": 0.62, "learning_rate": 1.0078438129916327e-05, "loss": 0.5617, "step": 4316 }, { "epoch": 0.62, "learning_rate": 1.007186427133748e-05, "loss": 0.8142, "step": 4317 }, { "epoch": 0.62, "learning_rate": 1.0065291473664306e-05, "loss": 0.8198, "step": 4318 }, { "epoch": 0.62, "learning_rate": 1.0058719738311763e-05, "loss": 0.6627, "step": 4319 }, { "epoch": 0.62, "learning_rate": 1.005214906669458e-05, "loss": 0.8267, "step": 4320 }, { "epoch": 0.62, "learning_rate": 1.0045579460227268e-05, "loss": 0.822, "step": 4321 }, { "epoch": 0.62, "learning_rate": 1.0039010920324088e-05, "loss": 0.9124, "step": 4322 }, { "epoch": 0.62, "learning_rate": 1.0032443448399096e-05, "loss": 0.7411, "step": 4323 }, { "epoch": 0.62, "learning_rate": 1.0025877045866103e-05, "loss": 0.7634, "step": 4324 }, { "epoch": 0.62, "learning_rate": 1.0019311714138688e-05, "loss": 0.6978, "step": 4325 }, { "epoch": 0.62, "learning_rate": 1.0012747454630206e-05, "loss": 0.7545, "step": 4326 }, { "epoch": 0.62, "learning_rate": 1.0006184268753792e-05, "loss": 0.7266, "step": 4327 }, { "epoch": 0.62, "learning_rate": 9.999622157922315e-06, "loss": 0.8156, "step": 4328 }, { "epoch": 0.62, "learning_rate": 9.993061123548455e-06, "loss": 0.9805, "step": 4329 }, { "epoch": 0.62, "learning_rate": 9.98650116704463e-06, "loss": 0.8817, "step": 4330 }, { "epoch": 0.62, "learning_rate": 9.97994228982304e-06, "loss": 0.784, "step": 4331 }, { "epoch": 0.62, "learning_rate": 9.97338449329565e-06, "loss": 0.7327, "step": 4332 }, { "epoch": 0.62, "learning_rate": 9.966827778874181e-06, "loss": 0.9453, "step": 4333 }, { "epoch": 0.62, "learning_rate": 9.960272147970151e-06, "loss": 0.8945, "step": 4334 }, { "epoch": 0.62, "learning_rate": 9.953717601994804e-06, "loss": 0.7835, "step": 4335 }, { "epoch": 0.62, "learning_rate": 9.947164142359183e-06, "loss": 0.9972, "step": 4336 }, { "epoch": 0.62, "learning_rate": 9.940611770474087e-06, "loss": 0.8672, "step": 4337 }, { "epoch": 0.62, "learning_rate": 9.934060487750075e-06, "loss": 0.6572, "step": 4338 }, { "epoch": 0.62, "learning_rate": 9.927510295597473e-06, "loss": 0.803, "step": 4339 }, { "epoch": 0.62, "learning_rate": 9.920961195426387e-06, "loss": 0.7294, "step": 4340 }, { "epoch": 0.62, "learning_rate": 9.914413188646665e-06, "loss": 0.76, "step": 4341 }, { "epoch": 0.62, "learning_rate": 9.90786627666794e-06, "loss": 0.7863, "step": 4342 }, { "epoch": 0.62, "learning_rate": 9.90132046089959e-06, "loss": 0.5925, "step": 4343 }, { "epoch": 0.62, "learning_rate": 9.894775742750768e-06, "loss": 0.865, "step": 4344 }, { "epoch": 0.62, "learning_rate": 9.8882321236304e-06, "loss": 0.7743, "step": 4345 }, { "epoch": 0.62, "learning_rate": 9.881689604947157e-06, "loss": 0.9247, "step": 4346 }, { "epoch": 0.62, "learning_rate": 9.875148188109485e-06, "loss": 0.5544, "step": 4347 }, { "epoch": 0.62, "learning_rate": 9.868607874525588e-06, "loss": 0.8482, "step": 4348 }, { "epoch": 0.62, "learning_rate": 9.862068665603429e-06, "loss": 0.8834, "step": 4349 }, { "epoch": 0.62, "learning_rate": 9.855530562750751e-06, "loss": 0.7592, "step": 4350 }, { "epoch": 0.62, "learning_rate": 9.848993567375029e-06, "loss": 0.8136, "step": 4351 }, { "epoch": 0.62, "learning_rate": 9.842457680883525e-06, "loss": 0.8019, "step": 4352 }, { "epoch": 0.62, "learning_rate": 9.835922904683258e-06, "loss": 0.9023, "step": 4353 }, { "epoch": 0.62, "learning_rate": 9.829389240180994e-06, "loss": 0.7991, "step": 4354 }, { "epoch": 0.62, "learning_rate": 9.822856688783278e-06, "loss": 0.7285, "step": 4355 }, { "epoch": 0.62, "learning_rate": 9.816325251896395e-06, "loss": 0.8493, "step": 4356 }, { "epoch": 0.62, "learning_rate": 9.809794930926413e-06, "loss": 0.7176, "step": 4357 }, { "epoch": 0.62, "learning_rate": 9.80326572727915e-06, "loss": 0.5301, "step": 4358 }, { "epoch": 0.62, "learning_rate": 9.79673764236017e-06, "loss": 0.817, "step": 4359 }, { "epoch": 0.62, "learning_rate": 9.790210677574815e-06, "loss": 0.7447, "step": 4360 }, { "epoch": 0.62, "learning_rate": 9.783684834328187e-06, "loss": 0.7868, "step": 4361 }, { "epoch": 0.62, "learning_rate": 9.777160114025121e-06, "loss": 0.6325, "step": 4362 }, { "epoch": 0.62, "learning_rate": 9.770636518070243e-06, "loss": 0.7447, "step": 4363 }, { "epoch": 0.63, "learning_rate": 9.764114047867914e-06, "loss": 0.7701, "step": 4364 }, { "epoch": 0.63, "learning_rate": 9.75759270482226e-06, "loss": 0.9866, "step": 4365 }, { "epoch": 0.63, "learning_rate": 9.751072490337172e-06, "loss": 0.947, "step": 4366 }, { "epoch": 0.63, "learning_rate": 9.744553405816278e-06, "loss": 0.8281, "step": 4367 }, { "epoch": 0.63, "learning_rate": 9.73803545266299e-06, "loss": 0.7037, "step": 4368 }, { "epoch": 0.63, "learning_rate": 9.731518632280458e-06, "loss": 0.8756, "step": 4369 }, { "epoch": 0.63, "learning_rate": 9.725002946071585e-06, "loss": 0.764, "step": 4370 }, { "epoch": 0.63, "learning_rate": 9.718488395439047e-06, "loss": 0.8968, "step": 4371 }, { "epoch": 0.63, "learning_rate": 9.711974981785252e-06, "loss": 0.7447, "step": 4372 }, { "epoch": 0.63, "learning_rate": 9.705462706512389e-06, "loss": 0.8069, "step": 4373 }, { "epoch": 0.63, "learning_rate": 9.69895157102239e-06, "loss": 0.7606, "step": 4374 }, { "epoch": 0.63, "learning_rate": 9.692441576716932e-06, "loss": 0.8862, "step": 4375 }, { "epoch": 0.63, "learning_rate": 9.685932724997459e-06, "loss": 0.7302, "step": 4376 }, { "epoch": 0.63, "learning_rate": 9.679425017265177e-06, "loss": 0.752, "step": 4377 }, { "epoch": 0.63, "learning_rate": 9.672918454921014e-06, "loss": 1.0045, "step": 4378 }, { "epoch": 0.63, "learning_rate": 9.666413039365692e-06, "loss": 0.851, "step": 4379 }, { "epoch": 0.63, "learning_rate": 9.65990877199965e-06, "loss": 0.8376, "step": 4380 }, { "epoch": 0.63, "learning_rate": 9.653405654223107e-06, "loss": 0.8011, "step": 4381 }, { "epoch": 0.63, "learning_rate": 9.64690368743602e-06, "loss": 0.9347, "step": 4382 }, { "epoch": 0.63, "learning_rate": 9.640402873038094e-06, "loss": 0.6604, "step": 4383 }, { "epoch": 0.63, "learning_rate": 9.63390321242881e-06, "loss": 0.8538, "step": 4384 }, { "epoch": 0.63, "learning_rate": 9.627404707007364e-06, "loss": 0.7902, "step": 4385 }, { "epoch": 0.63, "learning_rate": 9.620907358172737e-06, "loss": 0.7199, "step": 4386 }, { "epoch": 0.63, "learning_rate": 9.614411167323646e-06, "loss": 0.9118, "step": 4387 }, { "epoch": 0.63, "learning_rate": 9.607916135858557e-06, "loss": 0.6512, "step": 4388 }, { "epoch": 0.63, "learning_rate": 9.601422265175686e-06, "loss": 0.7193, "step": 4389 }, { "epoch": 0.63, "learning_rate": 9.594929556673014e-06, "loss": 0.7997, "step": 4390 }, { "epoch": 0.63, "learning_rate": 9.588438011748252e-06, "loss": 0.9076, "step": 4391 }, { "epoch": 0.63, "learning_rate": 9.581947631798873e-06, "loss": 0.7383, "step": 4392 }, { "epoch": 0.63, "learning_rate": 9.575458418222088e-06, "loss": 0.6744, "step": 4393 }, { "epoch": 0.63, "learning_rate": 9.568970372414866e-06, "loss": 0.7612, "step": 4394 }, { "epoch": 0.63, "learning_rate": 9.562483495773936e-06, "loss": 0.8281, "step": 4395 }, { "epoch": 0.63, "learning_rate": 9.555997789695742e-06, "loss": 0.7478, "step": 4396 }, { "epoch": 0.63, "learning_rate": 9.54951325557651e-06, "loss": 0.8873, "step": 4397 }, { "epoch": 0.63, "learning_rate": 9.543029894812198e-06, "loss": 0.8069, "step": 4398 }, { "epoch": 0.63, "learning_rate": 9.536547708798508e-06, "loss": 0.8421, "step": 4399 }, { "epoch": 0.63, "learning_rate": 9.530066698930897e-06, "loss": 0.6456, "step": 4400 }, { "epoch": 0.63, "learning_rate": 9.523586866604563e-06, "loss": 0.7411, "step": 4401 }, { "epoch": 0.63, "learning_rate": 9.517108213214457e-06, "loss": 0.596, "step": 4402 }, { "epoch": 0.63, "learning_rate": 9.510630740155277e-06, "loss": 0.7154, "step": 4403 }, { "epoch": 0.63, "learning_rate": 9.504154448821455e-06, "loss": 0.7849, "step": 4404 }, { "epoch": 0.63, "learning_rate": 9.497679340607174e-06, "loss": 0.7712, "step": 4405 }, { "epoch": 0.63, "learning_rate": 9.49120541690638e-06, "loss": 0.7907, "step": 4406 }, { "epoch": 0.63, "learning_rate": 9.484732679112731e-06, "loss": 0.8225, "step": 4407 }, { "epoch": 0.63, "learning_rate": 9.47826112861966e-06, "loss": 0.6892, "step": 4408 }, { "epoch": 0.63, "learning_rate": 9.471790766820322e-06, "loss": 0.642, "step": 4409 }, { "epoch": 0.63, "learning_rate": 9.465321595107629e-06, "loss": 0.6629, "step": 4410 }, { "epoch": 0.63, "learning_rate": 9.45885361487424e-06, "loss": 0.7969, "step": 4411 }, { "epoch": 0.63, "learning_rate": 9.452386827512539e-06, "loss": 0.745, "step": 4412 }, { "epoch": 0.63, "learning_rate": 9.445921234414677e-06, "loss": 0.8616, "step": 4413 }, { "epoch": 0.63, "learning_rate": 9.439456836972528e-06, "loss": 0.8493, "step": 4414 }, { "epoch": 0.63, "learning_rate": 9.432993636577722e-06, "loss": 0.7695, "step": 4415 }, { "epoch": 0.63, "learning_rate": 9.426531634621623e-06, "loss": 0.6906, "step": 4416 }, { "epoch": 0.63, "learning_rate": 9.420070832495337e-06, "loss": 0.9007, "step": 4417 }, { "epoch": 0.63, "learning_rate": 9.413611231589725e-06, "loss": 0.9439, "step": 4418 }, { "epoch": 0.63, "learning_rate": 9.407152833295373e-06, "loss": 0.8415, "step": 4419 }, { "epoch": 0.63, "learning_rate": 9.400695639002612e-06, "loss": 0.7285, "step": 4420 }, { "epoch": 0.63, "learning_rate": 9.394239650101521e-06, "loss": 0.803, "step": 4421 }, { "epoch": 0.63, "learning_rate": 9.38778486798191e-06, "loss": 0.9241, "step": 4422 }, { "epoch": 0.63, "learning_rate": 9.381331294033333e-06, "loss": 0.7296, "step": 4423 }, { "epoch": 0.63, "learning_rate": 9.374878929645096e-06, "loss": 0.6929, "step": 4424 }, { "epoch": 0.63, "learning_rate": 9.36842777620622e-06, "loss": 0.7553, "step": 4425 }, { "epoch": 0.63, "learning_rate": 9.361977835105485e-06, "loss": 0.6802, "step": 4426 }, { "epoch": 0.63, "learning_rate": 9.355529107731405e-06, "loss": 0.8532, "step": 4427 }, { "epoch": 0.63, "learning_rate": 9.349081595472222e-06, "loss": 0.8214, "step": 4428 }, { "epoch": 0.63, "learning_rate": 9.342635299715944e-06, "loss": 0.8733, "step": 4429 }, { "epoch": 0.63, "learning_rate": 9.336190221850278e-06, "loss": 0.8097, "step": 4430 }, { "epoch": 0.63, "learning_rate": 9.329746363262703e-06, "loss": 0.909, "step": 4431 }, { "epoch": 0.63, "learning_rate": 9.323303725340423e-06, "loss": 0.6858, "step": 4432 }, { "epoch": 0.64, "learning_rate": 9.316862309470368e-06, "loss": 0.7807, "step": 4433 }, { "epoch": 0.64, "learning_rate": 9.31042211703923e-06, "loss": 0.9227, "step": 4434 }, { "epoch": 0.64, "learning_rate": 9.303983149433406e-06, "loss": 1.0067, "step": 4435 }, { "epoch": 0.64, "learning_rate": 9.297545408039057e-06, "loss": 0.8105, "step": 4436 }, { "epoch": 0.64, "learning_rate": 9.291108894242075e-06, "loss": 0.7494, "step": 4437 }, { "epoch": 0.64, "learning_rate": 9.284673609428068e-06, "loss": 0.74, "step": 4438 }, { "epoch": 0.64, "learning_rate": 9.2782395549824e-06, "loss": 0.827, "step": 4439 }, { "epoch": 0.64, "learning_rate": 9.271806732290169e-06, "loss": 0.9007, "step": 4440 }, { "epoch": 0.64, "learning_rate": 9.265375142736195e-06, "loss": 0.7288, "step": 4441 }, { "epoch": 0.64, "learning_rate": 9.258944787705048e-06, "loss": 0.8435, "step": 4442 }, { "epoch": 0.64, "learning_rate": 9.252515668581013e-06, "loss": 0.7483, "step": 4443 }, { "epoch": 0.64, "learning_rate": 9.246087786748124e-06, "loss": 0.822, "step": 4444 }, { "epoch": 0.64, "learning_rate": 9.239661143590157e-06, "loss": 0.9475, "step": 4445 }, { "epoch": 0.64, "learning_rate": 9.233235740490591e-06, "loss": 0.8086, "step": 4446 }, { "epoch": 0.64, "learning_rate": 9.226811578832666e-06, "loss": 0.7542, "step": 4447 }, { "epoch": 0.64, "learning_rate": 9.220388659999348e-06, "loss": 0.7419, "step": 4448 }, { "epoch": 0.64, "learning_rate": 9.213966985373325e-06, "loss": 0.7626, "step": 4449 }, { "epoch": 0.64, "learning_rate": 9.20754655633703e-06, "loss": 0.6113, "step": 4450 }, { "epoch": 0.64, "learning_rate": 9.201127374272619e-06, "loss": 0.7723, "step": 4451 }, { "epoch": 0.64, "learning_rate": 9.194709440561983e-06, "loss": 0.6936, "step": 4452 }, { "epoch": 0.64, "learning_rate": 9.18829275658675e-06, "loss": 0.781, "step": 4453 }, { "epoch": 0.64, "learning_rate": 9.181877323728267e-06, "loss": 0.8114, "step": 4454 }, { "epoch": 0.64, "learning_rate": 9.175463143367617e-06, "loss": 0.6766, "step": 4455 }, { "epoch": 0.64, "learning_rate": 9.169050216885622e-06, "loss": 0.7205, "step": 4456 }, { "epoch": 0.64, "learning_rate": 9.162638545662817e-06, "loss": 0.7254, "step": 4457 }, { "epoch": 0.64, "learning_rate": 9.156228131079486e-06, "loss": 0.8265, "step": 4458 }, { "epoch": 0.64, "learning_rate": 9.149818974515625e-06, "loss": 0.5798, "step": 4459 }, { "epoch": 0.64, "learning_rate": 9.143411077350969e-06, "loss": 0.6585, "step": 4460 }, { "epoch": 0.64, "learning_rate": 9.137004440964981e-06, "loss": 0.6931, "step": 4461 }, { "epoch": 0.64, "learning_rate": 9.130599066736848e-06, "loss": 0.7935, "step": 4462 }, { "epoch": 0.64, "learning_rate": 9.124194956045498e-06, "loss": 0.8161, "step": 4463 }, { "epoch": 0.64, "learning_rate": 9.117792110269562e-06, "loss": 0.6233, "step": 4464 }, { "epoch": 0.64, "learning_rate": 9.111390530787425e-06, "loss": 0.6858, "step": 4465 }, { "epoch": 0.64, "learning_rate": 9.104990218977188e-06, "loss": 0.7673, "step": 4466 }, { "epoch": 0.64, "learning_rate": 9.098591176216672e-06, "loss": 0.7739, "step": 4467 }, { "epoch": 0.64, "learning_rate": 9.092193403883443e-06, "loss": 0.8371, "step": 4468 }, { "epoch": 0.64, "learning_rate": 9.085796903354781e-06, "loss": 0.6685, "step": 4469 }, { "epoch": 0.64, "learning_rate": 9.079401676007689e-06, "loss": 0.8315, "step": 4470 }, { "epoch": 0.64, "learning_rate": 9.073007723218908e-06, "loss": 0.798, "step": 4471 }, { "epoch": 0.64, "learning_rate": 9.06661504636489e-06, "loss": 0.9766, "step": 4472 }, { "epoch": 0.64, "learning_rate": 9.06022364682182e-06, "loss": 0.7188, "step": 4473 }, { "epoch": 0.64, "learning_rate": 9.053833525965622e-06, "loss": 0.889, "step": 4474 }, { "epoch": 0.64, "learning_rate": 9.047444685171915e-06, "loss": 0.7243, "step": 4475 }, { "epoch": 0.64, "learning_rate": 9.041057125816065e-06, "loss": 0.7997, "step": 4476 }, { "epoch": 0.64, "learning_rate": 9.034670849273156e-06, "loss": 0.8806, "step": 4477 }, { "epoch": 0.64, "learning_rate": 9.028285856917991e-06, "loss": 0.9381, "step": 4478 }, { "epoch": 0.64, "learning_rate": 9.021902150125112e-06, "loss": 0.7824, "step": 4479 }, { "epoch": 0.64, "learning_rate": 9.015519730268755e-06, "loss": 0.7963, "step": 4480 }, { "epoch": 0.64, "learning_rate": 9.009138598722913e-06, "loss": 0.7949, "step": 4481 }, { "epoch": 0.64, "learning_rate": 9.002758756861279e-06, "loss": 0.8103, "step": 4482 }, { "epoch": 0.64, "learning_rate": 8.996380206057276e-06, "loss": 0.7776, "step": 4483 }, { "epoch": 0.64, "learning_rate": 8.990002947684049e-06, "loss": 0.772, "step": 4484 }, { "epoch": 0.64, "learning_rate": 8.98362698311446e-06, "loss": 0.8789, "step": 4485 }, { "epoch": 0.64, "learning_rate": 8.9772523137211e-06, "loss": 0.6931, "step": 4486 }, { "epoch": 0.64, "learning_rate": 8.97087894087628e-06, "loss": 0.8306, "step": 4487 }, { "epoch": 0.64, "learning_rate": 8.964506865952026e-06, "loss": 0.7148, "step": 4488 }, { "epoch": 0.64, "learning_rate": 8.958136090320082e-06, "loss": 0.8619, "step": 4489 }, { "epoch": 0.64, "learning_rate": 8.951766615351936e-06, "loss": 0.9704, "step": 4490 }, { "epoch": 0.64, "learning_rate": 8.945398442418758e-06, "loss": 0.7634, "step": 4491 }, { "epoch": 0.64, "learning_rate": 8.939031572891473e-06, "loss": 0.9202, "step": 4492 }, { "epoch": 0.64, "learning_rate": 8.932666008140702e-06, "loss": 0.7134, "step": 4493 }, { "epoch": 0.64, "learning_rate": 8.926301749536794e-06, "loss": 0.8198, "step": 4494 }, { "epoch": 0.64, "learning_rate": 8.919938798449824e-06, "loss": 0.7782, "step": 4495 }, { "epoch": 0.64, "learning_rate": 8.913577156249567e-06, "loss": 0.8086, "step": 4496 }, { "epoch": 0.64, "learning_rate": 8.907216824305534e-06, "loss": 0.6823, "step": 4497 }, { "epoch": 0.64, "learning_rate": 8.900857803986949e-06, "loss": 0.9414, "step": 4498 }, { "epoch": 0.64, "learning_rate": 8.894500096662745e-06, "loss": 0.74, "step": 4499 }, { "epoch": 0.64, "learning_rate": 8.888143703701588e-06, "loss": 0.7439, "step": 4500 }, { "epoch": 0.64, "learning_rate": 8.881788626471837e-06, "loss": 0.8304, "step": 4501 }, { "epoch": 0.64, "learning_rate": 8.875434866341598e-06, "loss": 0.7545, "step": 4502 }, { "epoch": 0.65, "learning_rate": 8.869082424678674e-06, "loss": 0.9012, "step": 4503 }, { "epoch": 0.65, "learning_rate": 8.862731302850588e-06, "loss": 0.8544, "step": 4504 }, { "epoch": 0.65, "learning_rate": 8.856381502224573e-06, "loss": 0.7408, "step": 4505 }, { "epoch": 0.65, "learning_rate": 8.850033024167599e-06, "loss": 0.7645, "step": 4506 }, { "epoch": 0.65, "learning_rate": 8.843685870046318e-06, "loss": 0.8119, "step": 4507 }, { "epoch": 0.65, "learning_rate": 8.837340041227128e-06, "loss": 0.8912, "step": 4508 }, { "epoch": 0.65, "learning_rate": 8.830995539076127e-06, "loss": 0.7324, "step": 4509 }, { "epoch": 0.65, "learning_rate": 8.824652364959122e-06, "loss": 0.4413, "step": 4510 }, { "epoch": 0.65, "learning_rate": 8.818310520241652e-06, "loss": 0.7344, "step": 4511 }, { "epoch": 0.65, "learning_rate": 8.811970006288948e-06, "loss": 1.0413, "step": 4512 }, { "epoch": 0.65, "learning_rate": 8.80563082446598e-06, "loss": 0.6775, "step": 4513 }, { "epoch": 0.65, "learning_rate": 8.799292976137401e-06, "loss": 0.827, "step": 4514 }, { "epoch": 0.65, "learning_rate": 8.792956462667602e-06, "loss": 0.8711, "step": 4515 }, { "epoch": 0.65, "learning_rate": 8.786621285420678e-06, "loss": 0.9838, "step": 4516 }, { "epoch": 0.65, "learning_rate": 8.780287445760434e-06, "loss": 0.7963, "step": 4517 }, { "epoch": 0.65, "learning_rate": 8.773954945050385e-06, "loss": 0.8164, "step": 4518 }, { "epoch": 0.65, "learning_rate": 8.767623784653772e-06, "loss": 0.8019, "step": 4519 }, { "epoch": 0.65, "learning_rate": 8.761293965933526e-06, "loss": 0.7709, "step": 4520 }, { "epoch": 0.65, "learning_rate": 8.754965490252312e-06, "loss": 0.7907, "step": 4521 }, { "epoch": 0.65, "learning_rate": 8.748638358972475e-06, "loss": 0.6708, "step": 4522 }, { "epoch": 0.65, "learning_rate": 8.742312573456109e-06, "loss": 0.7757, "step": 4523 }, { "epoch": 0.65, "learning_rate": 8.735988135065e-06, "loss": 0.798, "step": 4524 }, { "epoch": 0.65, "learning_rate": 8.729665045160627e-06, "loss": 0.7193, "step": 4525 }, { "epoch": 0.65, "learning_rate": 8.723343305104196e-06, "loss": 0.9364, "step": 4526 }, { "epoch": 0.65, "learning_rate": 8.717022916256642e-06, "loss": 0.7494, "step": 4527 }, { "epoch": 0.65, "learning_rate": 8.710703879978566e-06, "loss": 0.7489, "step": 4528 }, { "epoch": 0.65, "learning_rate": 8.704386197630313e-06, "loss": 0.6893, "step": 4529 }, { "epoch": 0.65, "learning_rate": 8.698069870571916e-06, "loss": 0.827, "step": 4530 }, { "epoch": 0.65, "learning_rate": 8.691754900163129e-06, "loss": 0.7531, "step": 4531 }, { "epoch": 0.65, "learning_rate": 8.685441287763413e-06, "loss": 0.745, "step": 4532 }, { "epoch": 0.65, "learning_rate": 8.679129034731912e-06, "loss": 0.6844, "step": 4533 }, { "epoch": 0.65, "learning_rate": 8.672818142427523e-06, "loss": 0.9007, "step": 4534 }, { "epoch": 0.65, "learning_rate": 8.666508612208818e-06, "loss": 0.7634, "step": 4535 }, { "epoch": 0.65, "learning_rate": 8.660200445434073e-06, "loss": 0.7349, "step": 4536 }, { "epoch": 0.65, "learning_rate": 8.653893643461287e-06, "loss": 0.7645, "step": 4537 }, { "epoch": 0.65, "learning_rate": 8.647588207648159e-06, "loss": 0.8181, "step": 4538 }, { "epoch": 0.65, "learning_rate": 8.641284139352091e-06, "loss": 0.8421, "step": 4539 }, { "epoch": 0.65, "learning_rate": 8.634981439930194e-06, "loss": 0.7213, "step": 4540 }, { "epoch": 0.65, "learning_rate": 8.628680110739283e-06, "loss": 0.8111, "step": 4541 }, { "epoch": 0.65, "learning_rate": 8.622380153135884e-06, "loss": 0.8973, "step": 4542 }, { "epoch": 0.65, "learning_rate": 8.61608156847621e-06, "loss": 0.6964, "step": 4543 }, { "epoch": 0.65, "learning_rate": 8.609784358116194e-06, "loss": 0.9782, "step": 4544 }, { "epoch": 0.65, "learning_rate": 8.603488523411483e-06, "loss": 0.8211, "step": 4545 }, { "epoch": 0.65, "learning_rate": 8.597194065717395e-06, "loss": 0.7349, "step": 4546 }, { "epoch": 0.65, "learning_rate": 8.590900986388977e-06, "loss": 0.7606, "step": 4547 }, { "epoch": 0.65, "learning_rate": 8.584609286780987e-06, "loss": 0.7154, "step": 4548 }, { "epoch": 0.65, "learning_rate": 8.578318968247856e-06, "loss": 0.5889, "step": 4549 }, { "epoch": 0.65, "learning_rate": 8.572030032143737e-06, "loss": 0.7762, "step": 4550 }, { "epoch": 0.65, "learning_rate": 8.565742479822487e-06, "loss": 0.7679, "step": 4551 }, { "epoch": 0.65, "learning_rate": 8.559456312637658e-06, "loss": 0.6741, "step": 4552 }, { "epoch": 0.65, "learning_rate": 8.55317153194251e-06, "loss": 0.7093, "step": 4553 }, { "epoch": 0.65, "learning_rate": 8.546888139089986e-06, "loss": 0.7126, "step": 4554 }, { "epoch": 0.65, "learning_rate": 8.540606135432763e-06, "loss": 0.87, "step": 4555 }, { "epoch": 0.65, "learning_rate": 8.5343255223232e-06, "loss": 0.7414, "step": 4556 }, { "epoch": 0.65, "learning_rate": 8.528046301113343e-06, "loss": 0.7444, "step": 4557 }, { "epoch": 0.65, "learning_rate": 8.521768473154964e-06, "loss": 0.7372, "step": 4558 }, { "epoch": 0.65, "learning_rate": 8.51549203979952e-06, "loss": 0.8136, "step": 4559 }, { "epoch": 0.65, "learning_rate": 8.509217002398174e-06, "loss": 0.6763, "step": 4560 }, { "epoch": 0.65, "learning_rate": 8.502943362301782e-06, "loss": 0.8566, "step": 4561 }, { "epoch": 0.65, "learning_rate": 8.496671120860912e-06, "loss": 0.8105, "step": 4562 }, { "epoch": 0.65, "learning_rate": 8.49040027942582e-06, "loss": 0.6724, "step": 4563 }, { "epoch": 0.65, "learning_rate": 8.484130839346456e-06, "loss": 0.7952, "step": 4564 }, { "epoch": 0.65, "learning_rate": 8.477862801972472e-06, "loss": 0.7023, "step": 4565 }, { "epoch": 0.65, "learning_rate": 8.471596168653241e-06, "loss": 0.6886, "step": 4566 }, { "epoch": 0.65, "learning_rate": 8.465330940737797e-06, "loss": 0.7771, "step": 4567 }, { "epoch": 0.65, "learning_rate": 8.459067119574892e-06, "loss": 0.7818, "step": 4568 }, { "epoch": 0.65, "learning_rate": 8.452804706512977e-06, "loss": 0.7344, "step": 4569 }, { "epoch": 0.65, "learning_rate": 8.446543702900187e-06, "loss": 0.8834, "step": 4570 }, { "epoch": 0.65, "learning_rate": 8.440284110084372e-06, "loss": 0.678, "step": 4571 }, { "epoch": 0.65, "learning_rate": 8.434025929413047e-06, "loss": 0.6875, "step": 4572 }, { "epoch": 0.66, "learning_rate": 8.427769162233467e-06, "loss": 0.841, "step": 4573 }, { "epoch": 0.66, "learning_rate": 8.42151380989255e-06, "loss": 0.6568, "step": 4574 }, { "epoch": 0.66, "learning_rate": 8.415259873736913e-06, "loss": 1.0251, "step": 4575 }, { "epoch": 0.66, "learning_rate": 8.409007355112872e-06, "loss": 0.7687, "step": 4576 }, { "epoch": 0.66, "learning_rate": 8.40275625536646e-06, "loss": 0.9782, "step": 4577 }, { "epoch": 0.66, "learning_rate": 8.396506575843362e-06, "loss": 0.7896, "step": 4578 }, { "epoch": 0.66, "learning_rate": 8.390258317888988e-06, "loss": 0.7712, "step": 4579 }, { "epoch": 0.66, "learning_rate": 8.384011482848431e-06, "loss": 0.8488, "step": 4580 }, { "epoch": 0.66, "learning_rate": 8.377766072066483e-06, "loss": 0.6853, "step": 4581 }, { "epoch": 0.66, "learning_rate": 8.37152208688763e-06, "loss": 0.8276, "step": 4582 }, { "epoch": 0.66, "learning_rate": 8.365279528656031e-06, "loss": 0.935, "step": 4583 }, { "epoch": 0.66, "learning_rate": 8.359038398715569e-06, "loss": 0.7129, "step": 4584 }, { "epoch": 0.66, "learning_rate": 8.352798698409809e-06, "loss": 0.689, "step": 4585 }, { "epoch": 0.66, "learning_rate": 8.346560429081991e-06, "loss": 0.7048, "step": 4586 }, { "epoch": 0.66, "learning_rate": 8.340323592075066e-06, "loss": 0.6353, "step": 4587 }, { "epoch": 0.66, "learning_rate": 8.334088188731669e-06, "loss": 0.8817, "step": 4588 }, { "epoch": 0.66, "learning_rate": 8.32785422039413e-06, "loss": 0.9023, "step": 4589 }, { "epoch": 0.66, "learning_rate": 8.32162168840447e-06, "loss": 0.7179, "step": 4590 }, { "epoch": 0.66, "learning_rate": 8.31539059410439e-06, "loss": 0.8292, "step": 4591 }, { "epoch": 0.66, "learning_rate": 8.309160938835309e-06, "loss": 0.9364, "step": 4592 }, { "epoch": 0.66, "learning_rate": 8.302932723938296e-06, "loss": 0.6225, "step": 4593 }, { "epoch": 0.66, "learning_rate": 8.296705950754136e-06, "loss": 0.7266, "step": 4594 }, { "epoch": 0.66, "learning_rate": 8.290480620623316e-06, "loss": 0.8605, "step": 4595 }, { "epoch": 0.66, "learning_rate": 8.284256734885976e-06, "loss": 0.7575, "step": 4596 }, { "epoch": 0.66, "learning_rate": 8.278034294881972e-06, "loss": 0.7846, "step": 4597 }, { "epoch": 0.66, "learning_rate": 8.271813301950845e-06, "loss": 0.7913, "step": 4598 }, { "epoch": 0.66, "learning_rate": 8.265593757431813e-06, "loss": 0.7432, "step": 4599 }, { "epoch": 0.66, "learning_rate": 8.259375662663803e-06, "loss": 0.7896, "step": 4600 }, { "epoch": 0.66, "learning_rate": 8.253159018985397e-06, "loss": 0.6881, "step": 4601 }, { "epoch": 0.66, "learning_rate": 8.246943827734899e-06, "loss": 0.791, "step": 4602 }, { "epoch": 0.66, "learning_rate": 8.240730090250291e-06, "loss": 0.8047, "step": 4603 }, { "epoch": 0.66, "learning_rate": 8.234517807869222e-06, "loss": 0.6521, "step": 4604 }, { "epoch": 0.66, "learning_rate": 8.228306981929043e-06, "loss": 0.9169, "step": 4605 }, { "epoch": 0.66, "learning_rate": 8.22209761376681e-06, "loss": 0.6989, "step": 4606 }, { "epoch": 0.66, "learning_rate": 8.215889704719225e-06, "loss": 0.7991, "step": 4607 }, { "epoch": 0.66, "learning_rate": 8.20968325612271e-06, "loss": 0.7838, "step": 4608 }, { "epoch": 0.66, "learning_rate": 8.203478269313353e-06, "loss": 0.6903, "step": 4609 }, { "epoch": 0.66, "learning_rate": 8.197274745626938e-06, "loss": 0.8711, "step": 4610 }, { "epoch": 0.66, "learning_rate": 8.19107268639893e-06, "loss": 0.7129, "step": 4611 }, { "epoch": 0.66, "learning_rate": 8.184872092964475e-06, "loss": 0.9079, "step": 4612 }, { "epoch": 0.66, "learning_rate": 8.178672966658418e-06, "loss": 0.822, "step": 4613 }, { "epoch": 0.66, "learning_rate": 8.172475308815263e-06, "loss": 0.8638, "step": 4614 }, { "epoch": 0.66, "learning_rate": 8.166279120769213e-06, "loss": 0.7645, "step": 4615 }, { "epoch": 0.66, "learning_rate": 8.160084403854172e-06, "loss": 0.8465, "step": 4616 }, { "epoch": 0.66, "learning_rate": 8.153891159403693e-06, "loss": 0.8811, "step": 4617 }, { "epoch": 0.66, "learning_rate": 8.147699388751028e-06, "loss": 0.8011, "step": 4618 }, { "epoch": 0.66, "learning_rate": 8.141509093229121e-06, "loss": 0.7165, "step": 4619 }, { "epoch": 0.66, "learning_rate": 8.135320274170583e-06, "loss": 0.8357, "step": 4620 }, { "epoch": 0.66, "learning_rate": 8.129132932907723e-06, "loss": 0.8139, "step": 4621 }, { "epoch": 0.66, "learning_rate": 8.122947070772501e-06, "loss": 0.8086, "step": 4622 }, { "epoch": 0.66, "learning_rate": 8.116762689096601e-06, "loss": 0.8131, "step": 4623 }, { "epoch": 0.66, "learning_rate": 8.110579789211364e-06, "loss": 0.9001, "step": 4624 }, { "epoch": 0.66, "learning_rate": 8.10439837244781e-06, "loss": 0.695, "step": 4625 }, { "epoch": 0.66, "learning_rate": 8.098218440136638e-06, "loss": 0.6367, "step": 4626 }, { "epoch": 0.66, "learning_rate": 8.092039993608256e-06, "loss": 0.7882, "step": 4627 }, { "epoch": 0.66, "learning_rate": 8.085863034192711e-06, "loss": 0.726, "step": 4628 }, { "epoch": 0.66, "learning_rate": 8.079687563219758e-06, "loss": 0.8108, "step": 4629 }, { "epoch": 0.66, "learning_rate": 8.07351358201882e-06, "loss": 0.9872, "step": 4630 }, { "epoch": 0.66, "learning_rate": 8.067341091919006e-06, "loss": 0.7573, "step": 4631 }, { "epoch": 0.66, "learning_rate": 8.061170094249102e-06, "loss": 0.8326, "step": 4632 }, { "epoch": 0.66, "learning_rate": 8.055000590337556e-06, "loss": 0.7941, "step": 4633 }, { "epoch": 0.66, "learning_rate": 8.048832581512525e-06, "loss": 0.8047, "step": 4634 }, { "epoch": 0.66, "learning_rate": 8.042666069101834e-06, "loss": 0.7997, "step": 4635 }, { "epoch": 0.66, "learning_rate": 8.036501054432966e-06, "loss": 0.7877, "step": 4636 }, { "epoch": 0.66, "learning_rate": 8.030337538833097e-06, "loss": 0.7508, "step": 4637 }, { "epoch": 0.66, "learning_rate": 8.024175523629085e-06, "loss": 0.8767, "step": 4638 }, { "epoch": 0.66, "learning_rate": 8.018015010147458e-06, "loss": 0.7539, "step": 4639 }, { "epoch": 0.66, "learning_rate": 8.011855999714421e-06, "loss": 0.7573, "step": 4640 }, { "epoch": 0.66, "learning_rate": 8.00569849365586e-06, "loss": 0.8131, "step": 4641 }, { "epoch": 0.66, "learning_rate": 7.999542493297337e-06, "loss": 0.6906, "step": 4642 }, { "epoch": 0.67, "learning_rate": 7.993387999964073e-06, "loss": 0.798, "step": 4643 }, { "epoch": 0.67, "learning_rate": 7.987235014980982e-06, "loss": 0.7617, "step": 4644 }, { "epoch": 0.67, "learning_rate": 7.981083539672664e-06, "loss": 0.6401, "step": 4645 }, { "epoch": 0.67, "learning_rate": 7.97493357536336e-06, "loss": 0.7885, "step": 4646 }, { "epoch": 0.67, "learning_rate": 7.968785123377016e-06, "loss": 0.752, "step": 4647 }, { "epoch": 0.67, "learning_rate": 7.962638185037242e-06, "loss": 0.9208, "step": 4648 }, { "epoch": 0.67, "learning_rate": 7.956492761667317e-06, "loss": 0.8253, "step": 4649 }, { "epoch": 0.67, "learning_rate": 7.950348854590206e-06, "loss": 0.9247, "step": 4650 }, { "epoch": 0.67, "learning_rate": 7.944206465128521e-06, "loss": 0.7734, "step": 4651 }, { "epoch": 0.67, "learning_rate": 7.93806559460459e-06, "loss": 0.8666, "step": 4652 }, { "epoch": 0.67, "learning_rate": 7.931926244340386e-06, "loss": 0.618, "step": 4653 }, { "epoch": 0.67, "learning_rate": 7.925788415657546e-06, "loss": 0.7835, "step": 4654 }, { "epoch": 0.67, "learning_rate": 7.919652109877397e-06, "loss": 0.8867, "step": 4655 }, { "epoch": 0.67, "learning_rate": 7.913517328320947e-06, "loss": 0.851, "step": 4656 }, { "epoch": 0.67, "learning_rate": 7.907384072308849e-06, "loss": 0.7902, "step": 4657 }, { "epoch": 0.67, "learning_rate": 7.901252343161445e-06, "loss": 0.875, "step": 4658 }, { "epoch": 0.67, "learning_rate": 7.895122142198743e-06, "loss": 0.6666, "step": 4659 }, { "epoch": 0.67, "learning_rate": 7.888993470740428e-06, "loss": 0.7352, "step": 4660 }, { "epoch": 0.67, "learning_rate": 7.882866330105852e-06, "loss": 0.7902, "step": 4661 }, { "epoch": 0.67, "learning_rate": 7.876740721614024e-06, "loss": 0.7132, "step": 4662 }, { "epoch": 0.67, "learning_rate": 7.870616646583648e-06, "loss": 0.8011, "step": 4663 }, { "epoch": 0.67, "learning_rate": 7.864494106333092e-06, "loss": 0.6858, "step": 4664 }, { "epoch": 0.67, "learning_rate": 7.858373102180371e-06, "loss": 0.7997, "step": 4665 }, { "epoch": 0.67, "learning_rate": 7.852253635443195e-06, "loss": 0.7327, "step": 4666 }, { "epoch": 0.67, "learning_rate": 7.846135707438931e-06, "loss": 0.7182, "step": 4667 }, { "epoch": 0.67, "learning_rate": 7.840019319484619e-06, "loss": 0.5762, "step": 4668 }, { "epoch": 0.67, "learning_rate": 7.833904472896964e-06, "loss": 0.9336, "step": 4669 }, { "epoch": 0.67, "learning_rate": 7.827791168992345e-06, "loss": 0.7218, "step": 4670 }, { "epoch": 0.67, "learning_rate": 7.821679409086804e-06, "loss": 0.904, "step": 4671 }, { "epoch": 0.67, "learning_rate": 7.815569194496045e-06, "loss": 0.6786, "step": 4672 }, { "epoch": 0.67, "learning_rate": 7.809460526535452e-06, "loss": 0.8747, "step": 4673 }, { "epoch": 0.67, "learning_rate": 7.803353406520079e-06, "loss": 0.8214, "step": 4674 }, { "epoch": 0.67, "learning_rate": 7.79724783576462e-06, "loss": 0.8937, "step": 4675 }, { "epoch": 0.67, "learning_rate": 7.791143815583458e-06, "loss": 0.7081, "step": 4676 }, { "epoch": 0.67, "learning_rate": 7.78504134729065e-06, "loss": 0.7062, "step": 4677 }, { "epoch": 0.67, "learning_rate": 7.778940432199892e-06, "loss": 0.6853, "step": 4678 }, { "epoch": 0.67, "learning_rate": 7.772841071624567e-06, "loss": 0.7907, "step": 4679 }, { "epoch": 0.67, "learning_rate": 7.766743266877712e-06, "loss": 0.7645, "step": 4680 }, { "epoch": 0.67, "learning_rate": 7.76064701927204e-06, "loss": 0.6222, "step": 4681 }, { "epoch": 0.67, "learning_rate": 7.754552330119923e-06, "loss": 0.8371, "step": 4682 }, { "epoch": 0.67, "learning_rate": 7.748459200733381e-06, "loss": 0.8789, "step": 4683 }, { "epoch": 0.67, "learning_rate": 7.74236763242413e-06, "loss": 0.8929, "step": 4684 }, { "epoch": 0.67, "learning_rate": 7.736277626503535e-06, "loss": 0.9406, "step": 4685 }, { "epoch": 0.67, "learning_rate": 7.730189184282613e-06, "loss": 0.8516, "step": 4686 }, { "epoch": 0.67, "learning_rate": 7.724102307072059e-06, "loss": 0.7422, "step": 4687 }, { "epoch": 0.67, "learning_rate": 7.718016996182227e-06, "loss": 0.7974, "step": 4688 }, { "epoch": 0.67, "learning_rate": 7.711933252923139e-06, "loss": 0.639, "step": 4689 }, { "epoch": 0.67, "learning_rate": 7.705851078604466e-06, "loss": 0.6998, "step": 4690 }, { "epoch": 0.67, "learning_rate": 7.699770474535553e-06, "loss": 0.957, "step": 4691 }, { "epoch": 0.67, "learning_rate": 7.693691442025412e-06, "loss": 0.8912, "step": 4692 }, { "epoch": 0.67, "learning_rate": 7.687613982382691e-06, "loss": 0.8309, "step": 4693 }, { "epoch": 0.67, "learning_rate": 7.681538096915726e-06, "loss": 0.7221, "step": 4694 }, { "epoch": 0.67, "learning_rate": 7.675463786932512e-06, "loss": 0.8167, "step": 4695 }, { "epoch": 0.67, "learning_rate": 7.669391053740688e-06, "loss": 0.6518, "step": 4696 }, { "epoch": 0.67, "learning_rate": 7.663319898647564e-06, "loss": 0.7896, "step": 4697 }, { "epoch": 0.67, "learning_rate": 7.657250322960112e-06, "loss": 0.7444, "step": 4698 }, { "epoch": 0.67, "learning_rate": 7.65118232798496e-06, "loss": 0.6412, "step": 4699 }, { "epoch": 0.67, "learning_rate": 7.645115915028404e-06, "loss": 0.7397, "step": 4700 }, { "epoch": 0.67, "learning_rate": 7.639051085396375e-06, "loss": 0.7787, "step": 4701 }, { "epoch": 0.67, "learning_rate": 7.632987840394497e-06, "loss": 0.8078, "step": 4702 }, { "epoch": 0.67, "learning_rate": 7.626926181328038e-06, "loss": 0.7441, "step": 4703 }, { "epoch": 0.67, "learning_rate": 7.62086610950191e-06, "loss": 0.7366, "step": 4704 }, { "epoch": 0.67, "learning_rate": 7.614807626220698e-06, "loss": 0.7539, "step": 4705 }, { "epoch": 0.67, "learning_rate": 7.6087507327886625e-06, "loss": 0.774, "step": 4706 }, { "epoch": 0.67, "learning_rate": 7.602695430509681e-06, "loss": 0.6496, "step": 4707 }, { "epoch": 0.67, "learning_rate": 7.59664172068732e-06, "loss": 0.7623, "step": 4708 }, { "epoch": 0.67, "learning_rate": 7.59058960462479e-06, "loss": 0.9275, "step": 4709 }, { "epoch": 0.67, "learning_rate": 7.5845390836249676e-06, "loss": 0.8228, "step": 4710 }, { "epoch": 0.67, "learning_rate": 7.578490158990378e-06, "loss": 0.7305, "step": 4711 }, { "epoch": 0.67, "learning_rate": 7.572442832023193e-06, "loss": 0.7422, "step": 4712 }, { "epoch": 0.68, "learning_rate": 7.5663971040252684e-06, "loss": 0.7335, "step": 4713 }, { "epoch": 0.68, "learning_rate": 7.560352976298101e-06, "loss": 0.7277, "step": 4714 }, { "epoch": 0.68, "learning_rate": 7.55431045014283e-06, "loss": 0.7302, "step": 4715 }, { "epoch": 0.68, "learning_rate": 7.548269526860265e-06, "loss": 0.6858, "step": 4716 }, { "epoch": 0.68, "learning_rate": 7.5422302077508705e-06, "loss": 0.7405, "step": 4717 }, { "epoch": 0.68, "learning_rate": 7.536192494114761e-06, "loss": 0.9598, "step": 4718 }, { "epoch": 0.68, "learning_rate": 7.530156387251706e-06, "loss": 0.7386, "step": 4719 }, { "epoch": 0.68, "learning_rate": 7.524121888461132e-06, "loss": 0.726, "step": 4720 }, { "epoch": 0.68, "learning_rate": 7.518088999042121e-06, "loss": 0.8917, "step": 4721 }, { "epoch": 0.68, "learning_rate": 7.512057720293394e-06, "loss": 0.851, "step": 4722 }, { "epoch": 0.68, "learning_rate": 7.506028053513335e-06, "loss": 0.7902, "step": 4723 }, { "epoch": 0.68, "learning_rate": 7.500000000000004e-06, "loss": 0.8273, "step": 4724 }, { "epoch": 0.68, "learning_rate": 7.493973561051069e-06, "loss": 0.6384, "step": 4725 }, { "epoch": 0.68, "learning_rate": 7.48794873796388e-06, "loss": 0.6278, "step": 4726 }, { "epoch": 0.68, "learning_rate": 7.481925532035436e-06, "loss": 0.8644, "step": 4727 }, { "epoch": 0.68, "learning_rate": 7.475903944562379e-06, "loss": 0.7561, "step": 4728 }, { "epoch": 0.68, "learning_rate": 7.469883976841018e-06, "loss": 0.865, "step": 4729 }, { "epoch": 0.68, "learning_rate": 7.463865630167282e-06, "loss": 0.9548, "step": 4730 }, { "epoch": 0.68, "learning_rate": 7.457848905836792e-06, "loss": 0.8343, "step": 4731 }, { "epoch": 0.68, "learning_rate": 7.4518338051447995e-06, "loss": 0.8733, "step": 4732 }, { "epoch": 0.68, "learning_rate": 7.445820329386196e-06, "loss": 0.8929, "step": 4733 }, { "epoch": 0.68, "learning_rate": 7.43980847985553e-06, "loss": 0.6325, "step": 4734 }, { "epoch": 0.68, "learning_rate": 7.433798257847026e-06, "loss": 0.7673, "step": 4735 }, { "epoch": 0.68, "learning_rate": 7.427789664654518e-06, "loss": 0.8544, "step": 4736 }, { "epoch": 0.68, "learning_rate": 7.42178270157151e-06, "loss": 0.6461, "step": 4737 }, { "epoch": 0.68, "learning_rate": 7.415777369891155e-06, "loss": 0.7958, "step": 4738 }, { "epoch": 0.68, "learning_rate": 7.409773670906252e-06, "loss": 0.6928, "step": 4739 }, { "epoch": 0.68, "learning_rate": 7.403771605909249e-06, "loss": 0.8092, "step": 4740 }, { "epoch": 0.68, "learning_rate": 7.397771176192241e-06, "loss": 0.7673, "step": 4741 }, { "epoch": 0.68, "learning_rate": 7.391772383046979e-06, "loss": 0.8315, "step": 4742 }, { "epoch": 0.68, "learning_rate": 7.385775227764843e-06, "loss": 0.7673, "step": 4743 }, { "epoch": 0.68, "learning_rate": 7.379779711636873e-06, "loss": 0.8025, "step": 4744 }, { "epoch": 0.68, "learning_rate": 7.37378583595377e-06, "loss": 0.639, "step": 4745 }, { "epoch": 0.68, "learning_rate": 7.367793602005851e-06, "loss": 1.0145, "step": 4746 }, { "epoch": 0.68, "learning_rate": 7.361803011083102e-06, "loss": 0.8315, "step": 4747 }, { "epoch": 0.68, "learning_rate": 7.355814064475149e-06, "loss": 0.7718, "step": 4748 }, { "epoch": 0.68, "learning_rate": 7.349826763471265e-06, "loss": 0.7098, "step": 4749 }, { "epoch": 0.68, "learning_rate": 7.34384110936037e-06, "loss": 0.7042, "step": 4750 }, { "epoch": 0.68, "learning_rate": 7.337857103431012e-06, "loss": 0.7561, "step": 4751 }, { "epoch": 0.68, "learning_rate": 7.331874746971418e-06, "loss": 0.7946, "step": 4752 }, { "epoch": 0.68, "learning_rate": 7.32589404126944e-06, "loss": 0.8429, "step": 4753 }, { "epoch": 0.68, "learning_rate": 7.3199149876125635e-06, "loss": 0.8504, "step": 4754 }, { "epoch": 0.68, "learning_rate": 7.313937587287934e-06, "loss": 0.6948, "step": 4755 }, { "epoch": 0.68, "learning_rate": 7.3079618415823505e-06, "loss": 0.6989, "step": 4756 }, { "epoch": 0.68, "learning_rate": 7.3019877517822305e-06, "loss": 0.7743, "step": 4757 }, { "epoch": 0.68, "learning_rate": 7.2960153191736505e-06, "loss": 0.7991, "step": 4758 }, { "epoch": 0.68, "learning_rate": 7.290044545042328e-06, "loss": 0.7807, "step": 4759 }, { "epoch": 0.68, "learning_rate": 7.284075430673623e-06, "loss": 0.7271, "step": 4760 }, { "epoch": 0.68, "learning_rate": 7.278107977352544e-06, "loss": 0.784, "step": 4761 }, { "epoch": 0.68, "learning_rate": 7.272142186363718e-06, "loss": 0.6641, "step": 4762 }, { "epoch": 0.68, "learning_rate": 7.266178058991452e-06, "loss": 0.7701, "step": 4763 }, { "epoch": 0.68, "learning_rate": 7.260215596519672e-06, "loss": 0.7669, "step": 4764 }, { "epoch": 0.68, "learning_rate": 7.254254800231939e-06, "loss": 0.7863, "step": 4765 }, { "epoch": 0.68, "learning_rate": 7.248295671411471e-06, "loss": 0.6936, "step": 4766 }, { "epoch": 0.68, "learning_rate": 7.242338211341119e-06, "loss": 0.8253, "step": 4767 }, { "epoch": 0.68, "learning_rate": 7.236382421303377e-06, "loss": 0.6783, "step": 4768 }, { "epoch": 0.68, "learning_rate": 7.230428302580382e-06, "loss": 0.8845, "step": 4769 }, { "epoch": 0.68, "learning_rate": 7.224475856453905e-06, "loss": 0.8304, "step": 4770 }, { "epoch": 0.68, "learning_rate": 7.218525084205368e-06, "loss": 0.7843, "step": 4771 }, { "epoch": 0.68, "learning_rate": 7.212575987115813e-06, "loss": 0.7567, "step": 4772 }, { "epoch": 0.68, "learning_rate": 7.206628566465933e-06, "loss": 0.6881, "step": 4773 }, { "epoch": 0.68, "learning_rate": 7.200682823536078e-06, "loss": 0.6635, "step": 4774 }, { "epoch": 0.68, "learning_rate": 7.194738759606203e-06, "loss": 0.8108, "step": 4775 }, { "epoch": 0.68, "learning_rate": 7.188796375955921e-06, "loss": 0.8248, "step": 4776 }, { "epoch": 0.68, "learning_rate": 7.182855673864484e-06, "loss": 0.8761, "step": 4777 }, { "epoch": 0.68, "learning_rate": 7.1769166546107755e-06, "loss": 0.6855, "step": 4778 }, { "epoch": 0.68, "learning_rate": 7.170979319473325e-06, "loss": 0.7607, "step": 4779 }, { "epoch": 0.68, "learning_rate": 7.165043669730279e-06, "loss": 0.6401, "step": 4780 }, { "epoch": 0.68, "learning_rate": 7.15910970665945e-06, "loss": 0.8103, "step": 4781 }, { "epoch": 0.69, "learning_rate": 7.153177431538275e-06, "loss": 0.786, "step": 4782 }, { "epoch": 0.69, "learning_rate": 7.1472468456438166e-06, "loss": 0.8206, "step": 4783 }, { "epoch": 0.69, "learning_rate": 7.141317950252779e-06, "loss": 0.8842, "step": 4784 }, { "epoch": 0.69, "learning_rate": 7.135390746641527e-06, "loss": 0.7854, "step": 4785 }, { "epoch": 0.69, "learning_rate": 7.129465236086022e-06, "loss": 0.7729, "step": 4786 }, { "epoch": 0.69, "learning_rate": 7.123541419861887e-06, "loss": 0.8521, "step": 4787 }, { "epoch": 0.69, "learning_rate": 7.1176192992443725e-06, "loss": 0.7935, "step": 4788 }, { "epoch": 0.69, "learning_rate": 7.111698875508363e-06, "loss": 0.7048, "step": 4789 }, { "epoch": 0.69, "learning_rate": 7.105780149928386e-06, "loss": 0.9777, "step": 4790 }, { "epoch": 0.69, "learning_rate": 7.0998631237785806e-06, "loss": 0.7902, "step": 4791 }, { "epoch": 0.69, "learning_rate": 7.093947798332753e-06, "loss": 0.7773, "step": 4792 }, { "epoch": 0.69, "learning_rate": 7.088034174864324e-06, "loss": 0.7695, "step": 4793 }, { "epoch": 0.69, "learning_rate": 7.082122254646341e-06, "loss": 0.9152, "step": 4794 }, { "epoch": 0.69, "learning_rate": 7.076212038951501e-06, "loss": 0.8058, "step": 4795 }, { "epoch": 0.69, "learning_rate": 7.070303529052123e-06, "loss": 0.7302, "step": 4796 }, { "epoch": 0.69, "learning_rate": 7.064396726220168e-06, "loss": 0.8499, "step": 4797 }, { "epoch": 0.69, "learning_rate": 7.058491631727223e-06, "loss": 0.7204, "step": 4798 }, { "epoch": 0.69, "learning_rate": 7.0525882468445045e-06, "loss": 0.7695, "step": 4799 }, { "epoch": 0.69, "learning_rate": 7.046686572842877e-06, "loss": 0.8627, "step": 4800 }, { "epoch": 0.69, "learning_rate": 7.040786610992803e-06, "loss": 0.7394, "step": 4801 }, { "epoch": 0.69, "learning_rate": 7.034888362564419e-06, "loss": 0.8884, "step": 4802 }, { "epoch": 0.69, "learning_rate": 7.02899182882747e-06, "loss": 0.7274, "step": 4803 }, { "epoch": 0.69, "learning_rate": 7.023097011051323e-06, "loss": 0.7528, "step": 4804 }, { "epoch": 0.69, "learning_rate": 7.017203910504986e-06, "loss": 0.7227, "step": 4805 }, { "epoch": 0.69, "learning_rate": 7.011312528457115e-06, "loss": 0.7274, "step": 4806 }, { "epoch": 0.69, "learning_rate": 7.00542286617596e-06, "loss": 0.8421, "step": 4807 }, { "epoch": 0.69, "learning_rate": 6.999534924929429e-06, "loss": 0.798, "step": 4808 }, { "epoch": 0.69, "learning_rate": 6.993648705985047e-06, "loss": 0.7188, "step": 4809 }, { "epoch": 0.69, "learning_rate": 6.987764210609972e-06, "loss": 0.7494, "step": 4810 }, { "epoch": 0.69, "learning_rate": 6.981881440070996e-06, "loss": 0.635, "step": 4811 }, { "epoch": 0.69, "learning_rate": 6.976000395634518e-06, "loss": 0.733, "step": 4812 }, { "epoch": 0.69, "learning_rate": 6.9701210785665964e-06, "loss": 0.7932, "step": 4813 }, { "epoch": 0.69, "learning_rate": 6.964243490132904e-06, "loss": 0.6864, "step": 4814 }, { "epoch": 0.69, "learning_rate": 6.958367631598728e-06, "loss": 0.6219, "step": 4815 }, { "epoch": 0.69, "learning_rate": 6.952493504229001e-06, "loss": 0.6384, "step": 4816 }, { "epoch": 0.69, "learning_rate": 6.9466211092882774e-06, "loss": 0.6749, "step": 4817 }, { "epoch": 0.69, "learning_rate": 6.940750448040741e-06, "loss": 0.774, "step": 4818 }, { "epoch": 0.69, "learning_rate": 6.934881521750196e-06, "loss": 0.7241, "step": 4819 }, { "epoch": 0.69, "learning_rate": 6.929014331680079e-06, "loss": 0.9492, "step": 4820 }, { "epoch": 0.69, "learning_rate": 6.9231488790934555e-06, "loss": 0.7182, "step": 4821 }, { "epoch": 0.69, "learning_rate": 6.917285165253002e-06, "loss": 0.808, "step": 4822 }, { "epoch": 0.69, "learning_rate": 6.911423191421032e-06, "loss": 0.8345, "step": 4823 }, { "epoch": 0.69, "learning_rate": 6.905562958859498e-06, "loss": 0.8465, "step": 4824 }, { "epoch": 0.69, "learning_rate": 6.899704468829947e-06, "loss": 0.8066, "step": 4825 }, { "epoch": 0.69, "learning_rate": 6.893847722593573e-06, "loss": 0.8315, "step": 4826 }, { "epoch": 0.69, "learning_rate": 6.887992721411188e-06, "loss": 0.8744, "step": 4827 }, { "epoch": 0.69, "learning_rate": 6.882139466543227e-06, "loss": 0.889, "step": 4828 }, { "epoch": 0.69, "learning_rate": 6.876287959249763e-06, "loss": 0.8549, "step": 4829 }, { "epoch": 0.69, "learning_rate": 6.8704382007904545e-06, "loss": 0.7628, "step": 4830 }, { "epoch": 0.69, "learning_rate": 6.864590192424636e-06, "loss": 0.6772, "step": 4831 }, { "epoch": 0.69, "learning_rate": 6.858743935411231e-06, "loss": 0.5875, "step": 4832 }, { "epoch": 0.69, "learning_rate": 6.852899431008788e-06, "loss": 0.7511, "step": 4833 }, { "epoch": 0.69, "learning_rate": 6.847056680475483e-06, "loss": 0.8016, "step": 4834 }, { "epoch": 0.69, "learning_rate": 6.841215685069131e-06, "loss": 0.632, "step": 4835 }, { "epoch": 0.69, "learning_rate": 6.835376446047138e-06, "loss": 0.6978, "step": 4836 }, { "epoch": 0.69, "learning_rate": 6.829538964666554e-06, "loss": 0.7165, "step": 4837 }, { "epoch": 0.69, "learning_rate": 6.8237032421840426e-06, "loss": 0.8421, "step": 4838 }, { "epoch": 0.69, "learning_rate": 6.817869279855891e-06, "loss": 0.7319, "step": 4839 }, { "epoch": 0.69, "learning_rate": 6.812037078938013e-06, "loss": 0.7564, "step": 4840 }, { "epoch": 0.69, "learning_rate": 6.806206640685921e-06, "loss": 0.8064, "step": 4841 }, { "epoch": 0.69, "learning_rate": 6.800377966354778e-06, "loss": 0.7112, "step": 4842 }, { "epoch": 0.69, "learning_rate": 6.794551057199356e-06, "loss": 0.7857, "step": 4843 }, { "epoch": 0.69, "learning_rate": 6.788725914474033e-06, "loss": 0.6819, "step": 4844 }, { "epoch": 0.69, "learning_rate": 6.782902539432823e-06, "loss": 0.6842, "step": 4845 }, { "epoch": 0.69, "learning_rate": 6.777080933329355e-06, "loss": 0.5678, "step": 4846 }, { "epoch": 0.69, "learning_rate": 6.771261097416876e-06, "loss": 0.7042, "step": 4847 }, { "epoch": 0.69, "learning_rate": 6.7654430329482516e-06, "loss": 0.871, "step": 4848 }, { "epoch": 0.69, "learning_rate": 6.759626741175971e-06, "loss": 0.8337, "step": 4849 }, { "epoch": 0.69, "learning_rate": 6.75381222335214e-06, "loss": 0.7561, "step": 4850 }, { "epoch": 0.69, "learning_rate": 6.747999480728471e-06, "loss": 0.7779, "step": 4851 }, { "epoch": 0.7, "learning_rate": 6.7421885145563025e-06, "loss": 0.6423, "step": 4852 }, { "epoch": 0.7, "learning_rate": 6.736379326086608e-06, "loss": 0.6169, "step": 4853 }, { "epoch": 0.7, "learning_rate": 6.730571916569946e-06, "loss": 0.7321, "step": 4854 }, { "epoch": 0.7, "learning_rate": 6.724766287256514e-06, "loss": 0.7907, "step": 4855 }, { "epoch": 0.7, "learning_rate": 6.718962439396121e-06, "loss": 0.7578, "step": 4856 }, { "epoch": 0.7, "learning_rate": 6.713160374238192e-06, "loss": 0.8292, "step": 4857 }, { "epoch": 0.7, "learning_rate": 6.70736009303177e-06, "loss": 0.7921, "step": 4858 }, { "epoch": 0.7, "learning_rate": 6.701561597025498e-06, "loss": 0.7938, "step": 4859 }, { "epoch": 0.7, "learning_rate": 6.695764887467668e-06, "loss": 0.8281, "step": 4860 }, { "epoch": 0.7, "learning_rate": 6.689969965606163e-06, "loss": 0.649, "step": 4861 }, { "epoch": 0.7, "learning_rate": 6.684176832688472e-06, "loss": 0.736, "step": 4862 }, { "epoch": 0.7, "learning_rate": 6.678385489961731e-06, "loss": 0.7048, "step": 4863 }, { "epoch": 0.7, "learning_rate": 6.672595938672671e-06, "loss": 0.8348, "step": 4864 }, { "epoch": 0.7, "learning_rate": 6.666808180067629e-06, "loss": 0.7271, "step": 4865 }, { "epoch": 0.7, "learning_rate": 6.66102221539257e-06, "loss": 0.8186, "step": 4866 }, { "epoch": 0.7, "learning_rate": 6.655238045893071e-06, "loss": 0.8075, "step": 4867 }, { "epoch": 0.7, "learning_rate": 6.64945567281432e-06, "loss": 0.8912, "step": 4868 }, { "epoch": 0.7, "learning_rate": 6.6436750974011194e-06, "loss": 0.7913, "step": 4869 }, { "epoch": 0.7, "learning_rate": 6.6378963208978835e-06, "loss": 0.7235, "step": 4870 }, { "epoch": 0.7, "learning_rate": 6.632119344548645e-06, "loss": 0.8103, "step": 4871 }, { "epoch": 0.7, "learning_rate": 6.626344169597032e-06, "loss": 0.7606, "step": 4872 }, { "epoch": 0.7, "learning_rate": 6.620570797286299e-06, "loss": 0.6166, "step": 4873 }, { "epoch": 0.7, "learning_rate": 6.614799228859325e-06, "loss": 0.7333, "step": 4874 }, { "epoch": 0.7, "learning_rate": 6.609029465558572e-06, "loss": 0.7182, "step": 4875 }, { "epoch": 0.7, "learning_rate": 6.60326150862613e-06, "loss": 0.8061, "step": 4876 }, { "epoch": 0.7, "learning_rate": 6.597495359303696e-06, "loss": 0.6434, "step": 4877 }, { "epoch": 0.7, "learning_rate": 6.591731018832582e-06, "loss": 0.7667, "step": 4878 }, { "epoch": 0.7, "learning_rate": 6.585968488453715e-06, "loss": 0.6942, "step": 4879 }, { "epoch": 0.7, "learning_rate": 6.580207769407605e-06, "loss": 0.6532, "step": 4880 }, { "epoch": 0.7, "learning_rate": 6.57444886293441e-06, "loss": 0.7963, "step": 4881 }, { "epoch": 0.7, "learning_rate": 6.568691770273882e-06, "loss": 0.9102, "step": 4882 }, { "epoch": 0.7, "learning_rate": 6.562936492665366e-06, "loss": 0.8527, "step": 4883 }, { "epoch": 0.7, "learning_rate": 6.557183031347834e-06, "loss": 0.7584, "step": 4884 }, { "epoch": 0.7, "learning_rate": 6.551431387559879e-06, "loss": 0.7372, "step": 4885 }, { "epoch": 0.7, "learning_rate": 6.54568156253967e-06, "loss": 0.7199, "step": 4886 }, { "epoch": 0.7, "learning_rate": 6.539933557525009e-06, "loss": 0.8666, "step": 4887 }, { "epoch": 0.7, "learning_rate": 6.5341873737533e-06, "loss": 0.6166, "step": 4888 }, { "epoch": 0.7, "learning_rate": 6.528443012461552e-06, "loss": 0.8231, "step": 4889 }, { "epoch": 0.7, "learning_rate": 6.522700474886391e-06, "loss": 0.7985, "step": 4890 }, { "epoch": 0.7, "learning_rate": 6.516959762264025e-06, "loss": 0.8672, "step": 4891 }, { "epoch": 0.7, "learning_rate": 6.511220875830307e-06, "loss": 0.9029, "step": 4892 }, { "epoch": 0.7, "learning_rate": 6.505483816820673e-06, "loss": 0.6472, "step": 4893 }, { "epoch": 0.7, "learning_rate": 6.499748586470162e-06, "loss": 0.5801, "step": 4894 }, { "epoch": 0.7, "learning_rate": 6.494015186013433e-06, "loss": 0.7394, "step": 4895 }, { "epoch": 0.7, "learning_rate": 6.488283616684742e-06, "loss": 0.74, "step": 4896 }, { "epoch": 0.7, "learning_rate": 6.4825538797179575e-06, "loss": 0.9386, "step": 4897 }, { "epoch": 0.7, "learning_rate": 6.476825976346549e-06, "loss": 0.6272, "step": 4898 }, { "epoch": 0.7, "learning_rate": 6.471099907803591e-06, "loss": 0.7547, "step": 4899 }, { "epoch": 0.7, "learning_rate": 6.465375675321772e-06, "loss": 0.9023, "step": 4900 }, { "epoch": 0.7, "learning_rate": 6.459653280133364e-06, "loss": 0.6579, "step": 4901 }, { "epoch": 0.7, "learning_rate": 6.453932723470259e-06, "loss": 0.8058, "step": 4902 }, { "epoch": 0.7, "learning_rate": 6.448214006563968e-06, "loss": 0.798, "step": 4903 }, { "epoch": 0.7, "learning_rate": 6.442497130645571e-06, "loss": 0.8097, "step": 4904 }, { "epoch": 0.7, "learning_rate": 6.436782096945777e-06, "loss": 0.8242, "step": 4905 }, { "epoch": 0.7, "learning_rate": 6.4310689066948895e-06, "loss": 0.7137, "step": 4906 }, { "epoch": 0.7, "learning_rate": 6.425357561122821e-06, "loss": 0.8253, "step": 4907 }, { "epoch": 0.7, "learning_rate": 6.419648061459083e-06, "loss": 0.8449, "step": 4908 }, { "epoch": 0.7, "learning_rate": 6.413940408932778e-06, "loss": 0.7427, "step": 4909 }, { "epoch": 0.7, "learning_rate": 6.408234604772634e-06, "loss": 0.8365, "step": 4910 }, { "epoch": 0.7, "learning_rate": 6.4025306502069735e-06, "loss": 0.8917, "step": 4911 }, { "epoch": 0.7, "learning_rate": 6.396828546463703e-06, "loss": 0.7729, "step": 4912 }, { "epoch": 0.7, "learning_rate": 6.391128294770347e-06, "loss": 0.7193, "step": 4913 }, { "epoch": 0.7, "learning_rate": 6.385429896354043e-06, "loss": 0.8365, "step": 4914 }, { "epoch": 0.7, "learning_rate": 6.379733352441499e-06, "loss": 0.8641, "step": 4915 }, { "epoch": 0.7, "learning_rate": 6.3740386642590455e-06, "loss": 0.7511, "step": 4916 }, { "epoch": 0.7, "learning_rate": 6.368345833032608e-06, "loss": 0.7525, "step": 4917 }, { "epoch": 0.7, "learning_rate": 6.362654859987714e-06, "loss": 0.7327, "step": 4918 }, { "epoch": 0.7, "learning_rate": 6.356965746349492e-06, "loss": 0.8733, "step": 4919 }, { "epoch": 0.7, "learning_rate": 6.351278493342652e-06, "loss": 0.8298, "step": 4920 }, { "epoch": 0.7, "learning_rate": 6.345593102191535e-06, "loss": 0.8814, "step": 4921 }, { "epoch": 0.71, "learning_rate": 6.3399095741200655e-06, "loss": 0.7419, "step": 4922 }, { "epoch": 0.71, "learning_rate": 6.334227910351755e-06, "loss": 0.8359, "step": 4923 }, { "epoch": 0.71, "learning_rate": 6.32854811210973e-06, "loss": 0.6097, "step": 4924 }, { "epoch": 0.71, "learning_rate": 6.3228701806167136e-06, "loss": 0.6342, "step": 4925 }, { "epoch": 0.71, "learning_rate": 6.31719411709502e-06, "loss": 0.6657, "step": 4926 }, { "epoch": 0.71, "learning_rate": 6.311519922766568e-06, "loss": 0.8304, "step": 4927 }, { "epoch": 0.71, "learning_rate": 6.30584759885287e-06, "loss": 0.7768, "step": 4928 }, { "epoch": 0.71, "learning_rate": 6.3001771465750415e-06, "loss": 0.8426, "step": 4929 }, { "epoch": 0.71, "learning_rate": 6.294508567153776e-06, "loss": 0.7483, "step": 4930 }, { "epoch": 0.71, "learning_rate": 6.288841861809393e-06, "loss": 0.764, "step": 4931 }, { "epoch": 0.71, "learning_rate": 6.283177031761796e-06, "loss": 0.7852, "step": 4932 }, { "epoch": 0.71, "learning_rate": 6.277514078230469e-06, "loss": 0.6822, "step": 4933 }, { "epoch": 0.71, "learning_rate": 6.2718530024345106e-06, "loss": 0.8482, "step": 4934 }, { "epoch": 0.71, "learning_rate": 6.266193805592622e-06, "loss": 0.9805, "step": 4935 }, { "epoch": 0.71, "learning_rate": 6.2605364889230735e-06, "loss": 0.7871, "step": 4936 }, { "epoch": 0.71, "learning_rate": 6.25488105364375e-06, "loss": 0.7154, "step": 4937 }, { "epoch": 0.71, "learning_rate": 6.2492275009721275e-06, "loss": 0.8064, "step": 4938 }, { "epoch": 0.71, "learning_rate": 6.243575832125278e-06, "loss": 0.9116, "step": 4939 }, { "epoch": 0.71, "learning_rate": 6.237926048319868e-06, "loss": 0.8237, "step": 4940 }, { "epoch": 0.71, "learning_rate": 6.232278150772142e-06, "loss": 0.7405, "step": 4941 }, { "epoch": 0.71, "learning_rate": 6.226632140697966e-06, "loss": 0.7919, "step": 4942 }, { "epoch": 0.71, "learning_rate": 6.220988019312791e-06, "loss": 0.7874, "step": 4943 }, { "epoch": 0.71, "learning_rate": 6.215345787831641e-06, "loss": 0.7316, "step": 4944 }, { "epoch": 0.71, "learning_rate": 6.209705447469158e-06, "loss": 0.7938, "step": 4945 }, { "epoch": 0.71, "learning_rate": 6.204066999439566e-06, "loss": 0.6535, "step": 4946 }, { "epoch": 0.71, "learning_rate": 6.198430444956684e-06, "loss": 0.7896, "step": 4947 }, { "epoch": 0.71, "learning_rate": 6.19279578523392e-06, "loss": 0.6682, "step": 4948 }, { "epoch": 0.71, "learning_rate": 6.18716302148428e-06, "loss": 0.5776, "step": 4949 }, { "epoch": 0.71, "learning_rate": 6.1815321549203635e-06, "loss": 0.8884, "step": 4950 }, { "epoch": 0.71, "learning_rate": 6.1759031867543455e-06, "loss": 0.5382, "step": 4951 }, { "epoch": 0.71, "learning_rate": 6.170276118198004e-06, "loss": 0.7946, "step": 4952 }, { "epoch": 0.71, "learning_rate": 6.164650950462722e-06, "loss": 0.7455, "step": 4953 }, { "epoch": 0.71, "learning_rate": 6.1590276847594445e-06, "loss": 0.7893, "step": 4954 }, { "epoch": 0.71, "learning_rate": 6.1534063222987274e-06, "loss": 1.0, "step": 4955 }, { "epoch": 0.71, "learning_rate": 6.147786864290709e-06, "loss": 0.7958, "step": 4956 }, { "epoch": 0.71, "learning_rate": 6.142169311945121e-06, "loss": 0.8454, "step": 4957 }, { "epoch": 0.71, "learning_rate": 6.1365536664712875e-06, "loss": 0.9855, "step": 4958 }, { "epoch": 0.71, "learning_rate": 6.130939929078103e-06, "loss": 0.7121, "step": 4959 }, { "epoch": 0.71, "learning_rate": 6.1253281009740814e-06, "loss": 0.8605, "step": 4960 }, { "epoch": 0.71, "learning_rate": 6.11971818336731e-06, "loss": 0.7868, "step": 4961 }, { "epoch": 0.71, "learning_rate": 6.114110177465459e-06, "loss": 0.6892, "step": 4962 }, { "epoch": 0.71, "learning_rate": 6.108504084475786e-06, "loss": 0.7946, "step": 4963 }, { "epoch": 0.71, "learning_rate": 6.1028999056051665e-06, "loss": 0.9403, "step": 4964 }, { "epoch": 0.71, "learning_rate": 6.097297642060022e-06, "loss": 0.8465, "step": 4965 }, { "epoch": 0.71, "learning_rate": 6.0916972950463874e-06, "loss": 0.8365, "step": 4966 }, { "epoch": 0.71, "learning_rate": 6.08609886576988e-06, "loss": 0.8594, "step": 4967 }, { "epoch": 0.71, "learning_rate": 6.080502355435702e-06, "loss": 0.8566, "step": 4968 }, { "epoch": 0.71, "learning_rate": 6.0749077652486475e-06, "loss": 0.9174, "step": 4969 }, { "epoch": 0.71, "learning_rate": 6.069315096413078e-06, "loss": 0.7414, "step": 4970 }, { "epoch": 0.71, "learning_rate": 6.0637243501329745e-06, "loss": 0.6161, "step": 4971 }, { "epoch": 0.71, "learning_rate": 6.058135527611886e-06, "loss": 0.8783, "step": 4972 }, { "epoch": 0.71, "learning_rate": 6.052548630052936e-06, "loss": 0.8538, "step": 4973 }, { "epoch": 0.71, "learning_rate": 6.046963658658851e-06, "loss": 0.8354, "step": 4974 }, { "epoch": 0.71, "learning_rate": 6.041380614631936e-06, "loss": 0.6975, "step": 4975 }, { "epoch": 0.71, "learning_rate": 6.035799499174085e-06, "loss": 0.9386, "step": 4976 }, { "epoch": 0.71, "learning_rate": 6.030220313486769e-06, "loss": 0.6657, "step": 4977 }, { "epoch": 0.71, "learning_rate": 6.024643058771056e-06, "loss": 0.8488, "step": 4978 }, { "epoch": 0.71, "learning_rate": 6.0190677362275905e-06, "loss": 0.8549, "step": 4979 }, { "epoch": 0.71, "learning_rate": 6.013494347056592e-06, "loss": 0.8036, "step": 4980 }, { "epoch": 0.71, "learning_rate": 6.007922892457873e-06, "loss": 0.6702, "step": 4981 }, { "epoch": 0.71, "learning_rate": 6.002353373630849e-06, "loss": 0.6618, "step": 4982 }, { "epoch": 0.71, "learning_rate": 5.996785791774478e-06, "loss": 0.7478, "step": 4983 }, { "epoch": 0.71, "learning_rate": 5.991220148087334e-06, "loss": 0.6984, "step": 4984 }, { "epoch": 0.71, "learning_rate": 5.985656443767557e-06, "loss": 0.8186, "step": 4985 }, { "epoch": 0.71, "learning_rate": 5.980094680012879e-06, "loss": 0.7383, "step": 4986 }, { "epoch": 0.71, "learning_rate": 5.9745348580206085e-06, "loss": 0.7506, "step": 4987 }, { "epoch": 0.71, "learning_rate": 5.968976978987637e-06, "loss": 0.7464, "step": 4988 }, { "epoch": 0.71, "learning_rate": 5.96342104411044e-06, "loss": 0.8566, "step": 4989 }, { "epoch": 0.71, "learning_rate": 5.957867054585075e-06, "loss": 0.6493, "step": 4990 }, { "epoch": 0.71, "learning_rate": 5.952315011607165e-06, "loss": 0.6754, "step": 4991 }, { "epoch": 0.72, "learning_rate": 5.946764916371942e-06, "loss": 0.6596, "step": 4992 }, { "epoch": 0.72, "learning_rate": 5.941216770074207e-06, "loss": 0.9844, "step": 4993 }, { "epoch": 0.72, "learning_rate": 5.935670573908326e-06, "loss": 0.7467, "step": 4994 }, { "epoch": 0.72, "learning_rate": 5.930126329068261e-06, "loss": 0.865, "step": 4995 }, { "epoch": 0.72, "learning_rate": 5.924584036747555e-06, "loss": 0.7729, "step": 4996 }, { "epoch": 0.72, "learning_rate": 5.919043698139323e-06, "loss": 0.9414, "step": 4997 }, { "epoch": 0.72, "learning_rate": 5.913505314436264e-06, "loss": 0.7227, "step": 4998 }, { "epoch": 0.72, "learning_rate": 5.907968886830655e-06, "loss": 0.6889, "step": 4999 }, { "epoch": 0.72, "learning_rate": 5.902434416514357e-06, "loss": 0.8504, "step": 5000 }, { "epoch": 0.72, "learning_rate": 5.896901904678794e-06, "loss": 0.7793, "step": 5001 }, { "epoch": 0.72, "learning_rate": 5.891371352514979e-06, "loss": 0.7453, "step": 5002 }, { "epoch": 0.72, "learning_rate": 5.88584276121352e-06, "loss": 0.6861, "step": 5003 }, { "epoch": 0.72, "learning_rate": 5.8803161319645685e-06, "loss": 0.7282, "step": 5004 }, { "epoch": 0.72, "learning_rate": 5.874791465957875e-06, "loss": 0.786, "step": 5005 }, { "epoch": 0.72, "learning_rate": 5.869268764382766e-06, "loss": 0.6724, "step": 5006 }, { "epoch": 0.72, "learning_rate": 5.863748028428143e-06, "loss": 0.7506, "step": 5007 }, { "epoch": 0.72, "learning_rate": 5.858229259282486e-06, "loss": 0.8387, "step": 5008 }, { "epoch": 0.72, "learning_rate": 5.8527124581338346e-06, "loss": 0.6928, "step": 5009 }, { "epoch": 0.72, "learning_rate": 5.847197626169838e-06, "loss": 0.5501, "step": 5010 }, { "epoch": 0.72, "learning_rate": 5.841684764577699e-06, "loss": 0.7051, "step": 5011 }, { "epoch": 0.72, "learning_rate": 5.836173874544192e-06, "loss": 0.7575, "step": 5012 }, { "epoch": 0.72, "learning_rate": 5.8306649572556775e-06, "loss": 0.8521, "step": 5013 }, { "epoch": 0.72, "learning_rate": 5.825158013898101e-06, "loss": 0.6959, "step": 5014 }, { "epoch": 0.72, "learning_rate": 5.819653045656956e-06, "loss": 0.7257, "step": 5015 }, { "epoch": 0.72, "learning_rate": 5.814150053717333e-06, "loss": 0.7204, "step": 5016 }, { "epoch": 0.72, "learning_rate": 5.808649039263887e-06, "loss": 0.721, "step": 5017 }, { "epoch": 0.72, "learning_rate": 5.803150003480854e-06, "loss": 0.757, "step": 5018 }, { "epoch": 0.72, "learning_rate": 5.797652947552043e-06, "loss": 0.779, "step": 5019 }, { "epoch": 0.72, "learning_rate": 5.792157872660819e-06, "loss": 0.9436, "step": 5020 }, { "epoch": 0.72, "learning_rate": 5.786664779990154e-06, "loss": 0.808, "step": 5021 }, { "epoch": 0.72, "learning_rate": 5.781173670722569e-06, "loss": 0.7074, "step": 5022 }, { "epoch": 0.72, "learning_rate": 5.77568454604016e-06, "loss": 0.7282, "step": 5023 }, { "epoch": 0.72, "learning_rate": 5.770197407124599e-06, "loss": 0.627, "step": 5024 }, { "epoch": 0.72, "learning_rate": 5.764712255157137e-06, "loss": 0.6094, "step": 5025 }, { "epoch": 0.72, "learning_rate": 5.759229091318586e-06, "loss": 0.7734, "step": 5026 }, { "epoch": 0.72, "learning_rate": 5.753747916789338e-06, "loss": 0.7338, "step": 5027 }, { "epoch": 0.72, "learning_rate": 5.748268732749357e-06, "loss": 0.8214, "step": 5028 }, { "epoch": 0.72, "learning_rate": 5.742791540378176e-06, "loss": 0.5968, "step": 5029 }, { "epoch": 0.72, "learning_rate": 5.737316340854892e-06, "loss": 0.7207, "step": 5030 }, { "epoch": 0.72, "learning_rate": 5.731843135358177e-06, "loss": 0.8094, "step": 5031 }, { "epoch": 0.72, "learning_rate": 5.726371925066297e-06, "loss": 0.7284, "step": 5032 }, { "epoch": 0.72, "learning_rate": 5.7209027111570475e-06, "loss": 0.6956, "step": 5033 }, { "epoch": 0.72, "learning_rate": 5.715435494807823e-06, "loss": 0.6883, "step": 5034 }, { "epoch": 0.72, "learning_rate": 5.709970277195578e-06, "loss": 0.8343, "step": 5035 }, { "epoch": 0.72, "learning_rate": 5.704507059496842e-06, "loss": 0.5336, "step": 5036 }, { "epoch": 0.72, "learning_rate": 5.699045842887711e-06, "loss": 0.7866, "step": 5037 }, { "epoch": 0.72, "learning_rate": 5.693586628543839e-06, "loss": 0.8376, "step": 5038 }, { "epoch": 0.72, "learning_rate": 5.688129417640472e-06, "loss": 0.7581, "step": 5039 }, { "epoch": 0.72, "learning_rate": 5.682674211352414e-06, "loss": 0.7218, "step": 5040 }, { "epoch": 0.72, "learning_rate": 5.677221010854024e-06, "loss": 0.8666, "step": 5041 }, { "epoch": 0.72, "learning_rate": 5.671769817319244e-06, "loss": 0.6931, "step": 5042 }, { "epoch": 0.72, "learning_rate": 5.666320631921594e-06, "loss": 0.827, "step": 5043 }, { "epoch": 0.72, "learning_rate": 5.660873455834134e-06, "loss": 0.6275, "step": 5044 }, { "epoch": 0.72, "learning_rate": 5.65542829022951e-06, "loss": 0.911, "step": 5045 }, { "epoch": 0.72, "learning_rate": 5.649985136279935e-06, "loss": 0.7863, "step": 5046 }, { "epoch": 0.72, "learning_rate": 5.644543995157182e-06, "loss": 0.8474, "step": 5047 }, { "epoch": 0.72, "learning_rate": 5.639104868032602e-06, "loss": 0.7162, "step": 5048 }, { "epoch": 0.72, "learning_rate": 5.633667756077085e-06, "loss": 0.8343, "step": 5049 }, { "epoch": 0.72, "learning_rate": 5.6282326604611265e-06, "loss": 0.7896, "step": 5050 }, { "epoch": 0.72, "learning_rate": 5.6227995823547655e-06, "loss": 0.7531, "step": 5051 }, { "epoch": 0.72, "learning_rate": 5.617368522927598e-06, "loss": 0.7547, "step": 5052 }, { "epoch": 0.72, "learning_rate": 5.611939483348806e-06, "loss": 0.5871, "step": 5053 }, { "epoch": 0.72, "learning_rate": 5.606512464787123e-06, "loss": 0.9693, "step": 5054 }, { "epoch": 0.72, "learning_rate": 5.601087468410855e-06, "loss": 0.8055, "step": 5055 }, { "epoch": 0.72, "learning_rate": 5.5956644953878675e-06, "loss": 0.6992, "step": 5056 }, { "epoch": 0.72, "learning_rate": 5.590243546885593e-06, "loss": 0.8438, "step": 5057 }, { "epoch": 0.72, "learning_rate": 5.584824624071032e-06, "loss": 0.8259, "step": 5058 }, { "epoch": 0.72, "learning_rate": 5.5794077281107305e-06, "loss": 0.8926, "step": 5059 }, { "epoch": 0.72, "learning_rate": 5.5739928601708265e-06, "loss": 0.6908, "step": 5060 }, { "epoch": 0.72, "learning_rate": 5.568580021417008e-06, "loss": 0.7087, "step": 5061 }, { "epoch": 0.73, "learning_rate": 5.563169213014514e-06, "loss": 0.5879, "step": 5062 }, { "epoch": 0.73, "learning_rate": 5.557760436128157e-06, "loss": 0.6097, "step": 5063 }, { "epoch": 0.73, "learning_rate": 5.55235369192233e-06, "loss": 0.8616, "step": 5064 }, { "epoch": 0.73, "learning_rate": 5.546948981560955e-06, "loss": 0.8167, "step": 5065 }, { "epoch": 0.73, "learning_rate": 5.541546306207538e-06, "loss": 0.6646, "step": 5066 }, { "epoch": 0.73, "learning_rate": 5.536145667025141e-06, "loss": 0.6526, "step": 5067 }, { "epoch": 0.73, "learning_rate": 5.530747065176388e-06, "loss": 0.5855, "step": 5068 }, { "epoch": 0.73, "learning_rate": 5.525350501823468e-06, "loss": 0.7807, "step": 5069 }, { "epoch": 0.73, "learning_rate": 5.519955978128114e-06, "loss": 0.728, "step": 5070 }, { "epoch": 0.73, "learning_rate": 5.514563495251649e-06, "loss": 0.7344, "step": 5071 }, { "epoch": 0.73, "learning_rate": 5.50917305435494e-06, "loss": 0.8041, "step": 5072 }, { "epoch": 0.73, "learning_rate": 5.503784656598406e-06, "loss": 0.9113, "step": 5073 }, { "epoch": 0.73, "learning_rate": 5.498398303142041e-06, "loss": 0.9515, "step": 5074 }, { "epoch": 0.73, "learning_rate": 5.493013995145392e-06, "loss": 0.8068, "step": 5075 }, { "epoch": 0.73, "learning_rate": 5.48763173376757e-06, "loss": 0.7305, "step": 5076 }, { "epoch": 0.73, "learning_rate": 5.482251520167241e-06, "loss": 0.8742, "step": 5077 }, { "epoch": 0.73, "learning_rate": 5.476873355502631e-06, "loss": 0.8103, "step": 5078 }, { "epoch": 0.73, "learning_rate": 5.4714972409315315e-06, "loss": 0.7453, "step": 5079 }, { "epoch": 0.73, "learning_rate": 5.466123177611279e-06, "loss": 0.603, "step": 5080 }, { "epoch": 0.73, "learning_rate": 5.460751166698772e-06, "loss": 0.6822, "step": 5081 }, { "epoch": 0.73, "learning_rate": 5.4553812093504915e-06, "loss": 0.6948, "step": 5082 }, { "epoch": 0.73, "learning_rate": 5.450013306722439e-06, "loss": 0.7785, "step": 5083 }, { "epoch": 0.73, "learning_rate": 5.444647459970195e-06, "loss": 0.7695, "step": 5084 }, { "epoch": 0.73, "learning_rate": 5.439283670248893e-06, "loss": 0.8348, "step": 5085 }, { "epoch": 0.73, "learning_rate": 5.433921938713228e-06, "loss": 0.7243, "step": 5086 }, { "epoch": 0.73, "learning_rate": 5.428562266517451e-06, "loss": 0.8041, "step": 5087 }, { "epoch": 0.73, "learning_rate": 5.42320465481535e-06, "loss": 0.5857, "step": 5088 }, { "epoch": 0.73, "learning_rate": 5.417849104760305e-06, "loss": 0.7849, "step": 5089 }, { "epoch": 0.73, "learning_rate": 5.41249561750523e-06, "loss": 0.8705, "step": 5090 }, { "epoch": 0.73, "learning_rate": 5.407144194202589e-06, "loss": 0.5991, "step": 5091 }, { "epoch": 0.73, "learning_rate": 5.401794836004412e-06, "loss": 0.7679, "step": 5092 }, { "epoch": 0.73, "learning_rate": 5.3964475440623e-06, "loss": 0.7567, "step": 5093 }, { "epoch": 0.73, "learning_rate": 5.391102319527373e-06, "loss": 0.7059, "step": 5094 }, { "epoch": 0.73, "learning_rate": 5.385759163550336e-06, "loss": 0.7651, "step": 5095 }, { "epoch": 0.73, "learning_rate": 5.380418077281435e-06, "loss": 0.7533, "step": 5096 }, { "epoch": 0.73, "learning_rate": 5.3750790618704735e-06, "loss": 0.5579, "step": 5097 }, { "epoch": 0.73, "learning_rate": 5.369742118466815e-06, "loss": 0.7734, "step": 5098 }, { "epoch": 0.73, "learning_rate": 5.364407248219356e-06, "loss": 0.7109, "step": 5099 }, { "epoch": 0.73, "learning_rate": 5.3590744522765784e-06, "loss": 0.7341, "step": 5100 }, { "epoch": 0.73, "learning_rate": 5.3537437317865e-06, "loss": 0.7684, "step": 5101 }, { "epoch": 0.73, "learning_rate": 5.348415087896684e-06, "loss": 0.6942, "step": 5102 }, { "epoch": 0.73, "learning_rate": 5.34308852175426e-06, "loss": 0.6794, "step": 5103 }, { "epoch": 0.73, "learning_rate": 5.337764034505906e-06, "loss": 0.7031, "step": 5104 }, { "epoch": 0.73, "learning_rate": 5.33244162729785e-06, "loss": 0.7006, "step": 5105 }, { "epoch": 0.73, "learning_rate": 5.327121301275878e-06, "loss": 0.6814, "step": 5106 }, { "epoch": 0.73, "learning_rate": 5.3218030575853235e-06, "loss": 0.7963, "step": 5107 }, { "epoch": 0.73, "learning_rate": 5.316486897371079e-06, "loss": 0.8237, "step": 5108 }, { "epoch": 0.73, "learning_rate": 5.311172821777568e-06, "loss": 0.7416, "step": 5109 }, { "epoch": 0.73, "learning_rate": 5.305860831948784e-06, "loss": 0.6699, "step": 5110 }, { "epoch": 0.73, "learning_rate": 5.300550929028281e-06, "loss": 0.8182, "step": 5111 }, { "epoch": 0.73, "learning_rate": 5.295243114159132e-06, "loss": 0.8605, "step": 5112 }, { "epoch": 0.73, "learning_rate": 5.289937388483985e-06, "loss": 0.7056, "step": 5113 }, { "epoch": 0.73, "learning_rate": 5.2846337531450325e-06, "loss": 0.8382, "step": 5114 }, { "epoch": 0.73, "learning_rate": 5.279332209284015e-06, "loss": 0.8181, "step": 5115 }, { "epoch": 0.73, "learning_rate": 5.274032758042222e-06, "loss": 0.7771, "step": 5116 }, { "epoch": 0.73, "learning_rate": 5.268735400560498e-06, "loss": 0.6886, "step": 5117 }, { "epoch": 0.73, "learning_rate": 5.26344013797923e-06, "loss": 0.7511, "step": 5118 }, { "epoch": 0.73, "learning_rate": 5.258146971438363e-06, "loss": 0.8172, "step": 5119 }, { "epoch": 0.73, "learning_rate": 5.25285590207737e-06, "loss": 0.6723, "step": 5120 }, { "epoch": 0.73, "learning_rate": 5.247566931035301e-06, "loss": 0.9213, "step": 5121 }, { "epoch": 0.73, "learning_rate": 5.242280059450742e-06, "loss": 0.5366, "step": 5122 }, { "epoch": 0.73, "learning_rate": 5.236995288461815e-06, "loss": 0.6426, "step": 5123 }, { "epoch": 0.73, "learning_rate": 5.231712619206205e-06, "loss": 0.75, "step": 5124 }, { "epoch": 0.73, "learning_rate": 5.226432052821142e-06, "loss": 0.6713, "step": 5125 }, { "epoch": 0.73, "learning_rate": 5.2211535904433985e-06, "loss": 0.7508, "step": 5126 }, { "epoch": 0.73, "learning_rate": 5.215877233209299e-06, "loss": 0.9593, "step": 5127 }, { "epoch": 0.73, "learning_rate": 5.210602982254709e-06, "loss": 0.7815, "step": 5128 }, { "epoch": 0.73, "learning_rate": 5.205330838715055e-06, "loss": 0.714, "step": 5129 }, { "epoch": 0.73, "learning_rate": 5.200060803725285e-06, "loss": 0.7606, "step": 5130 }, { "epoch": 0.73, "learning_rate": 5.1947928784199085e-06, "loss": 0.7031, "step": 5131 }, { "epoch": 0.74, "learning_rate": 5.1895270639329946e-06, "loss": 0.7938, "step": 5132 }, { "epoch": 0.74, "learning_rate": 5.184263361398127e-06, "loss": 0.6936, "step": 5133 }, { "epoch": 0.74, "learning_rate": 5.179001771948454e-06, "loss": 0.8465, "step": 5134 }, { "epoch": 0.74, "learning_rate": 5.1737422967166706e-06, "loss": 0.7896, "step": 5135 }, { "epoch": 0.74, "learning_rate": 5.168484936835007e-06, "loss": 0.6833, "step": 5136 }, { "epoch": 0.74, "learning_rate": 5.163229693435248e-06, "loss": 0.6984, "step": 5137 }, { "epoch": 0.74, "learning_rate": 5.157976567648706e-06, "loss": 0.553, "step": 5138 }, { "epoch": 0.74, "learning_rate": 5.15272556060626e-06, "loss": 0.9068, "step": 5139 }, { "epoch": 0.74, "learning_rate": 5.147476673438324e-06, "loss": 0.7762, "step": 5140 }, { "epoch": 0.74, "learning_rate": 5.142229907274843e-06, "loss": 0.8242, "step": 5141 }, { "epoch": 0.74, "learning_rate": 5.136985263245317e-06, "loss": 0.7882, "step": 5142 }, { "epoch": 0.74, "learning_rate": 5.131742742478802e-06, "loss": 0.7221, "step": 5143 }, { "epoch": 0.74, "learning_rate": 5.126502346103868e-06, "loss": 0.7109, "step": 5144 }, { "epoch": 0.74, "learning_rate": 5.121264075248647e-06, "loss": 0.7919, "step": 5145 }, { "epoch": 0.74, "learning_rate": 5.11602793104081e-06, "loss": 0.8153, "step": 5146 }, { "epoch": 0.74, "learning_rate": 5.110793914607569e-06, "loss": 0.7478, "step": 5147 }, { "epoch": 0.74, "learning_rate": 5.105562027075684e-06, "loss": 0.9202, "step": 5148 }, { "epoch": 0.74, "learning_rate": 5.100332269571435e-06, "loss": 0.6713, "step": 5149 }, { "epoch": 0.74, "learning_rate": 5.095104643220674e-06, "loss": 0.6995, "step": 5150 }, { "epoch": 0.74, "learning_rate": 5.089879149148781e-06, "loss": 0.8482, "step": 5151 }, { "epoch": 0.74, "learning_rate": 5.084655788480666e-06, "loss": 0.7695, "step": 5152 }, { "epoch": 0.74, "learning_rate": 5.079434562340791e-06, "loss": 0.6978, "step": 5153 }, { "epoch": 0.74, "learning_rate": 5.074215471853159e-06, "loss": 0.8242, "step": 5154 }, { "epoch": 0.74, "learning_rate": 5.0689985181413105e-06, "loss": 0.6867, "step": 5155 }, { "epoch": 0.74, "learning_rate": 5.063783702328329e-06, "loss": 0.7924, "step": 5156 }, { "epoch": 0.74, "learning_rate": 5.058571025536831e-06, "loss": 0.8594, "step": 5157 }, { "epoch": 0.74, "learning_rate": 5.053360488888984e-06, "loss": 0.853, "step": 5158 }, { "epoch": 0.74, "learning_rate": 5.04815209350648e-06, "loss": 0.6875, "step": 5159 }, { "epoch": 0.74, "learning_rate": 5.042945840510554e-06, "loss": 0.7667, "step": 5160 }, { "epoch": 0.74, "learning_rate": 5.037741731021999e-06, "loss": 0.7188, "step": 5161 }, { "epoch": 0.74, "learning_rate": 5.032539766161118e-06, "loss": 0.899, "step": 5162 }, { "epoch": 0.74, "learning_rate": 5.027339947047768e-06, "loss": 0.7492, "step": 5163 }, { "epoch": 0.74, "learning_rate": 5.022142274801342e-06, "loss": 0.7595, "step": 5164 }, { "epoch": 0.74, "learning_rate": 5.016946750540772e-06, "loss": 0.7785, "step": 5165 }, { "epoch": 0.74, "learning_rate": 5.0117533753845275e-06, "loss": 0.8956, "step": 5166 }, { "epoch": 0.74, "learning_rate": 5.006562150450603e-06, "loss": 0.6738, "step": 5167 }, { "epoch": 0.74, "learning_rate": 5.001373076856552e-06, "loss": 0.6423, "step": 5168 }, { "epoch": 0.74, "learning_rate": 4.996186155719454e-06, "loss": 0.6353, "step": 5169 }, { "epoch": 0.74, "learning_rate": 4.991001388155917e-06, "loss": 0.8443, "step": 5170 }, { "epoch": 0.74, "learning_rate": 4.985818775282091e-06, "loss": 0.909, "step": 5171 }, { "epoch": 0.74, "learning_rate": 4.980638318213681e-06, "loss": 0.6805, "step": 5172 }, { "epoch": 0.74, "learning_rate": 4.9754600180658946e-06, "loss": 0.7824, "step": 5173 }, { "epoch": 0.74, "learning_rate": 4.9702838759535e-06, "loss": 0.6978, "step": 5174 }, { "epoch": 0.74, "learning_rate": 4.965109892990787e-06, "loss": 0.8393, "step": 5175 }, { "epoch": 0.74, "learning_rate": 4.95993807029159e-06, "loss": 0.9035, "step": 5176 }, { "epoch": 0.74, "learning_rate": 4.954768408969273e-06, "loss": 0.7494, "step": 5177 }, { "epoch": 0.74, "learning_rate": 4.9496009101367375e-06, "loss": 0.7349, "step": 5178 }, { "epoch": 0.74, "learning_rate": 4.944435574906422e-06, "loss": 0.6984, "step": 5179 }, { "epoch": 0.74, "learning_rate": 4.939272404390285e-06, "loss": 0.8092, "step": 5180 }, { "epoch": 0.74, "learning_rate": 4.934111399699831e-06, "loss": 0.5519, "step": 5181 }, { "epoch": 0.74, "learning_rate": 4.92895256194611e-06, "loss": 0.7193, "step": 5182 }, { "epoch": 0.74, "learning_rate": 4.923795892239677e-06, "loss": 0.8917, "step": 5183 }, { "epoch": 0.74, "learning_rate": 4.918641391690643e-06, "loss": 0.7352, "step": 5184 }, { "epoch": 0.74, "learning_rate": 4.9134890614086405e-06, "loss": 0.7614, "step": 5185 }, { "epoch": 0.74, "learning_rate": 4.908338902502842e-06, "loss": 0.7907, "step": 5186 }, { "epoch": 0.74, "learning_rate": 4.903190916081953e-06, "loss": 0.8025, "step": 5187 }, { "epoch": 0.74, "learning_rate": 4.898045103254191e-06, "loss": 0.796, "step": 5188 }, { "epoch": 0.74, "learning_rate": 4.892901465127341e-06, "loss": 0.798, "step": 5189 }, { "epoch": 0.74, "learning_rate": 4.8877600028086975e-06, "loss": 0.6438, "step": 5190 }, { "epoch": 0.74, "learning_rate": 4.882620717405081e-06, "loss": 0.7143, "step": 5191 }, { "epoch": 0.74, "learning_rate": 4.877483610022854e-06, "loss": 0.738, "step": 5192 }, { "epoch": 0.74, "learning_rate": 4.87234868176792e-06, "loss": 0.8304, "step": 5193 }, { "epoch": 0.74, "learning_rate": 4.867215933745691e-06, "loss": 0.7037, "step": 5194 }, { "epoch": 0.74, "learning_rate": 4.862085367061124e-06, "loss": 0.5896, "step": 5195 }, { "epoch": 0.74, "learning_rate": 4.8569569828187025e-06, "loss": 0.779, "step": 5196 }, { "epoch": 0.74, "learning_rate": 4.851830782122442e-06, "loss": 0.8365, "step": 5197 }, { "epoch": 0.74, "learning_rate": 4.84670676607589e-06, "loss": 0.8164, "step": 5198 }, { "epoch": 0.74, "learning_rate": 4.8415849357821075e-06, "loss": 0.6981, "step": 5199 }, { "epoch": 0.74, "learning_rate": 4.83646529234371e-06, "loss": 0.9358, "step": 5200 }, { "epoch": 0.75, "learning_rate": 4.831347836862832e-06, "loss": 0.7037, "step": 5201 }, { "epoch": 0.75, "learning_rate": 4.826232570441125e-06, "loss": 0.8387, "step": 5202 }, { "epoch": 0.75, "learning_rate": 4.8211194941797845e-06, "loss": 0.8956, "step": 5203 }, { "epoch": 0.75, "learning_rate": 4.816008609179529e-06, "loss": 0.8711, "step": 5204 }, { "epoch": 0.75, "learning_rate": 4.810899916540603e-06, "loss": 0.7874, "step": 5205 }, { "epoch": 0.75, "learning_rate": 4.805793417362786e-06, "loss": 0.7804, "step": 5206 }, { "epoch": 0.75, "learning_rate": 4.800689112745378e-06, "loss": 0.7433, "step": 5207 }, { "epoch": 0.75, "learning_rate": 4.795587003787216e-06, "loss": 0.7146, "step": 5208 }, { "epoch": 0.75, "learning_rate": 4.790487091586646e-06, "loss": 0.7768, "step": 5209 }, { "epoch": 0.75, "learning_rate": 4.785389377241556e-06, "loss": 0.6833, "step": 5210 }, { "epoch": 0.75, "learning_rate": 4.780293861849368e-06, "loss": 0.7299, "step": 5211 }, { "epoch": 0.75, "learning_rate": 4.77520054650701e-06, "loss": 0.7475, "step": 5212 }, { "epoch": 0.75, "learning_rate": 4.770109432310949e-06, "loss": 0.8136, "step": 5213 }, { "epoch": 0.75, "learning_rate": 4.765020520357177e-06, "loss": 0.7383, "step": 5214 }, { "epoch": 0.75, "learning_rate": 4.75993381174121e-06, "loss": 0.9581, "step": 5215 }, { "epoch": 0.75, "learning_rate": 4.7548493075580965e-06, "loss": 0.8783, "step": 5216 }, { "epoch": 0.75, "learning_rate": 4.749767008902388e-06, "loss": 0.7617, "step": 5217 }, { "epoch": 0.75, "learning_rate": 4.744686916868195e-06, "loss": 0.6855, "step": 5218 }, { "epoch": 0.75, "learning_rate": 4.739609032549134e-06, "loss": 0.8778, "step": 5219 }, { "epoch": 0.75, "learning_rate": 4.734533357038338e-06, "loss": 0.7553, "step": 5220 }, { "epoch": 0.75, "learning_rate": 4.729459891428473e-06, "loss": 0.668, "step": 5221 }, { "epoch": 0.75, "learning_rate": 4.724388636811748e-06, "loss": 0.7294, "step": 5222 }, { "epoch": 0.75, "learning_rate": 4.719319594279864e-06, "loss": 0.6136, "step": 5223 }, { "epoch": 0.75, "learning_rate": 4.714252764924062e-06, "loss": 0.8153, "step": 5224 }, { "epoch": 0.75, "learning_rate": 4.709188149835108e-06, "loss": 0.7785, "step": 5225 }, { "epoch": 0.75, "learning_rate": 4.704125750103287e-06, "loss": 0.9185, "step": 5226 }, { "epoch": 0.75, "learning_rate": 4.699065566818414e-06, "loss": 0.6724, "step": 5227 }, { "epoch": 0.75, "learning_rate": 4.694007601069805e-06, "loss": 0.7065, "step": 5228 }, { "epoch": 0.75, "learning_rate": 4.688951853946331e-06, "loss": 0.7143, "step": 5229 }, { "epoch": 0.75, "learning_rate": 4.68389832653637e-06, "loss": 0.9481, "step": 5230 }, { "epoch": 0.75, "learning_rate": 4.67884701992781e-06, "loss": 0.8058, "step": 5231 }, { "epoch": 0.75, "learning_rate": 4.673797935208078e-06, "loss": 0.8605, "step": 5232 }, { "epoch": 0.75, "learning_rate": 4.668751073464115e-06, "loss": 0.6267, "step": 5233 }, { "epoch": 0.75, "learning_rate": 4.66370643578239e-06, "loss": 0.8772, "step": 5234 }, { "epoch": 0.75, "learning_rate": 4.658664023248883e-06, "loss": 0.644, "step": 5235 }, { "epoch": 0.75, "learning_rate": 4.653623836949104e-06, "loss": 0.7243, "step": 5236 }, { "epoch": 0.75, "learning_rate": 4.648585877968086e-06, "loss": 0.6511, "step": 5237 }, { "epoch": 0.75, "learning_rate": 4.643550147390365e-06, "loss": 0.7528, "step": 5238 }, { "epoch": 0.75, "learning_rate": 4.638516646300011e-06, "loss": 0.7522, "step": 5239 }, { "epoch": 0.75, "learning_rate": 4.633485375780625e-06, "loss": 0.8343, "step": 5240 }, { "epoch": 0.75, "learning_rate": 4.628456336915302e-06, "loss": 0.5558, "step": 5241 }, { "epoch": 0.75, "learning_rate": 4.623429530786672e-06, "loss": 0.7556, "step": 5242 }, { "epoch": 0.75, "learning_rate": 4.6184049584768864e-06, "loss": 0.6055, "step": 5243 }, { "epoch": 0.75, "learning_rate": 4.613382621067606e-06, "loss": 0.7243, "step": 5244 }, { "epoch": 0.75, "learning_rate": 4.6083625196400205e-06, "loss": 0.6842, "step": 5245 }, { "epoch": 0.75, "learning_rate": 4.603344655274831e-06, "loss": 0.6496, "step": 5246 }, { "epoch": 0.75, "learning_rate": 4.598329029052259e-06, "loss": 0.7673, "step": 5247 }, { "epoch": 0.75, "learning_rate": 4.593315642052051e-06, "loss": 0.7896, "step": 5248 }, { "epoch": 0.75, "learning_rate": 4.588304495353451e-06, "loss": 0.7706, "step": 5249 }, { "epoch": 0.75, "learning_rate": 4.583295590035247e-06, "loss": 0.6172, "step": 5250 }, { "epoch": 0.75, "learning_rate": 4.578288927175735e-06, "loss": 0.644, "step": 5251 }, { "epoch": 0.75, "learning_rate": 4.5732845078527146e-06, "loss": 0.8619, "step": 5252 }, { "epoch": 0.75, "learning_rate": 4.568282333143519e-06, "loss": 0.6934, "step": 5253 }, { "epoch": 0.75, "learning_rate": 4.563282404124991e-06, "loss": 0.7204, "step": 5254 }, { "epoch": 0.75, "learning_rate": 4.558284721873494e-06, "loss": 0.6956, "step": 5255 }, { "epoch": 0.75, "learning_rate": 4.553289287464903e-06, "loss": 0.8934, "step": 5256 }, { "epoch": 0.75, "learning_rate": 4.548296101974613e-06, "loss": 0.5474, "step": 5257 }, { "epoch": 0.75, "learning_rate": 4.543305166477537e-06, "loss": 0.7779, "step": 5258 }, { "epoch": 0.75, "learning_rate": 4.538316482048092e-06, "loss": 0.8092, "step": 5259 }, { "epoch": 0.75, "learning_rate": 4.533330049760216e-06, "loss": 0.8145, "step": 5260 }, { "epoch": 0.75, "learning_rate": 4.52834587068738e-06, "loss": 0.6165, "step": 5261 }, { "epoch": 0.75, "learning_rate": 4.52336394590254e-06, "loss": 0.7697, "step": 5262 }, { "epoch": 0.75, "learning_rate": 4.5183842764781865e-06, "loss": 0.8019, "step": 5263 }, { "epoch": 0.75, "learning_rate": 4.513406863486318e-06, "loss": 0.8728, "step": 5264 }, { "epoch": 0.75, "learning_rate": 4.508431707998449e-06, "loss": 0.6607, "step": 5265 }, { "epoch": 0.75, "learning_rate": 4.503458811085613e-06, "loss": 0.8664, "step": 5266 }, { "epoch": 0.75, "learning_rate": 4.498488173818335e-06, "loss": 0.8516, "step": 5267 }, { "epoch": 0.75, "learning_rate": 4.4935197972666874e-06, "loss": 0.8047, "step": 5268 }, { "epoch": 0.75, "learning_rate": 4.488553682500236e-06, "loss": 0.7681, "step": 5269 }, { "epoch": 0.75, "learning_rate": 4.483589830588054e-06, "loss": 0.698, "step": 5270 }, { "epoch": 0.76, "learning_rate": 4.478628242598737e-06, "loss": 0.6869, "step": 5271 }, { "epoch": 0.76, "learning_rate": 4.4736689196004045e-06, "loss": 0.7254, "step": 5272 }, { "epoch": 0.76, "learning_rate": 4.468711862660663e-06, "loss": 0.5622, "step": 5273 }, { "epoch": 0.76, "learning_rate": 4.463757072846648e-06, "loss": 0.6624, "step": 5274 }, { "epoch": 0.76, "learning_rate": 4.458804551225004e-06, "loss": 0.9319, "step": 5275 }, { "epoch": 0.76, "learning_rate": 4.453854298861886e-06, "loss": 0.8337, "step": 5276 }, { "epoch": 0.76, "learning_rate": 4.448906316822967e-06, "loss": 0.7433, "step": 5277 }, { "epoch": 0.76, "learning_rate": 4.443960606173408e-06, "loss": 0.7249, "step": 5278 }, { "epoch": 0.76, "learning_rate": 4.439017167977913e-06, "loss": 0.6952, "step": 5279 }, { "epoch": 0.76, "learning_rate": 4.434076003300685e-06, "loss": 0.779, "step": 5280 }, { "epoch": 0.76, "learning_rate": 4.4291371132054215e-06, "loss": 0.9269, "step": 5281 }, { "epoch": 0.76, "learning_rate": 4.424200498755349e-06, "loss": 0.7508, "step": 5282 }, { "epoch": 0.76, "learning_rate": 4.419266161013199e-06, "loss": 0.7327, "step": 5283 }, { "epoch": 0.76, "learning_rate": 4.414334101041212e-06, "loss": 0.8421, "step": 5284 }, { "epoch": 0.76, "learning_rate": 4.409404319901139e-06, "loss": 0.7266, "step": 5285 }, { "epoch": 0.76, "learning_rate": 4.404476818654238e-06, "loss": 0.8594, "step": 5286 }, { "epoch": 0.76, "learning_rate": 4.399551598361285e-06, "loss": 0.8094, "step": 5287 }, { "epoch": 0.76, "learning_rate": 4.394628660082549e-06, "loss": 0.8025, "step": 5288 }, { "epoch": 0.76, "learning_rate": 4.389708004877814e-06, "loss": 0.6936, "step": 5289 }, { "epoch": 0.76, "learning_rate": 4.384789633806392e-06, "loss": 0.7098, "step": 5290 }, { "epoch": 0.76, "learning_rate": 4.37987354792707e-06, "loss": 0.7316, "step": 5291 }, { "epoch": 0.76, "learning_rate": 4.374959748298166e-06, "loss": 0.6921, "step": 5292 }, { "epoch": 0.76, "learning_rate": 4.370048235977501e-06, "loss": 0.82, "step": 5293 }, { "epoch": 0.76, "learning_rate": 4.3651390120223985e-06, "loss": 0.7796, "step": 5294 }, { "epoch": 0.76, "learning_rate": 4.360232077489701e-06, "loss": 0.7026, "step": 5295 }, { "epoch": 0.76, "learning_rate": 4.355327433435734e-06, "loss": 0.8365, "step": 5296 }, { "epoch": 0.76, "learning_rate": 4.35042508091636e-06, "loss": 0.8477, "step": 5297 }, { "epoch": 0.76, "learning_rate": 4.3455250209869355e-06, "loss": 0.8156, "step": 5298 }, { "epoch": 0.76, "learning_rate": 4.340627254702311e-06, "loss": 0.6822, "step": 5299 }, { "epoch": 0.76, "learning_rate": 4.3357317831168554e-06, "loss": 0.752, "step": 5300 }, { "epoch": 0.76, "learning_rate": 4.3308386072844575e-06, "loss": 0.7087, "step": 5301 }, { "epoch": 0.76, "learning_rate": 4.325947728258481e-06, "loss": 0.721, "step": 5302 }, { "epoch": 0.76, "learning_rate": 4.321059147091817e-06, "loss": 0.851, "step": 5303 }, { "epoch": 0.76, "learning_rate": 4.316172864836856e-06, "loss": 0.8605, "step": 5304 }, { "epoch": 0.76, "learning_rate": 4.311288882545493e-06, "loss": 0.6032, "step": 5305 }, { "epoch": 0.76, "learning_rate": 4.3064072012691305e-06, "loss": 0.6331, "step": 5306 }, { "epoch": 0.76, "learning_rate": 4.301527822058671e-06, "loss": 0.6678, "step": 5307 }, { "epoch": 0.76, "learning_rate": 4.296650745964531e-06, "loss": 0.6795, "step": 5308 }, { "epoch": 0.76, "learning_rate": 4.2917759740366134e-06, "loss": 0.8733, "step": 5309 }, { "epoch": 0.76, "learning_rate": 4.286903507324336e-06, "loss": 0.7589, "step": 5310 }, { "epoch": 0.76, "learning_rate": 4.282033346876636e-06, "loss": 0.873, "step": 5311 }, { "epoch": 0.76, "learning_rate": 4.2771654937419225e-06, "loss": 0.5851, "step": 5312 }, { "epoch": 0.76, "learning_rate": 4.272299948968129e-06, "loss": 0.7589, "step": 5313 }, { "epoch": 0.76, "learning_rate": 4.267436713602689e-06, "loss": 0.6629, "step": 5314 }, { "epoch": 0.76, "learning_rate": 4.262575788692532e-06, "loss": 0.719, "step": 5315 }, { "epoch": 0.76, "learning_rate": 4.257717175284103e-06, "loss": 0.7422, "step": 5316 }, { "epoch": 0.76, "learning_rate": 4.252860874423329e-06, "loss": 0.7453, "step": 5317 }, { "epoch": 0.76, "learning_rate": 4.248006887155661e-06, "loss": 0.6618, "step": 5318 }, { "epoch": 0.76, "learning_rate": 4.2431552145260446e-06, "loss": 0.8566, "step": 5319 }, { "epoch": 0.76, "learning_rate": 4.238305857578915e-06, "loss": 0.5943, "step": 5320 }, { "epoch": 0.76, "learning_rate": 4.23345881735822e-06, "loss": 0.6814, "step": 5321 }, { "epoch": 0.76, "learning_rate": 4.228614094907419e-06, "loss": 0.7924, "step": 5322 }, { "epoch": 0.76, "learning_rate": 4.2237716912694505e-06, "loss": 0.8064, "step": 5323 }, { "epoch": 0.76, "learning_rate": 4.218931607486764e-06, "loss": 0.8281, "step": 5324 }, { "epoch": 0.76, "learning_rate": 4.214093844601314e-06, "loss": 0.7673, "step": 5325 }, { "epoch": 0.76, "learning_rate": 4.20925840365455e-06, "loss": 0.7472, "step": 5326 }, { "epoch": 0.76, "learning_rate": 4.204425285687425e-06, "loss": 0.9621, "step": 5327 }, { "epoch": 0.76, "learning_rate": 4.19959449174038e-06, "loss": 0.7249, "step": 5328 }, { "epoch": 0.76, "learning_rate": 4.194766022853375e-06, "loss": 0.8619, "step": 5329 }, { "epoch": 0.76, "learning_rate": 4.189939880065862e-06, "loss": 0.7182, "step": 5330 }, { "epoch": 0.76, "learning_rate": 4.1851160644167815e-06, "loss": 0.8013, "step": 5331 }, { "epoch": 0.76, "learning_rate": 4.180294576944587e-06, "loss": 0.7084, "step": 5332 }, { "epoch": 0.76, "learning_rate": 4.175475418687221e-06, "loss": 0.7718, "step": 5333 }, { "epoch": 0.76, "learning_rate": 4.170658590682134e-06, "loss": 0.6655, "step": 5334 }, { "epoch": 0.76, "learning_rate": 4.165844093966266e-06, "loss": 0.8013, "step": 5335 }, { "epoch": 0.76, "learning_rate": 4.161031929576063e-06, "loss": 0.7573, "step": 5336 }, { "epoch": 0.76, "learning_rate": 4.156222098547467e-06, "loss": 0.7087, "step": 5337 }, { "epoch": 0.76, "learning_rate": 4.151414601915905e-06, "loss": 0.8577, "step": 5338 }, { "epoch": 0.76, "learning_rate": 4.146609440716316e-06, "loss": 0.755, "step": 5339 }, { "epoch": 0.76, "learning_rate": 4.141806615983142e-06, "loss": 0.7556, "step": 5340 }, { "epoch": 0.77, "learning_rate": 4.1370061287503015e-06, "loss": 0.8253, "step": 5341 }, { "epoch": 0.77, "learning_rate": 4.132207980051224e-06, "loss": 0.8477, "step": 5342 }, { "epoch": 0.77, "learning_rate": 4.127412170918832e-06, "loss": 0.7765, "step": 5343 }, { "epoch": 0.77, "learning_rate": 4.122618702385545e-06, "loss": 0.6225, "step": 5344 }, { "epoch": 0.77, "learning_rate": 4.117827575483282e-06, "loss": 0.7414, "step": 5345 }, { "epoch": 0.77, "learning_rate": 4.113038791243441e-06, "loss": 0.7115, "step": 5346 }, { "epoch": 0.77, "learning_rate": 4.108252350696942e-06, "loss": 0.8605, "step": 5347 }, { "epoch": 0.77, "learning_rate": 4.10346825487419e-06, "loss": 0.6948, "step": 5348 }, { "epoch": 0.77, "learning_rate": 4.098686504805068e-06, "loss": 0.909, "step": 5349 }, { "epoch": 0.77, "learning_rate": 4.093907101518973e-06, "loss": 0.713, "step": 5350 }, { "epoch": 0.77, "learning_rate": 4.089130046044804e-06, "loss": 0.6451, "step": 5351 }, { "epoch": 0.77, "learning_rate": 4.084355339410929e-06, "loss": 1.0194, "step": 5352 }, { "epoch": 0.77, "learning_rate": 4.079582982645229e-06, "loss": 0.7467, "step": 5353 }, { "epoch": 0.77, "learning_rate": 4.074812976775076e-06, "loss": 0.6808, "step": 5354 }, { "epoch": 0.77, "learning_rate": 4.070045322827331e-06, "loss": 0.7662, "step": 5355 }, { "epoch": 0.77, "learning_rate": 4.0652800218283585e-06, "loss": 0.7843, "step": 5356 }, { "epoch": 0.77, "learning_rate": 4.060517074803996e-06, "loss": 0.8242, "step": 5357 }, { "epoch": 0.77, "learning_rate": 4.055756482779601e-06, "loss": 0.7997, "step": 5358 }, { "epoch": 0.77, "learning_rate": 4.050998246780013e-06, "loss": 0.6964, "step": 5359 }, { "epoch": 0.77, "learning_rate": 4.046242367829551e-06, "loss": 0.7427, "step": 5360 }, { "epoch": 0.77, "learning_rate": 4.041488846952046e-06, "loss": 0.7274, "step": 5361 }, { "epoch": 0.77, "learning_rate": 4.0367376851708085e-06, "loss": 0.8923, "step": 5362 }, { "epoch": 0.77, "learning_rate": 4.031988883508652e-06, "loss": 0.7199, "step": 5363 }, { "epoch": 0.77, "learning_rate": 4.0272424429878706e-06, "loss": 0.8002, "step": 5364 }, { "epoch": 0.77, "learning_rate": 4.022498364630257e-06, "loss": 0.7126, "step": 5365 }, { "epoch": 0.77, "learning_rate": 4.0177566494571e-06, "loss": 0.7796, "step": 5366 }, { "epoch": 0.77, "learning_rate": 4.013017298489159e-06, "loss": 0.9403, "step": 5367 }, { "epoch": 0.77, "learning_rate": 4.008280312746713e-06, "loss": 0.7768, "step": 5368 }, { "epoch": 0.77, "learning_rate": 4.003545693249516e-06, "loss": 0.7299, "step": 5369 }, { "epoch": 0.77, "learning_rate": 3.998813441016807e-06, "loss": 0.8128, "step": 5370 }, { "epoch": 0.77, "learning_rate": 3.994083557067323e-06, "loss": 0.7885, "step": 5371 }, { "epoch": 0.77, "learning_rate": 3.989356042419304e-06, "loss": 0.7439, "step": 5372 }, { "epoch": 0.77, "learning_rate": 3.984630898090455e-06, "loss": 0.7081, "step": 5373 }, { "epoch": 0.77, "learning_rate": 3.9799081250979834e-06, "loss": 0.9554, "step": 5374 }, { "epoch": 0.77, "learning_rate": 3.975187724458588e-06, "loss": 0.7564, "step": 5375 }, { "epoch": 0.77, "learning_rate": 3.970469697188454e-06, "loss": 0.9068, "step": 5376 }, { "epoch": 0.77, "learning_rate": 3.965754044303261e-06, "loss": 0.7232, "step": 5377 }, { "epoch": 0.77, "learning_rate": 3.9610407668181565e-06, "loss": 0.8781, "step": 5378 }, { "epoch": 0.77, "learning_rate": 3.956329865747807e-06, "loss": 0.779, "step": 5379 }, { "epoch": 0.77, "learning_rate": 3.951621342106356e-06, "loss": 0.9643, "step": 5380 }, { "epoch": 0.77, "learning_rate": 3.946915196907419e-06, "loss": 0.6998, "step": 5381 }, { "epoch": 0.77, "learning_rate": 3.942211431164119e-06, "loss": 0.861, "step": 5382 }, { "epoch": 0.77, "learning_rate": 3.93751004588906e-06, "loss": 0.8276, "step": 5383 }, { "epoch": 0.77, "learning_rate": 3.932811042094334e-06, "loss": 0.8761, "step": 5384 }, { "epoch": 0.77, "learning_rate": 3.92811442079152e-06, "loss": 0.7595, "step": 5385 }, { "epoch": 0.77, "learning_rate": 3.923420182991687e-06, "loss": 0.9043, "step": 5386 }, { "epoch": 0.77, "learning_rate": 3.918728329705388e-06, "loss": 0.7422, "step": 5387 }, { "epoch": 0.77, "learning_rate": 3.914038861942656e-06, "loss": 0.8315, "step": 5388 }, { "epoch": 0.77, "learning_rate": 3.909351780713019e-06, "loss": 0.7363, "step": 5389 }, { "epoch": 0.77, "learning_rate": 3.9046670870255006e-06, "loss": 0.6758, "step": 5390 }, { "epoch": 0.77, "learning_rate": 3.8999847818885855e-06, "loss": 0.7698, "step": 5391 }, { "epoch": 0.77, "learning_rate": 3.895304866310268e-06, "loss": 0.9113, "step": 5392 }, { "epoch": 0.77, "learning_rate": 3.890627341298011e-06, "loss": 0.7148, "step": 5393 }, { "epoch": 0.77, "learning_rate": 3.885952207858774e-06, "loss": 0.7997, "step": 5394 }, { "epoch": 0.77, "learning_rate": 3.881279466999001e-06, "loss": 0.6908, "step": 5395 }, { "epoch": 0.77, "learning_rate": 3.876609119724605e-06, "loss": 0.8203, "step": 5396 }, { "epoch": 0.77, "learning_rate": 3.871941167041008e-06, "loss": 0.755, "step": 5397 }, { "epoch": 0.77, "learning_rate": 3.867275609953107e-06, "loss": 0.5826, "step": 5398 }, { "epoch": 0.77, "learning_rate": 3.862612449465269e-06, "loss": 0.8549, "step": 5399 }, { "epoch": 0.77, "learning_rate": 3.857951686581359e-06, "loss": 0.7863, "step": 5400 }, { "epoch": 0.77, "learning_rate": 3.853293322304737e-06, "loss": 0.8644, "step": 5401 }, { "epoch": 0.77, "learning_rate": 3.84863735763822e-06, "loss": 0.7874, "step": 5402 }, { "epoch": 0.77, "learning_rate": 3.843983793584127e-06, "loss": 0.767, "step": 5403 }, { "epoch": 0.77, "learning_rate": 3.839332631144254e-06, "loss": 0.8382, "step": 5404 }, { "epoch": 0.77, "learning_rate": 3.834683871319882e-06, "loss": 0.7154, "step": 5405 }, { "epoch": 0.77, "learning_rate": 3.830037515111778e-06, "loss": 0.6967, "step": 5406 }, { "epoch": 0.77, "learning_rate": 3.825393563520175e-06, "loss": 0.6289, "step": 5407 }, { "epoch": 0.77, "learning_rate": 3.820752017544813e-06, "loss": 0.7746, "step": 5408 }, { "epoch": 0.77, "learning_rate": 3.816112878184904e-06, "loss": 0.7065, "step": 5409 }, { "epoch": 0.77, "learning_rate": 3.8114761464391285e-06, "loss": 0.6847, "step": 5410 }, { "epoch": 0.78, "learning_rate": 3.806841823305666e-06, "loss": 0.9961, "step": 5411 }, { "epoch": 0.78, "learning_rate": 3.8022099097821723e-06, "loss": 0.6417, "step": 5412 }, { "epoch": 0.78, "learning_rate": 3.7975804068657805e-06, "loss": 0.8025, "step": 5413 }, { "epoch": 0.78, "learning_rate": 3.792953315553112e-06, "loss": 0.8371, "step": 5414 }, { "epoch": 0.78, "learning_rate": 3.788328636840262e-06, "loss": 0.9127, "step": 5415 }, { "epoch": 0.78, "learning_rate": 3.783706371722817e-06, "loss": 0.7252, "step": 5416 }, { "epoch": 0.78, "learning_rate": 3.7790865211958248e-06, "loss": 0.8128, "step": 5417 }, { "epoch": 0.78, "learning_rate": 3.7744690862538243e-06, "loss": 0.7573, "step": 5418 }, { "epoch": 0.78, "learning_rate": 3.7698540678908513e-06, "loss": 0.7712, "step": 5419 }, { "epoch": 0.78, "learning_rate": 3.76524146710039e-06, "loss": 0.7017, "step": 5420 }, { "epoch": 0.78, "learning_rate": 3.760631284875423e-06, "loss": 0.6484, "step": 5421 }, { "epoch": 0.78, "learning_rate": 3.7560235222084088e-06, "loss": 0.8225, "step": 5422 }, { "epoch": 0.78, "learning_rate": 3.751418180091285e-06, "loss": 0.8722, "step": 5423 }, { "epoch": 0.78, "learning_rate": 3.7468152595154736e-06, "loss": 0.7972, "step": 5424 }, { "epoch": 0.78, "learning_rate": 3.7422147614718537e-06, "loss": 0.6222, "step": 5425 }, { "epoch": 0.78, "learning_rate": 3.7376166869508133e-06, "loss": 0.7268, "step": 5426 }, { "epoch": 0.78, "learning_rate": 3.7330210369422053e-06, "loss": 0.6992, "step": 5427 }, { "epoch": 0.78, "learning_rate": 3.728427812435348e-06, "loss": 0.8181, "step": 5428 }, { "epoch": 0.78, "learning_rate": 3.7238370144190497e-06, "loss": 0.7475, "step": 5429 }, { "epoch": 0.78, "learning_rate": 3.719248643881611e-06, "loss": 0.6562, "step": 5430 }, { "epoch": 0.78, "learning_rate": 3.714662701810778e-06, "loss": 0.7003, "step": 5431 }, { "epoch": 0.78, "learning_rate": 3.710079189193797e-06, "loss": 0.8862, "step": 5432 }, { "epoch": 0.78, "learning_rate": 3.7054981070173844e-06, "loss": 0.7204, "step": 5433 }, { "epoch": 0.78, "learning_rate": 3.7009194562677334e-06, "loss": 0.642, "step": 5434 }, { "epoch": 0.78, "learning_rate": 3.6963432379305158e-06, "loss": 0.7826, "step": 5435 }, { "epoch": 0.78, "learning_rate": 3.691769452990876e-06, "loss": 0.6574, "step": 5436 }, { "epoch": 0.78, "learning_rate": 3.687198102433443e-06, "loss": 0.8192, "step": 5437 }, { "epoch": 0.78, "learning_rate": 3.6826291872423046e-06, "loss": 0.7162, "step": 5438 }, { "epoch": 0.78, "learning_rate": 3.6780627084010365e-06, "loss": 0.6853, "step": 5439 }, { "epoch": 0.78, "learning_rate": 3.673498666892702e-06, "loss": 0.7885, "step": 5440 }, { "epoch": 0.78, "learning_rate": 3.6689370636998127e-06, "loss": 0.7617, "step": 5441 }, { "epoch": 0.78, "learning_rate": 3.664377899804374e-06, "loss": 0.8114, "step": 5442 }, { "epoch": 0.78, "learning_rate": 3.6598211761878603e-06, "loss": 0.7628, "step": 5443 }, { "epoch": 0.78, "learning_rate": 3.655266893831221e-06, "loss": 0.7829, "step": 5444 }, { "epoch": 0.78, "learning_rate": 3.650715053714885e-06, "loss": 0.8672, "step": 5445 }, { "epoch": 0.78, "learning_rate": 3.646165656818739e-06, "loss": 0.7436, "step": 5446 }, { "epoch": 0.78, "learning_rate": 3.6416187041221655e-06, "loss": 0.8387, "step": 5447 }, { "epoch": 0.78, "learning_rate": 3.637074196604015e-06, "loss": 0.5706, "step": 5448 }, { "epoch": 0.78, "learning_rate": 3.6325321352425967e-06, "loss": 0.894, "step": 5449 }, { "epoch": 0.78, "learning_rate": 3.6279925210157033e-06, "loss": 0.757, "step": 5450 }, { "epoch": 0.78, "learning_rate": 3.623455354900617e-06, "loss": 0.8555, "step": 5451 }, { "epoch": 0.78, "learning_rate": 3.61892063787406e-06, "loss": 0.7773, "step": 5452 }, { "epoch": 0.78, "learning_rate": 3.614388370912255e-06, "loss": 0.947, "step": 5453 }, { "epoch": 0.78, "learning_rate": 3.60985855499088e-06, "loss": 0.7879, "step": 5454 }, { "epoch": 0.78, "learning_rate": 3.6053311910850995e-06, "loss": 0.7835, "step": 5455 }, { "epoch": 0.78, "learning_rate": 3.600806280169541e-06, "loss": 0.6897, "step": 5456 }, { "epoch": 0.78, "learning_rate": 3.5962838232182965e-06, "loss": 0.7249, "step": 5457 }, { "epoch": 0.78, "learning_rate": 3.5917638212049515e-06, "loss": 0.7084, "step": 5458 }, { "epoch": 0.78, "learning_rate": 3.5872462751025494e-06, "loss": 0.6403, "step": 5459 }, { "epoch": 0.78, "learning_rate": 3.582731185883597e-06, "loss": 0.803, "step": 5460 }, { "epoch": 0.78, "learning_rate": 3.578218554520089e-06, "loss": 0.5073, "step": 5461 }, { "epoch": 0.78, "learning_rate": 3.5737083819834798e-06, "loss": 0.7536, "step": 5462 }, { "epoch": 0.78, "learning_rate": 3.569200669244699e-06, "loss": 0.7902, "step": 5463 }, { "epoch": 0.78, "learning_rate": 3.564695417274146e-06, "loss": 0.7913, "step": 5464 }, { "epoch": 0.78, "learning_rate": 3.56019262704169e-06, "loss": 0.6975, "step": 5465 }, { "epoch": 0.78, "learning_rate": 3.5556922995166734e-06, "loss": 0.6674, "step": 5466 }, { "epoch": 0.78, "learning_rate": 3.5511944356679e-06, "loss": 0.6451, "step": 5467 }, { "epoch": 0.78, "learning_rate": 3.546699036463645e-06, "loss": 0.7952, "step": 5468 }, { "epoch": 0.78, "learning_rate": 3.5422061028716706e-06, "loss": 0.6364, "step": 5469 }, { "epoch": 0.78, "learning_rate": 3.5377156358591808e-06, "loss": 0.8806, "step": 5470 }, { "epoch": 0.78, "learning_rate": 3.5332276363928676e-06, "loss": 0.6482, "step": 5471 }, { "epoch": 0.78, "learning_rate": 3.5287421054388862e-06, "loss": 0.8064, "step": 5472 }, { "epoch": 0.78, "learning_rate": 3.52425904396286e-06, "loss": 0.8119, "step": 5473 }, { "epoch": 0.78, "learning_rate": 3.519778452929886e-06, "loss": 0.5377, "step": 5474 }, { "epoch": 0.78, "learning_rate": 3.5153003333045103e-06, "loss": 0.704, "step": 5475 }, { "epoch": 0.78, "learning_rate": 3.5108246860507753e-06, "loss": 0.6903, "step": 5476 }, { "epoch": 0.78, "learning_rate": 3.5063515121321772e-06, "loss": 0.4706, "step": 5477 }, { "epoch": 0.78, "learning_rate": 3.5018808125116703e-06, "loss": 0.6836, "step": 5478 }, { "epoch": 0.78, "learning_rate": 3.497412588151685e-06, "loss": 0.861, "step": 5479 }, { "epoch": 0.78, "learning_rate": 3.4929468400141333e-06, "loss": 0.736, "step": 5480 }, { "epoch": 0.79, "learning_rate": 3.488483569060368e-06, "loss": 0.6783, "step": 5481 }, { "epoch": 0.79, "learning_rate": 3.4840227762512245e-06, "loss": 0.6409, "step": 5482 }, { "epoch": 0.79, "learning_rate": 3.4795644625470018e-06, "loss": 0.7935, "step": 5483 }, { "epoch": 0.79, "learning_rate": 3.4751086289074628e-06, "loss": 0.8426, "step": 5484 }, { "epoch": 0.79, "learning_rate": 3.4706552762918436e-06, "loss": 0.6434, "step": 5485 }, { "epoch": 0.79, "learning_rate": 3.466204405658827e-06, "loss": 0.755, "step": 5486 }, { "epoch": 0.79, "learning_rate": 3.4617560179665904e-06, "loss": 0.9358, "step": 5487 }, { "epoch": 0.79, "learning_rate": 3.4573101141727587e-06, "loss": 0.719, "step": 5488 }, { "epoch": 0.79, "learning_rate": 3.45286669523442e-06, "loss": 0.733, "step": 5489 }, { "epoch": 0.79, "learning_rate": 3.4484257621081326e-06, "loss": 0.793, "step": 5490 }, { "epoch": 0.79, "learning_rate": 3.4439873157499234e-06, "loss": 0.7439, "step": 5491 }, { "epoch": 0.79, "learning_rate": 3.4395513571152766e-06, "loss": 0.8454, "step": 5492 }, { "epoch": 0.79, "learning_rate": 3.435117887159146e-06, "loss": 0.8541, "step": 5493 }, { "epoch": 0.79, "learning_rate": 3.430686906835949e-06, "loss": 0.7076, "step": 5494 }, { "epoch": 0.79, "learning_rate": 3.4262584170995683e-06, "loss": 0.6925, "step": 5495 }, { "epoch": 0.79, "learning_rate": 3.4218324189033366e-06, "loss": 0.8571, "step": 5496 }, { "epoch": 0.79, "learning_rate": 3.417408913200074e-06, "loss": 0.6571, "step": 5497 }, { "epoch": 0.79, "learning_rate": 3.4129879009420534e-06, "loss": 0.7748, "step": 5498 }, { "epoch": 0.79, "learning_rate": 3.4085693830809982e-06, "loss": 0.767, "step": 5499 }, { "epoch": 0.79, "learning_rate": 3.4041533605681064e-06, "loss": 0.803, "step": 5500 }, { "epoch": 0.79, "learning_rate": 3.3997398343540535e-06, "loss": 0.7165, "step": 5501 }, { "epoch": 0.79, "learning_rate": 3.395328805388947e-06, "loss": 0.7958, "step": 5502 }, { "epoch": 0.79, "learning_rate": 3.3909202746223783e-06, "loss": 0.7746, "step": 5503 }, { "epoch": 0.79, "learning_rate": 3.3865142430033934e-06, "loss": 0.7299, "step": 5504 }, { "epoch": 0.79, "learning_rate": 3.3821107114805038e-06, "loss": 0.8783, "step": 5505 }, { "epoch": 0.79, "learning_rate": 3.3777096810016845e-06, "loss": 0.5997, "step": 5506 }, { "epoch": 0.79, "learning_rate": 3.373311152514354e-06, "loss": 0.7199, "step": 5507 }, { "epoch": 0.79, "learning_rate": 3.3689151269654215e-06, "loss": 0.7201, "step": 5508 }, { "epoch": 0.79, "learning_rate": 3.3645216053012422e-06, "loss": 0.7614, "step": 5509 }, { "epoch": 0.79, "learning_rate": 3.3601305884676226e-06, "loss": 0.7846, "step": 5510 }, { "epoch": 0.79, "learning_rate": 3.355742077409846e-06, "loss": 0.8873, "step": 5511 }, { "epoch": 0.79, "learning_rate": 3.3513560730726496e-06, "loss": 0.606, "step": 5512 }, { "epoch": 0.79, "learning_rate": 3.346972576400232e-06, "loss": 0.6523, "step": 5513 }, { "epoch": 0.79, "learning_rate": 3.34259158833625e-06, "loss": 0.721, "step": 5514 }, { "epoch": 0.79, "learning_rate": 3.3382131098238235e-06, "loss": 0.7746, "step": 5515 }, { "epoch": 0.79, "learning_rate": 3.333837141805534e-06, "loss": 0.5999, "step": 5516 }, { "epoch": 0.79, "learning_rate": 3.329463685223411e-06, "loss": 0.6143, "step": 5517 }, { "epoch": 0.79, "learning_rate": 3.3250927410189515e-06, "loss": 0.6797, "step": 5518 }, { "epoch": 0.79, "learning_rate": 3.3207243101331236e-06, "loss": 0.6242, "step": 5519 }, { "epoch": 0.79, "learning_rate": 3.3163583935063303e-06, "loss": 0.9269, "step": 5520 }, { "epoch": 0.79, "learning_rate": 3.3119949920784508e-06, "loss": 0.8756, "step": 5521 }, { "epoch": 0.79, "learning_rate": 3.3076341067888144e-06, "loss": 0.7411, "step": 5522 }, { "epoch": 0.79, "learning_rate": 3.3032757385762146e-06, "loss": 0.8705, "step": 5523 }, { "epoch": 0.79, "learning_rate": 3.2989198883789045e-06, "loss": 0.7034, "step": 5524 }, { "epoch": 0.79, "learning_rate": 3.2945665571345773e-06, "loss": 0.767, "step": 5525 }, { "epoch": 0.79, "learning_rate": 3.2902157457804084e-06, "loss": 0.7296, "step": 5526 }, { "epoch": 0.79, "learning_rate": 3.285867455253023e-06, "loss": 0.714, "step": 5527 }, { "epoch": 0.79, "learning_rate": 3.28152168648849e-06, "loss": 0.6883, "step": 5528 }, { "epoch": 0.79, "learning_rate": 3.2771784404223453e-06, "loss": 0.6362, "step": 5529 }, { "epoch": 0.79, "learning_rate": 3.2728377179895964e-06, "loss": 0.6554, "step": 5530 }, { "epoch": 0.79, "learning_rate": 3.268499520124682e-06, "loss": 0.6855, "step": 5531 }, { "epoch": 0.79, "learning_rate": 3.2641638477615104e-06, "loss": 0.909, "step": 5532 }, { "epoch": 0.79, "learning_rate": 3.259830701833445e-06, "loss": 0.8222, "step": 5533 }, { "epoch": 0.79, "learning_rate": 3.255500083273306e-06, "loss": 0.8203, "step": 5534 }, { "epoch": 0.79, "learning_rate": 3.2511719930133715e-06, "loss": 0.7729, "step": 5535 }, { "epoch": 0.79, "learning_rate": 3.246846431985361e-06, "loss": 0.661, "step": 5536 }, { "epoch": 0.79, "learning_rate": 3.242523401120471e-06, "loss": 0.6847, "step": 5537 }, { "epoch": 0.79, "learning_rate": 3.238202901349345e-06, "loss": 0.7109, "step": 5538 }, { "epoch": 0.79, "learning_rate": 3.2338849336020722e-06, "loss": 0.8253, "step": 5539 }, { "epoch": 0.79, "learning_rate": 3.229569498808206e-06, "loss": 0.6236, "step": 5540 }, { "epoch": 0.79, "learning_rate": 3.2252565978967553e-06, "loss": 0.6883, "step": 5541 }, { "epoch": 0.79, "learning_rate": 3.2209462317961776e-06, "loss": 0.6646, "step": 5542 }, { "epoch": 0.79, "learning_rate": 3.2166384014343904e-06, "loss": 0.6925, "step": 5543 }, { "epoch": 0.79, "learning_rate": 3.212333107738762e-06, "loss": 0.7969, "step": 5544 }, { "epoch": 0.79, "learning_rate": 3.208030351636121e-06, "loss": 0.7218, "step": 5545 }, { "epoch": 0.79, "learning_rate": 3.2037301340527348e-06, "loss": 0.6493, "step": 5546 }, { "epoch": 0.79, "learning_rate": 3.1994324559143347e-06, "loss": 0.7785, "step": 5547 }, { "epoch": 0.79, "learning_rate": 3.195137318146115e-06, "loss": 0.8371, "step": 5548 }, { "epoch": 0.79, "learning_rate": 3.190844721672702e-06, "loss": 0.6254, "step": 5549 }, { "epoch": 0.8, "learning_rate": 3.1865546674181913e-06, "loss": 0.7054, "step": 5550 }, { "epoch": 0.8, "learning_rate": 3.182267156306122e-06, "loss": 0.7268, "step": 5551 }, { "epoch": 0.8, "learning_rate": 3.177982189259489e-06, "loss": 0.7796, "step": 5552 }, { "epoch": 0.8, "learning_rate": 3.1736997672007467e-06, "loss": 0.8867, "step": 5553 }, { "epoch": 0.8, "learning_rate": 3.1694198910517808e-06, "loss": 0.6936, "step": 5554 }, { "epoch": 0.8, "learning_rate": 3.165142561733956e-06, "loss": 0.7031, "step": 5555 }, { "epoch": 0.8, "learning_rate": 3.1608677801680718e-06, "loss": 0.728, "step": 5556 }, { "epoch": 0.8, "learning_rate": 3.1565955472743794e-06, "loss": 0.6487, "step": 5557 }, { "epoch": 0.8, "learning_rate": 3.1523258639725833e-06, "loss": 0.7076, "step": 5558 }, { "epoch": 0.8, "learning_rate": 3.148058731181851e-06, "loss": 0.865, "step": 5559 }, { "epoch": 0.8, "learning_rate": 3.1437941498207793e-06, "loss": 0.7589, "step": 5560 }, { "epoch": 0.8, "learning_rate": 3.139532120807433e-06, "loss": 0.7684, "step": 5561 }, { "epoch": 0.8, "learning_rate": 3.1352726450593207e-06, "loss": 0.6141, "step": 5562 }, { "epoch": 0.8, "learning_rate": 3.1310157234934006e-06, "loss": 0.697, "step": 5563 }, { "epoch": 0.8, "learning_rate": 3.1267613570260833e-06, "loss": 0.6749, "step": 5564 }, { "epoch": 0.8, "learning_rate": 3.1225095465732277e-06, "loss": 0.8432, "step": 5565 }, { "epoch": 0.8, "learning_rate": 3.1182602930501504e-06, "loss": 0.7835, "step": 5566 }, { "epoch": 0.8, "learning_rate": 3.1140135973716004e-06, "loss": 0.716, "step": 5567 }, { "epoch": 0.8, "learning_rate": 3.109769460451786e-06, "loss": 0.7126, "step": 5568 }, { "epoch": 0.8, "learning_rate": 3.1055278832043765e-06, "loss": 0.7888, "step": 5569 }, { "epoch": 0.8, "learning_rate": 3.1012888665424678e-06, "loss": 0.6964, "step": 5570 }, { "epoch": 0.8, "learning_rate": 3.0970524113786193e-06, "loss": 0.6627, "step": 5571 }, { "epoch": 0.8, "learning_rate": 3.0928185186248332e-06, "loss": 0.7174, "step": 5572 }, { "epoch": 0.8, "learning_rate": 3.0885871891925636e-06, "loss": 0.6643, "step": 5573 }, { "epoch": 0.8, "learning_rate": 3.0843584239927143e-06, "loss": 0.865, "step": 5574 }, { "epoch": 0.8, "learning_rate": 3.080132223935623e-06, "loss": 0.6052, "step": 5575 }, { "epoch": 0.8, "learning_rate": 3.075908589931097e-06, "loss": 0.7626, "step": 5576 }, { "epoch": 0.8, "learning_rate": 3.0716875228883788e-06, "loss": 0.8945, "step": 5577 }, { "epoch": 0.8, "learning_rate": 3.067469023716154e-06, "loss": 0.7517, "step": 5578 }, { "epoch": 0.8, "learning_rate": 3.063253093322559e-06, "loss": 0.9749, "step": 5579 }, { "epoch": 0.8, "learning_rate": 3.0590397326151924e-06, "loss": 0.7701, "step": 5580 }, { "epoch": 0.8, "learning_rate": 3.054828942501076e-06, "loss": 0.6217, "step": 5581 }, { "epoch": 0.8, "learning_rate": 3.0506207238866897e-06, "loss": 0.885, "step": 5582 }, { "epoch": 0.8, "learning_rate": 3.0464150776779615e-06, "loss": 0.6694, "step": 5583 }, { "epoch": 0.8, "learning_rate": 3.042212004780261e-06, "loss": 0.8432, "step": 5584 }, { "epoch": 0.8, "learning_rate": 3.038011506098411e-06, "loss": 0.7673, "step": 5585 }, { "epoch": 0.8, "learning_rate": 3.033813582536663e-06, "loss": 0.6281, "step": 5586 }, { "epoch": 0.8, "learning_rate": 3.029618234998736e-06, "loss": 0.6897, "step": 5587 }, { "epoch": 0.8, "learning_rate": 3.0254254643877877e-06, "loss": 0.6727, "step": 5588 }, { "epoch": 0.8, "learning_rate": 3.0212352716064075e-06, "loss": 0.6535, "step": 5589 }, { "epoch": 0.8, "learning_rate": 3.0170476575566434e-06, "loss": 0.8343, "step": 5590 }, { "epoch": 0.8, "learning_rate": 3.0128626231399887e-06, "loss": 0.7327, "step": 5591 }, { "epoch": 0.8, "learning_rate": 3.0086801692573752e-06, "loss": 0.6401, "step": 5592 }, { "epoch": 0.8, "learning_rate": 3.004500296809181e-06, "loss": 0.8884, "step": 5593 }, { "epoch": 0.8, "learning_rate": 3.000323006695232e-06, "loss": 0.6482, "step": 5594 }, { "epoch": 0.8, "learning_rate": 2.9961482998147965e-06, "loss": 0.659, "step": 5595 }, { "epoch": 0.8, "learning_rate": 2.9919761770665795e-06, "loss": 0.9927, "step": 5596 }, { "epoch": 0.8, "learning_rate": 2.9878066393487354e-06, "loss": 0.9102, "step": 5597 }, { "epoch": 0.8, "learning_rate": 2.9836396875588746e-06, "loss": 0.5943, "step": 5598 }, { "epoch": 0.8, "learning_rate": 2.9794753225940247e-06, "loss": 0.7751, "step": 5599 }, { "epoch": 0.8, "learning_rate": 2.9753135453506767e-06, "loss": 0.638, "step": 5600 }, { "epoch": 0.8, "learning_rate": 2.971154356724758e-06, "loss": 0.8371, "step": 5601 }, { "epoch": 0.8, "learning_rate": 2.966997757611638e-06, "loss": 0.793, "step": 5602 }, { "epoch": 0.8, "learning_rate": 2.962843748906135e-06, "loss": 0.8075, "step": 5603 }, { "epoch": 0.8, "learning_rate": 2.9586923315024917e-06, "loss": 0.8756, "step": 5604 }, { "epoch": 0.8, "learning_rate": 2.9545435062944167e-06, "loss": 0.8499, "step": 5605 }, { "epoch": 0.8, "learning_rate": 2.95039727417505e-06, "loss": 0.7634, "step": 5606 }, { "epoch": 0.8, "learning_rate": 2.9462536360369637e-06, "loss": 0.7506, "step": 5607 }, { "epoch": 0.8, "learning_rate": 2.9421125927721833e-06, "loss": 0.8242, "step": 5608 }, { "epoch": 0.8, "learning_rate": 2.9379741452721824e-06, "loss": 0.7924, "step": 5609 }, { "epoch": 0.8, "learning_rate": 2.933838294427856e-06, "loss": 0.8181, "step": 5610 }, { "epoch": 0.8, "learning_rate": 2.9297050411295537e-06, "loss": 0.9074, "step": 5611 }, { "epoch": 0.8, "learning_rate": 2.925574386267062e-06, "loss": 0.7405, "step": 5612 }, { "epoch": 0.8, "learning_rate": 2.9214463307296086e-06, "loss": 0.6657, "step": 5613 }, { "epoch": 0.8, "learning_rate": 2.9173208754058677e-06, "loss": 0.7712, "step": 5614 }, { "epoch": 0.8, "learning_rate": 2.9131980211839343e-06, "loss": 0.6635, "step": 5615 }, { "epoch": 0.8, "learning_rate": 2.909077768951368e-06, "loss": 0.8443, "step": 5616 }, { "epoch": 0.8, "learning_rate": 2.9049601195951596e-06, "loss": 0.6602, "step": 5617 }, { "epoch": 0.8, "learning_rate": 2.9008450740017267e-06, "loss": 0.74, "step": 5618 }, { "epoch": 0.8, "learning_rate": 2.896732633056942e-06, "loss": 0.8292, "step": 5619 }, { "epoch": 0.81, "learning_rate": 2.892622797646113e-06, "loss": 0.7076, "step": 5620 }, { "epoch": 0.81, "learning_rate": 2.888515568653983e-06, "loss": 0.7681, "step": 5621 }, { "epoch": 0.81, "learning_rate": 2.8844109469647396e-06, "loss": 0.7648, "step": 5622 }, { "epoch": 0.81, "learning_rate": 2.8803089334620047e-06, "loss": 0.7179, "step": 5623 }, { "epoch": 0.81, "learning_rate": 2.876209529028844e-06, "loss": 0.7785, "step": 5624 }, { "epoch": 0.81, "learning_rate": 2.8721127345477472e-06, "loss": 0.8532, "step": 5625 }, { "epoch": 0.81, "learning_rate": 2.868018550900663e-06, "loss": 0.8225, "step": 5626 }, { "epoch": 0.81, "learning_rate": 2.8639269789689693e-06, "loss": 0.8181, "step": 5627 }, { "epoch": 0.81, "learning_rate": 2.859838019633473e-06, "loss": 0.7148, "step": 5628 }, { "epoch": 0.81, "learning_rate": 2.855751673774424e-06, "loss": 0.6744, "step": 5629 }, { "epoch": 0.81, "learning_rate": 2.8516679422715237e-06, "loss": 0.8052, "step": 5630 }, { "epoch": 0.81, "learning_rate": 2.847586826003886e-06, "loss": 0.8086, "step": 5631 }, { "epoch": 0.81, "learning_rate": 2.843508325850081e-06, "loss": 0.7846, "step": 5632 }, { "epoch": 0.81, "learning_rate": 2.8394324426881076e-06, "loss": 0.817, "step": 5633 }, { "epoch": 0.81, "learning_rate": 2.8353591773954e-06, "loss": 0.7483, "step": 5634 }, { "epoch": 0.81, "learning_rate": 2.831288530848838e-06, "loss": 0.5734, "step": 5635 }, { "epoch": 0.81, "learning_rate": 2.827220503924718e-06, "loss": 0.7453, "step": 5636 }, { "epoch": 0.81, "learning_rate": 2.8231550974987995e-06, "loss": 0.8192, "step": 5637 }, { "epoch": 0.81, "learning_rate": 2.8190923124462613e-06, "loss": 0.6604, "step": 5638 }, { "epoch": 0.81, "learning_rate": 2.8150321496417136e-06, "loss": 0.8147, "step": 5639 }, { "epoch": 0.81, "learning_rate": 2.8109746099592123e-06, "loss": 0.921, "step": 5640 }, { "epoch": 0.81, "learning_rate": 2.8069196942722464e-06, "loss": 0.5566, "step": 5641 }, { "epoch": 0.81, "learning_rate": 2.802867403453738e-06, "loss": 0.709, "step": 5642 }, { "epoch": 0.81, "learning_rate": 2.7988177383760466e-06, "loss": 0.7779, "step": 5643 }, { "epoch": 0.81, "learning_rate": 2.794770699910964e-06, "loss": 0.7665, "step": 5644 }, { "epoch": 0.81, "learning_rate": 2.79072628892972e-06, "loss": 0.7693, "step": 5645 }, { "epoch": 0.81, "learning_rate": 2.7866845063029704e-06, "loss": 0.6995, "step": 5646 }, { "epoch": 0.81, "learning_rate": 2.782645352900809e-06, "loss": 0.7012, "step": 5647 }, { "epoch": 0.81, "learning_rate": 2.778608829592778e-06, "loss": 0.6613, "step": 5648 }, { "epoch": 0.81, "learning_rate": 2.7745749372478314e-06, "loss": 0.6113, "step": 5649 }, { "epoch": 0.81, "learning_rate": 2.7705436767343674e-06, "loss": 0.5926, "step": 5650 }, { "epoch": 0.81, "learning_rate": 2.7665150489202184e-06, "loss": 0.7489, "step": 5651 }, { "epoch": 0.81, "learning_rate": 2.762489054672646e-06, "loss": 0.707, "step": 5652 }, { "epoch": 0.81, "learning_rate": 2.7584656948583538e-06, "loss": 0.7849, "step": 5653 }, { "epoch": 0.81, "learning_rate": 2.7544449703434597e-06, "loss": 0.6802, "step": 5654 }, { "epoch": 0.81, "learning_rate": 2.7504268819935357e-06, "loss": 0.7648, "step": 5655 }, { "epoch": 0.81, "learning_rate": 2.746411430673578e-06, "loss": 0.5615, "step": 5656 }, { "epoch": 0.81, "learning_rate": 2.742398617248006e-06, "loss": 0.6077, "step": 5657 }, { "epoch": 0.81, "learning_rate": 2.738388442580681e-06, "loss": 0.6189, "step": 5658 }, { "epoch": 0.81, "learning_rate": 2.7343809075349025e-06, "loss": 0.63, "step": 5659 }, { "epoch": 0.81, "learning_rate": 2.7303760129733835e-06, "loss": 0.7285, "step": 5660 }, { "epoch": 0.81, "learning_rate": 2.7263737597582843e-06, "loss": 0.9258, "step": 5661 }, { "epoch": 0.81, "learning_rate": 2.7223741487511893e-06, "loss": 0.6412, "step": 5662 }, { "epoch": 0.81, "learning_rate": 2.7183771808131147e-06, "loss": 0.7617, "step": 5663 }, { "epoch": 0.81, "learning_rate": 2.7143828568045156e-06, "loss": 0.5851, "step": 5664 }, { "epoch": 0.81, "learning_rate": 2.710391177585257e-06, "loss": 0.9001, "step": 5665 }, { "epoch": 0.81, "learning_rate": 2.7064021440146602e-06, "loss": 0.7093, "step": 5666 }, { "epoch": 0.81, "learning_rate": 2.7024157569514684e-06, "loss": 0.8783, "step": 5667 }, { "epoch": 0.81, "learning_rate": 2.6984320172538417e-06, "loss": 0.8326, "step": 5668 }, { "epoch": 0.81, "learning_rate": 2.694450925779384e-06, "loss": 0.8167, "step": 5669 }, { "epoch": 0.81, "learning_rate": 2.6904724833851287e-06, "loss": 0.7801, "step": 5670 }, { "epoch": 0.81, "learning_rate": 2.686496690927533e-06, "loss": 0.6616, "step": 5671 }, { "epoch": 0.81, "learning_rate": 2.682523549262488e-06, "loss": 0.7263, "step": 5672 }, { "epoch": 0.81, "learning_rate": 2.6785530592453123e-06, "loss": 0.7807, "step": 5673 }, { "epoch": 0.81, "learning_rate": 2.6745852217307576e-06, "loss": 0.6931, "step": 5674 }, { "epoch": 0.81, "learning_rate": 2.6706200375729954e-06, "loss": 0.5731, "step": 5675 }, { "epoch": 0.81, "learning_rate": 2.6666575076256316e-06, "loss": 0.8574, "step": 5676 }, { "epoch": 0.81, "learning_rate": 2.6626976327417085e-06, "loss": 0.7238, "step": 5677 }, { "epoch": 0.81, "learning_rate": 2.6587404137736824e-06, "loss": 0.7701, "step": 5678 }, { "epoch": 0.81, "learning_rate": 2.654785851573446e-06, "loss": 0.7695, "step": 5679 }, { "epoch": 0.81, "learning_rate": 2.6508339469923188e-06, "loss": 0.7282, "step": 5680 }, { "epoch": 0.81, "learning_rate": 2.6468847008810494e-06, "loss": 0.6814, "step": 5681 }, { "epoch": 0.81, "learning_rate": 2.6429381140898106e-06, "loss": 0.6002, "step": 5682 }, { "epoch": 0.81, "learning_rate": 2.6389941874682066e-06, "loss": 0.647, "step": 5683 }, { "epoch": 0.81, "learning_rate": 2.6350529218652664e-06, "loss": 0.709, "step": 5684 }, { "epoch": 0.81, "learning_rate": 2.631114318129451e-06, "loss": 0.8334, "step": 5685 }, { "epoch": 0.81, "learning_rate": 2.6271783771086326e-06, "loss": 0.7511, "step": 5686 }, { "epoch": 0.81, "learning_rate": 2.623245099650135e-06, "loss": 0.7866, "step": 5687 }, { "epoch": 0.81, "learning_rate": 2.6193144866006924e-06, "loss": 0.8108, "step": 5688 }, { "epoch": 0.81, "learning_rate": 2.615386538806463e-06, "loss": 0.6574, "step": 5689 }, { "epoch": 0.82, "learning_rate": 2.611461257113042e-06, "loss": 0.7567, "step": 5690 }, { "epoch": 0.82, "learning_rate": 2.607538642365442e-06, "loss": 0.6147, "step": 5691 }, { "epoch": 0.82, "learning_rate": 2.6036186954081052e-06, "loss": 0.7846, "step": 5692 }, { "epoch": 0.82, "learning_rate": 2.5997014170849026e-06, "loss": 0.827, "step": 5693 }, { "epoch": 0.82, "learning_rate": 2.595786808239124e-06, "loss": 0.6263, "step": 5694 }, { "epoch": 0.82, "learning_rate": 2.5918748697134935e-06, "loss": 0.9063, "step": 5695 }, { "epoch": 0.82, "learning_rate": 2.5879656023501465e-06, "loss": 0.6978, "step": 5696 }, { "epoch": 0.82, "learning_rate": 2.5840590069906505e-06, "loss": 0.6172, "step": 5697 }, { "epoch": 0.82, "learning_rate": 2.5801550844760118e-06, "loss": 0.9453, "step": 5698 }, { "epoch": 0.82, "learning_rate": 2.5762538356466365e-06, "loss": 0.7316, "step": 5699 }, { "epoch": 0.82, "learning_rate": 2.572355261342369e-06, "loss": 0.9146, "step": 5700 }, { "epoch": 0.82, "learning_rate": 2.568459362402478e-06, "loss": 0.7263, "step": 5701 }, { "epoch": 0.82, "learning_rate": 2.564566139665653e-06, "loss": 0.7257, "step": 5702 }, { "epoch": 0.82, "learning_rate": 2.560675593970014e-06, "loss": 0.6883, "step": 5703 }, { "epoch": 0.82, "learning_rate": 2.5567877261530847e-06, "loss": 0.7282, "step": 5704 }, { "epoch": 0.82, "learning_rate": 2.5529025370518417e-06, "loss": 0.6138, "step": 5705 }, { "epoch": 0.82, "learning_rate": 2.549020027502667e-06, "loss": 0.8008, "step": 5706 }, { "epoch": 0.82, "learning_rate": 2.545140198341364e-06, "loss": 0.7958, "step": 5707 }, { "epoch": 0.82, "learning_rate": 2.5412630504031633e-06, "loss": 0.7991, "step": 5708 }, { "epoch": 0.82, "learning_rate": 2.537388584522728e-06, "loss": 0.5733, "step": 5709 }, { "epoch": 0.82, "learning_rate": 2.533516801534125e-06, "loss": 0.803, "step": 5710 }, { "epoch": 0.82, "learning_rate": 2.529647702270857e-06, "loss": 0.719, "step": 5711 }, { "epoch": 0.82, "learning_rate": 2.5257812875658447e-06, "loss": 0.6677, "step": 5712 }, { "epoch": 0.82, "learning_rate": 2.5219175582514344e-06, "loss": 0.6956, "step": 5713 }, { "epoch": 0.82, "learning_rate": 2.5180565151593917e-06, "loss": 0.6936, "step": 5714 }, { "epoch": 0.82, "learning_rate": 2.5141981591208934e-06, "loss": 0.8013, "step": 5715 }, { "epoch": 0.82, "learning_rate": 2.510342490966559e-06, "loss": 0.8309, "step": 5716 }, { "epoch": 0.82, "learning_rate": 2.5064895115264206e-06, "loss": 0.613, "step": 5717 }, { "epoch": 0.82, "learning_rate": 2.5026392216299188e-06, "loss": 0.7706, "step": 5718 }, { "epoch": 0.82, "learning_rate": 2.4987916221059316e-06, "loss": 0.7257, "step": 5719 }, { "epoch": 0.82, "learning_rate": 2.4949467137827516e-06, "loss": 0.6099, "step": 5720 }, { "epoch": 0.82, "learning_rate": 2.4911044974880916e-06, "loss": 0.8404, "step": 5721 }, { "epoch": 0.82, "learning_rate": 2.4872649740490865e-06, "loss": 0.6747, "step": 5722 }, { "epoch": 0.82, "learning_rate": 2.4834281442922906e-06, "loss": 0.6197, "step": 5723 }, { "epoch": 0.82, "learning_rate": 2.479594009043681e-06, "loss": 0.8482, "step": 5724 }, { "epoch": 0.82, "learning_rate": 2.4757625691286467e-06, "loss": 0.8834, "step": 5725 }, { "epoch": 0.82, "learning_rate": 2.471933825372e-06, "loss": 0.7355, "step": 5726 }, { "epoch": 0.82, "learning_rate": 2.468107778597986e-06, "loss": 0.6828, "step": 5727 }, { "epoch": 0.82, "learning_rate": 2.4642844296302484e-06, "loss": 0.6264, "step": 5728 }, { "epoch": 0.82, "learning_rate": 2.4604637792918626e-06, "loss": 0.738, "step": 5729 }, { "epoch": 0.82, "learning_rate": 2.456645828405319e-06, "loss": 0.7282, "step": 5730 }, { "epoch": 0.82, "learning_rate": 2.4528305777925297e-06, "loss": 0.8871, "step": 5731 }, { "epoch": 0.82, "learning_rate": 2.449018028274825e-06, "loss": 0.7879, "step": 5732 }, { "epoch": 0.82, "learning_rate": 2.4452081806729443e-06, "loss": 0.7517, "step": 5733 }, { "epoch": 0.82, "learning_rate": 2.441401035807062e-06, "loss": 0.7746, "step": 5734 }, { "epoch": 0.82, "learning_rate": 2.4375965944967644e-06, "loss": 0.6908, "step": 5735 }, { "epoch": 0.82, "learning_rate": 2.4337948575610443e-06, "loss": 0.6922, "step": 5736 }, { "epoch": 0.82, "learning_rate": 2.429995825818322e-06, "loss": 0.7355, "step": 5737 }, { "epoch": 0.82, "learning_rate": 2.426199500086447e-06, "loss": 0.8136, "step": 5738 }, { "epoch": 0.82, "learning_rate": 2.4224058811826634e-06, "loss": 0.7972, "step": 5739 }, { "epoch": 0.82, "learning_rate": 2.418614969923645e-06, "loss": 0.8189, "step": 5740 }, { "epoch": 0.82, "learning_rate": 2.4148267671254836e-06, "loss": 0.8516, "step": 5741 }, { "epoch": 0.82, "learning_rate": 2.411041273603684e-06, "loss": 0.8404, "step": 5742 }, { "epoch": 0.82, "learning_rate": 2.4072584901731744e-06, "loss": 0.6875, "step": 5743 }, { "epoch": 0.82, "learning_rate": 2.4034784176482825e-06, "loss": 0.7316, "step": 5744 }, { "epoch": 0.82, "learning_rate": 2.399701056842775e-06, "loss": 0.7199, "step": 5745 }, { "epoch": 0.82, "learning_rate": 2.3959264085698248e-06, "loss": 0.8797, "step": 5746 }, { "epoch": 0.82, "learning_rate": 2.3921544736420125e-06, "loss": 0.7296, "step": 5747 }, { "epoch": 0.82, "learning_rate": 2.3883852528713463e-06, "loss": 0.7701, "step": 5748 }, { "epoch": 0.82, "learning_rate": 2.384618747069245e-06, "loss": 0.5437, "step": 5749 }, { "epoch": 0.82, "learning_rate": 2.3808549570465465e-06, "loss": 0.7835, "step": 5750 }, { "epoch": 0.82, "learning_rate": 2.3770938836135013e-06, "loss": 0.7821, "step": 5751 }, { "epoch": 0.82, "learning_rate": 2.3733355275797737e-06, "loss": 0.6805, "step": 5752 }, { "epoch": 0.82, "learning_rate": 2.3695798897544492e-06, "loss": 0.6013, "step": 5753 }, { "epoch": 0.82, "learning_rate": 2.3658269709460114e-06, "loss": 0.6666, "step": 5754 }, { "epoch": 0.82, "learning_rate": 2.362076771962385e-06, "loss": 0.8281, "step": 5755 }, { "epoch": 0.82, "learning_rate": 2.3583292936108927e-06, "loss": 0.7846, "step": 5756 }, { "epoch": 0.82, "learning_rate": 2.354584536698267e-06, "loss": 0.8443, "step": 5757 }, { "epoch": 0.82, "learning_rate": 2.350842502030661e-06, "loss": 0.8064, "step": 5758 }, { "epoch": 0.82, "learning_rate": 2.3471031904136535e-06, "loss": 0.7866, "step": 5759 }, { "epoch": 0.83, "learning_rate": 2.3433666026522154e-06, "loss": 0.6138, "step": 5760 }, { "epoch": 0.83, "learning_rate": 2.339632739550745e-06, "loss": 0.6657, "step": 5761 }, { "epoch": 0.83, "learning_rate": 2.335901601913051e-06, "loss": 0.9057, "step": 5762 }, { "epoch": 0.83, "learning_rate": 2.332173190542352e-06, "loss": 0.8571, "step": 5763 }, { "epoch": 0.83, "learning_rate": 2.3284475062412902e-06, "loss": 0.9012, "step": 5764 }, { "epoch": 0.83, "learning_rate": 2.324724549811902e-06, "loss": 0.7997, "step": 5765 }, { "epoch": 0.83, "learning_rate": 2.321004322055656e-06, "loss": 0.6074, "step": 5766 }, { "epoch": 0.83, "learning_rate": 2.3172868237734253e-06, "loss": 0.7461, "step": 5767 }, { "epoch": 0.83, "learning_rate": 2.313572055765491e-06, "loss": 0.885, "step": 5768 }, { "epoch": 0.83, "learning_rate": 2.3098600188315523e-06, "loss": 0.7419, "step": 5769 }, { "epoch": 0.83, "learning_rate": 2.306150713770721e-06, "loss": 0.8259, "step": 5770 }, { "epoch": 0.83, "learning_rate": 2.3024441413815156e-06, "loss": 0.6846, "step": 5771 }, { "epoch": 0.83, "learning_rate": 2.298740302461871e-06, "loss": 0.6582, "step": 5772 }, { "epoch": 0.83, "learning_rate": 2.2950391978091306e-06, "loss": 0.9208, "step": 5773 }, { "epoch": 0.83, "learning_rate": 2.2913408282200577e-06, "loss": 0.7472, "step": 5774 }, { "epoch": 0.83, "learning_rate": 2.2876451944908083e-06, "loss": 0.8086, "step": 5775 }, { "epoch": 0.83, "learning_rate": 2.283952297416965e-06, "loss": 0.6112, "step": 5776 }, { "epoch": 0.83, "learning_rate": 2.280262137793524e-06, "loss": 0.8362, "step": 5777 }, { "epoch": 0.83, "learning_rate": 2.2765747164148777e-06, "loss": 0.7757, "step": 5778 }, { "epoch": 0.83, "learning_rate": 2.2728900340748387e-06, "loss": 0.8114, "step": 5779 }, { "epoch": 0.83, "learning_rate": 2.2692080915666295e-06, "loss": 0.7545, "step": 5780 }, { "epoch": 0.83, "learning_rate": 2.2655288896828778e-06, "loss": 0.8426, "step": 5781 }, { "epoch": 0.83, "learning_rate": 2.2618524292156334e-06, "loss": 0.832, "step": 5782 }, { "epoch": 0.83, "learning_rate": 2.2581787109563314e-06, "loss": 0.7662, "step": 5783 }, { "epoch": 0.83, "learning_rate": 2.2545077356958456e-06, "loss": 0.8393, "step": 5784 }, { "epoch": 0.83, "learning_rate": 2.250839504224446e-06, "loss": 0.757, "step": 5785 }, { "epoch": 0.83, "learning_rate": 2.247174017331805e-06, "loss": 0.7316, "step": 5786 }, { "epoch": 0.83, "learning_rate": 2.2435112758070108e-06, "loss": 0.7628, "step": 5787 }, { "epoch": 0.83, "learning_rate": 2.2398512804385723e-06, "loss": 0.7813, "step": 5788 }, { "epoch": 0.83, "learning_rate": 2.236194032014386e-06, "loss": 0.6515, "step": 5789 }, { "epoch": 0.83, "learning_rate": 2.2325395313217685e-06, "loss": 0.9503, "step": 5790 }, { "epoch": 0.83, "learning_rate": 2.2288877791474436e-06, "loss": 0.8186, "step": 5791 }, { "epoch": 0.83, "learning_rate": 2.225238776277545e-06, "loss": 0.7288, "step": 5792 }, { "epoch": 0.83, "learning_rate": 2.2215925234976138e-06, "loss": 0.6875, "step": 5793 }, { "epoch": 0.83, "learning_rate": 2.21794902159259e-06, "loss": 0.6671, "step": 5794 }, { "epoch": 0.83, "learning_rate": 2.214308271346839e-06, "loss": 0.6948, "step": 5795 }, { "epoch": 0.83, "learning_rate": 2.2106702735441224e-06, "loss": 0.7254, "step": 5796 }, { "epoch": 0.83, "learning_rate": 2.2070350289676066e-06, "loss": 0.7573, "step": 5797 }, { "epoch": 0.83, "learning_rate": 2.203402538399872e-06, "loss": 0.7559, "step": 5798 }, { "epoch": 0.83, "learning_rate": 2.199772802622904e-06, "loss": 0.6914, "step": 5799 }, { "epoch": 0.83, "learning_rate": 2.196145822418095e-06, "loss": 0.7623, "step": 5800 }, { "epoch": 0.83, "learning_rate": 2.1925215985662457e-06, "loss": 0.6885, "step": 5801 }, { "epoch": 0.83, "learning_rate": 2.1889001318475594e-06, "loss": 0.6705, "step": 5802 }, { "epoch": 0.83, "learning_rate": 2.185281423041653e-06, "loss": 0.6733, "step": 5803 }, { "epoch": 0.83, "learning_rate": 2.1816654729275366e-06, "loss": 0.7706, "step": 5804 }, { "epoch": 0.83, "learning_rate": 2.1780522822836373e-06, "loss": 0.7913, "step": 5805 }, { "epoch": 0.83, "learning_rate": 2.1744418518877936e-06, "loss": 0.656, "step": 5806 }, { "epoch": 0.83, "learning_rate": 2.1708341825172328e-06, "loss": 0.8041, "step": 5807 }, { "epoch": 0.83, "learning_rate": 2.167229274948601e-06, "loss": 0.9342, "step": 5808 }, { "epoch": 0.83, "learning_rate": 2.1636271299579426e-06, "loss": 0.8331, "step": 5809 }, { "epoch": 0.83, "learning_rate": 2.1600277483207133e-06, "loss": 0.7341, "step": 5810 }, { "epoch": 0.83, "learning_rate": 2.15643113081177e-06, "loss": 0.7171, "step": 5811 }, { "epoch": 0.83, "learning_rate": 2.152837278205374e-06, "loss": 0.9213, "step": 5812 }, { "epoch": 0.83, "learning_rate": 2.149246191275196e-06, "loss": 0.7282, "step": 5813 }, { "epoch": 0.83, "learning_rate": 2.1456578707943076e-06, "loss": 0.6155, "step": 5814 }, { "epoch": 0.83, "learning_rate": 2.142072317535179e-06, "loss": 0.7093, "step": 5815 }, { "epoch": 0.83, "learning_rate": 2.1384895322696983e-06, "loss": 0.8253, "step": 5816 }, { "epoch": 0.83, "learning_rate": 2.1349095157691533e-06, "loss": 0.5419, "step": 5817 }, { "epoch": 0.83, "learning_rate": 2.1313322688042226e-06, "loss": 0.6928, "step": 5818 }, { "epoch": 0.83, "learning_rate": 2.1277577921450065e-06, "loss": 0.6906, "step": 5819 }, { "epoch": 0.83, "learning_rate": 2.1241860865609963e-06, "loss": 0.7852, "step": 5820 }, { "epoch": 0.83, "learning_rate": 2.1206171528210973e-06, "loss": 0.7271, "step": 5821 }, { "epoch": 0.83, "learning_rate": 2.1170509916936093e-06, "loss": 0.7185, "step": 5822 }, { "epoch": 0.83, "learning_rate": 2.1134876039462385e-06, "loss": 0.736, "step": 5823 }, { "epoch": 0.83, "learning_rate": 2.1099269903461e-06, "loss": 0.6515, "step": 5824 }, { "epoch": 0.83, "learning_rate": 2.106369151659695e-06, "loss": 0.904, "step": 5825 }, { "epoch": 0.83, "learning_rate": 2.102814088652943e-06, "loss": 0.7796, "step": 5826 }, { "epoch": 0.83, "learning_rate": 2.0992618020911663e-06, "loss": 0.7366, "step": 5827 }, { "epoch": 0.83, "learning_rate": 2.095712292739079e-06, "loss": 0.7737, "step": 5828 }, { "epoch": 0.83, "learning_rate": 2.092165561360802e-06, "loss": 0.7245, "step": 5829 }, { "epoch": 0.84, "learning_rate": 2.0886216087198605e-06, "loss": 0.9347, "step": 5830 }, { "epoch": 0.84, "learning_rate": 2.085080435579179e-06, "loss": 0.7436, "step": 5831 }, { "epoch": 0.84, "learning_rate": 2.0815420427010893e-06, "loss": 0.7076, "step": 5832 }, { "epoch": 0.84, "learning_rate": 2.078006430847309e-06, "loss": 0.7561, "step": 5833 }, { "epoch": 0.84, "learning_rate": 2.074473600778977e-06, "loss": 0.6657, "step": 5834 }, { "epoch": 0.84, "learning_rate": 2.070943553256626e-06, "loss": 0.8705, "step": 5835 }, { "epoch": 0.84, "learning_rate": 2.067416289040178e-06, "loss": 0.7263, "step": 5836 }, { "epoch": 0.84, "learning_rate": 2.0638918088889706e-06, "loss": 0.686, "step": 5837 }, { "epoch": 0.84, "learning_rate": 2.060370113561743e-06, "loss": 0.7584, "step": 5838 }, { "epoch": 0.84, "learning_rate": 2.0568512038166205e-06, "loss": 0.7935, "step": 5839 }, { "epoch": 0.84, "learning_rate": 2.053335080411142e-06, "loss": 1.0268, "step": 5840 }, { "epoch": 0.84, "learning_rate": 2.049821744102238e-06, "loss": 0.7617, "step": 5841 }, { "epoch": 0.84, "learning_rate": 2.0463111956462466e-06, "loss": 0.8348, "step": 5842 }, { "epoch": 0.84, "learning_rate": 2.0428034357989054e-06, "loss": 0.817, "step": 5843 }, { "epoch": 0.84, "learning_rate": 2.0392984653153346e-06, "loss": 0.7461, "step": 5844 }, { "epoch": 0.84, "learning_rate": 2.035796284950082e-06, "loss": 0.8371, "step": 5845 }, { "epoch": 0.84, "learning_rate": 2.032296895457077e-06, "loss": 0.8142, "step": 5846 }, { "epoch": 0.84, "learning_rate": 2.028800297589648e-06, "loss": 0.7813, "step": 5847 }, { "epoch": 0.84, "learning_rate": 2.025306492100526e-06, "loss": 0.7093, "step": 5848 }, { "epoch": 0.84, "learning_rate": 2.0218154797418427e-06, "loss": 0.62, "step": 5849 }, { "epoch": 0.84, "learning_rate": 2.0183272612651267e-06, "loss": 0.728, "step": 5850 }, { "epoch": 0.84, "learning_rate": 2.014841837421304e-06, "loss": 0.6854, "step": 5851 }, { "epoch": 0.84, "learning_rate": 2.0113592089607016e-06, "loss": 0.7533, "step": 5852 }, { "epoch": 0.84, "learning_rate": 2.0078793766330458e-06, "loss": 0.6314, "step": 5853 }, { "epoch": 0.84, "learning_rate": 2.0044023411874524e-06, "loss": 0.8044, "step": 5854 }, { "epoch": 0.84, "learning_rate": 2.0009281033724397e-06, "loss": 0.7174, "step": 5855 }, { "epoch": 0.84, "learning_rate": 1.997456663935936e-06, "loss": 0.8052, "step": 5856 }, { "epoch": 0.84, "learning_rate": 1.993988023625247e-06, "loss": 0.6705, "step": 5857 }, { "epoch": 0.84, "learning_rate": 1.9905221831870862e-06, "loss": 0.8086, "step": 5858 }, { "epoch": 0.84, "learning_rate": 1.9870591433675663e-06, "loss": 0.8158, "step": 5859 }, { "epoch": 0.84, "learning_rate": 1.9835989049121916e-06, "loss": 0.7656, "step": 5860 }, { "epoch": 0.84, "learning_rate": 1.9801414685658705e-06, "loss": 0.7648, "step": 5861 }, { "epoch": 0.84, "learning_rate": 1.9766868350728934e-06, "loss": 0.8566, "step": 5862 }, { "epoch": 0.84, "learning_rate": 1.9732350051769658e-06, "loss": 0.7009, "step": 5863 }, { "epoch": 0.84, "learning_rate": 1.9697859796211837e-06, "loss": 0.7285, "step": 5864 }, { "epoch": 0.84, "learning_rate": 1.9663397591480292e-06, "loss": 0.6816, "step": 5865 }, { "epoch": 0.84, "learning_rate": 1.9628963444993875e-06, "loss": 0.5865, "step": 5866 }, { "epoch": 0.84, "learning_rate": 1.959455736416552e-06, "loss": 0.8778, "step": 5867 }, { "epoch": 0.84, "learning_rate": 1.956017935640189e-06, "loss": 0.6814, "step": 5868 }, { "epoch": 0.84, "learning_rate": 1.9525829429103765e-06, "loss": 0.6144, "step": 5869 }, { "epoch": 0.84, "learning_rate": 1.9491507589665825e-06, "loss": 0.7628, "step": 5870 }, { "epoch": 0.84, "learning_rate": 1.945721384547671e-06, "loss": 0.8521, "step": 5871 }, { "epoch": 0.84, "learning_rate": 1.942294820391907e-06, "loss": 0.5363, "step": 5872 }, { "epoch": 0.84, "learning_rate": 1.9388710672369337e-06, "loss": 0.8114, "step": 5873 }, { "epoch": 0.84, "learning_rate": 1.9354501258198124e-06, "loss": 0.7634, "step": 5874 }, { "epoch": 0.84, "learning_rate": 1.9320319968769784e-06, "loss": 0.7974, "step": 5875 }, { "epoch": 0.84, "learning_rate": 1.9286166811442708e-06, "loss": 0.7553, "step": 5876 }, { "epoch": 0.84, "learning_rate": 1.9252041793569315e-06, "loss": 0.7419, "step": 5877 }, { "epoch": 0.84, "learning_rate": 1.9217944922495777e-06, "loss": 0.7606, "step": 5878 }, { "epoch": 0.84, "learning_rate": 1.9183876205562344e-06, "loss": 0.7985, "step": 5879 }, { "epoch": 0.84, "learning_rate": 1.914983565010318e-06, "loss": 0.8214, "step": 5880 }, { "epoch": 0.84, "learning_rate": 1.9115823263446358e-06, "loss": 0.7143, "step": 5881 }, { "epoch": 0.84, "learning_rate": 1.9081839052913935e-06, "loss": 0.7333, "step": 5882 }, { "epoch": 0.84, "learning_rate": 1.9047883025821777e-06, "loss": 0.7327, "step": 5883 }, { "epoch": 0.84, "learning_rate": 1.901395518947987e-06, "loss": 0.7852, "step": 5884 }, { "epoch": 0.84, "learning_rate": 1.8980055551192038e-06, "loss": 0.7517, "step": 5885 }, { "epoch": 0.84, "learning_rate": 1.8946184118255994e-06, "loss": 0.8052, "step": 5886 }, { "epoch": 0.84, "learning_rate": 1.8912340897963388e-06, "loss": 0.7199, "step": 5887 }, { "epoch": 0.84, "learning_rate": 1.8878525897599925e-06, "loss": 0.7985, "step": 5888 }, { "epoch": 0.84, "learning_rate": 1.8844739124445065e-06, "loss": 0.7324, "step": 5889 }, { "epoch": 0.84, "learning_rate": 1.8810980585772276e-06, "loss": 0.6627, "step": 5890 }, { "epoch": 0.84, "learning_rate": 1.877725028884894e-06, "loss": 0.7241, "step": 5891 }, { "epoch": 0.84, "learning_rate": 1.8743548240936353e-06, "loss": 0.659, "step": 5892 }, { "epoch": 0.84, "learning_rate": 1.8709874449289793e-06, "loss": 0.7467, "step": 5893 }, { "epoch": 0.84, "learning_rate": 1.8676228921158245e-06, "loss": 0.7433, "step": 5894 }, { "epoch": 0.84, "learning_rate": 1.8642611663784897e-06, "loss": 0.7595, "step": 5895 }, { "epoch": 0.84, "learning_rate": 1.8609022684406712e-06, "loss": 0.6261, "step": 5896 }, { "epoch": 0.84, "learning_rate": 1.8575461990254472e-06, "loss": 0.8119, "step": 5897 }, { "epoch": 0.84, "learning_rate": 1.8541929588553013e-06, "loss": 0.7712, "step": 5898 }, { "epoch": 0.85, "learning_rate": 1.850842548652103e-06, "loss": 0.6998, "step": 5899 }, { "epoch": 0.85, "learning_rate": 1.8474949691371134e-06, "loss": 0.6942, "step": 5900 }, { "epoch": 0.85, "learning_rate": 1.8441502210309847e-06, "loss": 0.87, "step": 5901 }, { "epoch": 0.85, "learning_rate": 1.840808305053754e-06, "loss": 0.7782, "step": 5902 }, { "epoch": 0.85, "learning_rate": 1.8374692219248607e-06, "loss": 0.7746, "step": 5903 }, { "epoch": 0.85, "learning_rate": 1.8341329723631168e-06, "loss": 0.8465, "step": 5904 }, { "epoch": 0.85, "learning_rate": 1.8307995570867375e-06, "loss": 0.7098, "step": 5905 }, { "epoch": 0.85, "learning_rate": 1.8274689768133325e-06, "loss": 0.6599, "step": 5906 }, { "epoch": 0.85, "learning_rate": 1.8241412322598844e-06, "loss": 0.7199, "step": 5907 }, { "epoch": 0.85, "learning_rate": 1.8208163241427773e-06, "loss": 0.7974, "step": 5908 }, { "epoch": 0.85, "learning_rate": 1.8174942531777816e-06, "loss": 0.6288, "step": 5909 }, { "epoch": 0.85, "learning_rate": 1.8141750200800556e-06, "loss": 0.7821, "step": 5910 }, { "epoch": 0.85, "learning_rate": 1.8108586255641524e-06, "loss": 0.7757, "step": 5911 }, { "epoch": 0.85, "learning_rate": 1.8075450703440011e-06, "loss": 0.7818, "step": 5912 }, { "epoch": 0.85, "learning_rate": 1.8042343551329376e-06, "loss": 0.8895, "step": 5913 }, { "epoch": 0.85, "learning_rate": 1.8009264806436738e-06, "loss": 0.5762, "step": 5914 }, { "epoch": 0.85, "learning_rate": 1.7976214475883113e-06, "loss": 0.8209, "step": 5915 }, { "epoch": 0.85, "learning_rate": 1.7943192566783394e-06, "loss": 0.7118, "step": 5916 }, { "epoch": 0.85, "learning_rate": 1.7910199086246464e-06, "loss": 0.7952, "step": 5917 }, { "epoch": 0.85, "learning_rate": 1.787723404137494e-06, "loss": 0.6892, "step": 5918 }, { "epoch": 0.85, "learning_rate": 1.7844297439265395e-06, "loss": 0.5737, "step": 5919 }, { "epoch": 0.85, "learning_rate": 1.7811389287008257e-06, "loss": 0.7472, "step": 5920 }, { "epoch": 0.85, "learning_rate": 1.7778509591687858e-06, "loss": 0.6518, "step": 5921 }, { "epoch": 0.85, "learning_rate": 1.774565836038239e-06, "loss": 0.7879, "step": 5922 }, { "epoch": 0.85, "learning_rate": 1.7712835600163824e-06, "loss": 0.8242, "step": 5923 }, { "epoch": 0.85, "learning_rate": 1.7680041318098195e-06, "loss": 0.6816, "step": 5924 }, { "epoch": 0.85, "learning_rate": 1.7647275521245287e-06, "loss": 0.6077, "step": 5925 }, { "epoch": 0.85, "learning_rate": 1.761453821665871e-06, "loss": 0.8644, "step": 5926 }, { "epoch": 0.85, "learning_rate": 1.7581829411386026e-06, "loss": 0.6632, "step": 5927 }, { "epoch": 0.85, "learning_rate": 1.7549149112468632e-06, "loss": 0.7305, "step": 5928 }, { "epoch": 0.85, "learning_rate": 1.7516497326941772e-06, "loss": 0.5564, "step": 5929 }, { "epoch": 0.85, "learning_rate": 1.7483874061834588e-06, "loss": 0.9068, "step": 5930 }, { "epoch": 0.85, "learning_rate": 1.7451279324170044e-06, "loss": 0.7522, "step": 5931 }, { "epoch": 0.85, "learning_rate": 1.741871312096503e-06, "loss": 0.6535, "step": 5932 }, { "epoch": 0.85, "learning_rate": 1.7386175459230169e-06, "loss": 0.8011, "step": 5933 }, { "epoch": 0.85, "learning_rate": 1.7353666345970026e-06, "loss": 0.8175, "step": 5934 }, { "epoch": 0.85, "learning_rate": 1.7321185788183085e-06, "loss": 0.6842, "step": 5935 }, { "epoch": 0.85, "learning_rate": 1.7288733792861523e-06, "loss": 0.8019, "step": 5936 }, { "epoch": 0.85, "learning_rate": 1.725631036699148e-06, "loss": 0.6629, "step": 5937 }, { "epoch": 0.85, "learning_rate": 1.7223915517552925e-06, "loss": 0.8343, "step": 5938 }, { "epoch": 0.85, "learning_rate": 1.7191549251519646e-06, "loss": 0.8432, "step": 5939 }, { "epoch": 0.85, "learning_rate": 1.7159211575859335e-06, "loss": 0.8923, "step": 5940 }, { "epoch": 0.85, "learning_rate": 1.7126902497533458e-06, "loss": 0.7796, "step": 5941 }, { "epoch": 0.85, "learning_rate": 1.7094622023497386e-06, "loss": 0.6655, "step": 5942 }, { "epoch": 0.85, "learning_rate": 1.7062370160700309e-06, "loss": 0.613, "step": 5943 }, { "epoch": 0.85, "learning_rate": 1.7030146916085187e-06, "loss": 0.7966, "step": 5944 }, { "epoch": 0.85, "learning_rate": 1.6997952296588976e-06, "loss": 0.7182, "step": 5945 }, { "epoch": 0.85, "learning_rate": 1.6965786309142378e-06, "loss": 0.8047, "step": 5946 }, { "epoch": 0.85, "learning_rate": 1.6933648960669873e-06, "loss": 0.631, "step": 5947 }, { "epoch": 0.85, "learning_rate": 1.6901540258089858e-06, "loss": 0.8348, "step": 5948 }, { "epoch": 0.85, "learning_rate": 1.6869460208314558e-06, "loss": 0.8019, "step": 5949 }, { "epoch": 0.85, "learning_rate": 1.6837408818249993e-06, "loss": 0.6554, "step": 5950 }, { "epoch": 0.85, "learning_rate": 1.680538609479605e-06, "loss": 0.7062, "step": 5951 }, { "epoch": 0.85, "learning_rate": 1.6773392044846442e-06, "loss": 0.7567, "step": 5952 }, { "epoch": 0.85, "learning_rate": 1.6741426675288697e-06, "loss": 0.7578, "step": 5953 }, { "epoch": 0.85, "learning_rate": 1.6709489993004122e-06, "loss": 0.8103, "step": 5954 }, { "epoch": 0.85, "learning_rate": 1.6677582004867891e-06, "loss": 0.7218, "step": 5955 }, { "epoch": 0.85, "learning_rate": 1.6645702717749106e-06, "loss": 0.8513, "step": 5956 }, { "epoch": 0.85, "learning_rate": 1.6613852138510476e-06, "loss": 0.7291, "step": 5957 }, { "epoch": 0.85, "learning_rate": 1.6582030274008686e-06, "loss": 0.7121, "step": 5958 }, { "epoch": 0.85, "learning_rate": 1.6550237131094203e-06, "loss": 0.564, "step": 5959 }, { "epoch": 0.85, "learning_rate": 1.6518472716611305e-06, "loss": 0.7963, "step": 5960 }, { "epoch": 0.85, "learning_rate": 1.6486737037398102e-06, "loss": 0.875, "step": 5961 }, { "epoch": 0.85, "learning_rate": 1.64550301002864e-06, "loss": 0.647, "step": 5962 }, { "epoch": 0.85, "learning_rate": 1.6423351912102035e-06, "loss": 0.493, "step": 5963 }, { "epoch": 0.85, "learning_rate": 1.6391702479664522e-06, "loss": 0.7352, "step": 5964 }, { "epoch": 0.85, "learning_rate": 1.6360081809787143e-06, "loss": 0.7879, "step": 5965 }, { "epoch": 0.85, "learning_rate": 1.6328489909277051e-06, "loss": 0.7073, "step": 5966 }, { "epoch": 0.85, "learning_rate": 1.6296926784935273e-06, "loss": 0.6839, "step": 5967 }, { "epoch": 0.85, "learning_rate": 1.626539244355651e-06, "loss": 0.7941, "step": 5968 }, { "epoch": 0.86, "learning_rate": 1.6233886891929312e-06, "loss": 0.5898, "step": 5969 }, { "epoch": 0.86, "learning_rate": 1.6202410136836087e-06, "loss": 0.9688, "step": 5970 }, { "epoch": 0.86, "learning_rate": 1.6170962185052983e-06, "loss": 0.76, "step": 5971 }, { "epoch": 0.86, "learning_rate": 1.6139543043349985e-06, "loss": 0.7506, "step": 5972 }, { "epoch": 0.86, "learning_rate": 1.6108152718490782e-06, "loss": 0.6934, "step": 5973 }, { "epoch": 0.86, "learning_rate": 1.6076791217233033e-06, "loss": 0.7109, "step": 5974 }, { "epoch": 0.86, "learning_rate": 1.6045458546328058e-06, "loss": 0.7316, "step": 5975 }, { "epoch": 0.86, "learning_rate": 1.6014154712520973e-06, "loss": 0.8117, "step": 5976 }, { "epoch": 0.86, "learning_rate": 1.598287972255076e-06, "loss": 0.7254, "step": 5977 }, { "epoch": 0.86, "learning_rate": 1.5951633583150127e-06, "loss": 0.7148, "step": 5978 }, { "epoch": 0.86, "learning_rate": 1.5920416301045582e-06, "loss": 0.7422, "step": 5979 }, { "epoch": 0.86, "learning_rate": 1.588922788295748e-06, "loss": 0.8627, "step": 5980 }, { "epoch": 0.86, "learning_rate": 1.5858068335599869e-06, "loss": 0.7712, "step": 5981 }, { "epoch": 0.86, "learning_rate": 1.5826937665680696e-06, "loss": 0.5725, "step": 5982 }, { "epoch": 0.86, "learning_rate": 1.579583587990152e-06, "loss": 0.8142, "step": 5983 }, { "epoch": 0.86, "learning_rate": 1.5764762984957837e-06, "loss": 0.6722, "step": 5984 }, { "epoch": 0.86, "learning_rate": 1.5733718987538925e-06, "loss": 0.9146, "step": 5985 }, { "epoch": 0.86, "learning_rate": 1.5702703894327708e-06, "loss": 0.7249, "step": 5986 }, { "epoch": 0.86, "learning_rate": 1.567171771200101e-06, "loss": 0.6208, "step": 5987 }, { "epoch": 0.86, "learning_rate": 1.5640760447229379e-06, "loss": 0.625, "step": 5988 }, { "epoch": 0.86, "learning_rate": 1.5609832106677136e-06, "loss": 0.8064, "step": 5989 }, { "epoch": 0.86, "learning_rate": 1.5578932697002434e-06, "loss": 0.6629, "step": 5990 }, { "epoch": 0.86, "learning_rate": 1.5548062224857074e-06, "loss": 0.7972, "step": 5991 }, { "epoch": 0.86, "learning_rate": 1.5517220696886753e-06, "loss": 0.6115, "step": 5992 }, { "epoch": 0.86, "learning_rate": 1.5486408119730926e-06, "loss": 0.6825, "step": 5993 }, { "epoch": 0.86, "learning_rate": 1.5455624500022707e-06, "loss": 0.8214, "step": 5994 }, { "epoch": 0.86, "learning_rate": 1.542486984438906e-06, "loss": 0.9492, "step": 5995 }, { "epoch": 0.86, "learning_rate": 1.5394144159450757e-06, "loss": 0.9261, "step": 5996 }, { "epoch": 0.86, "learning_rate": 1.5363447451822222e-06, "loss": 0.8823, "step": 5997 }, { "epoch": 0.86, "learning_rate": 1.5332779728111701e-06, "loss": 0.6242, "step": 5998 }, { "epoch": 0.86, "learning_rate": 1.5302140994921215e-06, "loss": 0.8181, "step": 5999 }, { "epoch": 0.86, "learning_rate": 1.527153125884652e-06, "loss": 0.6515, "step": 6000 }, { "epoch": 0.86, "learning_rate": 1.5240950526477132e-06, "loss": 0.9096, "step": 6001 }, { "epoch": 0.86, "learning_rate": 1.5210398804396331e-06, "loss": 0.7681, "step": 6002 }, { "epoch": 0.86, "learning_rate": 1.5179876099181155e-06, "loss": 0.6811, "step": 6003 }, { "epoch": 0.86, "learning_rate": 1.514938241740237e-06, "loss": 0.7478, "step": 6004 }, { "epoch": 0.86, "learning_rate": 1.5118917765624467e-06, "loss": 0.6194, "step": 6005 }, { "epoch": 0.86, "learning_rate": 1.508848215040584e-06, "loss": 0.6862, "step": 6006 }, { "epoch": 0.86, "learning_rate": 1.5058075578298448e-06, "loss": 0.6087, "step": 6007 }, { "epoch": 0.86, "learning_rate": 1.5027698055848082e-06, "loss": 0.7305, "step": 6008 }, { "epoch": 0.86, "learning_rate": 1.4997349589594283e-06, "loss": 0.8404, "step": 6009 }, { "epoch": 0.86, "learning_rate": 1.4967030186070325e-06, "loss": 0.8404, "step": 6010 }, { "epoch": 0.86, "learning_rate": 1.4936739851803233e-06, "loss": 0.8421, "step": 6011 }, { "epoch": 0.86, "learning_rate": 1.490647859331371e-06, "loss": 0.7656, "step": 6012 }, { "epoch": 0.86, "learning_rate": 1.4876246417116312e-06, "loss": 0.7243, "step": 6013 }, { "epoch": 0.86, "learning_rate": 1.4846043329719318e-06, "loss": 0.8504, "step": 6014 }, { "epoch": 0.86, "learning_rate": 1.481586933762462e-06, "loss": 0.6183, "step": 6015 }, { "epoch": 0.86, "learning_rate": 1.478572444732793e-06, "loss": 0.726, "step": 6016 }, { "epoch": 0.86, "learning_rate": 1.4755608665318787e-06, "loss": 0.7846, "step": 6017 }, { "epoch": 0.86, "learning_rate": 1.4725521998080305e-06, "loss": 0.6482, "step": 6018 }, { "epoch": 0.86, "learning_rate": 1.4695464452089403e-06, "loss": 0.7065, "step": 6019 }, { "epoch": 0.86, "learning_rate": 1.4665436033816736e-06, "loss": 0.7432, "step": 6020 }, { "epoch": 0.86, "learning_rate": 1.463543674972667e-06, "loss": 0.8538, "step": 6021 }, { "epoch": 0.86, "learning_rate": 1.4605466606277373e-06, "loss": 0.9074, "step": 6022 }, { "epoch": 0.86, "learning_rate": 1.4575525609920548e-06, "loss": 0.74, "step": 6023 }, { "epoch": 0.86, "learning_rate": 1.4545613767101855e-06, "loss": 0.6571, "step": 6024 }, { "epoch": 0.86, "learning_rate": 1.4515731084260564e-06, "loss": 0.772, "step": 6025 }, { "epoch": 0.86, "learning_rate": 1.4485877567829643e-06, "loss": 0.8276, "step": 6026 }, { "epoch": 0.86, "learning_rate": 1.4456053224235815e-06, "loss": 0.7441, "step": 6027 }, { "epoch": 0.86, "learning_rate": 1.4426258059899533e-06, "loss": 0.7522, "step": 6028 }, { "epoch": 0.86, "learning_rate": 1.439649208123498e-06, "loss": 0.7623, "step": 6029 }, { "epoch": 0.86, "learning_rate": 1.4366755294649997e-06, "loss": 0.7121, "step": 6030 }, { "epoch": 0.86, "learning_rate": 1.4337047706546214e-06, "loss": 0.7134, "step": 6031 }, { "epoch": 0.86, "learning_rate": 1.430736932331893e-06, "loss": 0.7148, "step": 6032 }, { "epoch": 0.86, "learning_rate": 1.4277720151357154e-06, "loss": 0.75, "step": 6033 }, { "epoch": 0.86, "learning_rate": 1.4248100197043578e-06, "loss": 0.748, "step": 6034 }, { "epoch": 0.86, "learning_rate": 1.4218509466754754e-06, "loss": 0.6429, "step": 6035 }, { "epoch": 0.86, "learning_rate": 1.418894796686075e-06, "loss": 0.5578, "step": 6036 }, { "epoch": 0.86, "learning_rate": 1.4159415703725442e-06, "loss": 0.7383, "step": 6037 }, { "epoch": 0.86, "learning_rate": 1.4129912683706397e-06, "loss": 0.8365, "step": 6038 }, { "epoch": 0.87, "learning_rate": 1.41004389131549e-06, "loss": 0.6183, "step": 6039 }, { "epoch": 0.87, "learning_rate": 1.4070994398415927e-06, "loss": 0.8345, "step": 6040 }, { "epoch": 0.87, "learning_rate": 1.4041579145828087e-06, "loss": 0.7718, "step": 6041 }, { "epoch": 0.87, "learning_rate": 1.4012193161723835e-06, "loss": 0.7042, "step": 6042 }, { "epoch": 0.87, "learning_rate": 1.398283645242926e-06, "loss": 0.7757, "step": 6043 }, { "epoch": 0.87, "learning_rate": 1.3953509024264055e-06, "loss": 0.7494, "step": 6044 }, { "epoch": 0.87, "learning_rate": 1.3924210883541706e-06, "loss": 0.8549, "step": 6045 }, { "epoch": 0.87, "learning_rate": 1.3894942036569446e-06, "loss": 0.8287, "step": 6046 }, { "epoch": 0.87, "learning_rate": 1.3865702489648074e-06, "loss": 0.8172, "step": 6047 }, { "epoch": 0.87, "learning_rate": 1.3836492249072153e-06, "loss": 0.656, "step": 6048 }, { "epoch": 0.87, "learning_rate": 1.3807311321129917e-06, "loss": 0.7606, "step": 6049 }, { "epoch": 0.87, "learning_rate": 1.3778159712103327e-06, "loss": 0.7974, "step": 6050 }, { "epoch": 0.87, "learning_rate": 1.3749037428267996e-06, "loss": 0.7148, "step": 6051 }, { "epoch": 0.87, "learning_rate": 1.371994447589316e-06, "loss": 0.7907, "step": 6052 }, { "epoch": 0.87, "learning_rate": 1.3690880861241906e-06, "loss": 0.7299, "step": 6053 }, { "epoch": 0.87, "learning_rate": 1.3661846590570914e-06, "loss": 0.882, "step": 6054 }, { "epoch": 0.87, "learning_rate": 1.3632841670130454e-06, "loss": 0.6936, "step": 6055 }, { "epoch": 0.87, "learning_rate": 1.3603866106164643e-06, "loss": 0.4975, "step": 6056 }, { "epoch": 0.87, "learning_rate": 1.357491990491116e-06, "loss": 0.6914, "step": 6057 }, { "epoch": 0.87, "learning_rate": 1.3546003072601438e-06, "loss": 0.6691, "step": 6058 }, { "epoch": 0.87, "learning_rate": 1.351711561546053e-06, "loss": 0.6042, "step": 6059 }, { "epoch": 0.87, "learning_rate": 1.348825753970721e-06, "loss": 0.7274, "step": 6060 }, { "epoch": 0.87, "learning_rate": 1.3459428851553928e-06, "loss": 0.7232, "step": 6061 }, { "epoch": 0.87, "learning_rate": 1.3430629557206735e-06, "loss": 0.7564, "step": 6062 }, { "epoch": 0.87, "learning_rate": 1.3401859662865407e-06, "loss": 0.7913, "step": 6063 }, { "epoch": 0.87, "learning_rate": 1.3373119174723452e-06, "loss": 0.7313, "step": 6064 }, { "epoch": 0.87, "learning_rate": 1.3344408098967942e-06, "loss": 0.8398, "step": 6065 }, { "epoch": 0.87, "learning_rate": 1.331572644177963e-06, "loss": 0.7559, "step": 6066 }, { "epoch": 0.87, "learning_rate": 1.3287074209333061e-06, "loss": 0.8401, "step": 6067 }, { "epoch": 0.87, "learning_rate": 1.3258451407796268e-06, "loss": 0.7969, "step": 6068 }, { "epoch": 0.87, "learning_rate": 1.3229858043331055e-06, "loss": 0.6297, "step": 6069 }, { "epoch": 0.87, "learning_rate": 1.3201294122092866e-06, "loss": 0.6456, "step": 6070 }, { "epoch": 0.87, "learning_rate": 1.3172759650230815e-06, "loss": 0.7372, "step": 6071 }, { "epoch": 0.87, "learning_rate": 1.3144254633887703e-06, "loss": 0.7366, "step": 6072 }, { "epoch": 0.87, "learning_rate": 1.3115779079199842e-06, "loss": 0.6883, "step": 6073 }, { "epoch": 0.87, "learning_rate": 1.3087332992297425e-06, "loss": 0.7902, "step": 6074 }, { "epoch": 0.87, "learning_rate": 1.3058916379304169e-06, "loss": 0.7201, "step": 6075 }, { "epoch": 0.87, "learning_rate": 1.3030529246337437e-06, "loss": 0.7302, "step": 6076 }, { "epoch": 0.87, "learning_rate": 1.3002171599508266e-06, "loss": 0.8398, "step": 6077 }, { "epoch": 0.87, "learning_rate": 1.2973843444921395e-06, "loss": 0.7896, "step": 6078 }, { "epoch": 0.87, "learning_rate": 1.2945544788675162e-06, "loss": 0.8108, "step": 6079 }, { "epoch": 0.87, "learning_rate": 1.2917275636861574e-06, "loss": 0.7321, "step": 6080 }, { "epoch": 0.87, "learning_rate": 1.288903599556625e-06, "loss": 0.7285, "step": 6081 }, { "epoch": 0.87, "learning_rate": 1.2860825870868542e-06, "loss": 0.6652, "step": 6082 }, { "epoch": 0.87, "learning_rate": 1.2832645268841342e-06, "loss": 0.7447, "step": 6083 }, { "epoch": 0.87, "learning_rate": 1.2804494195551213e-06, "loss": 0.8198, "step": 6084 }, { "epoch": 0.87, "learning_rate": 1.2776372657058476e-06, "loss": 0.8078, "step": 6085 }, { "epoch": 0.87, "learning_rate": 1.2748280659416922e-06, "loss": 0.7416, "step": 6086 }, { "epoch": 0.87, "learning_rate": 1.272021820867408e-06, "loss": 0.8064, "step": 6087 }, { "epoch": 0.87, "learning_rate": 1.269218531087113e-06, "loss": 0.7592, "step": 6088 }, { "epoch": 0.87, "learning_rate": 1.2664181972042816e-06, "loss": 0.6607, "step": 6089 }, { "epoch": 0.87, "learning_rate": 1.263620819821763e-06, "loss": 0.8471, "step": 6090 }, { "epoch": 0.87, "learning_rate": 1.260826399541754e-06, "loss": 0.8281, "step": 6091 }, { "epoch": 0.87, "learning_rate": 1.2580349369658333e-06, "loss": 0.5818, "step": 6092 }, { "epoch": 0.87, "learning_rate": 1.2552464326949303e-06, "loss": 0.7628, "step": 6093 }, { "epoch": 0.87, "learning_rate": 1.2524608873293397e-06, "loss": 0.7299, "step": 6094 }, { "epoch": 0.87, "learning_rate": 1.249678301468717e-06, "loss": 0.7054, "step": 6095 }, { "epoch": 0.87, "learning_rate": 1.2468986757120943e-06, "loss": 0.885, "step": 6096 }, { "epoch": 0.87, "learning_rate": 1.2441220106578482e-06, "loss": 0.7706, "step": 6097 }, { "epoch": 0.87, "learning_rate": 1.2413483069037274e-06, "loss": 0.654, "step": 6098 }, { "epoch": 0.87, "learning_rate": 1.2385775650468423e-06, "loss": 0.4068, "step": 6099 }, { "epoch": 0.87, "learning_rate": 1.2358097856836658e-06, "loss": 0.8421, "step": 6100 }, { "epoch": 0.87, "learning_rate": 1.233044969410035e-06, "loss": 0.7143, "step": 6101 }, { "epoch": 0.87, "learning_rate": 1.2302831168211365e-06, "loss": 0.7213, "step": 6102 }, { "epoch": 0.87, "learning_rate": 1.2275242285115383e-06, "loss": 0.7533, "step": 6103 }, { "epoch": 0.87, "learning_rate": 1.224768305075159e-06, "loss": 0.5301, "step": 6104 }, { "epoch": 0.87, "learning_rate": 1.2220153471052786e-06, "loss": 0.6476, "step": 6105 }, { "epoch": 0.87, "learning_rate": 1.2192653551945416e-06, "loss": 0.7427, "step": 6106 }, { "epoch": 0.87, "learning_rate": 1.2165183299349541e-06, "loss": 0.7054, "step": 6107 }, { "epoch": 0.87, "learning_rate": 1.2137742719178801e-06, "loss": 0.7257, "step": 6108 }, { "epoch": 0.88, "learning_rate": 1.2110331817340503e-06, "loss": 0.7366, "step": 6109 }, { "epoch": 0.88, "learning_rate": 1.2082950599735527e-06, "loss": 0.6875, "step": 6110 }, { "epoch": 0.88, "learning_rate": 1.2055599072258388e-06, "loss": 0.6836, "step": 6111 }, { "epoch": 0.88, "learning_rate": 1.2028277240797163e-06, "loss": 0.6869, "step": 6112 }, { "epoch": 0.88, "learning_rate": 1.2000985111233548e-06, "loss": 0.8214, "step": 6113 }, { "epoch": 0.88, "learning_rate": 1.1973722689442939e-06, "loss": 0.8689, "step": 6114 }, { "epoch": 0.88, "learning_rate": 1.1946489981294178e-06, "loss": 0.8591, "step": 6115 }, { "epoch": 0.88, "learning_rate": 1.191928699264984e-06, "loss": 0.7254, "step": 6116 }, { "epoch": 0.88, "learning_rate": 1.189211372936606e-06, "loss": 0.7093, "step": 6117 }, { "epoch": 0.88, "learning_rate": 1.1864970197292542e-06, "loss": 0.7941, "step": 6118 }, { "epoch": 0.88, "learning_rate": 1.1837856402272657e-06, "loss": 1.0212, "step": 6119 }, { "epoch": 0.88, "learning_rate": 1.1810772350143239e-06, "loss": 0.7387, "step": 6120 }, { "epoch": 0.88, "learning_rate": 1.1783718046734926e-06, "loss": 0.7107, "step": 6121 }, { "epoch": 0.88, "learning_rate": 1.1756693497871824e-06, "loss": 0.7785, "step": 6122 }, { "epoch": 0.88, "learning_rate": 1.1729698709371579e-06, "loss": 0.733, "step": 6123 }, { "epoch": 0.88, "learning_rate": 1.1702733687045525e-06, "loss": 0.8164, "step": 6124 }, { "epoch": 0.88, "learning_rate": 1.1675798436698636e-06, "loss": 0.7221, "step": 6125 }, { "epoch": 0.88, "learning_rate": 1.1648892964129326e-06, "loss": 0.7422, "step": 6126 }, { "epoch": 0.88, "learning_rate": 1.1622017275129709e-06, "loss": 0.7863, "step": 6127 }, { "epoch": 0.88, "learning_rate": 1.1595171375485441e-06, "loss": 0.8945, "step": 6128 }, { "epoch": 0.88, "learning_rate": 1.1568355270975783e-06, "loss": 0.7991, "step": 6129 }, { "epoch": 0.88, "learning_rate": 1.15415689673736e-06, "loss": 0.9492, "step": 6130 }, { "epoch": 0.88, "learning_rate": 1.151481247044528e-06, "loss": 0.6655, "step": 6131 }, { "epoch": 0.88, "learning_rate": 1.1488085785950897e-06, "loss": 0.6948, "step": 6132 }, { "epoch": 0.88, "learning_rate": 1.1461388919643983e-06, "loss": 0.6448, "step": 6133 }, { "epoch": 0.88, "learning_rate": 1.143472187727171e-06, "loss": 0.7238, "step": 6134 }, { "epoch": 0.88, "learning_rate": 1.1408084664574913e-06, "loss": 0.7109, "step": 6135 }, { "epoch": 0.88, "learning_rate": 1.1381477287287845e-06, "loss": 0.8298, "step": 6136 }, { "epoch": 0.88, "learning_rate": 1.1354899751138437e-06, "loss": 0.7182, "step": 6137 }, { "epoch": 0.88, "learning_rate": 1.132835206184818e-06, "loss": 0.7372, "step": 6138 }, { "epoch": 0.88, "learning_rate": 1.1301834225132134e-06, "loss": 0.738, "step": 6139 }, { "epoch": 0.88, "learning_rate": 1.1275346246698953e-06, "loss": 0.731, "step": 6140 }, { "epoch": 0.88, "learning_rate": 1.1248888132250774e-06, "loss": 0.7165, "step": 6141 }, { "epoch": 0.88, "learning_rate": 1.1222459887483455e-06, "loss": 0.8393, "step": 6142 }, { "epoch": 0.88, "learning_rate": 1.1196061518086332e-06, "loss": 0.6948, "step": 6143 }, { "epoch": 0.88, "learning_rate": 1.1169693029742273e-06, "loss": 0.75, "step": 6144 }, { "epoch": 0.88, "learning_rate": 1.1143354428127756e-06, "loss": 0.715, "step": 6145 }, { "epoch": 0.88, "learning_rate": 1.1117045718912928e-06, "loss": 0.7511, "step": 6146 }, { "epoch": 0.88, "learning_rate": 1.1090766907761308e-06, "loss": 0.7935, "step": 6147 }, { "epoch": 0.88, "learning_rate": 1.1064518000330103e-06, "loss": 0.7974, "step": 6148 }, { "epoch": 0.88, "learning_rate": 1.1038299002270048e-06, "loss": 0.8069, "step": 6149 }, { "epoch": 0.88, "learning_rate": 1.1012109919225454e-06, "loss": 0.7176, "step": 6150 }, { "epoch": 0.88, "learning_rate": 1.09859507568342e-06, "loss": 0.6967, "step": 6151 }, { "epoch": 0.88, "learning_rate": 1.0959821520727631e-06, "loss": 0.9364, "step": 6152 }, { "epoch": 0.88, "learning_rate": 1.0933722216530812e-06, "loss": 0.7874, "step": 6153 }, { "epoch": 0.88, "learning_rate": 1.0907652849862266e-06, "loss": 0.7221, "step": 6154 }, { "epoch": 0.88, "learning_rate": 1.0881613426334036e-06, "loss": 0.6836, "step": 6155 }, { "epoch": 0.88, "learning_rate": 1.0855603951551773e-06, "loss": 0.7631, "step": 6156 }, { "epoch": 0.88, "learning_rate": 1.0829624431114715e-06, "loss": 0.9515, "step": 6157 }, { "epoch": 0.88, "learning_rate": 1.080367487061557e-06, "loss": 0.7015, "step": 6158 }, { "epoch": 0.88, "learning_rate": 1.0777755275640654e-06, "loss": 0.8315, "step": 6159 }, { "epoch": 0.88, "learning_rate": 1.0751865651769822e-06, "loss": 0.8052, "step": 6160 }, { "epoch": 0.88, "learning_rate": 1.0726006004576461e-06, "loss": 0.7561, "step": 6161 }, { "epoch": 0.88, "learning_rate": 1.0700176339627504e-06, "loss": 0.947, "step": 6162 }, { "epoch": 0.88, "learning_rate": 1.067437666248342e-06, "loss": 0.649, "step": 6163 }, { "epoch": 0.88, "learning_rate": 1.0648606978698317e-06, "loss": 0.6805, "step": 6164 }, { "epoch": 0.88, "learning_rate": 1.0622867293819689e-06, "loss": 0.805, "step": 6165 }, { "epoch": 0.88, "learning_rate": 1.0597157613388685e-06, "loss": 0.4604, "step": 6166 }, { "epoch": 0.88, "learning_rate": 1.0571477942939966e-06, "loss": 0.7612, "step": 6167 }, { "epoch": 0.88, "learning_rate": 1.0545828288001723e-06, "loss": 0.8265, "step": 6168 }, { "epoch": 0.88, "learning_rate": 1.0520208654095724e-06, "loss": 0.7759, "step": 6169 }, { "epoch": 0.88, "learning_rate": 1.0494619046737158e-06, "loss": 0.8728, "step": 6170 }, { "epoch": 0.88, "learning_rate": 1.0469059471434916e-06, "loss": 0.8376, "step": 6171 }, { "epoch": 0.88, "learning_rate": 1.044352993369133e-06, "loss": 0.7224, "step": 6172 }, { "epoch": 0.88, "learning_rate": 1.0418030439002241e-06, "loss": 0.755, "step": 6173 }, { "epoch": 0.88, "learning_rate": 1.039256099285707e-06, "loss": 0.7536, "step": 6174 }, { "epoch": 0.88, "learning_rate": 1.0367121600738816e-06, "loss": 0.6881, "step": 6175 }, { "epoch": 0.88, "learning_rate": 1.0341712268123865e-06, "loss": 0.7246, "step": 6176 }, { "epoch": 0.88, "learning_rate": 1.0316333000482276e-06, "loss": 0.8198, "step": 6177 }, { "epoch": 0.88, "learning_rate": 1.0290983803277542e-06, "loss": 0.6934, "step": 6178 }, { "epoch": 0.89, "learning_rate": 1.0265664681966736e-06, "loss": 0.7299, "step": 6179 }, { "epoch": 0.89, "learning_rate": 1.024037564200046e-06, "loss": 0.8544, "step": 6180 }, { "epoch": 0.89, "learning_rate": 1.0215116688822761e-06, "loss": 0.7294, "step": 6181 }, { "epoch": 0.89, "learning_rate": 1.0189887827871309e-06, "loss": 0.7383, "step": 6182 }, { "epoch": 0.89, "learning_rate": 1.016468906457727e-06, "loss": 0.8566, "step": 6183 }, { "epoch": 0.89, "learning_rate": 1.0139520404365276e-06, "loss": 0.8728, "step": 6184 }, { "epoch": 0.89, "learning_rate": 1.011438185265351e-06, "loss": 0.6733, "step": 6185 }, { "epoch": 0.89, "learning_rate": 1.0089273414853723e-06, "loss": 0.9576, "step": 6186 }, { "epoch": 0.89, "learning_rate": 1.0064195096371093e-06, "loss": 0.8019, "step": 6187 }, { "epoch": 0.89, "learning_rate": 1.0039146902604401e-06, "loss": 0.6568, "step": 6188 }, { "epoch": 0.89, "learning_rate": 1.0014128838945884e-06, "loss": 0.7422, "step": 6189 }, { "epoch": 0.89, "learning_rate": 9.989140910781336e-07, "loss": 0.8644, "step": 6190 }, { "epoch": 0.89, "learning_rate": 9.964183123489984e-07, "loss": 0.784, "step": 6191 }, { "epoch": 0.89, "learning_rate": 9.939255482444665e-07, "loss": 0.6219, "step": 6192 }, { "epoch": 0.89, "learning_rate": 9.914357993011703e-07, "loss": 0.8002, "step": 6193 }, { "epoch": 0.89, "learning_rate": 9.88949066055086e-07, "loss": 0.7871, "step": 6194 }, { "epoch": 0.89, "learning_rate": 9.864653490415455e-07, "loss": 0.6562, "step": 6195 }, { "epoch": 0.89, "learning_rate": 9.839846487952392e-07, "loss": 0.7531, "step": 6196 }, { "epoch": 0.89, "learning_rate": 9.815069658501934e-07, "loss": 0.6401, "step": 6197 }, { "epoch": 0.89, "learning_rate": 9.79032300739795e-07, "loss": 0.7556, "step": 6198 }, { "epoch": 0.89, "learning_rate": 9.765606539967759e-07, "loss": 0.692, "step": 6199 }, { "epoch": 0.89, "learning_rate": 9.740920261532221e-07, "loss": 0.666, "step": 6200 }, { "epoch": 0.89, "learning_rate": 9.716264177405687e-07, "loss": 0.7433, "step": 6201 }, { "epoch": 0.89, "learning_rate": 9.69163829289596e-07, "loss": 0.8477, "step": 6202 }, { "epoch": 0.89, "learning_rate": 9.667042613304433e-07, "loss": 0.817, "step": 6203 }, { "epoch": 0.89, "learning_rate": 9.642477143925971e-07, "loss": 0.6769, "step": 6204 }, { "epoch": 0.89, "learning_rate": 9.617941890048825e-07, "loss": 0.7115, "step": 6205 }, { "epoch": 0.89, "learning_rate": 9.59343685695489e-07, "loss": 0.6197, "step": 6206 }, { "epoch": 0.89, "learning_rate": 9.56896204991946e-07, "loss": 0.7676, "step": 6207 }, { "epoch": 0.89, "learning_rate": 9.544517474211373e-07, "loss": 0.707, "step": 6208 }, { "epoch": 0.89, "learning_rate": 9.520103135092951e-07, "loss": 0.764, "step": 6209 }, { "epoch": 0.89, "learning_rate": 9.495719037819988e-07, "loss": 0.6222, "step": 6210 }, { "epoch": 0.89, "learning_rate": 9.47136518764179e-07, "loss": 0.8287, "step": 6211 }, { "epoch": 0.89, "learning_rate": 9.447041589801109e-07, "loss": 0.7045, "step": 6212 }, { "epoch": 0.89, "learning_rate": 9.422748249534191e-07, "loss": 0.738, "step": 6213 }, { "epoch": 0.89, "learning_rate": 9.398485172070887e-07, "loss": 0.7799, "step": 6214 }, { "epoch": 0.89, "learning_rate": 9.374252362634351e-07, "loss": 0.6674, "step": 6215 }, { "epoch": 0.89, "learning_rate": 9.350049826441326e-07, "loss": 0.6055, "step": 6216 }, { "epoch": 0.89, "learning_rate": 9.325877568702046e-07, "loss": 0.649, "step": 6217 }, { "epoch": 0.89, "learning_rate": 9.301735594620165e-07, "loss": 0.6445, "step": 6218 }, { "epoch": 0.89, "learning_rate": 9.27762390939289e-07, "loss": 0.7416, "step": 6219 }, { "epoch": 0.89, "learning_rate": 9.253542518210822e-07, "loss": 0.8845, "step": 6220 }, { "epoch": 0.89, "learning_rate": 9.229491426258113e-07, "loss": 0.7779, "step": 6221 }, { "epoch": 0.89, "learning_rate": 9.205470638712421e-07, "loss": 0.6688, "step": 6222 }, { "epoch": 0.89, "learning_rate": 9.181480160744743e-07, "loss": 0.7497, "step": 6223 }, { "epoch": 0.89, "learning_rate": 9.157519997519631e-07, "loss": 0.6535, "step": 6224 }, { "epoch": 0.89, "learning_rate": 9.133590154195209e-07, "loss": 0.7411, "step": 6225 }, { "epoch": 0.89, "learning_rate": 9.109690635922907e-07, "loss": 0.6309, "step": 6226 }, { "epoch": 0.89, "learning_rate": 9.085821447847725e-07, "loss": 0.8058, "step": 6227 }, { "epoch": 0.89, "learning_rate": 9.061982595108087e-07, "loss": 0.7188, "step": 6228 }, { "epoch": 0.89, "learning_rate": 9.03817408283592e-07, "loss": 0.7478, "step": 6229 }, { "epoch": 0.89, "learning_rate": 9.014395916156642e-07, "loss": 0.74, "step": 6230 }, { "epoch": 0.89, "learning_rate": 8.990648100189025e-07, "loss": 0.6297, "step": 6231 }, { "epoch": 0.89, "learning_rate": 8.966930640045445e-07, "loss": 0.6833, "step": 6232 }, { "epoch": 0.89, "learning_rate": 8.943243540831703e-07, "loss": 0.5792, "step": 6233 }, { "epoch": 0.89, "learning_rate": 8.919586807646984e-07, "loss": 0.5633, "step": 6234 }, { "epoch": 0.89, "learning_rate": 8.895960445584034e-07, "loss": 0.7182, "step": 6235 }, { "epoch": 0.89, "learning_rate": 8.872364459729015e-07, "loss": 0.8622, "step": 6236 }, { "epoch": 0.89, "learning_rate": 8.848798855161567e-07, "loss": 0.775, "step": 6237 }, { "epoch": 0.89, "learning_rate": 8.825263636954761e-07, "loss": 0.6197, "step": 6238 }, { "epoch": 0.89, "learning_rate": 8.801758810175181e-07, "loss": 0.8371, "step": 6239 }, { "epoch": 0.89, "learning_rate": 8.778284379882828e-07, "loss": 0.7796, "step": 6240 }, { "epoch": 0.89, "learning_rate": 8.754840351131127e-07, "loss": 0.8315, "step": 6241 }, { "epoch": 0.89, "learning_rate": 8.731426728967007e-07, "loss": 0.7533, "step": 6242 }, { "epoch": 0.89, "learning_rate": 8.708043518430902e-07, "loss": 0.6763, "step": 6243 }, { "epoch": 0.89, "learning_rate": 8.684690724556554e-07, "loss": 0.8309, "step": 6244 }, { "epoch": 0.89, "learning_rate": 8.661368352371307e-07, "loss": 0.8248, "step": 6245 }, { "epoch": 0.89, "learning_rate": 8.638076406895862e-07, "loss": 0.8477, "step": 6246 }, { "epoch": 0.89, "learning_rate": 8.614814893144391e-07, "loss": 0.7388, "step": 6247 }, { "epoch": 0.9, "learning_rate": 8.591583816124554e-07, "loss": 0.6402, "step": 6248 }, { "epoch": 0.9, "learning_rate": 8.568383180837369e-07, "loss": 0.8477, "step": 6249 }, { "epoch": 0.9, "learning_rate": 8.545212992277424e-07, "loss": 0.7531, "step": 6250 }, { "epoch": 0.9, "learning_rate": 8.522073255432677e-07, "loss": 0.6646, "step": 6251 }, { "epoch": 0.9, "learning_rate": 8.498963975284496e-07, "loss": 0.654, "step": 6252 }, { "epoch": 0.9, "learning_rate": 8.47588515680775e-07, "loss": 0.726, "step": 6253 }, { "epoch": 0.9, "learning_rate": 8.452836804970782e-07, "loss": 0.7229, "step": 6254 }, { "epoch": 0.9, "learning_rate": 8.429818924735289e-07, "loss": 0.904, "step": 6255 }, { "epoch": 0.9, "learning_rate": 8.406831521056474e-07, "loss": 0.6942, "step": 6256 }, { "epoch": 0.9, "learning_rate": 8.383874598882929e-07, "loss": 0.7241, "step": 6257 }, { "epoch": 0.9, "learning_rate": 8.360948163156734e-07, "loss": 0.7031, "step": 6258 }, { "epoch": 0.9, "learning_rate": 8.338052218813374e-07, "loss": 0.8689, "step": 6259 }, { "epoch": 0.9, "learning_rate": 8.315186770781791e-07, "loss": 0.7333, "step": 6260 }, { "epoch": 0.9, "learning_rate": 8.292351823984362e-07, "loss": 0.6853, "step": 6261 }, { "epoch": 0.9, "learning_rate": 8.26954738333684e-07, "loss": 0.7047, "step": 6262 }, { "epoch": 0.9, "learning_rate": 8.246773453748446e-07, "loss": 0.6691, "step": 6263 }, { "epoch": 0.9, "learning_rate": 8.224030040121944e-07, "loss": 0.728, "step": 6264 }, { "epoch": 0.9, "learning_rate": 8.201317147353332e-07, "loss": 0.6261, "step": 6265 }, { "epoch": 0.9, "learning_rate": 8.178634780332167e-07, "loss": 0.6105, "step": 6266 }, { "epoch": 0.9, "learning_rate": 8.155982943941409e-07, "loss": 0.7506, "step": 6267 }, { "epoch": 0.9, "learning_rate": 8.133361643057408e-07, "loss": 0.584, "step": 6268 }, { "epoch": 0.9, "learning_rate": 8.110770882550034e-07, "loss": 0.7478, "step": 6269 }, { "epoch": 0.9, "learning_rate": 8.088210667282431e-07, "loss": 0.6917, "step": 6270 }, { "epoch": 0.9, "learning_rate": 8.065681002111314e-07, "loss": 0.7751, "step": 6271 }, { "epoch": 0.9, "learning_rate": 8.043181891886769e-07, "loss": 0.8256, "step": 6272 }, { "epoch": 0.9, "learning_rate": 8.020713341452257e-07, "loss": 0.7341, "step": 6273 }, { "epoch": 0.9, "learning_rate": 7.998275355644691e-07, "loss": 0.4925, "step": 6274 }, { "epoch": 0.9, "learning_rate": 7.97586793929449e-07, "loss": 0.7115, "step": 6275 }, { "epoch": 0.9, "learning_rate": 7.953491097225362e-07, "loss": 0.7734, "step": 6276 }, { "epoch": 0.9, "learning_rate": 7.931144834254489e-07, "loss": 0.7051, "step": 6277 }, { "epoch": 0.9, "learning_rate": 7.908829155192471e-07, "loss": 0.6431, "step": 6278 }, { "epoch": 0.9, "learning_rate": 7.886544064843332e-07, "loss": 0.6501, "step": 6279 }, { "epoch": 0.9, "learning_rate": 7.864289568004502e-07, "loss": 0.6911, "step": 6280 }, { "epoch": 0.9, "learning_rate": 7.84206566946678e-07, "loss": 0.7801, "step": 6281 }, { "epoch": 0.9, "learning_rate": 7.819872374014475e-07, "loss": 0.7656, "step": 6282 }, { "epoch": 0.9, "learning_rate": 7.797709686425264e-07, "loss": 0.7017, "step": 6283 }, { "epoch": 0.9, "learning_rate": 7.775577611470181e-07, "loss": 0.6744, "step": 6284 }, { "epoch": 0.9, "learning_rate": 7.753476153913713e-07, "loss": 0.7645, "step": 6285 }, { "epoch": 0.9, "learning_rate": 7.73140531851379e-07, "loss": 0.6971, "step": 6286 }, { "epoch": 0.9, "learning_rate": 7.709365110021693e-07, "loss": 0.7963, "step": 6287 }, { "epoch": 0.9, "learning_rate": 7.687355533182145e-07, "loss": 0.7651, "step": 6288 }, { "epoch": 0.9, "learning_rate": 7.665376592733253e-07, "loss": 0.745, "step": 6289 }, { "epoch": 0.9, "learning_rate": 7.643428293406568e-07, "loss": 0.7419, "step": 6290 }, { "epoch": 0.9, "learning_rate": 7.621510639926976e-07, "loss": 0.7628, "step": 6291 }, { "epoch": 0.9, "learning_rate": 7.599623637012804e-07, "loss": 0.7305, "step": 6292 }, { "epoch": 0.9, "learning_rate": 7.577767289375831e-07, "loss": 0.8454, "step": 6293 }, { "epoch": 0.9, "learning_rate": 7.555941601721145e-07, "loss": 0.7785, "step": 6294 }, { "epoch": 0.9, "learning_rate": 7.534146578747287e-07, "loss": 0.6309, "step": 6295 }, { "epoch": 0.9, "learning_rate": 7.512382225146186e-07, "loss": 0.9414, "step": 6296 }, { "epoch": 0.9, "learning_rate": 7.49064854560318e-07, "loss": 0.6649, "step": 6297 }, { "epoch": 0.9, "learning_rate": 7.468945544797006e-07, "loss": 0.803, "step": 6298 }, { "epoch": 0.9, "learning_rate": 7.447273227399698e-07, "loss": 0.6177, "step": 6299 }, { "epoch": 0.9, "learning_rate": 7.425631598076871e-07, "loss": 0.7349, "step": 6300 }, { "epoch": 0.9, "learning_rate": 7.404020661487415e-07, "loss": 0.6749, "step": 6301 }, { "epoch": 0.9, "learning_rate": 7.382440422283576e-07, "loss": 0.743, "step": 6302 }, { "epoch": 0.9, "learning_rate": 7.360890885111071e-07, "loss": 0.6504, "step": 6303 }, { "epoch": 0.9, "learning_rate": 7.339372054609006e-07, "loss": 0.7073, "step": 6304 }, { "epoch": 0.9, "learning_rate": 7.317883935409841e-07, "loss": 0.7215, "step": 6305 }, { "epoch": 0.9, "learning_rate": 7.29642653213941e-07, "loss": 0.6537, "step": 6306 }, { "epoch": 0.9, "learning_rate": 7.274999849416986e-07, "loss": 0.8449, "step": 6307 }, { "epoch": 0.9, "learning_rate": 7.253603891855209e-07, "loss": 0.7595, "step": 6308 }, { "epoch": 0.9, "learning_rate": 7.232238664060098e-07, "loss": 0.6214, "step": 6309 }, { "epoch": 0.9, "learning_rate": 7.210904170631022e-07, "loss": 0.7634, "step": 6310 }, { "epoch": 0.9, "learning_rate": 7.189600416160807e-07, "loss": 0.7824, "step": 6311 }, { "epoch": 0.9, "learning_rate": 7.168327405235636e-07, "loss": 0.7118, "step": 6312 }, { "epoch": 0.9, "learning_rate": 7.147085142435028e-07, "loss": 0.8075, "step": 6313 }, { "epoch": 0.9, "learning_rate": 7.125873632331909e-07, "loss": 0.7402, "step": 6314 }, { "epoch": 0.9, "learning_rate": 7.104692879492625e-07, "loss": 0.7227, "step": 6315 }, { "epoch": 0.9, "learning_rate": 7.083542888476846e-07, "loss": 0.7109, "step": 6316 }, { "epoch": 0.9, "learning_rate": 7.062423663837663e-07, "loss": 0.6945, "step": 6317 }, { "epoch": 0.91, "learning_rate": 7.041335210121491e-07, "loss": 0.8075, "step": 6318 }, { "epoch": 0.91, "learning_rate": 7.020277531868214e-07, "loss": 0.8019, "step": 6319 }, { "epoch": 0.91, "learning_rate": 6.999250633610921e-07, "loss": 0.7034, "step": 6320 }, { "epoch": 0.91, "learning_rate": 6.978254519876276e-07, "loss": 0.9593, "step": 6321 }, { "epoch": 0.91, "learning_rate": 6.95728919518423e-07, "loss": 0.8544, "step": 6322 }, { "epoch": 0.91, "learning_rate": 6.936354664048023e-07, "loss": 0.7667, "step": 6323 }, { "epoch": 0.91, "learning_rate": 6.915450930974365e-07, "loss": 0.9074, "step": 6324 }, { "epoch": 0.91, "learning_rate": 6.894578000463375e-07, "loss": 0.7154, "step": 6325 }, { "epoch": 0.91, "learning_rate": 6.873735877008408e-07, "loss": 0.683, "step": 6326 }, { "epoch": 0.91, "learning_rate": 6.852924565096291e-07, "loss": 0.7388, "step": 6327 }, { "epoch": 0.91, "learning_rate": 6.832144069207157e-07, "loss": 0.8145, "step": 6328 }, { "epoch": 0.91, "learning_rate": 6.811394393814563e-07, "loss": 0.6562, "step": 6329 }, { "epoch": 0.91, "learning_rate": 6.790675543385399e-07, "loss": 0.7852, "step": 6330 }, { "epoch": 0.91, "learning_rate": 6.769987522379884e-07, "loss": 0.8633, "step": 6331 }, { "epoch": 0.91, "learning_rate": 6.749330335251668e-07, "loss": 0.7584, "step": 6332 }, { "epoch": 0.91, "learning_rate": 6.728703986447765e-07, "loss": 0.7254, "step": 6333 }, { "epoch": 0.91, "learning_rate": 6.708108480408437e-07, "loss": 0.8789, "step": 6334 }, { "epoch": 0.91, "learning_rate": 6.687543821567438e-07, "loss": 0.9425, "step": 6335 }, { "epoch": 0.91, "learning_rate": 6.667010014351827e-07, "loss": 0.7386, "step": 6336 }, { "epoch": 0.91, "learning_rate": 6.646507063182017e-07, "loss": 0.8008, "step": 6337 }, { "epoch": 0.91, "learning_rate": 6.626034972471795e-07, "loss": 0.7026, "step": 6338 }, { "epoch": 0.91, "learning_rate": 6.605593746628286e-07, "loss": 0.5932, "step": 6339 }, { "epoch": 0.91, "learning_rate": 6.585183390051985e-07, "loss": 0.7483, "step": 6340 }, { "epoch": 0.91, "learning_rate": 6.564803907136729e-07, "loss": 0.7268, "step": 6341 }, { "epoch": 0.91, "learning_rate": 6.544455302269709e-07, "loss": 0.656, "step": 6342 }, { "epoch": 0.91, "learning_rate": 6.524137579831518e-07, "loss": 0.772, "step": 6343 }, { "epoch": 0.91, "learning_rate": 6.503850744196011e-07, "loss": 0.6862, "step": 6344 }, { "epoch": 0.91, "learning_rate": 6.483594799730458e-07, "loss": 0.7017, "step": 6345 }, { "epoch": 0.91, "learning_rate": 6.463369750795472e-07, "loss": 0.7469, "step": 6346 }, { "epoch": 0.91, "learning_rate": 6.443175601744988e-07, "loss": 0.5798, "step": 6347 }, { "epoch": 0.91, "learning_rate": 6.423012356926344e-07, "loss": 0.7028, "step": 6348 }, { "epoch": 0.91, "learning_rate": 6.402880020680135e-07, "loss": 0.8393, "step": 6349 }, { "epoch": 0.91, "learning_rate": 6.382778597340377e-07, "loss": 0.6825, "step": 6350 }, { "epoch": 0.91, "learning_rate": 6.362708091234443e-07, "loss": 0.7751, "step": 6351 }, { "epoch": 0.91, "learning_rate": 6.342668506682958e-07, "loss": 0.9286, "step": 6352 }, { "epoch": 0.91, "learning_rate": 6.322659847999973e-07, "loss": 0.7645, "step": 6353 }, { "epoch": 0.91, "learning_rate": 6.302682119492892e-07, "loss": 0.6306, "step": 6354 }, { "epoch": 0.91, "learning_rate": 6.282735325462375e-07, "loss": 0.7146, "step": 6355 }, { "epoch": 0.91, "learning_rate": 6.262819470202485e-07, "loss": 0.8161, "step": 6356 }, { "epoch": 0.91, "learning_rate": 6.242934558000629e-07, "loss": 0.7199, "step": 6357 }, { "epoch": 0.91, "learning_rate": 6.223080593137515e-07, "loss": 0.8058, "step": 6358 }, { "epoch": 0.91, "learning_rate": 6.203257579887256e-07, "loss": 0.6574, "step": 6359 }, { "epoch": 0.91, "learning_rate": 6.183465522517173e-07, "loss": 0.6049, "step": 6360 }, { "epoch": 0.91, "learning_rate": 6.163704425288091e-07, "loss": 0.8198, "step": 6361 }, { "epoch": 0.91, "learning_rate": 6.143974292454058e-07, "loss": 0.6568, "step": 6362 }, { "epoch": 0.91, "learning_rate": 6.124275128262457e-07, "loss": 0.8465, "step": 6363 }, { "epoch": 0.91, "learning_rate": 6.104606936954049e-07, "loss": 0.8259, "step": 6364 }, { "epoch": 0.91, "learning_rate": 6.084969722762928e-07, "loss": 0.68, "step": 6365 }, { "epoch": 0.91, "learning_rate": 6.065363489916464e-07, "loss": 0.7472, "step": 6366 }, { "epoch": 0.91, "learning_rate": 6.045788242635414e-07, "loss": 0.7701, "step": 6367 }, { "epoch": 0.91, "learning_rate": 6.026243985133839e-07, "loss": 0.8823, "step": 6368 }, { "epoch": 0.91, "learning_rate": 6.006730721619158e-07, "loss": 0.7241, "step": 6369 }, { "epoch": 0.91, "learning_rate": 5.98724845629206e-07, "loss": 0.7991, "step": 6370 }, { "epoch": 0.91, "learning_rate": 5.967797193346575e-07, "loss": 0.7952, "step": 6371 }, { "epoch": 0.91, "learning_rate": 5.948376936970151e-07, "loss": 0.8795, "step": 6372 }, { "epoch": 0.91, "learning_rate": 5.92898769134343e-07, "loss": 0.7408, "step": 6373 }, { "epoch": 0.91, "learning_rate": 5.90962946064047e-07, "loss": 0.692, "step": 6374 }, { "epoch": 0.91, "learning_rate": 5.890302249028589e-07, "loss": 0.808, "step": 6375 }, { "epoch": 0.91, "learning_rate": 5.871006060668477e-07, "loss": 0.5689, "step": 6376 }, { "epoch": 0.91, "learning_rate": 5.851740899714142e-07, "loss": 0.6719, "step": 6377 }, { "epoch": 0.91, "learning_rate": 5.832506770312835e-07, "loss": 0.6892, "step": 6378 }, { "epoch": 0.91, "learning_rate": 5.813303676605241e-07, "loss": 0.7511, "step": 6379 }, { "epoch": 0.91, "learning_rate": 5.794131622725324e-07, "loss": 0.7999, "step": 6380 }, { "epoch": 0.91, "learning_rate": 5.774990612800296e-07, "loss": 0.76, "step": 6381 }, { "epoch": 0.91, "learning_rate": 5.755880650950779e-07, "loss": 0.8237, "step": 6382 }, { "epoch": 0.91, "learning_rate": 5.736801741290698e-07, "loss": 0.6981, "step": 6383 }, { "epoch": 0.91, "learning_rate": 5.717753887927235e-07, "loss": 0.7176, "step": 6384 }, { "epoch": 0.91, "learning_rate": 5.69873709496091e-07, "loss": 0.7168, "step": 6385 }, { "epoch": 0.91, "learning_rate": 5.679751366485597e-07, "loss": 0.5639, "step": 6386 }, { "epoch": 0.91, "learning_rate": 5.660796706588445e-07, "loss": 0.7773, "step": 6387 }, { "epoch": 0.92, "learning_rate": 5.641873119349905e-07, "loss": 0.6194, "step": 6388 }, { "epoch": 0.92, "learning_rate": 5.622980608843786e-07, "loss": 0.8092, "step": 6389 }, { "epoch": 0.92, "learning_rate": 5.604119179137185e-07, "loss": 0.6071, "step": 6390 }, { "epoch": 0.92, "learning_rate": 5.585288834290453e-07, "loss": 0.6203, "step": 6391 }, { "epoch": 0.92, "learning_rate": 5.566489578357314e-07, "loss": 0.5982, "step": 6392 }, { "epoch": 0.92, "learning_rate": 5.547721415384832e-07, "loss": 0.8136, "step": 6393 }, { "epoch": 0.92, "learning_rate": 5.528984349413274e-07, "loss": 0.7154, "step": 6394 }, { "epoch": 0.92, "learning_rate": 5.510278384476281e-07, "loss": 0.8538, "step": 6395 }, { "epoch": 0.92, "learning_rate": 5.49160352460078e-07, "loss": 0.811, "step": 6396 }, { "epoch": 0.92, "learning_rate": 5.472959773807024e-07, "loss": 0.7879, "step": 6397 }, { "epoch": 0.92, "learning_rate": 5.454347136108551e-07, "loss": 0.6719, "step": 6398 }, { "epoch": 0.92, "learning_rate": 5.435765615512156e-07, "loss": 0.6775, "step": 6399 }, { "epoch": 0.92, "learning_rate": 5.41721521601804e-07, "loss": 0.6833, "step": 6400 }, { "epoch": 0.92, "learning_rate": 5.398695941619625e-07, "loss": 0.7589, "step": 6401 }, { "epoch": 0.92, "learning_rate": 5.380207796303638e-07, "loss": 0.6886, "step": 6402 }, { "epoch": 0.92, "learning_rate": 5.361750784050096e-07, "loss": 0.6677, "step": 6403 }, { "epoch": 0.92, "learning_rate": 5.343324908832419e-07, "loss": 0.6205, "step": 6404 }, { "epoch": 0.92, "learning_rate": 5.324930174617171e-07, "loss": 0.8722, "step": 6405 }, { "epoch": 0.92, "learning_rate": 5.306566585364297e-07, "loss": 0.7651, "step": 6406 }, { "epoch": 0.92, "learning_rate": 5.288234145027037e-07, "loss": 0.8867, "step": 6407 }, { "epoch": 0.92, "learning_rate": 5.269932857551901e-07, "loss": 0.7824, "step": 6408 }, { "epoch": 0.92, "learning_rate": 5.251662726878737e-07, "loss": 0.5776, "step": 6409 }, { "epoch": 0.92, "learning_rate": 5.233423756940581e-07, "loss": 0.8125, "step": 6410 }, { "epoch": 0.92, "learning_rate": 5.215215951663893e-07, "loss": 0.7656, "step": 6411 }, { "epoch": 0.92, "learning_rate": 5.197039314968355e-07, "loss": 0.743, "step": 6412 }, { "epoch": 0.92, "learning_rate": 5.178893850766919e-07, "loss": 0.6878, "step": 6413 }, { "epoch": 0.92, "learning_rate": 5.160779562965878e-07, "loss": 0.7634, "step": 6414 }, { "epoch": 0.92, "learning_rate": 5.142696455464779e-07, "loss": 0.8145, "step": 6415 }, { "epoch": 0.92, "learning_rate": 5.124644532156459e-07, "loss": 0.9294, "step": 6416 }, { "epoch": 0.92, "learning_rate": 5.10662379692709e-07, "loss": 0.7472, "step": 6417 }, { "epoch": 0.92, "learning_rate": 5.088634253656038e-07, "loss": 0.841, "step": 6418 }, { "epoch": 0.92, "learning_rate": 5.070675906216054e-07, "loss": 0.5921, "step": 6419 }, { "epoch": 0.92, "learning_rate": 5.052748758473097e-07, "loss": 0.6378, "step": 6420 }, { "epoch": 0.92, "learning_rate": 5.034852814286428e-07, "loss": 0.7095, "step": 6421 }, { "epoch": 0.92, "learning_rate": 5.016988077508633e-07, "loss": 0.5762, "step": 6422 }, { "epoch": 0.92, "learning_rate": 4.999154551985535e-07, "loss": 0.8454, "step": 6423 }, { "epoch": 0.92, "learning_rate": 4.981352241556231e-07, "loss": 0.6995, "step": 6424 }, { "epoch": 0.92, "learning_rate": 4.963581150053154e-07, "loss": 0.8237, "step": 6425 }, { "epoch": 0.92, "learning_rate": 4.945841281301944e-07, "loss": 1.0664, "step": 6426 }, { "epoch": 0.92, "learning_rate": 4.928132639121596e-07, "loss": 0.7146, "step": 6427 }, { "epoch": 0.92, "learning_rate": 4.910455227324295e-07, "loss": 0.6479, "step": 6428 }, { "epoch": 0.92, "learning_rate": 4.89280904971558e-07, "loss": 0.745, "step": 6429 }, { "epoch": 0.92, "learning_rate": 4.875194110094261e-07, "loss": 0.798, "step": 6430 }, { "epoch": 0.92, "learning_rate": 4.857610412252339e-07, "loss": 0.7299, "step": 6431 }, { "epoch": 0.92, "learning_rate": 4.840057959975169e-07, "loss": 0.9609, "step": 6432 }, { "epoch": 0.92, "learning_rate": 4.822536757041396e-07, "loss": 0.7003, "step": 6433 }, { "epoch": 0.92, "learning_rate": 4.805046807222868e-07, "loss": 0.8443, "step": 6434 }, { "epoch": 0.92, "learning_rate": 4.787588114284741e-07, "loss": 0.6802, "step": 6435 }, { "epoch": 0.92, "learning_rate": 4.770160681985442e-07, "loss": 0.846, "step": 6436 }, { "epoch": 0.92, "learning_rate": 4.7527645140766684e-07, "loss": 0.7372, "step": 6437 }, { "epoch": 0.92, "learning_rate": 4.735399614303393e-07, "loss": 0.8496, "step": 6438 }, { "epoch": 0.92, "learning_rate": 4.7180659864038077e-07, "loss": 0.9096, "step": 6439 }, { "epoch": 0.92, "learning_rate": 4.7007636341094785e-07, "loss": 0.87, "step": 6440 }, { "epoch": 0.92, "learning_rate": 4.683492561145142e-07, "loss": 0.8756, "step": 6441 }, { "epoch": 0.92, "learning_rate": 4.6662527712288236e-07, "loss": 0.6186, "step": 6442 }, { "epoch": 0.92, "learning_rate": 4.6490442680718227e-07, "loss": 0.8142, "step": 6443 }, { "epoch": 0.92, "learning_rate": 4.6318670553787245e-07, "loss": 0.6165, "step": 6444 }, { "epoch": 0.92, "learning_rate": 4.6147211368473387e-07, "loss": 0.6867, "step": 6445 }, { "epoch": 0.92, "learning_rate": 4.597606516168762e-07, "loss": 0.7617, "step": 6446 }, { "epoch": 0.92, "learning_rate": 4.5805231970273643e-07, "loss": 0.8117, "step": 6447 }, { "epoch": 0.92, "learning_rate": 4.5634711831007537e-07, "loss": 0.7536, "step": 6448 }, { "epoch": 0.92, "learning_rate": 4.5464504780597773e-07, "loss": 0.8605, "step": 6449 }, { "epoch": 0.92, "learning_rate": 4.529461085568604e-07, "loss": 0.8878, "step": 6450 }, { "epoch": 0.92, "learning_rate": 4.5125030092846574e-07, "loss": 0.726, "step": 6451 }, { "epoch": 0.92, "learning_rate": 4.495576252858519e-07, "loss": 0.6961, "step": 6452 }, { "epoch": 0.92, "learning_rate": 4.4786808199341223e-07, "loss": 0.8803, "step": 6453 }, { "epoch": 0.92, "learning_rate": 4.461816714148692e-07, "loss": 0.6677, "step": 6454 }, { "epoch": 0.92, "learning_rate": 4.4449839391326074e-07, "loss": 0.6267, "step": 6455 }, { "epoch": 0.92, "learning_rate": 4.4281824985095364e-07, "loss": 0.7511, "step": 6456 }, { "epoch": 0.92, "learning_rate": 4.411412395896436e-07, "loss": 0.75, "step": 6457 }, { "epoch": 0.93, "learning_rate": 4.3946736349034845e-07, "loss": 0.7052, "step": 6458 }, { "epoch": 0.93, "learning_rate": 4.3779662191341496e-07, "loss": 0.6987, "step": 6459 }, { "epoch": 0.93, "learning_rate": 4.3612901521850544e-07, "loss": 0.555, "step": 6460 }, { "epoch": 0.93, "learning_rate": 4.3446454376462273e-07, "loss": 0.6141, "step": 6461 }, { "epoch": 0.93, "learning_rate": 4.3280320791008354e-07, "loss": 0.87, "step": 6462 }, { "epoch": 0.93, "learning_rate": 4.311450080125301e-07, "loss": 0.6655, "step": 6463 }, { "epoch": 0.93, "learning_rate": 4.2948994442893184e-07, "loss": 0.8616, "step": 6464 }, { "epoch": 0.93, "learning_rate": 4.2783801751558547e-07, "loss": 0.7176, "step": 6465 }, { "epoch": 0.93, "learning_rate": 4.2618922762810985e-07, "loss": 0.695, "step": 6466 }, { "epoch": 0.93, "learning_rate": 4.2454357512144606e-07, "loss": 0.7321, "step": 6467 }, { "epoch": 0.93, "learning_rate": 4.229010603498656e-07, "loss": 0.7419, "step": 6468 }, { "epoch": 0.93, "learning_rate": 4.212616836669608e-07, "loss": 0.7709, "step": 6469 }, { "epoch": 0.93, "learning_rate": 4.1962544542564597e-07, "loss": 0.6629, "step": 6470 }, { "epoch": 0.93, "learning_rate": 4.1799234597816263e-07, "loss": 0.774, "step": 6471 }, { "epoch": 0.93, "learning_rate": 4.163623856760812e-07, "loss": 0.6604, "step": 6472 }, { "epoch": 0.93, "learning_rate": 4.1473556487028773e-07, "loss": 0.6878, "step": 6473 }, { "epoch": 0.93, "learning_rate": 4.1311188391099873e-07, "loss": 0.6982, "step": 6474 }, { "epoch": 0.93, "learning_rate": 4.114913431477513e-07, "loss": 0.74, "step": 6475 }, { "epoch": 0.93, "learning_rate": 4.098739429294079e-07, "loss": 0.8119, "step": 6476 }, { "epoch": 0.93, "learning_rate": 4.082596836041569e-07, "loss": 0.8711, "step": 6477 }, { "epoch": 0.93, "learning_rate": 4.0664856551950345e-07, "loss": 0.7143, "step": 6478 }, { "epoch": 0.93, "learning_rate": 4.050405890222852e-07, "loss": 0.7757, "step": 6479 }, { "epoch": 0.93, "learning_rate": 4.03435754458662e-07, "loss": 0.5974, "step": 6480 }, { "epoch": 0.93, "learning_rate": 4.01834062174109e-07, "loss": 0.7065, "step": 6481 }, { "epoch": 0.93, "learning_rate": 4.002355125134338e-07, "loss": 0.7628, "step": 6482 }, { "epoch": 0.93, "learning_rate": 3.986401058207662e-07, "loss": 0.8253, "step": 6483 }, { "epoch": 0.93, "learning_rate": 3.970478424395563e-07, "loss": 0.8594, "step": 6484 }, { "epoch": 0.93, "learning_rate": 3.954587227125767e-07, "loss": 0.7341, "step": 6485 }, { "epoch": 0.93, "learning_rate": 3.9387274698193033e-07, "loss": 0.8996, "step": 6486 }, { "epoch": 0.93, "learning_rate": 3.9228991558903416e-07, "loss": 0.6127, "step": 6487 }, { "epoch": 0.93, "learning_rate": 3.9071022887463724e-07, "loss": 0.5496, "step": 6488 }, { "epoch": 0.93, "learning_rate": 3.8913368717880095e-07, "loss": 0.7146, "step": 6489 }, { "epoch": 0.93, "learning_rate": 3.875602908409204e-07, "loss": 0.8298, "step": 6490 }, { "epoch": 0.93, "learning_rate": 3.8599004019970794e-07, "loss": 0.6992, "step": 6491 }, { "epoch": 0.93, "learning_rate": 3.844229355931983e-07, "loss": 0.8041, "step": 6492 }, { "epoch": 0.93, "learning_rate": 3.828589773587515e-07, "loss": 0.8672, "step": 6493 }, { "epoch": 0.93, "learning_rate": 3.8129816583305e-07, "loss": 0.8354, "step": 6494 }, { "epoch": 0.93, "learning_rate": 3.797405013520949e-07, "loss": 0.6998, "step": 6495 }, { "epoch": 0.93, "learning_rate": 3.7818598425121644e-07, "loss": 0.7422, "step": 6496 }, { "epoch": 0.93, "learning_rate": 3.766346148650601e-07, "loss": 0.7835, "step": 6497 }, { "epoch": 0.93, "learning_rate": 3.7508639352760057e-07, "loss": 0.7386, "step": 6498 }, { "epoch": 0.93, "learning_rate": 3.7354132057212943e-07, "loss": 0.7687, "step": 6499 }, { "epoch": 0.93, "learning_rate": 3.7199939633126233e-07, "loss": 0.6445, "step": 6500 }, { "epoch": 0.93, "learning_rate": 3.704606211369405e-07, "loss": 0.6406, "step": 6501 }, { "epoch": 0.93, "learning_rate": 3.6892499532042045e-07, "loss": 0.6345, "step": 6502 }, { "epoch": 0.93, "learning_rate": 3.6739251921228624e-07, "loss": 0.5956, "step": 6503 }, { "epoch": 0.93, "learning_rate": 3.658631931424422e-07, "loss": 0.6406, "step": 6504 }, { "epoch": 0.93, "learning_rate": 3.6433701744011337e-07, "loss": 0.8041, "step": 6505 }, { "epoch": 0.93, "learning_rate": 3.628139924338486e-07, "loss": 0.7369, "step": 6506 }, { "epoch": 0.93, "learning_rate": 3.6129411845151725e-07, "loss": 0.6973, "step": 6507 }, { "epoch": 0.93, "learning_rate": 3.597773958203093e-07, "loss": 0.7924, "step": 6508 }, { "epoch": 0.93, "learning_rate": 3.582638248667419e-07, "loss": 0.6867, "step": 6509 }, { "epoch": 0.93, "learning_rate": 3.5675340591664273e-07, "loss": 0.7087, "step": 6510 }, { "epoch": 0.93, "learning_rate": 3.5524613929517333e-07, "loss": 0.7126, "step": 6511 }, { "epoch": 0.93, "learning_rate": 3.537420253268092e-07, "loss": 0.7673, "step": 6512 }, { "epoch": 0.93, "learning_rate": 3.522410643353463e-07, "loss": 0.6334, "step": 6513 }, { "epoch": 0.93, "learning_rate": 3.507432566439078e-07, "loss": 0.6462, "step": 6514 }, { "epoch": 0.93, "learning_rate": 3.492486025749325e-07, "loss": 0.5717, "step": 6515 }, { "epoch": 0.93, "learning_rate": 3.477571024501847e-07, "loss": 0.9079, "step": 6516 }, { "epoch": 0.93, "learning_rate": 3.462687565907441e-07, "loss": 0.6024, "step": 6517 }, { "epoch": 0.93, "learning_rate": 3.447835653170178e-07, "loss": 0.8139, "step": 6518 }, { "epoch": 0.93, "learning_rate": 3.433015289487318e-07, "loss": 0.6403, "step": 6519 }, { "epoch": 0.93, "learning_rate": 3.4182264780492746e-07, "loss": 0.6159, "step": 6520 }, { "epoch": 0.93, "learning_rate": 3.403469222039718e-07, "loss": 0.6169, "step": 6521 }, { "epoch": 0.93, "learning_rate": 3.388743524635557e-07, "loss": 0.7857, "step": 6522 }, { "epoch": 0.93, "learning_rate": 3.374049389006839e-07, "loss": 0.4643, "step": 6523 }, { "epoch": 0.93, "learning_rate": 3.3593868183168673e-07, "loss": 0.7084, "step": 6524 }, { "epoch": 0.93, "learning_rate": 3.344755815722117e-07, "loss": 0.7589, "step": 6525 }, { "epoch": 0.93, "learning_rate": 3.3301563843722846e-07, "loss": 0.584, "step": 6526 }, { "epoch": 0.93, "learning_rate": 3.3155885274102905e-07, "loss": 0.422, "step": 6527 }, { "epoch": 0.94, "learning_rate": 3.301052247972175e-07, "loss": 0.7282, "step": 6528 }, { "epoch": 0.94, "learning_rate": 3.286547549187285e-07, "loss": 0.8694, "step": 6529 }, { "epoch": 0.94, "learning_rate": 3.2720744341781405e-07, "loss": 0.6931, "step": 6530 }, { "epoch": 0.94, "learning_rate": 3.2576329060603984e-07, "loss": 0.7074, "step": 6531 }, { "epoch": 0.94, "learning_rate": 3.2432229679429715e-07, "loss": 0.5982, "step": 6532 }, { "epoch": 0.94, "learning_rate": 3.2288446229279957e-07, "loss": 0.7081, "step": 6533 }, { "epoch": 0.94, "learning_rate": 3.214497874110728e-07, "loss": 0.6317, "step": 6534 }, { "epoch": 0.94, "learning_rate": 3.2001827245796976e-07, "loss": 0.7796, "step": 6535 }, { "epoch": 0.94, "learning_rate": 3.185899177416607e-07, "loss": 0.9057, "step": 6536 }, { "epoch": 0.94, "learning_rate": 3.171647235696329e-07, "loss": 0.7874, "step": 6537 }, { "epoch": 0.94, "learning_rate": 3.157426902486976e-07, "loss": 0.889, "step": 6538 }, { "epoch": 0.94, "learning_rate": 3.143238180849783e-07, "loss": 0.8041, "step": 6539 }, { "epoch": 0.94, "learning_rate": 3.1290810738393074e-07, "loss": 0.7946, "step": 6540 }, { "epoch": 0.94, "learning_rate": 3.1149555845031764e-07, "loss": 0.8008, "step": 6541 }, { "epoch": 0.94, "learning_rate": 3.100861715882275e-07, "loss": 0.6677, "step": 6542 }, { "epoch": 0.94, "learning_rate": 3.0867994710106427e-07, "loss": 0.7042, "step": 6543 }, { "epoch": 0.94, "learning_rate": 3.0727688529155415e-07, "loss": 0.8393, "step": 6544 }, { "epoch": 0.94, "learning_rate": 3.0587698646174387e-07, "loss": 0.6777, "step": 6545 }, { "epoch": 0.94, "learning_rate": 3.0448025091299404e-07, "loss": 0.721, "step": 6546 }, { "epoch": 0.94, "learning_rate": 3.0308667894598907e-07, "loss": 0.8823, "step": 6547 }, { "epoch": 0.94, "learning_rate": 3.016962708607307e-07, "loss": 0.7522, "step": 6548 }, { "epoch": 0.94, "learning_rate": 3.003090269565378e-07, "loss": 0.6766, "step": 6549 }, { "epoch": 0.94, "learning_rate": 2.989249475320499e-07, "loss": 0.6663, "step": 6550 }, { "epoch": 0.94, "learning_rate": 2.975440328852269e-07, "loss": 0.7765, "step": 6551 }, { "epoch": 0.94, "learning_rate": 2.961662833133444e-07, "loss": 0.8153, "step": 6552 }, { "epoch": 0.94, "learning_rate": 2.9479169911299677e-07, "loss": 0.764, "step": 6553 }, { "epoch": 0.94, "learning_rate": 2.9342028058009893e-07, "loss": 0.6272, "step": 6554 }, { "epoch": 0.94, "learning_rate": 2.9205202800988484e-07, "loss": 0.7676, "step": 6555 }, { "epoch": 0.94, "learning_rate": 2.906869416969038e-07, "loss": 0.6963, "step": 6556 }, { "epoch": 0.94, "learning_rate": 2.893250219350224e-07, "loss": 0.7556, "step": 6557 }, { "epoch": 0.94, "learning_rate": 2.8796626901743286e-07, "loss": 0.8354, "step": 6558 }, { "epoch": 0.94, "learning_rate": 2.866106832366411e-07, "loss": 0.7288, "step": 6559 }, { "epoch": 0.94, "learning_rate": 2.8525826488446704e-07, "loss": 0.7807, "step": 6560 }, { "epoch": 0.94, "learning_rate": 2.8390901425205453e-07, "loss": 0.7386, "step": 6561 }, { "epoch": 0.94, "learning_rate": 2.825629316298678e-07, "loss": 0.6752, "step": 6562 }, { "epoch": 0.94, "learning_rate": 2.8122001730767845e-07, "loss": 0.7394, "step": 6563 }, { "epoch": 0.94, "learning_rate": 2.798802715745885e-07, "loss": 0.6708, "step": 6564 }, { "epoch": 0.94, "learning_rate": 2.7854369471900896e-07, "loss": 0.7785, "step": 6565 }, { "epoch": 0.94, "learning_rate": 2.7721028702867134e-07, "loss": 0.7301, "step": 6566 }, { "epoch": 0.94, "learning_rate": 2.758800487906293e-07, "loss": 0.7955, "step": 6567 }, { "epoch": 0.94, "learning_rate": 2.7455298029124385e-07, "loss": 0.8131, "step": 6568 }, { "epoch": 0.94, "learning_rate": 2.732290818162048e-07, "loss": 0.774, "step": 6569 }, { "epoch": 0.94, "learning_rate": 2.719083536505157e-07, "loss": 0.8711, "step": 6570 }, { "epoch": 0.94, "learning_rate": 2.705907960784909e-07, "loss": 0.6381, "step": 6571 }, { "epoch": 0.94, "learning_rate": 2.692764093837735e-07, "loss": 0.9171, "step": 6572 }, { "epoch": 0.94, "learning_rate": 2.679651938493155e-07, "loss": 0.8655, "step": 6573 }, { "epoch": 0.94, "learning_rate": 2.666571497573911e-07, "loss": 0.7137, "step": 6574 }, { "epoch": 0.94, "learning_rate": 2.653522773895867e-07, "loss": 0.7879, "step": 6575 }, { "epoch": 0.94, "learning_rate": 2.640505770268126e-07, "loss": 0.6409, "step": 6576 }, { "epoch": 0.94, "learning_rate": 2.627520489492913e-07, "loss": 0.8343, "step": 6577 }, { "epoch": 0.94, "learning_rate": 2.614566934365625e-07, "loss": 0.6217, "step": 6578 }, { "epoch": 0.94, "learning_rate": 2.6016451076748657e-07, "loss": 0.8454, "step": 6579 }, { "epoch": 0.94, "learning_rate": 2.588755012202376e-07, "loss": 0.885, "step": 6580 }, { "epoch": 0.94, "learning_rate": 2.575896650723053e-07, "loss": 0.7762, "step": 6581 }, { "epoch": 0.94, "learning_rate": 2.563070026004999e-07, "loss": 0.9208, "step": 6582 }, { "epoch": 0.94, "learning_rate": 2.5502751408094883e-07, "loss": 0.7291, "step": 6583 }, { "epoch": 0.94, "learning_rate": 2.537511997890918e-07, "loss": 0.841, "step": 6584 }, { "epoch": 0.94, "learning_rate": 2.5247805999968734e-07, "loss": 0.8089, "step": 6585 }, { "epoch": 0.94, "learning_rate": 2.5120809498681287e-07, "loss": 0.8131, "step": 6586 }, { "epoch": 0.94, "learning_rate": 2.499413050238597e-07, "loss": 0.6476, "step": 6587 }, { "epoch": 0.94, "learning_rate": 2.4867769038353807e-07, "loss": 0.9342, "step": 6588 }, { "epoch": 0.94, "learning_rate": 2.4741725133786695e-07, "loss": 0.7249, "step": 6589 }, { "epoch": 0.94, "learning_rate": 2.461599881581944e-07, "loss": 0.7073, "step": 6590 }, { "epoch": 0.94, "learning_rate": 2.449059011151772e-07, "loss": 0.7617, "step": 6591 }, { "epoch": 0.94, "learning_rate": 2.4365499047878605e-07, "loss": 0.8309, "step": 6592 }, { "epoch": 0.94, "learning_rate": 2.424072565183122e-07, "loss": 0.8025, "step": 6593 }, { "epoch": 0.94, "learning_rate": 2.4116269950236245e-07, "loss": 0.6981, "step": 6594 }, { "epoch": 0.94, "learning_rate": 2.399213196988592e-07, "loss": 0.7026, "step": 6595 }, { "epoch": 0.94, "learning_rate": 2.3868311737504035e-07, "loss": 0.7779, "step": 6596 }, { "epoch": 0.94, "learning_rate": 2.3744809279745927e-07, "loss": 0.7041, "step": 6597 }, { "epoch": 0.95, "learning_rate": 2.3621624623199e-07, "loss": 0.6177, "step": 6598 }, { "epoch": 0.95, "learning_rate": 2.3498757794381542e-07, "loss": 0.8005, "step": 6599 }, { "epoch": 0.95, "learning_rate": 2.337620881974356e-07, "loss": 0.7584, "step": 6600 }, { "epoch": 0.95, "learning_rate": 2.3253977725667285e-07, "loss": 0.7785, "step": 6601 }, { "epoch": 0.95, "learning_rate": 2.3132064538465835e-07, "loss": 0.7746, "step": 6602 }, { "epoch": 0.95, "learning_rate": 2.3010469284383884e-07, "loss": 0.6942, "step": 6603 }, { "epoch": 0.95, "learning_rate": 2.2889191989598158e-07, "loss": 0.707, "step": 6604 }, { "epoch": 0.95, "learning_rate": 2.2768232680216604e-07, "loss": 0.767, "step": 6605 }, { "epoch": 0.95, "learning_rate": 2.2647591382278898e-07, "loss": 0.6348, "step": 6606 }, { "epoch": 0.95, "learning_rate": 2.2527268121755762e-07, "loss": 0.6906, "step": 6607 }, { "epoch": 0.95, "learning_rate": 2.2407262924549977e-07, "loss": 0.7545, "step": 6608 }, { "epoch": 0.95, "learning_rate": 2.2287575816496042e-07, "loss": 0.6292, "step": 6609 }, { "epoch": 0.95, "learning_rate": 2.2168206823359016e-07, "loss": 0.7913, "step": 6610 }, { "epoch": 0.95, "learning_rate": 2.204915597083651e-07, "loss": 0.7891, "step": 6611 }, { "epoch": 0.95, "learning_rate": 2.1930423284557188e-07, "loss": 0.7427, "step": 6612 }, { "epoch": 0.95, "learning_rate": 2.1812008790081106e-07, "loss": 0.7533, "step": 6613 }, { "epoch": 0.95, "learning_rate": 2.1693912512900038e-07, "loss": 0.7531, "step": 6614 }, { "epoch": 0.95, "learning_rate": 2.1576134478437315e-07, "loss": 0.7634, "step": 6615 }, { "epoch": 0.95, "learning_rate": 2.1458674712047487e-07, "loss": 0.7391, "step": 6616 }, { "epoch": 0.95, "learning_rate": 2.134153323901683e-07, "loss": 0.7154, "step": 6617 }, { "epoch": 0.95, "learning_rate": 2.1224710084562838e-07, "loss": 0.8103, "step": 6618 }, { "epoch": 0.95, "learning_rate": 2.1108205273834725e-07, "loss": 0.762, "step": 6619 }, { "epoch": 0.95, "learning_rate": 2.0992018831913262e-07, "loss": 0.7662, "step": 6620 }, { "epoch": 0.95, "learning_rate": 2.0876150783810444e-07, "loss": 0.7567, "step": 6621 }, { "epoch": 0.95, "learning_rate": 2.0760601154469483e-07, "loss": 0.7737, "step": 6622 }, { "epoch": 0.95, "learning_rate": 2.0645369968765814e-07, "loss": 0.76, "step": 6623 }, { "epoch": 0.95, "learning_rate": 2.053045725150543e-07, "loss": 0.7701, "step": 6624 }, { "epoch": 0.95, "learning_rate": 2.041586302742654e-07, "loss": 0.7813, "step": 6625 }, { "epoch": 0.95, "learning_rate": 2.0301587321198412e-07, "loss": 0.7985, "step": 6626 }, { "epoch": 0.95, "learning_rate": 2.0187630157421698e-07, "loss": 0.5951, "step": 6627 }, { "epoch": 0.95, "learning_rate": 2.0073991560628447e-07, "loss": 0.7701, "step": 6628 }, { "epoch": 0.95, "learning_rate": 1.9960671555282084e-07, "loss": 0.7846, "step": 6629 }, { "epoch": 0.95, "learning_rate": 1.9847670165778264e-07, "loss": 0.7885, "step": 6630 }, { "epoch": 0.95, "learning_rate": 1.9734987416442696e-07, "loss": 0.6362, "step": 6631 }, { "epoch": 0.95, "learning_rate": 1.962262333153364e-07, "loss": 0.815, "step": 6632 }, { "epoch": 0.95, "learning_rate": 1.9510577935239916e-07, "loss": 0.7087, "step": 6633 }, { "epoch": 0.95, "learning_rate": 1.9398851251682558e-07, "loss": 0.7478, "step": 6634 }, { "epoch": 0.95, "learning_rate": 1.9287443304913166e-07, "loss": 0.8019, "step": 6635 }, { "epoch": 0.95, "learning_rate": 1.9176354118915384e-07, "loss": 0.736, "step": 6636 }, { "epoch": 0.95, "learning_rate": 1.9065583717603918e-07, "loss": 0.6409, "step": 6637 }, { "epoch": 0.95, "learning_rate": 1.8955132124824858e-07, "loss": 0.6828, "step": 6638 }, { "epoch": 0.95, "learning_rate": 1.884499936435552e-07, "loss": 0.7634, "step": 6639 }, { "epoch": 0.95, "learning_rate": 1.8735185459904936e-07, "loss": 0.6741, "step": 6640 }, { "epoch": 0.95, "learning_rate": 1.8625690435113364e-07, "loss": 0.7564, "step": 6641 }, { "epoch": 0.95, "learning_rate": 1.8516514313552447e-07, "loss": 0.752, "step": 6642 }, { "epoch": 0.95, "learning_rate": 1.840765711872472e-07, "loss": 0.808, "step": 6643 }, { "epoch": 0.95, "learning_rate": 1.8299118874064936e-07, "loss": 0.7224, "step": 6644 }, { "epoch": 0.95, "learning_rate": 1.8190899602938239e-07, "loss": 0.7785, "step": 6645 }, { "epoch": 0.95, "learning_rate": 1.8082999328641992e-07, "loss": 0.7285, "step": 6646 }, { "epoch": 0.95, "learning_rate": 1.7975418074404115e-07, "loss": 0.6401, "step": 6647 }, { "epoch": 0.95, "learning_rate": 1.7868155863384417e-07, "loss": 0.6574, "step": 6648 }, { "epoch": 0.95, "learning_rate": 1.7761212718673593e-07, "loss": 0.5043, "step": 6649 }, { "epoch": 0.95, "learning_rate": 1.7654588663293725e-07, "loss": 0.7564, "step": 6650 }, { "epoch": 0.95, "learning_rate": 1.7548283720198954e-07, "loss": 0.7701, "step": 6651 }, { "epoch": 0.95, "learning_rate": 1.7442297912273643e-07, "loss": 0.7321, "step": 6652 }, { "epoch": 0.95, "learning_rate": 1.7336631262333868e-07, "loss": 0.7877, "step": 6653 }, { "epoch": 0.95, "learning_rate": 1.7231283793127107e-07, "loss": 0.7416, "step": 6654 }, { "epoch": 0.95, "learning_rate": 1.7126255527332212e-07, "loss": 0.6412, "step": 6655 }, { "epoch": 0.95, "learning_rate": 1.702154648755927e-07, "loss": 0.6621, "step": 6656 }, { "epoch": 0.95, "learning_rate": 1.6917156696348911e-07, "loss": 0.7874, "step": 6657 }, { "epoch": 0.95, "learning_rate": 1.681308617617433e-07, "loss": 0.6819, "step": 6658 }, { "epoch": 0.95, "learning_rate": 1.670933494943927e-07, "loss": 0.7807, "step": 6659 }, { "epoch": 0.95, "learning_rate": 1.660590303847853e-07, "loss": 0.7171, "step": 6660 }, { "epoch": 0.95, "learning_rate": 1.6502790465558472e-07, "loss": 0.7327, "step": 6661 }, { "epoch": 0.95, "learning_rate": 1.6399997252876997e-07, "loss": 0.7793, "step": 6662 }, { "epoch": 0.95, "learning_rate": 1.629752342256241e-07, "loss": 0.7123, "step": 6663 }, { "epoch": 0.95, "learning_rate": 1.6195368996675397e-07, "loss": 0.75, "step": 6664 }, { "epoch": 0.95, "learning_rate": 1.60935339972067e-07, "loss": 0.7179, "step": 6665 }, { "epoch": 0.95, "learning_rate": 1.5992018446079283e-07, "loss": 0.6961, "step": 6666 }, { "epoch": 0.96, "learning_rate": 1.5890822365146995e-07, "loss": 0.6105, "step": 6667 }, { "epoch": 0.96, "learning_rate": 1.5789945776194247e-07, "loss": 0.8873, "step": 6668 }, { "epoch": 0.96, "learning_rate": 1.5689388700937834e-07, "loss": 0.8532, "step": 6669 }, { "epoch": 0.96, "learning_rate": 1.5589151161025273e-07, "loss": 0.8025, "step": 6670 }, { "epoch": 0.96, "learning_rate": 1.54892331780348e-07, "loss": 0.7863, "step": 6671 }, { "epoch": 0.96, "learning_rate": 1.5389634773476546e-07, "loss": 0.7522, "step": 6672 }, { "epoch": 0.96, "learning_rate": 1.529035596879136e-07, "loss": 0.7001, "step": 6673 }, { "epoch": 0.96, "learning_rate": 1.5191396785351808e-07, "loss": 0.7687, "step": 6674 }, { "epoch": 0.96, "learning_rate": 1.5092757244461352e-07, "loss": 0.7249, "step": 6675 }, { "epoch": 0.96, "learning_rate": 1.499443736735434e-07, "loss": 0.7757, "step": 6676 }, { "epoch": 0.96, "learning_rate": 1.489643717519684e-07, "loss": 0.7874, "step": 6677 }, { "epoch": 0.96, "learning_rate": 1.4798756689085812e-07, "loss": 0.7123, "step": 6678 }, { "epoch": 0.96, "learning_rate": 1.4701395930049267e-07, "loss": 0.8371, "step": 6679 }, { "epoch": 0.96, "learning_rate": 1.4604354919046937e-07, "loss": 0.7472, "step": 6680 }, { "epoch": 0.96, "learning_rate": 1.4507633676969113e-07, "loss": 0.5393, "step": 6681 }, { "epoch": 0.96, "learning_rate": 1.4411232224637638e-07, "loss": 0.6928, "step": 6682 }, { "epoch": 0.96, "learning_rate": 1.4315150582805082e-07, "loss": 0.68, "step": 6683 }, { "epoch": 0.96, "learning_rate": 1.4219388772155727e-07, "loss": 0.5898, "step": 6684 }, { "epoch": 0.96, "learning_rate": 1.412394681330459e-07, "loss": 0.7394, "step": 6685 }, { "epoch": 0.96, "learning_rate": 1.4028824726797895e-07, "loss": 0.8323, "step": 6686 }, { "epoch": 0.96, "learning_rate": 1.39340225331131e-07, "loss": 0.7573, "step": 6687 }, { "epoch": 0.96, "learning_rate": 1.3839540252659045e-07, "loss": 0.8061, "step": 6688 }, { "epoch": 0.96, "learning_rate": 1.3745377905775126e-07, "loss": 0.6557, "step": 6689 }, { "epoch": 0.96, "learning_rate": 1.3651535512732127e-07, "loss": 0.6537, "step": 6690 }, { "epoch": 0.96, "learning_rate": 1.355801309373239e-07, "loss": 0.7037, "step": 6691 }, { "epoch": 0.96, "learning_rate": 1.3464810668908477e-07, "loss": 0.8041, "step": 6692 }, { "epoch": 0.96, "learning_rate": 1.3371928258324838e-07, "loss": 0.9955, "step": 6693 }, { "epoch": 0.96, "learning_rate": 1.3279365881976812e-07, "loss": 0.7545, "step": 6694 }, { "epoch": 0.96, "learning_rate": 1.318712355979046e-07, "loss": 0.9637, "step": 6695 }, { "epoch": 0.96, "learning_rate": 1.309520131162373e-07, "loss": 0.7617, "step": 6696 }, { "epoch": 0.96, "learning_rate": 1.3003599157264955e-07, "loss": 0.683, "step": 6697 }, { "epoch": 0.96, "learning_rate": 1.2912317116433704e-07, "loss": 0.7813, "step": 6698 }, { "epoch": 0.96, "learning_rate": 1.2821355208781082e-07, "loss": 0.7958, "step": 6699 }, { "epoch": 0.96, "learning_rate": 1.2730713453888432e-07, "loss": 0.6002, "step": 6700 }, { "epoch": 0.96, "learning_rate": 1.2640391871269142e-07, "loss": 0.798, "step": 6701 }, { "epoch": 0.96, "learning_rate": 1.2550390480366992e-07, "loss": 0.6822, "step": 6702 }, { "epoch": 0.96, "learning_rate": 1.2460709300557148e-07, "loss": 0.8092, "step": 6703 }, { "epoch": 0.96, "learning_rate": 1.237134835114584e-07, "loss": 0.8343, "step": 6704 }, { "epoch": 0.96, "learning_rate": 1.2282307651370007e-07, "loss": 0.716, "step": 6705 }, { "epoch": 0.96, "learning_rate": 1.2193587220397983e-07, "loss": 0.8326, "step": 6706 }, { "epoch": 0.96, "learning_rate": 1.210518707732916e-07, "loss": 0.6272, "step": 6707 }, { "epoch": 0.96, "learning_rate": 1.201710724119398e-07, "loss": 0.6063, "step": 6708 }, { "epoch": 0.96, "learning_rate": 1.1929347730953778e-07, "loss": 0.8968, "step": 6709 }, { "epoch": 0.96, "learning_rate": 1.1841908565500769e-07, "loss": 0.644, "step": 6710 }, { "epoch": 0.96, "learning_rate": 1.1754789763658736e-07, "loss": 0.5552, "step": 6711 }, { "epoch": 0.96, "learning_rate": 1.1667991344182171e-07, "loss": 0.6741, "step": 6712 }, { "epoch": 0.96, "learning_rate": 1.1581513325756632e-07, "loss": 0.6576, "step": 6713 }, { "epoch": 0.96, "learning_rate": 1.1495355726998558e-07, "loss": 0.803, "step": 6714 }, { "epoch": 0.96, "learning_rate": 1.1409518566455612e-07, "loss": 0.6836, "step": 6715 }, { "epoch": 0.96, "learning_rate": 1.1324001862606348e-07, "loss": 0.6702, "step": 6716 }, { "epoch": 0.96, "learning_rate": 1.1238805633860705e-07, "loss": 0.684, "step": 6717 }, { "epoch": 0.96, "learning_rate": 1.1153929898558845e-07, "loss": 0.7612, "step": 6718 }, { "epoch": 0.96, "learning_rate": 1.1069374674972655e-07, "loss": 0.7416, "step": 6719 }, { "epoch": 0.96, "learning_rate": 1.098513998130507e-07, "loss": 0.6758, "step": 6720 }, { "epoch": 0.96, "learning_rate": 1.0901225835689255e-07, "loss": 0.7054, "step": 6721 }, { "epoch": 0.96, "learning_rate": 1.0817632256189924e-07, "loss": 0.861, "step": 6722 }, { "epoch": 0.96, "learning_rate": 1.0734359260803017e-07, "loss": 0.7015, "step": 6723 }, { "epoch": 0.96, "learning_rate": 1.065140686745486e-07, "loss": 0.9174, "step": 6724 }, { "epoch": 0.96, "learning_rate": 1.0568775094003169e-07, "loss": 0.8387, "step": 6725 }, { "epoch": 0.96, "learning_rate": 1.0486463958236548e-07, "loss": 0.8086, "step": 6726 }, { "epoch": 0.96, "learning_rate": 1.0404473477874488e-07, "loss": 0.6593, "step": 6727 }, { "epoch": 0.96, "learning_rate": 1.0322803670567538e-07, "loss": 0.7612, "step": 6728 }, { "epoch": 0.96, "learning_rate": 1.0241454553896967e-07, "loss": 0.7335, "step": 6729 }, { "epoch": 0.96, "learning_rate": 1.0160426145375768e-07, "loss": 0.8047, "step": 6730 }, { "epoch": 0.96, "learning_rate": 1.0079718462446819e-07, "loss": 0.7879, "step": 6731 }, { "epoch": 0.96, "learning_rate": 9.999331522484723e-08, "loss": 0.6889, "step": 6732 }, { "epoch": 0.96, "learning_rate": 9.919265342794637e-08, "loss": 0.734, "step": 6733 }, { "epoch": 0.96, "learning_rate": 9.839519940613107e-08, "loss": 0.7506, "step": 6734 }, { "epoch": 0.96, "learning_rate": 9.760095333107233e-08, "loss": 0.6579, "step": 6735 }, { "epoch": 0.96, "learning_rate": 9.680991537374839e-08, "loss": 0.7985, "step": 6736 }, { "epoch": 0.97, "learning_rate": 9.602208570445637e-08, "loss": 0.726, "step": 6737 }, { "epoch": 0.97, "learning_rate": 9.523746449279225e-08, "loss": 0.606, "step": 6738 }, { "epoch": 0.97, "learning_rate": 9.445605190766426e-08, "loss": 0.8337, "step": 6739 }, { "epoch": 0.97, "learning_rate": 9.367784811729452e-08, "loss": 0.5183, "step": 6740 }, { "epoch": 0.97, "learning_rate": 9.290285328921067e-08, "loss": 0.5545, "step": 6741 }, { "epoch": 0.97, "learning_rate": 9.213106759024926e-08, "loss": 0.6652, "step": 6742 }, { "epoch": 0.97, "learning_rate": 9.136249118655571e-08, "loss": 0.7076, "step": 6743 }, { "epoch": 0.97, "learning_rate": 9.059712424358602e-08, "loss": 0.7974, "step": 6744 }, { "epoch": 0.97, "learning_rate": 8.983496692610504e-08, "loss": 0.745, "step": 6745 }, { "epoch": 0.97, "learning_rate": 8.907601939818822e-08, "loss": 0.6281, "step": 6746 }, { "epoch": 0.97, "learning_rate": 8.832028182321483e-08, "loss": 0.8638, "step": 6747 }, { "epoch": 0.97, "learning_rate": 8.756775436387809e-08, "loss": 0.6903, "step": 6748 }, { "epoch": 0.97, "learning_rate": 8.681843718217842e-08, "loss": 0.6401, "step": 6749 }, { "epoch": 0.97, "learning_rate": 8.607233043942509e-08, "loss": 0.6829, "step": 6750 }, { "epoch": 0.97, "learning_rate": 8.53294342962363e-08, "loss": 0.832, "step": 6751 }, { "epoch": 0.97, "learning_rate": 8.458974891254079e-08, "loss": 0.7768, "step": 6752 }, { "epoch": 0.97, "learning_rate": 8.385327444757285e-08, "loss": 0.8304, "step": 6753 }, { "epoch": 0.97, "learning_rate": 8.312001105987732e-08, "loss": 0.7879, "step": 6754 }, { "epoch": 0.97, "learning_rate": 8.238995890730794e-08, "loss": 0.8426, "step": 6755 }, { "epoch": 0.97, "learning_rate": 8.166311814702732e-08, "loss": 0.8577, "step": 6756 }, { "epoch": 0.97, "learning_rate": 8.093948893550529e-08, "loss": 0.5926, "step": 6757 }, { "epoch": 0.97, "learning_rate": 8.021907142852059e-08, "loss": 0.7076, "step": 6758 }, { "epoch": 0.97, "learning_rate": 7.950186578116414e-08, "loss": 0.8064, "step": 6759 }, { "epoch": 0.97, "learning_rate": 7.878787214783078e-08, "loss": 0.8052, "step": 6760 }, { "epoch": 0.97, "learning_rate": 7.80770906822259e-08, "loss": 0.75, "step": 6761 }, { "epoch": 0.97, "learning_rate": 7.736952153736376e-08, "loss": 0.6362, "step": 6762 }, { "epoch": 0.97, "learning_rate": 7.666516486556418e-08, "loss": 0.7846, "step": 6763 }, { "epoch": 0.97, "learning_rate": 7.596402081846088e-08, "loss": 0.7483, "step": 6764 }, { "epoch": 0.97, "learning_rate": 7.526608954699143e-08, "loss": 0.7885, "step": 6765 }, { "epoch": 0.97, "learning_rate": 7.457137120140234e-08, "loss": 0.716, "step": 6766 }, { "epoch": 0.97, "learning_rate": 7.387986593125062e-08, "loss": 0.8119, "step": 6767 }, { "epoch": 0.97, "learning_rate": 7.319157388539888e-08, "loss": 0.6747, "step": 6768 }, { "epoch": 0.97, "learning_rate": 7.250649521202024e-08, "loss": 0.5342, "step": 6769 }, { "epoch": 0.97, "learning_rate": 7.182463005859507e-08, "loss": 0.7891, "step": 6770 }, { "epoch": 0.97, "learning_rate": 7.114597857191263e-08, "loss": 0.8563, "step": 6771 }, { "epoch": 0.97, "learning_rate": 7.047054089806936e-08, "loss": 0.596, "step": 6772 }, { "epoch": 0.97, "learning_rate": 6.979831718246899e-08, "loss": 0.6889, "step": 6773 }, { "epoch": 0.97, "learning_rate": 6.912930756982572e-08, "loss": 0.5499, "step": 6774 }, { "epoch": 0.97, "learning_rate": 6.84635122041627e-08, "loss": 0.7779, "step": 6775 }, { "epoch": 0.97, "learning_rate": 6.780093122880527e-08, "loss": 0.8058, "step": 6776 }, { "epoch": 0.97, "learning_rate": 6.714156478639432e-08, "loss": 0.659, "step": 6777 }, { "epoch": 0.97, "learning_rate": 6.648541301887301e-08, "loss": 0.6297, "step": 6778 }, { "epoch": 0.97, "learning_rate": 6.583247606749498e-08, "loss": 0.8253, "step": 6779 }, { "epoch": 0.97, "learning_rate": 6.518275407282281e-08, "loss": 0.6632, "step": 6780 }, { "epoch": 0.97, "learning_rate": 6.453624717472295e-08, "loss": 0.74, "step": 6781 }, { "epoch": 0.97, "learning_rate": 6.389295551237407e-08, "loss": 0.7813, "step": 6782 }, { "epoch": 0.97, "learning_rate": 6.325287922426038e-08, "loss": 0.6063, "step": 6783 }, { "epoch": 0.97, "learning_rate": 6.261601844817499e-08, "loss": 0.7539, "step": 6784 }, { "epoch": 0.97, "learning_rate": 6.198237332121826e-08, "loss": 0.6508, "step": 6785 }, { "epoch": 0.97, "learning_rate": 6.135194397979771e-08, "loss": 0.6507, "step": 6786 }, { "epoch": 0.97, "learning_rate": 6.07247305596298e-08, "loss": 0.7453, "step": 6787 }, { "epoch": 0.97, "learning_rate": 6.01007331957365e-08, "loss": 0.5237, "step": 6788 }, { "epoch": 0.97, "learning_rate": 5.947995202245204e-08, "loss": 0.8956, "step": 6789 }, { "epoch": 0.97, "learning_rate": 5.886238717341119e-08, "loss": 0.6848, "step": 6790 }, { "epoch": 0.97, "learning_rate": 5.824803878156426e-08, "loss": 0.7871, "step": 6791 }, { "epoch": 0.97, "learning_rate": 5.763690697916379e-08, "loss": 0.8415, "step": 6792 }, { "epoch": 0.97, "learning_rate": 5.702899189777122e-08, "loss": 0.6738, "step": 6793 }, { "epoch": 0.97, "learning_rate": 5.642429366825519e-08, "loss": 0.8396, "step": 6794 }, { "epoch": 0.97, "learning_rate": 5.582281242079157e-08, "loss": 0.9392, "step": 6795 }, { "epoch": 0.97, "learning_rate": 5.5224548284866783e-08, "loss": 0.8069, "step": 6796 }, { "epoch": 0.97, "learning_rate": 5.462950138927114e-08, "loss": 0.8984, "step": 6797 }, { "epoch": 0.97, "learning_rate": 5.403767186210218e-08, "loss": 0.7054, "step": 6798 }, { "epoch": 0.97, "learning_rate": 5.3449059830767976e-08, "loss": 0.8186, "step": 6799 }, { "epoch": 0.97, "learning_rate": 5.286366542198218e-08, "loss": 0.6562, "step": 6800 }, { "epoch": 0.97, "learning_rate": 5.2281488761763974e-08, "loss": 0.659, "step": 6801 }, { "epoch": 0.97, "learning_rate": 5.170252997544311e-08, "loss": 0.7246, "step": 6802 }, { "epoch": 0.97, "learning_rate": 5.1126789187654896e-08, "loss": 0.8753, "step": 6803 }, { "epoch": 0.97, "learning_rate": 5.055426652234019e-08, "loss": 0.7612, "step": 6804 }, { "epoch": 0.97, "learning_rate": 4.998496210275205e-08, "loss": 0.8136, "step": 6805 }, { "epoch": 0.97, "learning_rate": 4.941887605144746e-08, "loss": 0.8262, "step": 6806 }, { "epoch": 0.98, "learning_rate": 4.885600849028726e-08, "loss": 0.7472, "step": 6807 }, { "epoch": 0.98, "learning_rate": 4.829635954044454e-08, "loss": 0.772, "step": 6808 }, { "epoch": 0.98, "learning_rate": 4.773992932239957e-08, "loss": 0.7383, "step": 6809 }, { "epoch": 0.98, "learning_rate": 4.7186717955936546e-08, "loss": 0.7076, "step": 6810 }, { "epoch": 0.98, "learning_rate": 4.66367255601502e-08, "loss": 0.757, "step": 6811 }, { "epoch": 0.98, "learning_rate": 4.608995225343582e-08, "loss": 0.8817, "step": 6812 }, { "epoch": 0.98, "learning_rate": 4.554639815350425e-08, "loss": 0.7282, "step": 6813 }, { "epoch": 0.98, "learning_rate": 4.5006063377368567e-08, "loss": 0.7511, "step": 6814 }, { "epoch": 0.98, "learning_rate": 4.446894804134738e-08, "loss": 0.8438, "step": 6815 }, { "epoch": 0.98, "learning_rate": 4.393505226106986e-08, "loss": 0.7026, "step": 6816 }, { "epoch": 0.98, "learning_rate": 4.340437615147075e-08, "loss": 0.784, "step": 6817 }, { "epoch": 0.98, "learning_rate": 4.287691982679198e-08, "loss": 0.7199, "step": 6818 }, { "epoch": 0.98, "learning_rate": 4.235268340057941e-08, "loss": 0.7743, "step": 6819 }, { "epoch": 0.98, "learning_rate": 4.1831666985691076e-08, "loss": 0.6501, "step": 6820 }, { "epoch": 0.98, "learning_rate": 4.131387069428727e-08, "loss": 0.6281, "step": 6821 }, { "epoch": 0.98, "learning_rate": 4.079929463783716e-08, "loss": 0.6931, "step": 6822 }, { "epoch": 0.98, "learning_rate": 4.028793892711713e-08, "loss": 0.8783, "step": 6823 }, { "epoch": 0.98, "learning_rate": 3.977980367220746e-08, "loss": 0.6098, "step": 6824 }, { "epoch": 0.98, "learning_rate": 3.9274888982497315e-08, "loss": 0.817, "step": 6825 }, { "epoch": 0.98, "learning_rate": 3.8773194966684743e-08, "loss": 0.623, "step": 6826 }, { "epoch": 0.98, "learning_rate": 3.8274721732770026e-08, "loss": 0.8119, "step": 6827 }, { "epoch": 0.98, "learning_rate": 3.777946938806231e-08, "loss": 0.9314, "step": 6828 }, { "epoch": 0.98, "learning_rate": 3.7287438039176335e-08, "loss": 0.7626, "step": 6829 }, { "epoch": 0.98, "learning_rate": 3.679862779203569e-08, "loss": 0.8923, "step": 6830 }, { "epoch": 0.98, "learning_rate": 3.631303875186953e-08, "loss": 0.9375, "step": 6831 }, { "epoch": 0.98, "learning_rate": 3.583067102321258e-08, "loss": 0.803, "step": 6832 }, { "epoch": 0.98, "learning_rate": 3.53515247099051e-08, "loss": 0.6791, "step": 6833 }, { "epoch": 0.98, "learning_rate": 3.487559991509792e-08, "loss": 0.7305, "step": 6834 }, { "epoch": 0.98, "learning_rate": 3.4402896741244085e-08, "loss": 0.7567, "step": 6835 }, { "epoch": 0.98, "learning_rate": 3.3933415290105516e-08, "loss": 0.6451, "step": 6836 }, { "epoch": 0.98, "learning_rate": 3.346715566274972e-08, "loss": 0.7807, "step": 6837 }, { "epoch": 0.98, "learning_rate": 3.3004117959553045e-08, "loss": 0.9124, "step": 6838 }, { "epoch": 0.98, "learning_rate": 3.254430228019245e-08, "loss": 0.8117, "step": 6839 }, { "epoch": 0.98, "learning_rate": 3.2087708723657075e-08, "loss": 0.7829, "step": 6840 }, { "epoch": 0.98, "learning_rate": 3.1634337388239956e-08, "loss": 0.601, "step": 6841 }, { "epoch": 0.98, "learning_rate": 3.118418837153969e-08, "loss": 0.7453, "step": 6842 }, { "epoch": 0.98, "learning_rate": 3.073726177046376e-08, "loss": 0.7829, "step": 6843 }, { "epoch": 0.98, "learning_rate": 3.029355768122188e-08, "loss": 0.7561, "step": 6844 }, { "epoch": 0.98, "learning_rate": 2.985307619933597e-08, "loss": 0.8203, "step": 6845 }, { "epoch": 0.98, "learning_rate": 2.9415817419630175e-08, "loss": 0.6141, "step": 6846 }, { "epoch": 0.98, "learning_rate": 2.8981781436232537e-08, "loss": 0.8186, "step": 6847 }, { "epoch": 0.98, "learning_rate": 2.855096834258164e-08, "loss": 0.6646, "step": 6848 }, { "epoch": 0.98, "learning_rate": 2.8123378231423302e-08, "loss": 0.784, "step": 6849 }, { "epoch": 0.98, "learning_rate": 2.7699011194803892e-08, "loss": 0.8047, "step": 6850 }, { "epoch": 0.98, "learning_rate": 2.7277867324081994e-08, "loss": 0.7455, "step": 6851 }, { "epoch": 0.98, "learning_rate": 2.6859946709916762e-08, "loss": 0.6956, "step": 6852 }, { "epoch": 0.98, "learning_rate": 2.644524944227622e-08, "loss": 0.75, "step": 6853 }, { "epoch": 0.98, "learning_rate": 2.6033775610437293e-08, "loss": 0.6406, "step": 6854 }, { "epoch": 0.98, "learning_rate": 2.562552530297746e-08, "loss": 0.7673, "step": 6855 }, { "epoch": 0.98, "learning_rate": 2.522049860778475e-08, "loss": 0.6191, "step": 6856 }, { "epoch": 0.98, "learning_rate": 2.4818695612049412e-08, "loss": 0.7235, "step": 6857 }, { "epoch": 0.98, "learning_rate": 2.4420116402270598e-08, "loss": 0.5073, "step": 6858 }, { "epoch": 0.98, "learning_rate": 2.4024761064254664e-08, "loss": 0.6641, "step": 6859 }, { "epoch": 0.98, "learning_rate": 2.3632629683106864e-08, "loss": 0.7377, "step": 6860 }, { "epoch": 0.98, "learning_rate": 2.324372234324801e-08, "loss": 0.7941, "step": 6861 }, { "epoch": 0.98, "learning_rate": 2.2858039128399456e-08, "loss": 0.6825, "step": 6862 }, { "epoch": 0.98, "learning_rate": 2.247558012158646e-08, "loss": 0.7031, "step": 6863 }, { "epoch": 0.98, "learning_rate": 2.209634540514649e-08, "loss": 0.7578, "step": 6864 }, { "epoch": 0.98, "learning_rate": 2.1720335060715913e-08, "loss": 0.7969, "step": 6865 }, { "epoch": 0.98, "learning_rate": 2.134754916924331e-08, "loss": 0.8842, "step": 6866 }, { "epoch": 0.98, "learning_rate": 2.0977987810979483e-08, "loss": 0.7151, "step": 6867 }, { "epoch": 0.98, "learning_rate": 2.061165106548246e-08, "loss": 0.7426, "step": 6868 }, { "epoch": 0.98, "learning_rate": 2.0248539011612498e-08, "loss": 0.5045, "step": 6869 }, { "epoch": 0.98, "learning_rate": 1.988865172754206e-08, "loss": 0.6733, "step": 6870 }, { "epoch": 0.98, "learning_rate": 1.9531989290745845e-08, "loss": 0.6097, "step": 6871 }, { "epoch": 0.98, "learning_rate": 1.9178551778000763e-08, "loss": 0.7863, "step": 6872 }, { "epoch": 0.98, "learning_rate": 1.882833926539762e-08, "loss": 0.7511, "step": 6873 }, { "epoch": 0.98, "learning_rate": 1.848135182832611e-08, "loss": 0.7037, "step": 6874 }, { "epoch": 0.98, "learning_rate": 1.813758954148481e-08, "loss": 0.8108, "step": 6875 }, { "epoch": 0.98, "learning_rate": 1.77970524788762e-08, "loss": 0.9314, "step": 6876 }, { "epoch": 0.99, "learning_rate": 1.7459740713809956e-08, "loss": 0.7026, "step": 6877 }, { "epoch": 0.99, "learning_rate": 1.7125654318903007e-08, "loss": 0.7372, "step": 6878 }, { "epoch": 0.99, "learning_rate": 1.6794793366072813e-08, "loss": 0.7829, "step": 6879 }, { "epoch": 0.99, "learning_rate": 1.6467157926545738e-08, "loss": 0.875, "step": 6880 }, { "epoch": 0.99, "learning_rate": 1.6142748070857026e-08, "loss": 0.6233, "step": 6881 }, { "epoch": 0.99, "learning_rate": 1.5821563868840817e-08, "loss": 0.6155, "step": 6882 }, { "epoch": 0.99, "learning_rate": 1.550360538964013e-08, "loss": 0.8304, "step": 6883 }, { "epoch": 0.99, "learning_rate": 1.5188872701705215e-08, "loss": 0.8616, "step": 6884 }, { "epoch": 0.99, "learning_rate": 1.4877365872790205e-08, "loss": 0.8359, "step": 6885 }, { "epoch": 0.99, "learning_rate": 1.4569084969953128e-08, "loss": 0.7879, "step": 6886 }, { "epoch": 0.99, "learning_rate": 1.4264030059560896e-08, "loss": 0.6641, "step": 6887 }, { "epoch": 0.99, "learning_rate": 1.3962201207282643e-08, "loss": 0.6766, "step": 6888 }, { "epoch": 0.99, "learning_rate": 1.3663598478096395e-08, "loss": 0.822, "step": 6889 }, { "epoch": 0.99, "learning_rate": 1.3368221936282399e-08, "loss": 0.9721, "step": 6890 }, { "epoch": 0.99, "learning_rate": 1.3076071645429789e-08, "loss": 0.7081, "step": 6891 }, { "epoch": 0.99, "learning_rate": 1.2787147668429922e-08, "loss": 0.7148, "step": 6892 }, { "epoch": 0.99, "learning_rate": 1.250145006748138e-08, "loss": 1.1596, "step": 6893 }, { "epoch": 0.99, "learning_rate": 1.2218978904086631e-08, "loss": 0.7182, "step": 6894 }, { "epoch": 0.99, "learning_rate": 1.1939734239057033e-08, "loss": 0.632, "step": 6895 }, { "epoch": 0.99, "learning_rate": 1.1663716132504499e-08, "loss": 0.5971, "step": 6896 }, { "epoch": 0.99, "learning_rate": 1.1390924643851496e-08, "loss": 0.8198, "step": 6897 }, { "epoch": 0.99, "learning_rate": 1.1121359831821054e-08, "loss": 0.7684, "step": 6898 }, { "epoch": 0.99, "learning_rate": 1.0855021754443417e-08, "loss": 0.6362, "step": 6899 }, { "epoch": 0.99, "learning_rate": 1.0591910469057719e-08, "loss": 0.6752, "step": 6900 }, { "epoch": 0.99, "learning_rate": 1.0332026032303654e-08, "loss": 0.7268, "step": 6901 }, { "epoch": 0.99, "learning_rate": 1.007536850012647e-08, "loss": 0.7241, "step": 6902 }, { "epoch": 0.99, "learning_rate": 9.8219379277803e-09, "loss": 0.7628, "step": 6903 }, { "epoch": 0.99, "learning_rate": 9.57173436981984e-09, "loss": 0.8387, "step": 6904 }, { "epoch": 0.99, "learning_rate": 9.324757880112e-09, "loss": 0.8901, "step": 6905 }, { "epoch": 0.99, "learning_rate": 9.08100851182092e-09, "loss": 0.7377, "step": 6906 }, { "epoch": 0.99, "learning_rate": 8.840486317421292e-09, "loss": 0.8527, "step": 6907 }, { "epoch": 0.99, "learning_rate": 8.603191348690032e-09, "loss": 0.7952, "step": 6908 }, { "epoch": 0.99, "learning_rate": 8.369123656714606e-09, "loss": 0.8661, "step": 6909 }, { "epoch": 0.99, "learning_rate": 8.138283291879711e-09, "loss": 0.7723, "step": 6910 }, { "epoch": 0.99, "learning_rate": 7.910670303882261e-09, "loss": 0.7645, "step": 6911 }, { "epoch": 0.99, "learning_rate": 7.68628474171973e-09, "loss": 0.5806, "step": 6912 }, { "epoch": 0.99, "learning_rate": 7.465126653698473e-09, "loss": 0.8432, "step": 6913 }, { "epoch": 0.99, "learning_rate": 7.247196087428742e-09, "loss": 0.5571, "step": 6914 }, { "epoch": 0.99, "learning_rate": 7.0324930898246765e-09, "loss": 0.642, "step": 6915 }, { "epoch": 0.99, "learning_rate": 6.82101770710597e-09, "loss": 0.7246, "step": 6916 }, { "epoch": 0.99, "learning_rate": 6.6127699847978726e-09, "loss": 0.6593, "step": 6917 }, { "epoch": 0.99, "learning_rate": 6.407749967732856e-09, "loss": 0.6853, "step": 6918 }, { "epoch": 0.99, "learning_rate": 6.2059577000439514e-09, "loss": 0.7229, "step": 6919 }, { "epoch": 0.99, "learning_rate": 6.007393225176405e-09, "loss": 0.6324, "step": 6920 }, { "epoch": 0.99, "learning_rate": 5.812056585871028e-09, "loss": 0.6775, "step": 6921 }, { "epoch": 0.99, "learning_rate": 5.619947824180849e-09, "loss": 0.7215, "step": 6922 }, { "epoch": 0.99, "learning_rate": 5.43106698146445e-09, "loss": 0.721, "step": 6923 }, { "epoch": 0.99, "learning_rate": 5.245414098380974e-09, "loss": 0.7497, "step": 6924 }, { "epoch": 0.99, "learning_rate": 5.062989214896785e-09, "loss": 0.6961, "step": 6925 }, { "epoch": 0.99, "learning_rate": 4.883792370283801e-09, "loss": 0.601, "step": 6926 }, { "epoch": 0.99, "learning_rate": 4.707823603117833e-09, "loss": 0.6523, "step": 6927 }, { "epoch": 0.99, "learning_rate": 4.5350829512835756e-09, "loss": 0.7729, "step": 6928 }, { "epoch": 0.99, "learning_rate": 4.365570451964618e-09, "loss": 0.5681, "step": 6929 }, { "epoch": 0.99, "learning_rate": 4.199286141655101e-09, "loss": 0.8717, "step": 6930 }, { "epoch": 0.99, "learning_rate": 4.036230056149726e-09, "loss": 0.6886, "step": 6931 }, { "epoch": 0.99, "learning_rate": 3.876402230552078e-09, "loss": 0.8072, "step": 6932 }, { "epoch": 0.99, "learning_rate": 3.719802699267971e-09, "loss": 0.7545, "step": 6933 }, { "epoch": 0.99, "learning_rate": 3.566431496010436e-09, "loss": 0.6038, "step": 6934 }, { "epoch": 0.99, "learning_rate": 3.4162886537963954e-09, "loss": 0.6239, "step": 6935 }, { "epoch": 0.99, "learning_rate": 3.2693742049466624e-09, "loss": 0.6099, "step": 6936 }, { "epoch": 0.99, "learning_rate": 3.1256881810909354e-09, "loss": 0.7539, "step": 6937 }, { "epoch": 0.99, "learning_rate": 2.985230613157808e-09, "loss": 0.7327, "step": 6938 }, { "epoch": 0.99, "learning_rate": 2.848001531386424e-09, "loss": 0.603, "step": 6939 }, { "epoch": 0.99, "learning_rate": 2.7140009653198182e-09, "loss": 0.875, "step": 6940 }, { "epoch": 0.99, "learning_rate": 2.5832289438015854e-09, "loss": 0.6839, "step": 6941 }, { "epoch": 0.99, "learning_rate": 2.455685494985871e-09, "loss": 0.8968, "step": 6942 }, { "epoch": 0.99, "learning_rate": 2.3313706463307105e-09, "loss": 0.8309, "step": 6943 }, { "epoch": 0.99, "learning_rate": 2.210284424596365e-09, "loss": 0.6177, "step": 6944 }, { "epoch": 0.99, "learning_rate": 2.0924268558503157e-09, "loss": 0.8131, "step": 6945 }, { "epoch": 0.99, "learning_rate": 1.9777979654639344e-09, "loss": 0.8209, "step": 6946 }, { "epoch": 1.0, "learning_rate": 1.8663977781141483e-09, "loss": 0.8133, "step": 6947 }, { "epoch": 1.0, "learning_rate": 1.7582263177817748e-09, "loss": 0.6526, "step": 6948 }, { "epoch": 1.0, "learning_rate": 1.6532836077565173e-09, "loss": 0.8027, "step": 6949 }, { "epoch": 1.0, "learning_rate": 1.551569670625308e-09, "loss": 0.7723, "step": 6950 }, { "epoch": 1.0, "learning_rate": 1.4530845282889616e-09, "loss": 0.803, "step": 6951 }, { "epoch": 1.0, "learning_rate": 1.3578282019471866e-09, "loss": 0.635, "step": 6952 }, { "epoch": 1.0, "learning_rate": 1.2658007121052472e-09, "loss": 0.8025, "step": 6953 }, { "epoch": 1.0, "learning_rate": 1.1770020785756286e-09, "loss": 0.8248, "step": 6954 }, { "epoch": 1.0, "learning_rate": 1.0914323204747057e-09, "loss": 0.7573, "step": 6955 }, { "epoch": 1.0, "learning_rate": 1.0090914562227438e-09, "loss": 0.643, "step": 6956 }, { "epoch": 1.0, "learning_rate": 9.299795035455638e-10, "loss": 0.7372, "step": 6957 }, { "epoch": 1.0, "learning_rate": 8.540964794762074e-10, "loss": 0.8465, "step": 6958 }, { "epoch": 1.0, "learning_rate": 7.814424003466103e-10, "loss": 0.7695, "step": 6959 }, { "epoch": 1.0, "learning_rate": 7.120172818009252e-10, "loss": 0.6978, "step": 6960 }, { "epoch": 1.0, "learning_rate": 6.458211387821988e-10, "loss": 0.6236, "step": 6961 }, { "epoch": 1.0, "learning_rate": 5.828539855423642e-10, "loss": 0.6228, "step": 6962 }, { "epoch": 1.0, "learning_rate": 5.231158356355792e-10, "loss": 0.7369, "step": 6963 }, { "epoch": 1.0, "learning_rate": 4.66606701921557e-10, "loss": 0.6844, "step": 6964 }, { "epoch": 1.0, "learning_rate": 4.13326596568897e-10, "loss": 0.6556, "step": 6965 }, { "epoch": 1.0, "learning_rate": 3.632755310434277e-10, "loss": 0.6598, "step": 6966 }, { "epoch": 1.0, "learning_rate": 3.164535161198634e-10, "loss": 0.5992, "step": 6967 }, { "epoch": 1.0, "learning_rate": 2.7286056188180476e-10, "loss": 0.6501, "step": 6968 }, { "epoch": 1.0, "learning_rate": 2.3249667771008122e-10, "loss": 0.8097, "step": 6969 }, { "epoch": 1.0, "learning_rate": 1.9536187229607372e-10, "loss": 0.6184, "step": 6970 }, { "epoch": 1.0, "learning_rate": 1.6145615363172272e-10, "loss": 0.7896, "step": 6971 }, { "epoch": 1.0, "learning_rate": 1.3077952901785484e-10, "loss": 0.8401, "step": 6972 }, { "epoch": 1.0, "learning_rate": 1.0333200505918684e-10, "loss": 0.7489, "step": 6973 }, { "epoch": 1.0, "learning_rate": 7.91135876626603e-11, "loss": 0.7441, "step": 6974 }, { "epoch": 1.0, "learning_rate": 5.812428204243769e-11, "loss": 0.695, "step": 6975 }, { "epoch": 1.0, "learning_rate": 4.03640927165716e-11, "loss": 0.5957, "step": 6976 }, { "epoch": 1.0, "learning_rate": 2.583302351033545e-11, "loss": 0.6802, "step": 6977 }, { "epoch": 1.0, "learning_rate": 1.4531077549562178e-11, "loss": 0.7207, "step": 6978 }, { "epoch": 1.0, "learning_rate": 6.458257267305534e-12, "loss": 0.7472, "step": 6979 }, { "epoch": 1.0, "learning_rate": 1.6145644038401131e-12, "loss": 0.9146, "step": 6980 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.7324, "step": 6981 }, { "epoch": 1.0, "step": 6981, "total_flos": 358640158814208.0, "train_loss": 0.8420886357403334, "train_runtime": 68132.8659, "train_samples_per_second": 4.303, "train_steps_per_second": 0.102 } ], "max_steps": 6981, "num_train_epochs": 1, "total_flos": 358640158814208.0, "trial_name": null, "trial_params": null }