{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999150887322747, "eval_steps": 500, "global_step": 2944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "loss": 1.3042, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 0.9504, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 1.0178, "step": 3 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 0.9327, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 1.0915, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-06, "loss": 1.0031, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.4000000000000001e-06, "loss": 0.9832, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.6000000000000001e-06, "loss": 1.1326, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.8e-06, "loss": 1.0474, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 1.1069, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.2e-06, "loss": 0.9859, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, "loss": 1.0538, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.6e-06, "loss": 0.9548, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.8000000000000003e-06, "loss": 1.0719, "step": 14 }, { "epoch": 0.01, "learning_rate": 3e-06, "loss": 1.2635, "step": 15 }, { "epoch": 0.01, "learning_rate": 3.2000000000000003e-06, "loss": 1.1509, "step": 16 }, { "epoch": 0.01, "learning_rate": 3.4000000000000005e-06, "loss": 1.0657, "step": 17 }, { "epoch": 0.01, "learning_rate": 3.6e-06, "loss": 1.2754, "step": 18 }, { "epoch": 0.01, "learning_rate": 3.8e-06, "loss": 0.9697, "step": 19 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "loss": 0.9908, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.2000000000000004e-06, "loss": 1.4632, "step": 21 }, { "epoch": 0.01, "learning_rate": 4.4e-06, "loss": 1.0316, "step": 22 }, { "epoch": 0.01, "learning_rate": 4.6e-06, "loss": 1.2311, "step": 23 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, "loss": 1.1077, "step": 24 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 1.1171, "step": 25 }, { "epoch": 0.01, "learning_rate": 5.2e-06, "loss": 1.104, "step": 26 }, { "epoch": 0.01, "learning_rate": 5.4e-06, "loss": 0.925, "step": 27 }, { "epoch": 0.01, "learning_rate": 5.600000000000001e-06, "loss": 1.0434, "step": 28 }, { "epoch": 0.01, "learning_rate": 5.8e-06, "loss": 1.3773, "step": 29 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 1.2922, "step": 30 }, { "epoch": 0.01, "learning_rate": 6.2e-06, "loss": 1.2745, "step": 31 }, { "epoch": 0.01, "learning_rate": 6.4000000000000006e-06, "loss": 1.0006, "step": 32 }, { "epoch": 0.01, "learning_rate": 6.6e-06, "loss": 1.0357, "step": 33 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-06, "loss": 0.794, "step": 34 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-06, "loss": 0.9732, "step": 35 }, { "epoch": 0.01, "learning_rate": 7.2e-06, "loss": 1.1687, "step": 36 }, { "epoch": 0.01, "learning_rate": 7.4e-06, "loss": 1.1174, "step": 37 }, { "epoch": 0.01, "learning_rate": 7.6e-06, "loss": 1.1431, "step": 38 }, { "epoch": 0.01, "learning_rate": 7.8e-06, "loss": 0.9108, "step": 39 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 1.2346, "step": 40 }, { "epoch": 0.01, "learning_rate": 8.200000000000001e-06, "loss": 1.0235, "step": 41 }, { "epoch": 0.01, "learning_rate": 8.400000000000001e-06, "loss": 1.0657, "step": 42 }, { "epoch": 0.01, "learning_rate": 8.599999999999999e-06, "loss": 1.3677, "step": 43 }, { "epoch": 0.01, "learning_rate": 8.8e-06, "loss": 1.0487, "step": 44 }, { "epoch": 0.02, "learning_rate": 9e-06, "loss": 1.2712, "step": 45 }, { "epoch": 0.02, "learning_rate": 9.2e-06, "loss": 0.846, "step": 46 }, { "epoch": 0.02, "learning_rate": 9.4e-06, "loss": 1.1446, "step": 47 }, { "epoch": 0.02, "learning_rate": 9.600000000000001e-06, "loss": 0.8558, "step": 48 }, { "epoch": 0.02, "learning_rate": 9.800000000000001e-06, "loss": 1.4719, "step": 49 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 1.435, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.02e-05, "loss": 1.2399, "step": 51 }, { "epoch": 0.02, "learning_rate": 1.04e-05, "loss": 1.1599, "step": 52 }, { "epoch": 0.02, "learning_rate": 1.06e-05, "loss": 1.274, "step": 53 }, { "epoch": 0.02, "learning_rate": 1.08e-05, "loss": 1.0344, "step": 54 }, { "epoch": 0.02, "learning_rate": 1.1000000000000001e-05, "loss": 1.0804, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.1200000000000001e-05, "loss": 0.853, "step": 56 }, { "epoch": 0.02, "learning_rate": 1.1400000000000001e-05, "loss": 1.2724, "step": 57 }, { "epoch": 0.02, "learning_rate": 1.16e-05, "loss": 1.3649, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.18e-05, "loss": 1.0567, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.2e-05, "loss": 1.2989, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.22e-05, "loss": 1.3125, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.24e-05, "loss": 1.2558, "step": 62 }, { "epoch": 0.02, "learning_rate": 1.2600000000000001e-05, "loss": 1.0725, "step": 63 }, { "epoch": 0.02, "learning_rate": 1.2800000000000001e-05, "loss": 0.8163, "step": 64 }, { "epoch": 0.02, "learning_rate": 1.3000000000000001e-05, "loss": 0.8683, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.32e-05, "loss": 0.9044, "step": 66 }, { "epoch": 0.02, "learning_rate": 1.3400000000000002e-05, "loss": 0.9762, "step": 67 }, { "epoch": 0.02, "learning_rate": 1.3600000000000002e-05, "loss": 1.1501, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.3800000000000002e-05, "loss": 1.2536, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.4000000000000001e-05, "loss": 1.4515, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.42e-05, "loss": 1.0518, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.44e-05, "loss": 1.2878, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.4599999999999999e-05, "loss": 1.0588, "step": 73 }, { "epoch": 0.03, "learning_rate": 1.48e-05, "loss": 0.9676, "step": 74 }, { "epoch": 0.03, "learning_rate": 1.5e-05, "loss": 1.1548, "step": 75 }, { "epoch": 0.03, "learning_rate": 1.52e-05, "loss": 0.8494, "step": 76 }, { "epoch": 0.03, "learning_rate": 1.54e-05, "loss": 1.0182, "step": 77 }, { "epoch": 0.03, "learning_rate": 1.56e-05, "loss": 1.0368, "step": 78 }, { "epoch": 0.03, "learning_rate": 1.58e-05, "loss": 1.0434, "step": 79 }, { "epoch": 0.03, "learning_rate": 1.6000000000000003e-05, "loss": 1.1522, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.62e-05, "loss": 0.9713, "step": 81 }, { "epoch": 0.03, "learning_rate": 1.6400000000000002e-05, "loss": 1.0958, "step": 82 }, { "epoch": 0.03, "learning_rate": 1.66e-05, "loss": 1.2681, "step": 83 }, { "epoch": 0.03, "learning_rate": 1.6800000000000002e-05, "loss": 1.2481, "step": 84 }, { "epoch": 0.03, "learning_rate": 1.7000000000000003e-05, "loss": 0.9709, "step": 85 }, { "epoch": 0.03, "learning_rate": 1.7199999999999998e-05, "loss": 1.0639, "step": 86 }, { "epoch": 0.03, "learning_rate": 1.74e-05, "loss": 1.2025, "step": 87 }, { "epoch": 0.03, "learning_rate": 1.76e-05, "loss": 0.9136, "step": 88 }, { "epoch": 0.03, "learning_rate": 1.78e-05, "loss": 1.3257, "step": 89 }, { "epoch": 0.03, "learning_rate": 1.8e-05, "loss": 1.1845, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.8200000000000002e-05, "loss": 1.0897, "step": 91 }, { "epoch": 0.03, "learning_rate": 1.84e-05, "loss": 0.8114, "step": 92 }, { "epoch": 0.03, "learning_rate": 1.86e-05, "loss": 1.4733, "step": 93 }, { "epoch": 0.03, "learning_rate": 1.88e-05, "loss": 1.0023, "step": 94 }, { "epoch": 0.03, "learning_rate": 1.9e-05, "loss": 1.2305, "step": 95 }, { "epoch": 0.03, "learning_rate": 1.9200000000000003e-05, "loss": 1.0121, "step": 96 }, { "epoch": 0.03, "learning_rate": 1.94e-05, "loss": 1.3582, "step": 97 }, { "epoch": 0.03, "learning_rate": 1.9600000000000002e-05, "loss": 1.1127, "step": 98 }, { "epoch": 0.03, "learning_rate": 1.9800000000000004e-05, "loss": 0.882, "step": 99 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 1.1614, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.0200000000000003e-05, "loss": 0.9753, "step": 101 }, { "epoch": 0.03, "learning_rate": 2.04e-05, "loss": 1.003, "step": 102 }, { "epoch": 0.03, "learning_rate": 2.06e-05, "loss": 1.0903, "step": 103 }, { "epoch": 0.04, "learning_rate": 2.08e-05, "loss": 1.0653, "step": 104 }, { "epoch": 0.04, "learning_rate": 2.1e-05, "loss": 1.0854, "step": 105 }, { "epoch": 0.04, "learning_rate": 2.12e-05, "loss": 1.0456, "step": 106 }, { "epoch": 0.04, "learning_rate": 2.1400000000000002e-05, "loss": 0.8484, "step": 107 }, { "epoch": 0.04, "learning_rate": 2.16e-05, "loss": 1.1733, "step": 108 }, { "epoch": 0.04, "learning_rate": 2.18e-05, "loss": 1.1247, "step": 109 }, { "epoch": 0.04, "learning_rate": 2.2000000000000003e-05, "loss": 1.11, "step": 110 }, { "epoch": 0.04, "learning_rate": 2.22e-05, "loss": 1.2462, "step": 111 }, { "epoch": 0.04, "learning_rate": 2.2400000000000002e-05, "loss": 0.9104, "step": 112 }, { "epoch": 0.04, "learning_rate": 2.26e-05, "loss": 1.2491, "step": 113 }, { "epoch": 0.04, "learning_rate": 2.2800000000000002e-05, "loss": 1.3274, "step": 114 }, { "epoch": 0.04, "learning_rate": 2.3000000000000003e-05, "loss": 0.8595, "step": 115 }, { "epoch": 0.04, "learning_rate": 2.32e-05, "loss": 1.3044, "step": 116 }, { "epoch": 0.04, "learning_rate": 2.3400000000000003e-05, "loss": 0.9495, "step": 117 }, { "epoch": 0.04, "learning_rate": 2.36e-05, "loss": 1.2426, "step": 118 }, { "epoch": 0.04, "learning_rate": 2.38e-05, "loss": 1.1397, "step": 119 }, { "epoch": 0.04, "learning_rate": 2.4e-05, "loss": 1.3382, "step": 120 }, { "epoch": 0.04, "learning_rate": 2.4200000000000002e-05, "loss": 1.05, "step": 121 }, { "epoch": 0.04, "learning_rate": 2.44e-05, "loss": 1.1322, "step": 122 }, { "epoch": 0.04, "learning_rate": 2.46e-05, "loss": 1.0397, "step": 123 }, { "epoch": 0.04, "learning_rate": 2.48e-05, "loss": 1.1735, "step": 124 }, { "epoch": 0.04, "learning_rate": 2.5e-05, "loss": 1.5592, "step": 125 }, { "epoch": 0.04, "learning_rate": 2.5200000000000003e-05, "loss": 1.3029, "step": 126 }, { "epoch": 0.04, "learning_rate": 2.54e-05, "loss": 1.1156, "step": 127 }, { "epoch": 0.04, "learning_rate": 2.5600000000000002e-05, "loss": 1.1272, "step": 128 }, { "epoch": 0.04, "learning_rate": 2.58e-05, "loss": 1.0726, "step": 129 }, { "epoch": 0.04, "learning_rate": 2.6000000000000002e-05, "loss": 0.9848, "step": 130 }, { "epoch": 0.04, "learning_rate": 2.6200000000000003e-05, "loss": 0.9126, "step": 131 }, { "epoch": 0.04, "learning_rate": 2.64e-05, "loss": 1.0913, "step": 132 }, { "epoch": 0.05, "learning_rate": 2.6600000000000003e-05, "loss": 1.2037, "step": 133 }, { "epoch": 0.05, "learning_rate": 2.6800000000000004e-05, "loss": 1.0029, "step": 134 }, { "epoch": 0.05, "learning_rate": 2.7000000000000002e-05, "loss": 0.9625, "step": 135 }, { "epoch": 0.05, "learning_rate": 2.7200000000000004e-05, "loss": 0.9805, "step": 136 }, { "epoch": 0.05, "learning_rate": 2.7400000000000002e-05, "loss": 0.9376, "step": 137 }, { "epoch": 0.05, "learning_rate": 2.7600000000000003e-05, "loss": 1.0234, "step": 138 }, { "epoch": 0.05, "learning_rate": 2.7800000000000005e-05, "loss": 0.9657, "step": 139 }, { "epoch": 0.05, "learning_rate": 2.8000000000000003e-05, "loss": 1.4375, "step": 140 }, { "epoch": 0.05, "learning_rate": 2.8199999999999998e-05, "loss": 0.8543, "step": 141 }, { "epoch": 0.05, "learning_rate": 2.84e-05, "loss": 0.9477, "step": 142 }, { "epoch": 0.05, "learning_rate": 2.86e-05, "loss": 0.9067, "step": 143 }, { "epoch": 0.05, "learning_rate": 2.88e-05, "loss": 0.9293, "step": 144 }, { "epoch": 0.05, "learning_rate": 2.9e-05, "loss": 0.9171, "step": 145 }, { "epoch": 0.05, "learning_rate": 2.9199999999999998e-05, "loss": 1.1132, "step": 146 }, { "epoch": 0.05, "learning_rate": 2.94e-05, "loss": 1.1732, "step": 147 }, { "epoch": 0.05, "learning_rate": 2.96e-05, "loss": 0.8066, "step": 148 }, { "epoch": 0.05, "learning_rate": 2.98e-05, "loss": 0.9202, "step": 149 }, { "epoch": 0.05, "learning_rate": 3e-05, "loss": 1.085, "step": 150 }, { "epoch": 0.05, "learning_rate": 3.02e-05, "loss": 1.1825, "step": 151 }, { "epoch": 0.05, "learning_rate": 3.04e-05, "loss": 1.0228, "step": 152 }, { "epoch": 0.05, "learning_rate": 3.06e-05, "loss": 0.9687, "step": 153 }, { "epoch": 0.05, "learning_rate": 3.08e-05, "loss": 1.1493, "step": 154 }, { "epoch": 0.05, "learning_rate": 3.1e-05, "loss": 1.1225, "step": 155 }, { "epoch": 0.05, "learning_rate": 3.12e-05, "loss": 1.1042, "step": 156 }, { "epoch": 0.05, "learning_rate": 3.1400000000000004e-05, "loss": 0.9101, "step": 157 }, { "epoch": 0.05, "learning_rate": 3.16e-05, "loss": 0.9214, "step": 158 }, { "epoch": 0.05, "learning_rate": 3.18e-05, "loss": 1.3579, "step": 159 }, { "epoch": 0.05, "learning_rate": 3.2000000000000005e-05, "loss": 1.3856, "step": 160 }, { "epoch": 0.05, "learning_rate": 3.2200000000000003e-05, "loss": 1.0046, "step": 161 }, { "epoch": 0.06, "learning_rate": 3.24e-05, "loss": 1.2557, "step": 162 }, { "epoch": 0.06, "learning_rate": 3.26e-05, "loss": 1.0828, "step": 163 }, { "epoch": 0.06, "learning_rate": 3.2800000000000004e-05, "loss": 0.7402, "step": 164 }, { "epoch": 0.06, "learning_rate": 3.3e-05, "loss": 1.1537, "step": 165 }, { "epoch": 0.06, "learning_rate": 3.32e-05, "loss": 1.1073, "step": 166 }, { "epoch": 0.06, "learning_rate": 3.3400000000000005e-05, "loss": 1.1896, "step": 167 }, { "epoch": 0.06, "learning_rate": 3.3600000000000004e-05, "loss": 1.0392, "step": 168 }, { "epoch": 0.06, "learning_rate": 3.38e-05, "loss": 0.9375, "step": 169 }, { "epoch": 0.06, "learning_rate": 3.4000000000000007e-05, "loss": 1.2697, "step": 170 }, { "epoch": 0.06, "learning_rate": 3.4200000000000005e-05, "loss": 0.9357, "step": 171 }, { "epoch": 0.06, "learning_rate": 3.4399999999999996e-05, "loss": 0.9192, "step": 172 }, { "epoch": 0.06, "learning_rate": 3.46e-05, "loss": 0.8176, "step": 173 }, { "epoch": 0.06, "learning_rate": 3.48e-05, "loss": 0.7519, "step": 174 }, { "epoch": 0.06, "learning_rate": 3.5e-05, "loss": 1.0933, "step": 175 }, { "epoch": 0.06, "learning_rate": 3.52e-05, "loss": 1.1732, "step": 176 }, { "epoch": 0.06, "learning_rate": 3.54e-05, "loss": 1.1382, "step": 177 }, { "epoch": 0.06, "learning_rate": 3.56e-05, "loss": 1.1032, "step": 178 }, { "epoch": 0.06, "learning_rate": 3.58e-05, "loss": 1.0158, "step": 179 }, { "epoch": 0.06, "learning_rate": 3.6e-05, "loss": 1.062, "step": 180 }, { "epoch": 0.06, "learning_rate": 3.62e-05, "loss": 1.0559, "step": 181 }, { "epoch": 0.06, "learning_rate": 3.6400000000000004e-05, "loss": 1.2562, "step": 182 }, { "epoch": 0.06, "learning_rate": 3.66e-05, "loss": 1.1242, "step": 183 }, { "epoch": 0.06, "learning_rate": 3.68e-05, "loss": 1.0644, "step": 184 }, { "epoch": 0.06, "learning_rate": 3.7e-05, "loss": 0.9732, "step": 185 }, { "epoch": 0.06, "learning_rate": 3.72e-05, "loss": 0.9875, "step": 186 }, { "epoch": 0.06, "learning_rate": 3.74e-05, "loss": 0.9166, "step": 187 }, { "epoch": 0.06, "learning_rate": 3.76e-05, "loss": 1.288, "step": 188 }, { "epoch": 0.06, "learning_rate": 3.7800000000000004e-05, "loss": 0.9281, "step": 189 }, { "epoch": 0.06, "learning_rate": 3.8e-05, "loss": 1.0369, "step": 190 }, { "epoch": 0.06, "learning_rate": 3.82e-05, "loss": 1.0053, "step": 191 }, { "epoch": 0.07, "learning_rate": 3.8400000000000005e-05, "loss": 1.0702, "step": 192 }, { "epoch": 0.07, "learning_rate": 3.86e-05, "loss": 1.1292, "step": 193 }, { "epoch": 0.07, "learning_rate": 3.88e-05, "loss": 0.8626, "step": 194 }, { "epoch": 0.07, "learning_rate": 3.9000000000000006e-05, "loss": 1.1731, "step": 195 }, { "epoch": 0.07, "learning_rate": 3.9200000000000004e-05, "loss": 1.0882, "step": 196 }, { "epoch": 0.07, "learning_rate": 3.94e-05, "loss": 1.1651, "step": 197 }, { "epoch": 0.07, "learning_rate": 3.960000000000001e-05, "loss": 0.9222, "step": 198 }, { "epoch": 0.07, "learning_rate": 3.9800000000000005e-05, "loss": 1.3165, "step": 199 }, { "epoch": 0.07, "learning_rate": 4e-05, "loss": 1.2229, "step": 200 }, { "epoch": 0.07, "learning_rate": 4.02e-05, "loss": 1.0394, "step": 201 }, { "epoch": 0.07, "learning_rate": 4.0400000000000006e-05, "loss": 1.1599, "step": 202 }, { "epoch": 0.07, "learning_rate": 4.0600000000000004e-05, "loss": 1.377, "step": 203 }, { "epoch": 0.07, "learning_rate": 4.08e-05, "loss": 0.9441, "step": 204 }, { "epoch": 0.07, "learning_rate": 4.1e-05, "loss": 0.8284, "step": 205 }, { "epoch": 0.07, "learning_rate": 4.12e-05, "loss": 0.7681, "step": 206 }, { "epoch": 0.07, "learning_rate": 4.14e-05, "loss": 0.9902, "step": 207 }, { "epoch": 0.07, "learning_rate": 4.16e-05, "loss": 0.833, "step": 208 }, { "epoch": 0.07, "learning_rate": 4.18e-05, "loss": 1.2702, "step": 209 }, { "epoch": 0.07, "learning_rate": 4.2e-05, "loss": 0.8871, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.22e-05, "loss": 1.1086, "step": 211 }, { "epoch": 0.07, "learning_rate": 4.24e-05, "loss": 0.8249, "step": 212 }, { "epoch": 0.07, "learning_rate": 4.26e-05, "loss": 1.0551, "step": 213 }, { "epoch": 0.07, "learning_rate": 4.2800000000000004e-05, "loss": 1.1374, "step": 214 }, { "epoch": 0.07, "learning_rate": 4.3e-05, "loss": 1.2192, "step": 215 }, { "epoch": 0.07, "learning_rate": 4.32e-05, "loss": 0.8779, "step": 216 }, { "epoch": 0.07, "learning_rate": 4.3400000000000005e-05, "loss": 0.8216, "step": 217 }, { "epoch": 0.07, "learning_rate": 4.36e-05, "loss": 0.9427, "step": 218 }, { "epoch": 0.07, "learning_rate": 4.38e-05, "loss": 0.9762, "step": 219 }, { "epoch": 0.07, "learning_rate": 4.4000000000000006e-05, "loss": 1.1454, "step": 220 }, { "epoch": 0.08, "learning_rate": 4.4200000000000004e-05, "loss": 1.2732, "step": 221 }, { "epoch": 0.08, "learning_rate": 4.44e-05, "loss": 1.0001, "step": 222 }, { "epoch": 0.08, "learning_rate": 4.46e-05, "loss": 1.3287, "step": 223 }, { "epoch": 0.08, "learning_rate": 4.4800000000000005e-05, "loss": 0.99, "step": 224 }, { "epoch": 0.08, "learning_rate": 4.5e-05, "loss": 1.0645, "step": 225 }, { "epoch": 0.08, "learning_rate": 4.52e-05, "loss": 1.2423, "step": 226 }, { "epoch": 0.08, "learning_rate": 4.5400000000000006e-05, "loss": 0.8737, "step": 227 }, { "epoch": 0.08, "learning_rate": 4.5600000000000004e-05, "loss": 1.0079, "step": 228 }, { "epoch": 0.08, "learning_rate": 4.58e-05, "loss": 1.0202, "step": 229 }, { "epoch": 0.08, "learning_rate": 4.600000000000001e-05, "loss": 1.3559, "step": 230 }, { "epoch": 0.08, "learning_rate": 4.6200000000000005e-05, "loss": 1.1469, "step": 231 }, { "epoch": 0.08, "learning_rate": 4.64e-05, "loss": 1.2149, "step": 232 }, { "epoch": 0.08, "learning_rate": 4.660000000000001e-05, "loss": 0.9838, "step": 233 }, { "epoch": 0.08, "learning_rate": 4.6800000000000006e-05, "loss": 1.2363, "step": 234 }, { "epoch": 0.08, "learning_rate": 4.7e-05, "loss": 1.2322, "step": 235 }, { "epoch": 0.08, "learning_rate": 4.72e-05, "loss": 1.2505, "step": 236 }, { "epoch": 0.08, "learning_rate": 4.74e-05, "loss": 1.0421, "step": 237 }, { "epoch": 0.08, "learning_rate": 4.76e-05, "loss": 1.0015, "step": 238 }, { "epoch": 0.08, "learning_rate": 4.78e-05, "loss": 1.4199, "step": 239 }, { "epoch": 0.08, "learning_rate": 4.8e-05, "loss": 0.9814, "step": 240 }, { "epoch": 0.08, "learning_rate": 4.82e-05, "loss": 0.8572, "step": 241 }, { "epoch": 0.08, "learning_rate": 4.8400000000000004e-05, "loss": 0.9791, "step": 242 }, { "epoch": 0.08, "learning_rate": 4.86e-05, "loss": 0.9173, "step": 243 }, { "epoch": 0.08, "learning_rate": 4.88e-05, "loss": 1.3136, "step": 244 }, { "epoch": 0.08, "learning_rate": 4.9e-05, "loss": 1.3671, "step": 245 }, { "epoch": 0.08, "learning_rate": 4.92e-05, "loss": 0.7593, "step": 246 }, { "epoch": 0.08, "learning_rate": 4.94e-05, "loss": 0.9689, "step": 247 }, { "epoch": 0.08, "learning_rate": 4.96e-05, "loss": 1.0316, "step": 248 }, { "epoch": 0.08, "learning_rate": 4.9800000000000004e-05, "loss": 1.2019, "step": 249 }, { "epoch": 0.08, "learning_rate": 5e-05, "loss": 0.7544, "step": 250 }, { "epoch": 0.09, "learning_rate": 5.02e-05, "loss": 1.1063, "step": 251 }, { "epoch": 0.09, "learning_rate": 5.0400000000000005e-05, "loss": 1.2543, "step": 252 }, { "epoch": 0.09, "learning_rate": 5.0600000000000003e-05, "loss": 1.123, "step": 253 }, { "epoch": 0.09, "learning_rate": 5.08e-05, "loss": 1.422, "step": 254 }, { "epoch": 0.09, "learning_rate": 5.1000000000000006e-05, "loss": 1.2677, "step": 255 }, { "epoch": 0.09, "learning_rate": 5.1200000000000004e-05, "loss": 1.0405, "step": 256 }, { "epoch": 0.09, "learning_rate": 5.14e-05, "loss": 1.0652, "step": 257 }, { "epoch": 0.09, "learning_rate": 5.16e-05, "loss": 1.0584, "step": 258 }, { "epoch": 0.09, "learning_rate": 5.1800000000000005e-05, "loss": 1.205, "step": 259 }, { "epoch": 0.09, "learning_rate": 5.2000000000000004e-05, "loss": 1.0404, "step": 260 }, { "epoch": 0.09, "learning_rate": 5.22e-05, "loss": 1.0146, "step": 261 }, { "epoch": 0.09, "learning_rate": 5.2400000000000007e-05, "loss": 1.0297, "step": 262 }, { "epoch": 0.09, "learning_rate": 5.2600000000000005e-05, "loss": 1.0076, "step": 263 }, { "epoch": 0.09, "learning_rate": 5.28e-05, "loss": 1.1728, "step": 264 }, { "epoch": 0.09, "learning_rate": 5.300000000000001e-05, "loss": 0.9752, "step": 265 }, { "epoch": 0.09, "learning_rate": 5.3200000000000006e-05, "loss": 1.1303, "step": 266 }, { "epoch": 0.09, "learning_rate": 5.3400000000000004e-05, "loss": 0.9298, "step": 267 }, { "epoch": 0.09, "learning_rate": 5.360000000000001e-05, "loss": 1.0034, "step": 268 }, { "epoch": 0.09, "learning_rate": 5.380000000000001e-05, "loss": 1.2169, "step": 269 }, { "epoch": 0.09, "learning_rate": 5.4000000000000005e-05, "loss": 1.1117, "step": 270 }, { "epoch": 0.09, "learning_rate": 5.420000000000001e-05, "loss": 1.2457, "step": 271 }, { "epoch": 0.09, "learning_rate": 5.440000000000001e-05, "loss": 0.9889, "step": 272 }, { "epoch": 0.09, "learning_rate": 5.4600000000000006e-05, "loss": 1.0068, "step": 273 }, { "epoch": 0.09, "learning_rate": 5.4800000000000004e-05, "loss": 1.561, "step": 274 }, { "epoch": 0.09, "learning_rate": 5.500000000000001e-05, "loss": 0.8235, "step": 275 }, { "epoch": 0.09, "learning_rate": 5.520000000000001e-05, "loss": 1.1859, "step": 276 }, { "epoch": 0.09, "learning_rate": 5.5400000000000005e-05, "loss": 1.2235, "step": 277 }, { "epoch": 0.09, "learning_rate": 5.560000000000001e-05, "loss": 1.025, "step": 278 }, { "epoch": 0.09, "learning_rate": 5.580000000000001e-05, "loss": 0.9945, "step": 279 }, { "epoch": 0.1, "learning_rate": 5.6000000000000006e-05, "loss": 1.1553, "step": 280 }, { "epoch": 0.1, "learning_rate": 5.620000000000001e-05, "loss": 1.0269, "step": 281 }, { "epoch": 0.1, "learning_rate": 5.6399999999999995e-05, "loss": 0.8173, "step": 282 }, { "epoch": 0.1, "learning_rate": 5.66e-05, "loss": 0.9081, "step": 283 }, { "epoch": 0.1, "learning_rate": 5.68e-05, "loss": 1.1829, "step": 284 }, { "epoch": 0.1, "learning_rate": 5.6999999999999996e-05, "loss": 1.1293, "step": 285 }, { "epoch": 0.1, "learning_rate": 5.72e-05, "loss": 0.816, "step": 286 }, { "epoch": 0.1, "learning_rate": 5.74e-05, "loss": 1.0409, "step": 287 }, { "epoch": 0.1, "learning_rate": 5.76e-05, "loss": 0.8734, "step": 288 }, { "epoch": 0.1, "learning_rate": 5.7799999999999995e-05, "loss": 0.9681, "step": 289 }, { "epoch": 0.1, "learning_rate": 5.8e-05, "loss": 0.535, "step": 290 }, { "epoch": 0.1, "learning_rate": 5.82e-05, "loss": 1.0433, "step": 291 }, { "epoch": 0.1, "learning_rate": 5.8399999999999997e-05, "loss": 1.0958, "step": 292 }, { "epoch": 0.1, "learning_rate": 5.86e-05, "loss": 1.1221, "step": 293 }, { "epoch": 0.1, "learning_rate": 5.88e-05, "loss": 1.1149, "step": 294 }, { "epoch": 0.1, "learning_rate": 5.9e-05, "loss": 1.1072, "step": 295 }, { "epoch": 0.1, "learning_rate": 5.92e-05, "loss": 0.9271, "step": 296 }, { "epoch": 0.1, "learning_rate": 5.94e-05, "loss": 1.078, "step": 297 }, { "epoch": 0.1, "learning_rate": 5.96e-05, "loss": 1.095, "step": 298 }, { "epoch": 0.1, "learning_rate": 5.9800000000000003e-05, "loss": 1.0897, "step": 299 }, { "epoch": 0.1, "learning_rate": 6e-05, "loss": 0.8459, "step": 300 }, { "epoch": 0.1, "learning_rate": 6.02e-05, "loss": 1.1404, "step": 301 }, { "epoch": 0.1, "learning_rate": 6.04e-05, "loss": 1.0806, "step": 302 }, { "epoch": 0.1, "learning_rate": 6.06e-05, "loss": 1.2118, "step": 303 }, { "epoch": 0.1, "learning_rate": 6.08e-05, "loss": 0.9364, "step": 304 }, { "epoch": 0.1, "learning_rate": 6.1e-05, "loss": 0.9578, "step": 305 }, { "epoch": 0.1, "learning_rate": 6.12e-05, "loss": 1.3464, "step": 306 }, { "epoch": 0.1, "learning_rate": 6.14e-05, "loss": 1.0319, "step": 307 }, { "epoch": 0.1, "learning_rate": 6.16e-05, "loss": 1.1353, "step": 308 }, { "epoch": 0.1, "learning_rate": 6.18e-05, "loss": 1.0227, "step": 309 }, { "epoch": 0.11, "learning_rate": 6.2e-05, "loss": 1.1619, "step": 310 }, { "epoch": 0.11, "learning_rate": 6.220000000000001e-05, "loss": 1.1357, "step": 311 }, { "epoch": 0.11, "learning_rate": 6.24e-05, "loss": 0.8716, "step": 312 }, { "epoch": 0.11, "learning_rate": 6.26e-05, "loss": 1.1437, "step": 313 }, { "epoch": 0.11, "learning_rate": 6.280000000000001e-05, "loss": 0.8471, "step": 314 }, { "epoch": 0.11, "learning_rate": 6.3e-05, "loss": 0.9581, "step": 315 }, { "epoch": 0.11, "learning_rate": 6.32e-05, "loss": 1.2124, "step": 316 }, { "epoch": 0.11, "learning_rate": 6.340000000000001e-05, "loss": 1.0421, "step": 317 }, { "epoch": 0.11, "learning_rate": 6.36e-05, "loss": 1.0548, "step": 318 }, { "epoch": 0.11, "learning_rate": 6.38e-05, "loss": 1.191, "step": 319 }, { "epoch": 0.11, "learning_rate": 6.400000000000001e-05, "loss": 0.8576, "step": 320 }, { "epoch": 0.11, "learning_rate": 6.42e-05, "loss": 1.0541, "step": 321 }, { "epoch": 0.11, "learning_rate": 6.440000000000001e-05, "loss": 0.8636, "step": 322 }, { "epoch": 0.11, "learning_rate": 6.460000000000001e-05, "loss": 1.1277, "step": 323 }, { "epoch": 0.11, "learning_rate": 6.48e-05, "loss": 1.3043, "step": 324 }, { "epoch": 0.11, "learning_rate": 6.500000000000001e-05, "loss": 0.7842, "step": 325 }, { "epoch": 0.11, "learning_rate": 6.52e-05, "loss": 0.8742, "step": 326 }, { "epoch": 0.11, "learning_rate": 6.54e-05, "loss": 0.825, "step": 327 }, { "epoch": 0.11, "learning_rate": 6.560000000000001e-05, "loss": 1.1291, "step": 328 }, { "epoch": 0.11, "learning_rate": 6.58e-05, "loss": 1.4211, "step": 329 }, { "epoch": 0.11, "learning_rate": 6.6e-05, "loss": 1.2307, "step": 330 }, { "epoch": 0.11, "learning_rate": 6.620000000000001e-05, "loss": 0.9963, "step": 331 }, { "epoch": 0.11, "learning_rate": 6.64e-05, "loss": 0.966, "step": 332 }, { "epoch": 0.11, "learning_rate": 6.66e-05, "loss": 1.2536, "step": 333 }, { "epoch": 0.11, "learning_rate": 6.680000000000001e-05, "loss": 1.1252, "step": 334 }, { "epoch": 0.11, "learning_rate": 6.7e-05, "loss": 1.3534, "step": 335 }, { "epoch": 0.11, "learning_rate": 6.720000000000001e-05, "loss": 1.2422, "step": 336 }, { "epoch": 0.11, "learning_rate": 6.740000000000001e-05, "loss": 0.9947, "step": 337 }, { "epoch": 0.11, "learning_rate": 6.76e-05, "loss": 0.77, "step": 338 }, { "epoch": 0.12, "learning_rate": 6.780000000000001e-05, "loss": 1.1431, "step": 339 }, { "epoch": 0.12, "learning_rate": 6.800000000000001e-05, "loss": 1.1344, "step": 340 }, { "epoch": 0.12, "learning_rate": 6.82e-05, "loss": 0.9449, "step": 341 }, { "epoch": 0.12, "learning_rate": 6.840000000000001e-05, "loss": 0.9845, "step": 342 }, { "epoch": 0.12, "learning_rate": 6.860000000000001e-05, "loss": 1.1023, "step": 343 }, { "epoch": 0.12, "learning_rate": 6.879999999999999e-05, "loss": 0.9782, "step": 344 }, { "epoch": 0.12, "learning_rate": 6.9e-05, "loss": 1.2512, "step": 345 }, { "epoch": 0.12, "learning_rate": 6.92e-05, "loss": 0.9329, "step": 346 }, { "epoch": 0.12, "learning_rate": 6.939999999999999e-05, "loss": 1.2155, "step": 347 }, { "epoch": 0.12, "learning_rate": 6.96e-05, "loss": 0.8937, "step": 348 }, { "epoch": 0.12, "learning_rate": 6.98e-05, "loss": 0.8908, "step": 349 }, { "epoch": 0.12, "learning_rate": 7e-05, "loss": 1.3316, "step": 350 }, { "epoch": 0.12, "learning_rate": 7.02e-05, "loss": 1.1358, "step": 351 }, { "epoch": 0.12, "learning_rate": 7.04e-05, "loss": 0.7582, "step": 352 }, { "epoch": 0.12, "learning_rate": 7.06e-05, "loss": 0.8916, "step": 353 }, { "epoch": 0.12, "learning_rate": 7.08e-05, "loss": 0.8119, "step": 354 }, { "epoch": 0.12, "learning_rate": 7.1e-05, "loss": 0.8577, "step": 355 }, { "epoch": 0.12, "learning_rate": 7.12e-05, "loss": 0.9068, "step": 356 }, { "epoch": 0.12, "learning_rate": 7.14e-05, "loss": 1.0544, "step": 357 }, { "epoch": 0.12, "learning_rate": 7.16e-05, "loss": 1.1407, "step": 358 }, { "epoch": 0.12, "learning_rate": 7.18e-05, "loss": 0.9417, "step": 359 }, { "epoch": 0.12, "learning_rate": 7.2e-05, "loss": 1.1257, "step": 360 }, { "epoch": 0.12, "learning_rate": 7.22e-05, "loss": 0.9103, "step": 361 }, { "epoch": 0.12, "learning_rate": 7.24e-05, "loss": 1.0995, "step": 362 }, { "epoch": 0.12, "learning_rate": 7.26e-05, "loss": 0.8531, "step": 363 }, { "epoch": 0.12, "learning_rate": 7.280000000000001e-05, "loss": 1.1083, "step": 364 }, { "epoch": 0.12, "learning_rate": 7.3e-05, "loss": 1.0703, "step": 365 }, { "epoch": 0.12, "learning_rate": 7.32e-05, "loss": 1.1166, "step": 366 }, { "epoch": 0.12, "learning_rate": 7.340000000000001e-05, "loss": 1.0186, "step": 367 }, { "epoch": 0.12, "learning_rate": 7.36e-05, "loss": 1.0328, "step": 368 }, { "epoch": 0.13, "learning_rate": 7.38e-05, "loss": 0.9233, "step": 369 }, { "epoch": 0.13, "learning_rate": 7.4e-05, "loss": 1.3662, "step": 370 }, { "epoch": 0.13, "learning_rate": 7.42e-05, "loss": 1.0046, "step": 371 }, { "epoch": 0.13, "learning_rate": 7.44e-05, "loss": 1.0522, "step": 372 }, { "epoch": 0.13, "learning_rate": 7.46e-05, "loss": 1.0791, "step": 373 }, { "epoch": 0.13, "learning_rate": 7.48e-05, "loss": 1.163, "step": 374 }, { "epoch": 0.13, "learning_rate": 7.500000000000001e-05, "loss": 1.0299, "step": 375 }, { "epoch": 0.13, "learning_rate": 7.52e-05, "loss": 0.9361, "step": 376 }, { "epoch": 0.13, "learning_rate": 7.54e-05, "loss": 0.9808, "step": 377 }, { "epoch": 0.13, "learning_rate": 7.560000000000001e-05, "loss": 0.9482, "step": 378 }, { "epoch": 0.13, "learning_rate": 7.58e-05, "loss": 0.9278, "step": 379 }, { "epoch": 0.13, "learning_rate": 7.6e-05, "loss": 1.3477, "step": 380 }, { "epoch": 0.13, "learning_rate": 7.620000000000001e-05, "loss": 1.0585, "step": 381 }, { "epoch": 0.13, "learning_rate": 7.64e-05, "loss": 1.3153, "step": 382 }, { "epoch": 0.13, "learning_rate": 7.66e-05, "loss": 0.9102, "step": 383 }, { "epoch": 0.13, "learning_rate": 7.680000000000001e-05, "loss": 0.9123, "step": 384 }, { "epoch": 0.13, "learning_rate": 7.7e-05, "loss": 1.1594, "step": 385 }, { "epoch": 0.13, "learning_rate": 7.72e-05, "loss": 0.747, "step": 386 }, { "epoch": 0.13, "learning_rate": 7.740000000000001e-05, "loss": 0.9993, "step": 387 }, { "epoch": 0.13, "learning_rate": 7.76e-05, "loss": 1.2205, "step": 388 }, { "epoch": 0.13, "learning_rate": 7.780000000000001e-05, "loss": 1.2019, "step": 389 }, { "epoch": 0.13, "learning_rate": 7.800000000000001e-05, "loss": 1.2638, "step": 390 }, { "epoch": 0.13, "learning_rate": 7.82e-05, "loss": 0.9998, "step": 391 }, { "epoch": 0.13, "learning_rate": 7.840000000000001e-05, "loss": 1.0865, "step": 392 }, { "epoch": 0.13, "learning_rate": 7.860000000000001e-05, "loss": 1.0499, "step": 393 }, { "epoch": 0.13, "learning_rate": 7.88e-05, "loss": 0.869, "step": 394 }, { "epoch": 0.13, "learning_rate": 7.900000000000001e-05, "loss": 0.8419, "step": 395 }, { "epoch": 0.13, "learning_rate": 7.920000000000001e-05, "loss": 1.2251, "step": 396 }, { "epoch": 0.13, "learning_rate": 7.94e-05, "loss": 1.1416, "step": 397 }, { "epoch": 0.14, "learning_rate": 7.960000000000001e-05, "loss": 0.8001, "step": 398 }, { "epoch": 0.14, "learning_rate": 7.98e-05, "loss": 0.9339, "step": 399 }, { "epoch": 0.14, "learning_rate": 8e-05, "loss": 1.0634, "step": 400 }, { "epoch": 0.14, "learning_rate": 8.020000000000001e-05, "loss": 1.1273, "step": 401 }, { "epoch": 0.14, "learning_rate": 8.04e-05, "loss": 1.0285, "step": 402 }, { "epoch": 0.14, "learning_rate": 8.060000000000001e-05, "loss": 1.0114, "step": 403 }, { "epoch": 0.14, "learning_rate": 8.080000000000001e-05, "loss": 1.1819, "step": 404 }, { "epoch": 0.14, "learning_rate": 8.1e-05, "loss": 1.1606, "step": 405 }, { "epoch": 0.14, "learning_rate": 8.120000000000001e-05, "loss": 0.8522, "step": 406 }, { "epoch": 0.14, "learning_rate": 8.14e-05, "loss": 0.8655, "step": 407 }, { "epoch": 0.14, "learning_rate": 8.16e-05, "loss": 0.9447, "step": 408 }, { "epoch": 0.14, "learning_rate": 8.18e-05, "loss": 0.9084, "step": 409 }, { "epoch": 0.14, "learning_rate": 8.2e-05, "loss": 0.8263, "step": 410 }, { "epoch": 0.14, "learning_rate": 8.22e-05, "loss": 1.154, "step": 411 }, { "epoch": 0.14, "learning_rate": 8.24e-05, "loss": 0.6722, "step": 412 }, { "epoch": 0.14, "learning_rate": 8.26e-05, "loss": 0.9552, "step": 413 }, { "epoch": 0.14, "learning_rate": 8.28e-05, "loss": 0.9094, "step": 414 }, { "epoch": 0.14, "learning_rate": 8.3e-05, "loss": 0.8104, "step": 415 }, { "epoch": 0.14, "learning_rate": 8.32e-05, "loss": 1.1676, "step": 416 }, { "epoch": 0.14, "learning_rate": 8.34e-05, "loss": 0.9264, "step": 417 }, { "epoch": 0.14, "learning_rate": 8.36e-05, "loss": 0.9371, "step": 418 }, { "epoch": 0.14, "learning_rate": 8.38e-05, "loss": 0.6897, "step": 419 }, { "epoch": 0.14, "learning_rate": 8.4e-05, "loss": 1.1492, "step": 420 }, { "epoch": 0.14, "learning_rate": 8.42e-05, "loss": 0.9769, "step": 421 }, { "epoch": 0.14, "learning_rate": 8.44e-05, "loss": 1.0581, "step": 422 }, { "epoch": 0.14, "learning_rate": 8.46e-05, "loss": 1.0116, "step": 423 }, { "epoch": 0.14, "learning_rate": 8.48e-05, "loss": 1.0151, "step": 424 }, { "epoch": 0.14, "learning_rate": 8.5e-05, "loss": 1.0845, "step": 425 }, { "epoch": 0.14, "learning_rate": 8.52e-05, "loss": 1.1277, "step": 426 }, { "epoch": 0.15, "learning_rate": 8.54e-05, "loss": 0.979, "step": 427 }, { "epoch": 0.15, "learning_rate": 8.560000000000001e-05, "loss": 1.2729, "step": 428 }, { "epoch": 0.15, "learning_rate": 8.58e-05, "loss": 0.8001, "step": 429 }, { "epoch": 0.15, "learning_rate": 8.6e-05, "loss": 1.3215, "step": 430 }, { "epoch": 0.15, "learning_rate": 8.620000000000001e-05, "loss": 1.0522, "step": 431 }, { "epoch": 0.15, "learning_rate": 8.64e-05, "loss": 1.4081, "step": 432 }, { "epoch": 0.15, "learning_rate": 8.66e-05, "loss": 0.8163, "step": 433 }, { "epoch": 0.15, "learning_rate": 8.680000000000001e-05, "loss": 0.884, "step": 434 }, { "epoch": 0.15, "learning_rate": 8.7e-05, "loss": 1.083, "step": 435 }, { "epoch": 0.15, "learning_rate": 8.72e-05, "loss": 1.1206, "step": 436 }, { "epoch": 0.15, "learning_rate": 8.740000000000001e-05, "loss": 0.7178, "step": 437 }, { "epoch": 0.15, "learning_rate": 8.76e-05, "loss": 1.0228, "step": 438 }, { "epoch": 0.15, "learning_rate": 8.78e-05, "loss": 0.7773, "step": 439 }, { "epoch": 0.15, "learning_rate": 8.800000000000001e-05, "loss": 0.8538, "step": 440 }, { "epoch": 0.15, "learning_rate": 8.82e-05, "loss": 1.1727, "step": 441 }, { "epoch": 0.15, "learning_rate": 8.840000000000001e-05, "loss": 0.9334, "step": 442 }, { "epoch": 0.15, "learning_rate": 8.86e-05, "loss": 1.225, "step": 443 }, { "epoch": 0.15, "learning_rate": 8.88e-05, "loss": 1.1864, "step": 444 }, { "epoch": 0.15, "learning_rate": 8.900000000000001e-05, "loss": 0.949, "step": 445 }, { "epoch": 0.15, "learning_rate": 8.92e-05, "loss": 0.8373, "step": 446 }, { "epoch": 0.15, "learning_rate": 8.94e-05, "loss": 1.1846, "step": 447 }, { "epoch": 0.15, "learning_rate": 8.960000000000001e-05, "loss": 1.2836, "step": 448 }, { "epoch": 0.15, "learning_rate": 8.98e-05, "loss": 1.1378, "step": 449 }, { "epoch": 0.15, "learning_rate": 9e-05, "loss": 1.2676, "step": 450 }, { "epoch": 0.15, "learning_rate": 9.020000000000001e-05, "loss": 1.072, "step": 451 }, { "epoch": 0.15, "learning_rate": 9.04e-05, "loss": 0.997, "step": 452 }, { "epoch": 0.15, "learning_rate": 9.06e-05, "loss": 1.0977, "step": 453 }, { "epoch": 0.15, "learning_rate": 9.080000000000001e-05, "loss": 1.113, "step": 454 }, { "epoch": 0.15, "learning_rate": 9.1e-05, "loss": 1.1956, "step": 455 }, { "epoch": 0.15, "learning_rate": 9.120000000000001e-05, "loss": 0.9959, "step": 456 }, { "epoch": 0.16, "learning_rate": 9.140000000000001e-05, "loss": 0.9334, "step": 457 }, { "epoch": 0.16, "learning_rate": 9.16e-05, "loss": 1.0809, "step": 458 }, { "epoch": 0.16, "learning_rate": 9.180000000000001e-05, "loss": 0.7941, "step": 459 }, { "epoch": 0.16, "learning_rate": 9.200000000000001e-05, "loss": 0.8435, "step": 460 }, { "epoch": 0.16, "learning_rate": 9.22e-05, "loss": 1.0702, "step": 461 }, { "epoch": 0.16, "learning_rate": 9.240000000000001e-05, "loss": 0.7776, "step": 462 }, { "epoch": 0.16, "learning_rate": 9.260000000000001e-05, "loss": 0.6806, "step": 463 }, { "epoch": 0.16, "learning_rate": 9.28e-05, "loss": 1.3112, "step": 464 }, { "epoch": 0.16, "learning_rate": 9.300000000000001e-05, "loss": 0.7585, "step": 465 }, { "epoch": 0.16, "learning_rate": 9.320000000000002e-05, "loss": 0.7196, "step": 466 }, { "epoch": 0.16, "learning_rate": 9.340000000000001e-05, "loss": 1.4181, "step": 467 }, { "epoch": 0.16, "learning_rate": 9.360000000000001e-05, "loss": 1.0533, "step": 468 }, { "epoch": 0.16, "learning_rate": 9.38e-05, "loss": 1.2705, "step": 469 }, { "epoch": 0.16, "learning_rate": 9.4e-05, "loss": 0.9667, "step": 470 }, { "epoch": 0.16, "learning_rate": 9.42e-05, "loss": 1.2206, "step": 471 }, { "epoch": 0.16, "learning_rate": 9.44e-05, "loss": 0.949, "step": 472 }, { "epoch": 0.16, "learning_rate": 9.46e-05, "loss": 1.0479, "step": 473 }, { "epoch": 0.16, "learning_rate": 9.48e-05, "loss": 0.9939, "step": 474 }, { "epoch": 0.16, "learning_rate": 9.5e-05, "loss": 0.9348, "step": 475 }, { "epoch": 0.16, "learning_rate": 9.52e-05, "loss": 0.8957, "step": 476 }, { "epoch": 0.16, "learning_rate": 9.54e-05, "loss": 1.2068, "step": 477 }, { "epoch": 0.16, "learning_rate": 9.56e-05, "loss": 0.9938, "step": 478 }, { "epoch": 0.16, "learning_rate": 9.58e-05, "loss": 1.2731, "step": 479 }, { "epoch": 0.16, "learning_rate": 9.6e-05, "loss": 1.0107, "step": 480 }, { "epoch": 0.16, "learning_rate": 9.620000000000001e-05, "loss": 0.9246, "step": 481 }, { "epoch": 0.16, "learning_rate": 9.64e-05, "loss": 1.0853, "step": 482 }, { "epoch": 0.16, "learning_rate": 9.66e-05, "loss": 0.8877, "step": 483 }, { "epoch": 0.16, "learning_rate": 9.680000000000001e-05, "loss": 1.0362, "step": 484 }, { "epoch": 0.16, "learning_rate": 9.7e-05, "loss": 1.2327, "step": 485 }, { "epoch": 0.17, "learning_rate": 9.72e-05, "loss": 1.0306, "step": 486 }, { "epoch": 0.17, "learning_rate": 9.74e-05, "loss": 1.271, "step": 487 }, { "epoch": 0.17, "learning_rate": 9.76e-05, "loss": 1.0267, "step": 488 }, { "epoch": 0.17, "learning_rate": 9.78e-05, "loss": 0.8182, "step": 489 }, { "epoch": 0.17, "learning_rate": 9.8e-05, "loss": 1.103, "step": 490 }, { "epoch": 0.17, "learning_rate": 9.82e-05, "loss": 0.748, "step": 491 }, { "epoch": 0.17, "learning_rate": 9.84e-05, "loss": 1.1575, "step": 492 }, { "epoch": 0.17, "learning_rate": 9.86e-05, "loss": 1.1123, "step": 493 }, { "epoch": 0.17, "learning_rate": 9.88e-05, "loss": 0.9651, "step": 494 }, { "epoch": 0.17, "learning_rate": 9.900000000000001e-05, "loss": 0.8904, "step": 495 }, { "epoch": 0.17, "learning_rate": 9.92e-05, "loss": 1.1865, "step": 496 }, { "epoch": 0.17, "learning_rate": 9.94e-05, "loss": 1.0227, "step": 497 }, { "epoch": 0.17, "learning_rate": 9.960000000000001e-05, "loss": 1.3148, "step": 498 }, { "epoch": 0.17, "learning_rate": 9.98e-05, "loss": 1.2396, "step": 499 }, { "epoch": 0.17, "learning_rate": 0.0001, "loss": 1.3986, "step": 500 }, { "epoch": 0.17, "learning_rate": 9.999995869170297e-05, "loss": 0.7103, "step": 501 }, { "epoch": 0.17, "learning_rate": 9.999983476688016e-05, "loss": 1.0108, "step": 502 }, { "epoch": 0.17, "learning_rate": 9.999962822573631e-05, "loss": 0.9018, "step": 503 }, { "epoch": 0.17, "learning_rate": 9.999933906861272e-05, "loss": 0.9262, "step": 504 }, { "epoch": 0.17, "learning_rate": 9.999896729598715e-05, "loss": 0.7978, "step": 505 }, { "epoch": 0.17, "learning_rate": 9.999851290847391e-05, "loss": 1.15, "step": 506 }, { "epoch": 0.17, "learning_rate": 9.999797590682379e-05, "loss": 0.938, "step": 507 }, { "epoch": 0.17, "learning_rate": 9.999735629192408e-05, "loss": 1.1561, "step": 508 }, { "epoch": 0.17, "learning_rate": 9.999665406479862e-05, "loss": 1.1484, "step": 509 }, { "epoch": 0.17, "learning_rate": 9.999586922660769e-05, "loss": 0.9513, "step": 510 }, { "epoch": 0.17, "learning_rate": 9.999500177864815e-05, "loss": 1.2961, "step": 511 }, { "epoch": 0.17, "learning_rate": 9.999405172235325e-05, "loss": 0.764, "step": 512 }, { "epoch": 0.17, "learning_rate": 9.999301905929286e-05, "loss": 0.9583, "step": 513 }, { "epoch": 0.17, "learning_rate": 9.999190379117323e-05, "loss": 0.8161, "step": 514 }, { "epoch": 0.17, "learning_rate": 9.999070591983719e-05, "loss": 0.9618, "step": 515 }, { "epoch": 0.18, "learning_rate": 9.9989425447264e-05, "loss": 0.8262, "step": 516 }, { "epoch": 0.18, "learning_rate": 9.998806237556944e-05, "loss": 0.9786, "step": 517 }, { "epoch": 0.18, "learning_rate": 9.998661670700575e-05, "loss": 0.8392, "step": 518 }, { "epoch": 0.18, "learning_rate": 9.998508844396164e-05, "loss": 1.2064, "step": 519 }, { "epoch": 0.18, "learning_rate": 9.998347758896234e-05, "loss": 1.3171, "step": 520 }, { "epoch": 0.18, "learning_rate": 9.998178414466948e-05, "loss": 0.8867, "step": 521 }, { "epoch": 0.18, "learning_rate": 9.998000811388122e-05, "loss": 1.2735, "step": 522 }, { "epoch": 0.18, "learning_rate": 9.997814949953215e-05, "loss": 1.0216, "step": 523 }, { "epoch": 0.18, "learning_rate": 9.99762083046933e-05, "loss": 1.2184, "step": 524 }, { "epoch": 0.18, "learning_rate": 9.997418453257218e-05, "loss": 1.1012, "step": 525 }, { "epoch": 0.18, "learning_rate": 9.997207818651274e-05, "loss": 1.0679, "step": 526 }, { "epoch": 0.18, "learning_rate": 9.996988926999535e-05, "loss": 1.0846, "step": 527 }, { "epoch": 0.18, "learning_rate": 9.996761778663683e-05, "loss": 0.9723, "step": 528 }, { "epoch": 0.18, "learning_rate": 9.996526374019042e-05, "loss": 0.824, "step": 529 }, { "epoch": 0.18, "learning_rate": 9.99628271345458e-05, "loss": 1.1234, "step": 530 }, { "epoch": 0.18, "learning_rate": 9.996030797372904e-05, "loss": 1.2164, "step": 531 }, { "epoch": 0.18, "learning_rate": 9.995770626190263e-05, "loss": 1.3016, "step": 532 }, { "epoch": 0.18, "learning_rate": 9.995502200336547e-05, "loss": 0.8915, "step": 533 }, { "epoch": 0.18, "learning_rate": 9.995225520255282e-05, "loss": 1.0244, "step": 534 }, { "epoch": 0.18, "learning_rate": 9.99494058640364e-05, "loss": 1.1737, "step": 535 }, { "epoch": 0.18, "learning_rate": 9.994647399252422e-05, "loss": 0.8732, "step": 536 }, { "epoch": 0.18, "learning_rate": 9.994345959286073e-05, "loss": 0.9633, "step": 537 }, { "epoch": 0.18, "learning_rate": 9.994036267002671e-05, "loss": 0.9683, "step": 538 }, { "epoch": 0.18, "learning_rate": 9.99371832291393e-05, "loss": 0.7446, "step": 539 }, { "epoch": 0.18, "learning_rate": 9.993392127545198e-05, "loss": 0.9861, "step": 540 }, { "epoch": 0.18, "learning_rate": 9.993057681435461e-05, "loss": 0.7392, "step": 541 }, { "epoch": 0.18, "learning_rate": 9.992714985137334e-05, "loss": 0.7956, "step": 542 }, { "epoch": 0.18, "learning_rate": 9.992364039217063e-05, "loss": 1.0934, "step": 543 }, { "epoch": 0.18, "learning_rate": 9.99200484425453e-05, "loss": 1.1052, "step": 544 }, { "epoch": 0.19, "learning_rate": 9.991637400843243e-05, "loss": 1.1224, "step": 545 }, { "epoch": 0.19, "learning_rate": 9.99126170959034e-05, "loss": 1.0617, "step": 546 }, { "epoch": 0.19, "learning_rate": 9.990877771116589e-05, "loss": 1.0025, "step": 547 }, { "epoch": 0.19, "learning_rate": 9.99048558605638e-05, "loss": 1.1439, "step": 548 }, { "epoch": 0.19, "learning_rate": 9.990085155057738e-05, "loss": 1.0878, "step": 549 }, { "epoch": 0.19, "learning_rate": 9.989676478782304e-05, "loss": 1.075, "step": 550 }, { "epoch": 0.19, "learning_rate": 9.989259557905351e-05, "loss": 0.9342, "step": 551 }, { "epoch": 0.19, "learning_rate": 9.988834393115767e-05, "loss": 1.1751, "step": 552 }, { "epoch": 0.19, "learning_rate": 9.988400985116065e-05, "loss": 1.329, "step": 553 }, { "epoch": 0.19, "learning_rate": 9.987959334622381e-05, "loss": 1.1593, "step": 554 }, { "epoch": 0.19, "learning_rate": 9.987509442364469e-05, "loss": 0.9043, "step": 555 }, { "epoch": 0.19, "learning_rate": 9.987051309085697e-05, "loss": 1.2056, "step": 556 }, { "epoch": 0.19, "learning_rate": 9.986584935543056e-05, "loss": 1.122, "step": 557 }, { "epoch": 0.19, "learning_rate": 9.986110322507149e-05, "loss": 1.1376, "step": 558 }, { "epoch": 0.19, "learning_rate": 9.985627470762194e-05, "loss": 0.777, "step": 559 }, { "epoch": 0.19, "learning_rate": 9.985136381106022e-05, "loss": 1.1734, "step": 560 }, { "epoch": 0.19, "learning_rate": 9.984637054350076e-05, "loss": 0.8742, "step": 561 }, { "epoch": 0.19, "learning_rate": 9.984129491319411e-05, "loss": 1.0455, "step": 562 }, { "epoch": 0.19, "learning_rate": 9.983613692852688e-05, "loss": 1.0325, "step": 563 }, { "epoch": 0.19, "learning_rate": 9.983089659802178e-05, "loss": 1.0385, "step": 564 }, { "epoch": 0.19, "learning_rate": 9.982557393033758e-05, "loss": 0.8214, "step": 565 }, { "epoch": 0.19, "learning_rate": 9.982016893426908e-05, "loss": 0.9628, "step": 566 }, { "epoch": 0.19, "learning_rate": 9.981468161874715e-05, "loss": 1.0957, "step": 567 }, { "epoch": 0.19, "learning_rate": 9.980911199283863e-05, "loss": 0.8065, "step": 568 }, { "epoch": 0.19, "learning_rate": 9.980346006574641e-05, "loss": 1.0722, "step": 569 }, { "epoch": 0.19, "learning_rate": 9.979772584680933e-05, "loss": 0.8153, "step": 570 }, { "epoch": 0.19, "learning_rate": 9.979190934550224e-05, "loss": 1.1704, "step": 571 }, { "epoch": 0.19, "learning_rate": 9.978601057143593e-05, "loss": 1.2302, "step": 572 }, { "epoch": 0.19, "learning_rate": 9.978002953435712e-05, "loss": 0.9719, "step": 573 }, { "epoch": 0.19, "learning_rate": 9.977396624414848e-05, "loss": 1.2587, "step": 574 }, { "epoch": 0.2, "learning_rate": 9.976782071082857e-05, "loss": 0.9017, "step": 575 }, { "epoch": 0.2, "learning_rate": 9.976159294455185e-05, "loss": 0.9049, "step": 576 }, { "epoch": 0.2, "learning_rate": 9.975528295560867e-05, "loss": 1.266, "step": 577 }, { "epoch": 0.2, "learning_rate": 9.974889075442521e-05, "loss": 1.0567, "step": 578 }, { "epoch": 0.2, "learning_rate": 9.974241635156352e-05, "loss": 0.9594, "step": 579 }, { "epoch": 0.2, "learning_rate": 9.973585975772144e-05, "loss": 1.3431, "step": 580 }, { "epoch": 0.2, "learning_rate": 9.972922098373268e-05, "loss": 1.2005, "step": 581 }, { "epoch": 0.2, "learning_rate": 9.972250004056664e-05, "loss": 1.1235, "step": 582 }, { "epoch": 0.2, "learning_rate": 9.971569693932861e-05, "loss": 0.864, "step": 583 }, { "epoch": 0.2, "learning_rate": 9.970881169125954e-05, "loss": 0.8226, "step": 584 }, { "epoch": 0.2, "learning_rate": 9.970184430773615e-05, "loss": 1.0466, "step": 585 }, { "epoch": 0.2, "learning_rate": 9.969479480027086e-05, "loss": 1.014, "step": 586 }, { "epoch": 0.2, "learning_rate": 9.968766318051179e-05, "loss": 0.9592, "step": 587 }, { "epoch": 0.2, "learning_rate": 9.968044946024278e-05, "loss": 0.9874, "step": 588 }, { "epoch": 0.2, "learning_rate": 9.967315365138325e-05, "loss": 1.0626, "step": 589 }, { "epoch": 0.2, "learning_rate": 9.966577576598833e-05, "loss": 0.8579, "step": 590 }, { "epoch": 0.2, "learning_rate": 9.965831581624871e-05, "loss": 1.1816, "step": 591 }, { "epoch": 0.2, "learning_rate": 9.965077381449071e-05, "loss": 0.9066, "step": 592 }, { "epoch": 0.2, "learning_rate": 9.964314977317624e-05, "loss": 1.395, "step": 593 }, { "epoch": 0.2, "learning_rate": 9.96354437049027e-05, "loss": 0.96, "step": 594 }, { "epoch": 0.2, "learning_rate": 9.962765562240313e-05, "loss": 1.1053, "step": 595 }, { "epoch": 0.2, "learning_rate": 9.961978553854598e-05, "loss": 1.1798, "step": 596 }, { "epoch": 0.2, "learning_rate": 9.961183346633525e-05, "loss": 1.1807, "step": 597 }, { "epoch": 0.2, "learning_rate": 9.960379941891042e-05, "loss": 0.8425, "step": 598 }, { "epoch": 0.2, "learning_rate": 9.95956834095464e-05, "loss": 1.2614, "step": 599 }, { "epoch": 0.2, "learning_rate": 9.958748545165353e-05, "loss": 1.3131, "step": 600 }, { "epoch": 0.2, "learning_rate": 9.957920555877754e-05, "loss": 1.1813, "step": 601 }, { "epoch": 0.2, "learning_rate": 9.957084374459958e-05, "loss": 0.9543, "step": 602 }, { "epoch": 0.2, "learning_rate": 9.956240002293611e-05, "loss": 1.0004, "step": 603 }, { "epoch": 0.21, "learning_rate": 9.9553874407739e-05, "loss": 1.0681, "step": 604 }, { "epoch": 0.21, "learning_rate": 9.954526691309541e-05, "loss": 0.9732, "step": 605 }, { "epoch": 0.21, "learning_rate": 9.953657755322771e-05, "loss": 1.0982, "step": 606 }, { "epoch": 0.21, "learning_rate": 9.952780634249365e-05, "loss": 1.4031, "step": 607 }, { "epoch": 0.21, "learning_rate": 9.95189532953862e-05, "loss": 0.8401, "step": 608 }, { "epoch": 0.21, "learning_rate": 9.95100184265335e-05, "loss": 1.334, "step": 609 }, { "epoch": 0.21, "learning_rate": 9.95010017506989e-05, "loss": 1.1879, "step": 610 }, { "epoch": 0.21, "learning_rate": 9.9491903282781e-05, "loss": 1.2676, "step": 611 }, { "epoch": 0.21, "learning_rate": 9.948272303781345e-05, "loss": 0.7195, "step": 612 }, { "epoch": 0.21, "learning_rate": 9.947346103096506e-05, "loss": 0.8089, "step": 613 }, { "epoch": 0.21, "learning_rate": 9.946411727753975e-05, "loss": 1.188, "step": 614 }, { "epoch": 0.21, "learning_rate": 9.94546917929765e-05, "loss": 0.8942, "step": 615 }, { "epoch": 0.21, "learning_rate": 9.944518459284934e-05, "loss": 1.0629, "step": 616 }, { "epoch": 0.21, "learning_rate": 9.94355956928673e-05, "loss": 1.095, "step": 617 }, { "epoch": 0.21, "learning_rate": 9.942592510887448e-05, "loss": 0.769, "step": 618 }, { "epoch": 0.21, "learning_rate": 9.941617285684982e-05, "loss": 1.0425, "step": 619 }, { "epoch": 0.21, "learning_rate": 9.940633895290733e-05, "loss": 1.2281, "step": 620 }, { "epoch": 0.21, "learning_rate": 9.939642341329587e-05, "loss": 0.8932, "step": 621 }, { "epoch": 0.21, "learning_rate": 9.938642625439918e-05, "loss": 1.058, "step": 622 }, { "epoch": 0.21, "learning_rate": 9.937634749273593e-05, "loss": 1.0739, "step": 623 }, { "epoch": 0.21, "learning_rate": 9.936618714495953e-05, "loss": 0.9774, "step": 624 }, { "epoch": 0.21, "learning_rate": 9.935594522785826e-05, "loss": 0.9943, "step": 625 }, { "epoch": 0.21, "learning_rate": 9.934562175835522e-05, "loss": 0.8749, "step": 626 }, { "epoch": 0.21, "learning_rate": 9.933521675350812e-05, "loss": 1.452, "step": 627 }, { "epoch": 0.21, "learning_rate": 9.932473023050955e-05, "loss": 1.0515, "step": 628 }, { "epoch": 0.21, "learning_rate": 9.931416220668669e-05, "loss": 1.0638, "step": 629 }, { "epoch": 0.21, "learning_rate": 9.930351269950143e-05, "loss": 0.9141, "step": 630 }, { "epoch": 0.21, "learning_rate": 9.92927817265503e-05, "loss": 0.687, "step": 631 }, { "epoch": 0.21, "learning_rate": 9.928196930556442e-05, "loss": 1.0163, "step": 632 }, { "epoch": 0.21, "learning_rate": 9.92710754544095e-05, "loss": 1.2437, "step": 633 }, { "epoch": 0.22, "learning_rate": 9.92601001910858e-05, "loss": 0.9689, "step": 634 }, { "epoch": 0.22, "learning_rate": 9.924904353372808e-05, "loss": 0.9104, "step": 635 }, { "epoch": 0.22, "learning_rate": 9.923790550060563e-05, "loss": 0.9598, "step": 636 }, { "epoch": 0.22, "learning_rate": 9.922668611012218e-05, "loss": 0.9555, "step": 637 }, { "epoch": 0.22, "learning_rate": 9.921538538081587e-05, "loss": 1.195, "step": 638 }, { "epoch": 0.22, "learning_rate": 9.920400333135926e-05, "loss": 1.027, "step": 639 }, { "epoch": 0.22, "learning_rate": 9.919253998055928e-05, "loss": 1.2479, "step": 640 }, { "epoch": 0.22, "learning_rate": 9.918099534735718e-05, "loss": 1.2236, "step": 641 }, { "epoch": 0.22, "learning_rate": 9.916936945082853e-05, "loss": 1.0462, "step": 642 }, { "epoch": 0.22, "learning_rate": 9.915766231018318e-05, "loss": 1.0599, "step": 643 }, { "epoch": 0.22, "learning_rate": 9.914587394476519e-05, "loss": 0.7946, "step": 644 }, { "epoch": 0.22, "learning_rate": 9.913400437405287e-05, "loss": 1.1167, "step": 645 }, { "epoch": 0.22, "learning_rate": 9.912205361765867e-05, "loss": 0.9484, "step": 646 }, { "epoch": 0.22, "learning_rate": 9.911002169532924e-05, "loss": 0.8547, "step": 647 }, { "epoch": 0.22, "learning_rate": 9.909790862694528e-05, "loss": 1.111, "step": 648 }, { "epoch": 0.22, "learning_rate": 9.90857144325216e-05, "loss": 1.0342, "step": 649 }, { "epoch": 0.22, "learning_rate": 9.907343913220707e-05, "loss": 0.9308, "step": 650 }, { "epoch": 0.22, "learning_rate": 9.906108274628455e-05, "loss": 0.777, "step": 651 }, { "epoch": 0.22, "learning_rate": 9.90486452951709e-05, "loss": 1.2538, "step": 652 }, { "epoch": 0.22, "learning_rate": 9.90361267994169e-05, "loss": 0.8371, "step": 653 }, { "epoch": 0.22, "learning_rate": 9.902352727970729e-05, "loss": 1.2707, "step": 654 }, { "epoch": 0.22, "learning_rate": 9.901084675686062e-05, "loss": 0.976, "step": 655 }, { "epoch": 0.22, "learning_rate": 9.899808525182935e-05, "loss": 1.2887, "step": 656 }, { "epoch": 0.22, "learning_rate": 9.898524278569971e-05, "loss": 0.935, "step": 657 }, { "epoch": 0.22, "learning_rate": 9.897231937969171e-05, "loss": 1.3353, "step": 658 }, { "epoch": 0.22, "learning_rate": 9.895931505515913e-05, "loss": 0.786, "step": 659 }, { "epoch": 0.22, "learning_rate": 9.894622983358941e-05, "loss": 1.0992, "step": 660 }, { "epoch": 0.22, "learning_rate": 9.893306373660366e-05, "loss": 1.0753, "step": 661 }, { "epoch": 0.22, "learning_rate": 9.891981678595669e-05, "loss": 0.9363, "step": 662 }, { "epoch": 0.23, "learning_rate": 9.890648900353685e-05, "loss": 0.9322, "step": 663 }, { "epoch": 0.23, "learning_rate": 9.889308041136602e-05, "loss": 0.8061, "step": 664 }, { "epoch": 0.23, "learning_rate": 9.887959103159968e-05, "loss": 1.4907, "step": 665 }, { "epoch": 0.23, "learning_rate": 9.886602088652672e-05, "loss": 0.6783, "step": 666 }, { "epoch": 0.23, "learning_rate": 9.885236999856958e-05, "loss": 1.2966, "step": 667 }, { "epoch": 0.23, "learning_rate": 9.883863839028403e-05, "loss": 0.9598, "step": 668 }, { "epoch": 0.23, "learning_rate": 9.882482608435923e-05, "loss": 1.1738, "step": 669 }, { "epoch": 0.23, "learning_rate": 9.881093310361773e-05, "loss": 0.9746, "step": 670 }, { "epoch": 0.23, "learning_rate": 9.879695947101531e-05, "loss": 1.2173, "step": 671 }, { "epoch": 0.23, "learning_rate": 9.878290520964107e-05, "loss": 0.9556, "step": 672 }, { "epoch": 0.23, "learning_rate": 9.87687703427173e-05, "loss": 1.0402, "step": 673 }, { "epoch": 0.23, "learning_rate": 9.87545548935995e-05, "loss": 0.8971, "step": 674 }, { "epoch": 0.23, "learning_rate": 9.874025888577632e-05, "loss": 1.1925, "step": 675 }, { "epoch": 0.23, "learning_rate": 9.872588234286946e-05, "loss": 1.1512, "step": 676 }, { "epoch": 0.23, "learning_rate": 9.871142528863382e-05, "loss": 1.0581, "step": 677 }, { "epoch": 0.23, "learning_rate": 9.869688774695718e-05, "loss": 1.232, "step": 678 }, { "epoch": 0.23, "learning_rate": 9.868226974186043e-05, "loss": 0.9767, "step": 679 }, { "epoch": 0.23, "learning_rate": 9.866757129749733e-05, "loss": 1.0562, "step": 680 }, { "epoch": 0.23, "learning_rate": 9.865279243815462e-05, "loss": 1.4259, "step": 681 }, { "epoch": 0.23, "learning_rate": 9.863793318825186e-05, "loss": 1.0912, "step": 682 }, { "epoch": 0.23, "learning_rate": 9.862299357234149e-05, "loss": 0.9612, "step": 683 }, { "epoch": 0.23, "learning_rate": 9.860797361510866e-05, "loss": 0.8139, "step": 684 }, { "epoch": 0.23, "learning_rate": 9.859287334137137e-05, "loss": 1.0463, "step": 685 }, { "epoch": 0.23, "learning_rate": 9.857769277608026e-05, "loss": 1.2403, "step": 686 }, { "epoch": 0.23, "learning_rate": 9.85624319443187e-05, "loss": 0.8445, "step": 687 }, { "epoch": 0.23, "learning_rate": 9.85470908713026e-05, "loss": 1.1537, "step": 688 }, { "epoch": 0.23, "learning_rate": 9.853166958238053e-05, "loss": 0.9336, "step": 689 }, { "epoch": 0.23, "learning_rate": 9.851616810303359e-05, "loss": 0.9155, "step": 690 }, { "epoch": 0.23, "learning_rate": 9.850058645887533e-05, "loss": 0.8309, "step": 691 }, { "epoch": 0.24, "learning_rate": 9.848492467565182e-05, "loss": 1.3628, "step": 692 }, { "epoch": 0.24, "learning_rate": 9.846918277924153e-05, "loss": 1.1703, "step": 693 }, { "epoch": 0.24, "learning_rate": 9.845336079565529e-05, "loss": 1.1511, "step": 694 }, { "epoch": 0.24, "learning_rate": 9.843745875103627e-05, "loss": 0.9034, "step": 695 }, { "epoch": 0.24, "learning_rate": 9.842147667165992e-05, "loss": 1.1098, "step": 696 }, { "epoch": 0.24, "learning_rate": 9.840541458393393e-05, "loss": 1.1436, "step": 697 }, { "epoch": 0.24, "learning_rate": 9.838927251439822e-05, "loss": 0.8489, "step": 698 }, { "epoch": 0.24, "learning_rate": 9.837305048972484e-05, "loss": 0.9817, "step": 699 }, { "epoch": 0.24, "learning_rate": 9.835674853671797e-05, "loss": 1.0849, "step": 700 }, { "epoch": 0.24, "learning_rate": 9.834036668231382e-05, "loss": 1.1275, "step": 701 }, { "epoch": 0.24, "learning_rate": 9.832390495358066e-05, "loss": 1.1772, "step": 702 }, { "epoch": 0.24, "learning_rate": 9.830736337771874e-05, "loss": 0.9435, "step": 703 }, { "epoch": 0.24, "learning_rate": 9.829074198206023e-05, "loss": 1.3481, "step": 704 }, { "epoch": 0.24, "learning_rate": 9.827404079406917e-05, "loss": 0.8829, "step": 705 }, { "epoch": 0.24, "learning_rate": 9.82572598413415e-05, "loss": 0.9019, "step": 706 }, { "epoch": 0.24, "learning_rate": 9.82403991516049e-05, "loss": 1.1318, "step": 707 }, { "epoch": 0.24, "learning_rate": 9.822345875271883e-05, "loss": 0.8718, "step": 708 }, { "epoch": 0.24, "learning_rate": 9.820643867267447e-05, "loss": 1.0161, "step": 709 }, { "epoch": 0.24, "learning_rate": 9.81893389395946e-05, "loss": 0.8397, "step": 710 }, { "epoch": 0.24, "learning_rate": 9.817215958173369e-05, "loss": 0.9561, "step": 711 }, { "epoch": 0.24, "learning_rate": 9.815490062747772e-05, "loss": 1.0652, "step": 712 }, { "epoch": 0.24, "learning_rate": 9.813756210534422e-05, "loss": 1.0037, "step": 713 }, { "epoch": 0.24, "learning_rate": 9.812014404398219e-05, "loss": 1.163, "step": 714 }, { "epoch": 0.24, "learning_rate": 9.810264647217205e-05, "loss": 1.1434, "step": 715 }, { "epoch": 0.24, "learning_rate": 9.808506941882555e-05, "loss": 1.0718, "step": 716 }, { "epoch": 0.24, "learning_rate": 9.806741291298588e-05, "loss": 0.8113, "step": 717 }, { "epoch": 0.24, "learning_rate": 9.80496769838274e-05, "loss": 1.0096, "step": 718 }, { "epoch": 0.24, "learning_rate": 9.803186166065579e-05, "loss": 1.1531, "step": 719 }, { "epoch": 0.24, "learning_rate": 9.801396697290786e-05, "loss": 1.0726, "step": 720 }, { "epoch": 0.24, "learning_rate": 9.799599295015154e-05, "loss": 0.8579, "step": 721 }, { "epoch": 0.25, "learning_rate": 9.797793962208592e-05, "loss": 0.9528, "step": 722 }, { "epoch": 0.25, "learning_rate": 9.79598070185411e-05, "loss": 0.8258, "step": 723 }, { "epoch": 0.25, "learning_rate": 9.794159516947812e-05, "loss": 0.9595, "step": 724 }, { "epoch": 0.25, "learning_rate": 9.792330410498902e-05, "loss": 0.853, "step": 725 }, { "epoch": 0.25, "learning_rate": 9.790493385529671e-05, "loss": 0.9881, "step": 726 }, { "epoch": 0.25, "learning_rate": 9.788648445075495e-05, "loss": 1.0074, "step": 727 }, { "epoch": 0.25, "learning_rate": 9.786795592184823e-05, "loss": 1.0239, "step": 728 }, { "epoch": 0.25, "learning_rate": 9.78493482991919e-05, "loss": 0.8949, "step": 729 }, { "epoch": 0.25, "learning_rate": 9.783066161353187e-05, "loss": 0.9337, "step": 730 }, { "epoch": 0.25, "learning_rate": 9.781189589574477e-05, "loss": 1.03, "step": 731 }, { "epoch": 0.25, "learning_rate": 9.77930511768378e-05, "loss": 1.2378, "step": 732 }, { "epoch": 0.25, "learning_rate": 9.777412748794868e-05, "loss": 0.9148, "step": 733 }, { "epoch": 0.25, "learning_rate": 9.775512486034563e-05, "loss": 0.9906, "step": 734 }, { "epoch": 0.25, "learning_rate": 9.773604332542729e-05, "loss": 1.4706, "step": 735 }, { "epoch": 0.25, "learning_rate": 9.77168829147227e-05, "loss": 1.0886, "step": 736 }, { "epoch": 0.25, "learning_rate": 9.76976436598912e-05, "loss": 0.917, "step": 737 }, { "epoch": 0.25, "learning_rate": 9.767832559272243e-05, "loss": 1.2221, "step": 738 }, { "epoch": 0.25, "learning_rate": 9.765892874513626e-05, "loss": 0.8822, "step": 739 }, { "epoch": 0.25, "learning_rate": 9.76394531491827e-05, "loss": 1.2101, "step": 740 }, { "epoch": 0.25, "learning_rate": 9.761989883704192e-05, "loss": 0.8987, "step": 741 }, { "epoch": 0.25, "learning_rate": 9.760026584102414e-05, "loss": 1.1077, "step": 742 }, { "epoch": 0.25, "learning_rate": 9.758055419356955e-05, "loss": 1.1182, "step": 743 }, { "epoch": 0.25, "learning_rate": 9.756076392724836e-05, "loss": 0.9484, "step": 744 }, { "epoch": 0.25, "learning_rate": 9.754089507476064e-05, "loss": 1.0253, "step": 745 }, { "epoch": 0.25, "learning_rate": 9.752094766893634e-05, "loss": 0.9128, "step": 746 }, { "epoch": 0.25, "learning_rate": 9.750092174273521e-05, "loss": 0.8807, "step": 747 }, { "epoch": 0.25, "learning_rate": 9.74808173292467e-05, "loss": 0.9398, "step": 748 }, { "epoch": 0.25, "learning_rate": 9.746063446168996e-05, "loss": 0.7548, "step": 749 }, { "epoch": 0.25, "learning_rate": 9.744037317341383e-05, "loss": 0.9848, "step": 750 }, { "epoch": 0.26, "learning_rate": 9.742003349789668e-05, "loss": 0.8608, "step": 751 }, { "epoch": 0.26, "learning_rate": 9.739961546874637e-05, "loss": 1.0789, "step": 752 }, { "epoch": 0.26, "learning_rate": 9.737911911970026e-05, "loss": 0.9311, "step": 753 }, { "epoch": 0.26, "learning_rate": 9.735854448462515e-05, "loss": 0.9095, "step": 754 }, { "epoch": 0.26, "learning_rate": 9.733789159751715e-05, "loss": 1.1177, "step": 755 }, { "epoch": 0.26, "learning_rate": 9.731716049250168e-05, "loss": 1.1641, "step": 756 }, { "epoch": 0.26, "learning_rate": 9.729635120383343e-05, "loss": 1.1566, "step": 757 }, { "epoch": 0.26, "learning_rate": 9.727546376589622e-05, "loss": 0.9053, "step": 758 }, { "epoch": 0.26, "learning_rate": 9.725449821320303e-05, "loss": 1.1806, "step": 759 }, { "epoch": 0.26, "learning_rate": 9.723345458039594e-05, "loss": 1.0056, "step": 760 }, { "epoch": 0.26, "learning_rate": 9.7212332902246e-05, "loss": 0.9919, "step": 761 }, { "epoch": 0.26, "learning_rate": 9.719113321365323e-05, "loss": 0.8612, "step": 762 }, { "epoch": 0.26, "learning_rate": 9.716985554964655e-05, "loss": 1.0587, "step": 763 }, { "epoch": 0.26, "learning_rate": 9.714849994538373e-05, "loss": 1.142, "step": 764 }, { "epoch": 0.26, "learning_rate": 9.71270664361513e-05, "loss": 0.9303, "step": 765 }, { "epoch": 0.26, "learning_rate": 9.710555505736455e-05, "loss": 0.8318, "step": 766 }, { "epoch": 0.26, "learning_rate": 9.70839658445674e-05, "loss": 1.2149, "step": 767 }, { "epoch": 0.26, "learning_rate": 9.70622988334324e-05, "loss": 0.8926, "step": 768 }, { "epoch": 0.26, "learning_rate": 9.704055405976067e-05, "loss": 1.3801, "step": 769 }, { "epoch": 0.26, "learning_rate": 9.701873155948176e-05, "loss": 0.8362, "step": 770 }, { "epoch": 0.26, "learning_rate": 9.699683136865368e-05, "loss": 0.8698, "step": 771 }, { "epoch": 0.26, "learning_rate": 9.697485352346283e-05, "loss": 1.3974, "step": 772 }, { "epoch": 0.26, "learning_rate": 9.69527980602239e-05, "loss": 0.719, "step": 773 }, { "epoch": 0.26, "learning_rate": 9.693066501537984e-05, "loss": 1.0604, "step": 774 }, { "epoch": 0.26, "learning_rate": 9.690845442550179e-05, "loss": 1.1718, "step": 775 }, { "epoch": 0.26, "learning_rate": 9.688616632728897e-05, "loss": 0.9281, "step": 776 }, { "epoch": 0.26, "learning_rate": 9.686380075756878e-05, "loss": 1.1904, "step": 777 }, { "epoch": 0.26, "learning_rate": 9.684135775329654e-05, "loss": 1.2129, "step": 778 }, { "epoch": 0.26, "learning_rate": 9.681883735155552e-05, "loss": 1.0231, "step": 779 }, { "epoch": 0.26, "learning_rate": 9.679623958955692e-05, "loss": 1.3008, "step": 780 }, { "epoch": 0.27, "learning_rate": 9.677356450463975e-05, "loss": 1.1248, "step": 781 }, { "epoch": 0.27, "learning_rate": 9.675081213427076e-05, "loss": 1.2848, "step": 782 }, { "epoch": 0.27, "learning_rate": 9.67279825160444e-05, "loss": 0.913, "step": 783 }, { "epoch": 0.27, "learning_rate": 9.670507568768281e-05, "loss": 1.0365, "step": 784 }, { "epoch": 0.27, "learning_rate": 9.668209168703566e-05, "loss": 1.0933, "step": 785 }, { "epoch": 0.27, "learning_rate": 9.665903055208014e-05, "loss": 0.7632, "step": 786 }, { "epoch": 0.27, "learning_rate": 9.66358923209209e-05, "loss": 1.1747, "step": 787 }, { "epoch": 0.27, "learning_rate": 9.661267703179e-05, "loss": 0.9078, "step": 788 }, { "epoch": 0.27, "learning_rate": 9.658938472304676e-05, "loss": 0.898, "step": 789 }, { "epoch": 0.27, "learning_rate": 9.656601543317783e-05, "loss": 1.1109, "step": 790 }, { "epoch": 0.27, "learning_rate": 9.654256920079705e-05, "loss": 1.1505, "step": 791 }, { "epoch": 0.27, "learning_rate": 9.651904606464535e-05, "loss": 1.0081, "step": 792 }, { "epoch": 0.27, "learning_rate": 9.649544606359076e-05, "loss": 1.0133, "step": 793 }, { "epoch": 0.27, "learning_rate": 9.647176923662831e-05, "loss": 0.7545, "step": 794 }, { "epoch": 0.27, "learning_rate": 9.644801562288e-05, "loss": 1.0297, "step": 795 }, { "epoch": 0.27, "learning_rate": 9.642418526159466e-05, "loss": 0.9222, "step": 796 }, { "epoch": 0.27, "learning_rate": 9.640027819214799e-05, "loss": 0.9532, "step": 797 }, { "epoch": 0.27, "learning_rate": 9.637629445404235e-05, "loss": 0.918, "step": 798 }, { "epoch": 0.27, "learning_rate": 9.635223408690688e-05, "loss": 1.1732, "step": 799 }, { "epoch": 0.27, "learning_rate": 9.632809713049727e-05, "loss": 1.0789, "step": 800 }, { "epoch": 0.27, "learning_rate": 9.630388362469578e-05, "loss": 0.8592, "step": 801 }, { "epoch": 0.27, "learning_rate": 9.627959360951117e-05, "loss": 1.15, "step": 802 }, { "epoch": 0.27, "learning_rate": 9.62552271250786e-05, "loss": 0.9492, "step": 803 }, { "epoch": 0.27, "learning_rate": 9.623078421165959e-05, "loss": 1.2464, "step": 804 }, { "epoch": 0.27, "learning_rate": 9.620626490964195e-05, "loss": 0.7465, "step": 805 }, { "epoch": 0.27, "learning_rate": 9.618166925953968e-05, "loss": 0.9402, "step": 806 }, { "epoch": 0.27, "learning_rate": 9.615699730199302e-05, "loss": 0.8343, "step": 807 }, { "epoch": 0.27, "learning_rate": 9.613224907776814e-05, "loss": 0.9862, "step": 808 }, { "epoch": 0.27, "learning_rate": 9.610742462775739e-05, "loss": 1.2931, "step": 809 }, { "epoch": 0.28, "learning_rate": 9.608252399297899e-05, "loss": 0.8937, "step": 810 }, { "epoch": 0.28, "learning_rate": 9.605754721457703e-05, "loss": 0.7652, "step": 811 }, { "epoch": 0.28, "learning_rate": 9.603249433382144e-05, "loss": 0.9624, "step": 812 }, { "epoch": 0.28, "learning_rate": 9.60073653921079e-05, "loss": 0.8804, "step": 813 }, { "epoch": 0.28, "learning_rate": 9.598216043095777e-05, "loss": 0.9315, "step": 814 }, { "epoch": 0.28, "learning_rate": 9.595687949201803e-05, "loss": 1.0226, "step": 815 }, { "epoch": 0.28, "learning_rate": 9.593152261706113e-05, "loss": 1.2923, "step": 816 }, { "epoch": 0.28, "learning_rate": 9.590608984798508e-05, "loss": 0.9304, "step": 817 }, { "epoch": 0.28, "learning_rate": 9.588058122681323e-05, "loss": 1.0805, "step": 818 }, { "epoch": 0.28, "learning_rate": 9.585499679569432e-05, "loss": 0.8293, "step": 819 }, { "epoch": 0.28, "learning_rate": 9.582933659690228e-05, "loss": 1.1582, "step": 820 }, { "epoch": 0.28, "learning_rate": 9.580360067283632e-05, "loss": 0.9317, "step": 821 }, { "epoch": 0.28, "learning_rate": 9.577778906602068e-05, "loss": 0.9194, "step": 822 }, { "epoch": 0.28, "learning_rate": 9.575190181910476e-05, "loss": 1.1167, "step": 823 }, { "epoch": 0.28, "learning_rate": 9.572593897486282e-05, "loss": 1.1541, "step": 824 }, { "epoch": 0.28, "learning_rate": 9.569990057619414e-05, "loss": 0.7004, "step": 825 }, { "epoch": 0.28, "learning_rate": 9.567378666612279e-05, "loss": 1.0647, "step": 826 }, { "epoch": 0.28, "learning_rate": 9.564759728779759e-05, "loss": 0.9955, "step": 827 }, { "epoch": 0.28, "learning_rate": 9.56213324844921e-05, "loss": 1.0646, "step": 828 }, { "epoch": 0.28, "learning_rate": 9.559499229960451e-05, "loss": 1.0893, "step": 829 }, { "epoch": 0.28, "learning_rate": 9.556857677665751e-05, "loss": 0.9099, "step": 830 }, { "epoch": 0.28, "learning_rate": 9.554208595929836e-05, "loss": 1.1403, "step": 831 }, { "epoch": 0.28, "learning_rate": 9.551551989129863e-05, "loss": 1.1414, "step": 832 }, { "epoch": 0.28, "learning_rate": 9.548887861655433e-05, "loss": 0.967, "step": 833 }, { "epoch": 0.28, "learning_rate": 9.546216217908563e-05, "loss": 1.2456, "step": 834 }, { "epoch": 0.28, "learning_rate": 9.543537062303702e-05, "loss": 1.0066, "step": 835 }, { "epoch": 0.28, "learning_rate": 9.540850399267698e-05, "loss": 1.2899, "step": 836 }, { "epoch": 0.28, "learning_rate": 9.538156233239814e-05, "loss": 0.9196, "step": 837 }, { "epoch": 0.28, "learning_rate": 9.535454568671704e-05, "loss": 1.144, "step": 838 }, { "epoch": 0.28, "learning_rate": 9.532745410027417e-05, "loss": 0.9624, "step": 839 }, { "epoch": 0.29, "learning_rate": 9.530028761783378e-05, "loss": 0.9381, "step": 840 }, { "epoch": 0.29, "learning_rate": 9.527304628428398e-05, "loss": 0.9972, "step": 841 }, { "epoch": 0.29, "learning_rate": 9.524573014463643e-05, "loss": 0.8468, "step": 842 }, { "epoch": 0.29, "learning_rate": 9.521833924402649e-05, "loss": 0.8694, "step": 843 }, { "epoch": 0.29, "learning_rate": 9.519087362771302e-05, "loss": 1.0133, "step": 844 }, { "epoch": 0.29, "learning_rate": 9.516333334107832e-05, "loss": 1.3313, "step": 845 }, { "epoch": 0.29, "learning_rate": 9.51357184296281e-05, "loss": 1.047, "step": 846 }, { "epoch": 0.29, "learning_rate": 9.510802893899134e-05, "loss": 1.0006, "step": 847 }, { "epoch": 0.29, "learning_rate": 9.508026491492026e-05, "loss": 0.9454, "step": 848 }, { "epoch": 0.29, "learning_rate": 9.505242640329028e-05, "loss": 1.048, "step": 849 }, { "epoch": 0.29, "learning_rate": 9.502451345009984e-05, "loss": 1.2726, "step": 850 }, { "epoch": 0.29, "learning_rate": 9.49965261014704e-05, "loss": 0.5806, "step": 851 }, { "epoch": 0.29, "learning_rate": 9.496846440364635e-05, "loss": 1.0225, "step": 852 }, { "epoch": 0.29, "learning_rate": 9.494032840299491e-05, "loss": 0.9792, "step": 853 }, { "epoch": 0.29, "learning_rate": 9.491211814600614e-05, "loss": 1.0092, "step": 854 }, { "epoch": 0.29, "learning_rate": 9.48838336792927e-05, "loss": 1.1568, "step": 855 }, { "epoch": 0.29, "learning_rate": 9.485547504958993e-05, "loss": 1.2255, "step": 856 }, { "epoch": 0.29, "learning_rate": 9.482704230375569e-05, "loss": 0.8098, "step": 857 }, { "epoch": 0.29, "learning_rate": 9.479853548877033e-05, "loss": 1.2417, "step": 858 }, { "epoch": 0.29, "learning_rate": 9.476995465173658e-05, "loss": 0.8792, "step": 859 }, { "epoch": 0.29, "learning_rate": 9.474129983987944e-05, "loss": 0.8964, "step": 860 }, { "epoch": 0.29, "learning_rate": 9.471257110054616e-05, "loss": 0.9005, "step": 861 }, { "epoch": 0.29, "learning_rate": 9.468376848120619e-05, "loss": 1.046, "step": 862 }, { "epoch": 0.29, "learning_rate": 9.4654892029451e-05, "loss": 1.1082, "step": 863 }, { "epoch": 0.29, "learning_rate": 9.462594179299406e-05, "loss": 1.0971, "step": 864 }, { "epoch": 0.29, "learning_rate": 9.45969178196708e-05, "loss": 0.8087, "step": 865 }, { "epoch": 0.29, "learning_rate": 9.45678201574384e-05, "loss": 1.0799, "step": 866 }, { "epoch": 0.29, "learning_rate": 9.45386488543759e-05, "loss": 1.0542, "step": 867 }, { "epoch": 0.29, "learning_rate": 9.450940395868397e-05, "loss": 1.0708, "step": 868 }, { "epoch": 0.3, "learning_rate": 9.448008551868488e-05, "loss": 1.1218, "step": 869 }, { "epoch": 0.3, "learning_rate": 9.445069358282242e-05, "loss": 1.103, "step": 870 }, { "epoch": 0.3, "learning_rate": 9.44212281996618e-05, "loss": 0.7964, "step": 871 }, { "epoch": 0.3, "learning_rate": 9.439168941788965e-05, "loss": 1.2042, "step": 872 }, { "epoch": 0.3, "learning_rate": 9.436207728631384e-05, "loss": 1.0421, "step": 873 }, { "epoch": 0.3, "learning_rate": 9.43323918538634e-05, "loss": 1.0025, "step": 874 }, { "epoch": 0.3, "learning_rate": 9.430263316958854e-05, "loss": 1.261, "step": 875 }, { "epoch": 0.3, "learning_rate": 9.42728012826605e-05, "loss": 1.298, "step": 876 }, { "epoch": 0.3, "learning_rate": 9.424289624237144e-05, "loss": 1.0899, "step": 877 }, { "epoch": 0.3, "learning_rate": 9.42129180981344e-05, "loss": 1.1326, "step": 878 }, { "epoch": 0.3, "learning_rate": 9.418286689948324e-05, "loss": 1.1086, "step": 879 }, { "epoch": 0.3, "learning_rate": 9.415274269607253e-05, "loss": 1.0757, "step": 880 }, { "epoch": 0.3, "learning_rate": 9.412254553767741e-05, "loss": 1.1237, "step": 881 }, { "epoch": 0.3, "learning_rate": 9.409227547419363e-05, "loss": 1.0463, "step": 882 }, { "epoch": 0.3, "learning_rate": 9.406193255563737e-05, "loss": 1.042, "step": 883 }, { "epoch": 0.3, "learning_rate": 9.403151683214525e-05, "loss": 0.9995, "step": 884 }, { "epoch": 0.3, "learning_rate": 9.400102835397406e-05, "loss": 1.2485, "step": 885 }, { "epoch": 0.3, "learning_rate": 9.397046717150096e-05, "loss": 1.0357, "step": 886 }, { "epoch": 0.3, "learning_rate": 9.39398333352231e-05, "loss": 1.0826, "step": 887 }, { "epoch": 0.3, "learning_rate": 9.390912689575779e-05, "loss": 1.0659, "step": 888 }, { "epoch": 0.3, "learning_rate": 9.387834790384225e-05, "loss": 1.0674, "step": 889 }, { "epoch": 0.3, "learning_rate": 9.384749641033359e-05, "loss": 1.1996, "step": 890 }, { "epoch": 0.3, "learning_rate": 9.38165724662087e-05, "loss": 1.1351, "step": 891 }, { "epoch": 0.3, "learning_rate": 9.37855761225642e-05, "loss": 1.0983, "step": 892 }, { "epoch": 0.3, "learning_rate": 9.375450743061636e-05, "loss": 1.1178, "step": 893 }, { "epoch": 0.3, "learning_rate": 9.372336644170095e-05, "loss": 0.8222, "step": 894 }, { "epoch": 0.3, "learning_rate": 9.369215320727324e-05, "loss": 1.1073, "step": 895 }, { "epoch": 0.3, "learning_rate": 9.366086777890784e-05, "loss": 1.0111, "step": 896 }, { "epoch": 0.3, "learning_rate": 9.362951020829863e-05, "loss": 1.1758, "step": 897 }, { "epoch": 0.31, "learning_rate": 9.359808054725877e-05, "loss": 1.1208, "step": 898 }, { "epoch": 0.31, "learning_rate": 9.356657884772048e-05, "loss": 1.1234, "step": 899 }, { "epoch": 0.31, "learning_rate": 9.3535005161735e-05, "loss": 1.0212, "step": 900 }, { "epoch": 0.31, "learning_rate": 9.350335954147255e-05, "loss": 1.1291, "step": 901 }, { "epoch": 0.31, "learning_rate": 9.347164203922223e-05, "loss": 1.0811, "step": 902 }, { "epoch": 0.31, "learning_rate": 9.343985270739182e-05, "loss": 0.9648, "step": 903 }, { "epoch": 0.31, "learning_rate": 9.34079915985079e-05, "loss": 1.2675, "step": 904 }, { "epoch": 0.31, "learning_rate": 9.337605876521556e-05, "loss": 1.2332, "step": 905 }, { "epoch": 0.31, "learning_rate": 9.334405426027846e-05, "loss": 0.8153, "step": 906 }, { "epoch": 0.31, "learning_rate": 9.331197813657867e-05, "loss": 1.4154, "step": 907 }, { "epoch": 0.31, "learning_rate": 9.327983044711655e-05, "loss": 0.9532, "step": 908 }, { "epoch": 0.31, "learning_rate": 9.324761124501078e-05, "loss": 0.7831, "step": 909 }, { "epoch": 0.31, "learning_rate": 9.321532058349818e-05, "loss": 0.9692, "step": 910 }, { "epoch": 0.31, "learning_rate": 9.318295851593363e-05, "loss": 1.0814, "step": 911 }, { "epoch": 0.31, "learning_rate": 9.315052509579002e-05, "loss": 0.7133, "step": 912 }, { "epoch": 0.31, "learning_rate": 9.311802037665812e-05, "loss": 0.8592, "step": 913 }, { "epoch": 0.31, "learning_rate": 9.30854444122465e-05, "loss": 0.9691, "step": 914 }, { "epoch": 0.31, "learning_rate": 9.305279725638148e-05, "loss": 1.4755, "step": 915 }, { "epoch": 0.31, "learning_rate": 9.302007896300698e-05, "loss": 1.2686, "step": 916 }, { "epoch": 0.31, "learning_rate": 9.298728958618448e-05, "loss": 0.8982, "step": 917 }, { "epoch": 0.31, "learning_rate": 9.295442918009295e-05, "loss": 0.8225, "step": 918 }, { "epoch": 0.31, "learning_rate": 9.292149779902863e-05, "loss": 0.8752, "step": 919 }, { "epoch": 0.31, "learning_rate": 9.288849549740512e-05, "loss": 1.114, "step": 920 }, { "epoch": 0.31, "learning_rate": 9.285542232975318e-05, "loss": 0.908, "step": 921 }, { "epoch": 0.31, "learning_rate": 9.282227835072063e-05, "loss": 0.8795, "step": 922 }, { "epoch": 0.31, "learning_rate": 9.278906361507238e-05, "loss": 1.0355, "step": 923 }, { "epoch": 0.31, "learning_rate": 9.275577817769015e-05, "loss": 1.032, "step": 924 }, { "epoch": 0.31, "learning_rate": 9.272242209357253e-05, "loss": 0.8892, "step": 925 }, { "epoch": 0.31, "learning_rate": 9.268899541783487e-05, "loss": 1.0059, "step": 926 }, { "epoch": 0.31, "learning_rate": 9.265549820570911e-05, "loss": 0.8292, "step": 927 }, { "epoch": 0.32, "learning_rate": 9.262193051254377e-05, "loss": 1.1143, "step": 928 }, { "epoch": 0.32, "learning_rate": 9.25882923938038e-05, "loss": 0.7051, "step": 929 }, { "epoch": 0.32, "learning_rate": 9.255458390507058e-05, "loss": 1.3533, "step": 930 }, { "epoch": 0.32, "learning_rate": 9.252080510204168e-05, "loss": 0.7656, "step": 931 }, { "epoch": 0.32, "learning_rate": 9.24869560405309e-05, "loss": 0.9758, "step": 932 }, { "epoch": 0.32, "learning_rate": 9.245303677646813e-05, "loss": 1.0279, "step": 933 }, { "epoch": 0.32, "learning_rate": 9.241904736589926e-05, "loss": 1.1606, "step": 934 }, { "epoch": 0.32, "learning_rate": 9.238498786498605e-05, "loss": 0.9855, "step": 935 }, { "epoch": 0.32, "learning_rate": 9.235085833000613e-05, "loss": 1.2291, "step": 936 }, { "epoch": 0.32, "learning_rate": 9.23166588173528e-05, "loss": 0.9385, "step": 937 }, { "epoch": 0.32, "learning_rate": 9.228238938353501e-05, "loss": 0.618, "step": 938 }, { "epoch": 0.32, "learning_rate": 9.224805008517725e-05, "loss": 1.4071, "step": 939 }, { "epoch": 0.32, "learning_rate": 9.221364097901941e-05, "loss": 1.0657, "step": 940 }, { "epoch": 0.32, "learning_rate": 9.217916212191678e-05, "loss": 0.7575, "step": 941 }, { "epoch": 0.32, "learning_rate": 9.214461357083985e-05, "loss": 1.1951, "step": 942 }, { "epoch": 0.32, "learning_rate": 9.210999538287433e-05, "loss": 1.1924, "step": 943 }, { "epoch": 0.32, "learning_rate": 9.207530761522092e-05, "loss": 1.0177, "step": 944 }, { "epoch": 0.32, "learning_rate": 9.204055032519533e-05, "loss": 1.0267, "step": 945 }, { "epoch": 0.32, "learning_rate": 9.200572357022815e-05, "loss": 0.8595, "step": 946 }, { "epoch": 0.32, "learning_rate": 9.197082740786473e-05, "loss": 1.09, "step": 947 }, { "epoch": 0.32, "learning_rate": 9.19358618957651e-05, "loss": 0.8859, "step": 948 }, { "epoch": 0.32, "learning_rate": 9.190082709170393e-05, "loss": 1.0127, "step": 949 }, { "epoch": 0.32, "learning_rate": 9.186572305357031e-05, "loss": 0.9726, "step": 950 }, { "epoch": 0.32, "learning_rate": 9.183054983936777e-05, "loss": 1.1346, "step": 951 }, { "epoch": 0.32, "learning_rate": 9.179530750721412e-05, "loss": 0.9534, "step": 952 }, { "epoch": 0.32, "learning_rate": 9.17599961153414e-05, "loss": 0.882, "step": 953 }, { "epoch": 0.32, "learning_rate": 9.172461572209578e-05, "loss": 0.9878, "step": 954 }, { "epoch": 0.32, "learning_rate": 9.168916638593736e-05, "loss": 1.1131, "step": 955 }, { "epoch": 0.32, "learning_rate": 9.165364816544022e-05, "loss": 1.0415, "step": 956 }, { "epoch": 0.33, "learning_rate": 9.161806111929227e-05, "loss": 1.1306, "step": 957 }, { "epoch": 0.33, "learning_rate": 9.158240530629511e-05, "loss": 1.0081, "step": 958 }, { "epoch": 0.33, "learning_rate": 9.154668078536398e-05, "loss": 0.9231, "step": 959 }, { "epoch": 0.33, "learning_rate": 9.151088761552762e-05, "loss": 1.221, "step": 960 }, { "epoch": 0.33, "learning_rate": 9.147502585592827e-05, "loss": 1.162, "step": 961 }, { "epoch": 0.33, "learning_rate": 9.143909556582141e-05, "loss": 1.0276, "step": 962 }, { "epoch": 0.33, "learning_rate": 9.140309680457586e-05, "loss": 1.0981, "step": 963 }, { "epoch": 0.33, "learning_rate": 9.136702963167348e-05, "loss": 0.7658, "step": 964 }, { "epoch": 0.33, "learning_rate": 9.133089410670921e-05, "loss": 1.106, "step": 965 }, { "epoch": 0.33, "learning_rate": 9.129469028939094e-05, "loss": 1.2693, "step": 966 }, { "epoch": 0.33, "learning_rate": 9.125841823953938e-05, "loss": 0.8468, "step": 967 }, { "epoch": 0.33, "learning_rate": 9.122207801708802e-05, "loss": 0.8012, "step": 968 }, { "epoch": 0.33, "learning_rate": 9.118566968208296e-05, "loss": 1.1251, "step": 969 }, { "epoch": 0.33, "learning_rate": 9.114919329468282e-05, "loss": 1.1592, "step": 970 }, { "epoch": 0.33, "learning_rate": 9.111264891515874e-05, "loss": 0.9422, "step": 971 }, { "epoch": 0.33, "learning_rate": 9.107603660389414e-05, "loss": 1.0158, "step": 972 }, { "epoch": 0.33, "learning_rate": 9.103935642138472e-05, "loss": 0.9636, "step": 973 }, { "epoch": 0.33, "learning_rate": 9.100260842823831e-05, "loss": 0.7893, "step": 974 }, { "epoch": 0.33, "learning_rate": 9.09657926851748e-05, "loss": 0.9443, "step": 975 }, { "epoch": 0.33, "learning_rate": 9.0928909253026e-05, "loss": 0.7753, "step": 976 }, { "epoch": 0.33, "learning_rate": 9.08919581927356e-05, "loss": 1.3223, "step": 977 }, { "epoch": 0.33, "learning_rate": 9.085493956535898e-05, "loss": 1.0684, "step": 978 }, { "epoch": 0.33, "learning_rate": 9.081785343206324e-05, "loss": 0.9109, "step": 979 }, { "epoch": 0.33, "learning_rate": 9.078069985412696e-05, "loss": 0.7877, "step": 980 }, { "epoch": 0.33, "learning_rate": 9.074347889294016e-05, "loss": 0.96, "step": 981 }, { "epoch": 0.33, "learning_rate": 9.070619061000428e-05, "loss": 0.9384, "step": 982 }, { "epoch": 0.33, "learning_rate": 9.066883506693188e-05, "loss": 0.9182, "step": 983 }, { "epoch": 0.33, "learning_rate": 9.063141232544676e-05, "loss": 1.2048, "step": 984 }, { "epoch": 0.33, "learning_rate": 9.059392244738366e-05, "loss": 0.6311, "step": 985 }, { "epoch": 0.33, "learning_rate": 9.055636549468834e-05, "loss": 0.9734, "step": 986 }, { "epoch": 0.34, "learning_rate": 9.051874152941733e-05, "loss": 0.908, "step": 987 }, { "epoch": 0.34, "learning_rate": 9.048105061373792e-05, "loss": 0.8485, "step": 988 }, { "epoch": 0.34, "learning_rate": 9.0443292809928e-05, "loss": 0.9312, "step": 989 }, { "epoch": 0.34, "learning_rate": 9.0405468180376e-05, "loss": 0.8793, "step": 990 }, { "epoch": 0.34, "learning_rate": 9.036757678758077e-05, "loss": 1.1993, "step": 991 }, { "epoch": 0.34, "learning_rate": 9.032961869415146e-05, "loss": 0.7122, "step": 992 }, { "epoch": 0.34, "learning_rate": 9.029159396280743e-05, "loss": 0.7623, "step": 993 }, { "epoch": 0.34, "learning_rate": 9.025350265637815e-05, "loss": 0.8884, "step": 994 }, { "epoch": 0.34, "learning_rate": 9.021534483780312e-05, "loss": 1.0034, "step": 995 }, { "epoch": 0.34, "learning_rate": 9.01771205701317e-05, "loss": 1.1159, "step": 996 }, { "epoch": 0.34, "learning_rate": 9.013882991652309e-05, "loss": 1.2711, "step": 997 }, { "epoch": 0.34, "learning_rate": 9.010047294024614e-05, "loss": 0.9225, "step": 998 }, { "epoch": 0.34, "learning_rate": 9.00620497046793e-05, "loss": 0.6847, "step": 999 }, { "epoch": 0.34, "learning_rate": 9.002356027331054e-05, "loss": 1.0997, "step": 1000 }, { "epoch": 0.34, "learning_rate": 8.998500470973714e-05, "loss": 1.1557, "step": 1001 }, { "epoch": 0.34, "learning_rate": 8.99463830776657e-05, "loss": 1.2749, "step": 1002 }, { "epoch": 0.34, "learning_rate": 8.990769544091195e-05, "loss": 0.9791, "step": 1003 }, { "epoch": 0.34, "learning_rate": 8.986894186340075e-05, "loss": 0.7425, "step": 1004 }, { "epoch": 0.34, "learning_rate": 8.983012240916584e-05, "loss": 1.019, "step": 1005 }, { "epoch": 0.34, "learning_rate": 8.979123714234985e-05, "loss": 0.9782, "step": 1006 }, { "epoch": 0.34, "learning_rate": 8.975228612720416e-05, "loss": 0.801, "step": 1007 }, { "epoch": 0.34, "learning_rate": 8.971326942808875e-05, "loss": 1.2239, "step": 1008 }, { "epoch": 0.34, "learning_rate": 8.967418710947216e-05, "loss": 1.1717, "step": 1009 }, { "epoch": 0.34, "learning_rate": 8.963503923593138e-05, "loss": 0.8316, "step": 1010 }, { "epoch": 0.34, "learning_rate": 8.959582587215166e-05, "loss": 0.8905, "step": 1011 }, { "epoch": 0.34, "learning_rate": 8.955654708292648e-05, "loss": 0.7955, "step": 1012 }, { "epoch": 0.34, "learning_rate": 8.951720293315746e-05, "loss": 0.8456, "step": 1013 }, { "epoch": 0.34, "learning_rate": 8.947779348785418e-05, "loss": 0.9369, "step": 1014 }, { "epoch": 0.34, "learning_rate": 8.943831881213416e-05, "loss": 1.2324, "step": 1015 }, { "epoch": 0.35, "learning_rate": 8.939877897122262e-05, "loss": 0.9473, "step": 1016 }, { "epoch": 0.35, "learning_rate": 8.935917403045251e-05, "loss": 0.9737, "step": 1017 }, { "epoch": 0.35, "learning_rate": 8.931950405526434e-05, "loss": 0.6901, "step": 1018 }, { "epoch": 0.35, "learning_rate": 8.927976911120609e-05, "loss": 1.0834, "step": 1019 }, { "epoch": 0.35, "learning_rate": 8.923996926393305e-05, "loss": 0.5385, "step": 1020 }, { "epoch": 0.35, "learning_rate": 8.92001045792078e-05, "loss": 0.9275, "step": 1021 }, { "epoch": 0.35, "learning_rate": 8.916017512290001e-05, "loss": 0.8818, "step": 1022 }, { "epoch": 0.35, "learning_rate": 8.912018096098639e-05, "loss": 1.1229, "step": 1023 }, { "epoch": 0.35, "learning_rate": 8.908012215955058e-05, "loss": 0.9998, "step": 1024 }, { "epoch": 0.35, "learning_rate": 8.903999878478301e-05, "loss": 0.8981, "step": 1025 }, { "epoch": 0.35, "learning_rate": 8.899981090298084e-05, "loss": 1.0817, "step": 1026 }, { "epoch": 0.35, "learning_rate": 8.895955858054774e-05, "loss": 0.9932, "step": 1027 }, { "epoch": 0.35, "learning_rate": 8.891924188399395e-05, "loss": 0.9238, "step": 1028 }, { "epoch": 0.35, "learning_rate": 8.8878860879936e-05, "loss": 1.194, "step": 1029 }, { "epoch": 0.35, "learning_rate": 8.883841563509671e-05, "loss": 1.0561, "step": 1030 }, { "epoch": 0.35, "learning_rate": 8.879790621630508e-05, "loss": 1.1191, "step": 1031 }, { "epoch": 0.35, "learning_rate": 8.875733269049609e-05, "loss": 0.7127, "step": 1032 }, { "epoch": 0.35, "learning_rate": 8.871669512471068e-05, "loss": 0.9161, "step": 1033 }, { "epoch": 0.35, "learning_rate": 8.867599358609557e-05, "loss": 1.1141, "step": 1034 }, { "epoch": 0.35, "learning_rate": 8.863522814190326e-05, "loss": 1.2128, "step": 1035 }, { "epoch": 0.35, "learning_rate": 8.859439885949174e-05, "loss": 1.1588, "step": 1036 }, { "epoch": 0.35, "learning_rate": 8.855350580632456e-05, "loss": 0.8318, "step": 1037 }, { "epoch": 0.35, "learning_rate": 8.851254904997062e-05, "loss": 1.0339, "step": 1038 }, { "epoch": 0.35, "learning_rate": 8.847152865810407e-05, "loss": 0.9616, "step": 1039 }, { "epoch": 0.35, "learning_rate": 8.84304446985042e-05, "loss": 1.0683, "step": 1040 }, { "epoch": 0.35, "learning_rate": 8.838929723905536e-05, "loss": 1.0882, "step": 1041 }, { "epoch": 0.35, "learning_rate": 8.834808634774681e-05, "loss": 0.9221, "step": 1042 }, { "epoch": 0.35, "learning_rate": 8.83068120926726e-05, "loss": 0.8571, "step": 1043 }, { "epoch": 0.35, "learning_rate": 8.826547454203151e-05, "loss": 1.1493, "step": 1044 }, { "epoch": 0.35, "learning_rate": 8.822407376412691e-05, "loss": 0.9528, "step": 1045 }, { "epoch": 0.36, "learning_rate": 8.818260982736661e-05, "loss": 0.9512, "step": 1046 }, { "epoch": 0.36, "learning_rate": 8.814108280026279e-05, "loss": 0.9169, "step": 1047 }, { "epoch": 0.36, "learning_rate": 8.809949275143187e-05, "loss": 1.0533, "step": 1048 }, { "epoch": 0.36, "learning_rate": 8.805783974959445e-05, "loss": 1.1051, "step": 1049 }, { "epoch": 0.36, "learning_rate": 8.801612386357507e-05, "loss": 1.4548, "step": 1050 }, { "epoch": 0.36, "learning_rate": 8.797434516230226e-05, "loss": 0.9311, "step": 1051 }, { "epoch": 0.36, "learning_rate": 8.793250371480827e-05, "loss": 0.9964, "step": 1052 }, { "epoch": 0.36, "learning_rate": 8.789059959022907e-05, "loss": 1.1481, "step": 1053 }, { "epoch": 0.36, "learning_rate": 8.784863285780418e-05, "loss": 1.1182, "step": 1054 }, { "epoch": 0.36, "learning_rate": 8.780660358687656e-05, "loss": 0.8432, "step": 1055 }, { "epoch": 0.36, "learning_rate": 8.776451184689253e-05, "loss": 0.9551, "step": 1056 }, { "epoch": 0.36, "learning_rate": 8.772235770740161e-05, "loss": 0.9475, "step": 1057 }, { "epoch": 0.36, "learning_rate": 8.768014123805641e-05, "loss": 0.9236, "step": 1058 }, { "epoch": 0.36, "learning_rate": 8.763786250861256e-05, "loss": 0.8219, "step": 1059 }, { "epoch": 0.36, "learning_rate": 8.759552158892859e-05, "loss": 1.1512, "step": 1060 }, { "epoch": 0.36, "learning_rate": 8.755311854896568e-05, "loss": 0.6647, "step": 1061 }, { "epoch": 0.36, "learning_rate": 8.751065345878778e-05, "loss": 1.0313, "step": 1062 }, { "epoch": 0.36, "learning_rate": 8.74681263885613e-05, "loss": 0.8713, "step": 1063 }, { "epoch": 0.36, "learning_rate": 8.742553740855506e-05, "loss": 0.8023, "step": 1064 }, { "epoch": 0.36, "learning_rate": 8.73828865891402e-05, "loss": 0.937, "step": 1065 }, { "epoch": 0.36, "learning_rate": 8.734017400079001e-05, "loss": 0.9998, "step": 1066 }, { "epoch": 0.36, "learning_rate": 8.729739971407991e-05, "loss": 0.7682, "step": 1067 }, { "epoch": 0.36, "learning_rate": 8.725456379968717e-05, "loss": 0.9714, "step": 1068 }, { "epoch": 0.36, "learning_rate": 8.721166632839094e-05, "loss": 0.8538, "step": 1069 }, { "epoch": 0.36, "learning_rate": 8.716870737107211e-05, "loss": 0.9801, "step": 1070 }, { "epoch": 0.36, "learning_rate": 8.71256869987131e-05, "loss": 1.0996, "step": 1071 }, { "epoch": 0.36, "learning_rate": 8.708260528239788e-05, "loss": 1.058, "step": 1072 }, { "epoch": 0.36, "learning_rate": 8.703946229331171e-05, "loss": 1.0979, "step": 1073 }, { "epoch": 0.36, "learning_rate": 8.699625810274114e-05, "loss": 0.8716, "step": 1074 }, { "epoch": 0.37, "learning_rate": 8.695299278207384e-05, "loss": 0.9654, "step": 1075 }, { "epoch": 0.37, "learning_rate": 8.690966640279847e-05, "loss": 0.7832, "step": 1076 }, { "epoch": 0.37, "learning_rate": 8.686627903650457e-05, "loss": 1.031, "step": 1077 }, { "epoch": 0.37, "learning_rate": 8.682283075488249e-05, "loss": 1.0707, "step": 1078 }, { "epoch": 0.37, "learning_rate": 8.677932162972322e-05, "loss": 1.1282, "step": 1079 }, { "epoch": 0.37, "learning_rate": 8.673575173291825e-05, "loss": 1.0484, "step": 1080 }, { "epoch": 0.37, "learning_rate": 8.669212113645952e-05, "loss": 0.888, "step": 1081 }, { "epoch": 0.37, "learning_rate": 8.664842991243927e-05, "loss": 1.0607, "step": 1082 }, { "epoch": 0.37, "learning_rate": 8.660467813304986e-05, "loss": 1.0576, "step": 1083 }, { "epoch": 0.37, "learning_rate": 8.656086587058381e-05, "loss": 0.9831, "step": 1084 }, { "epoch": 0.37, "learning_rate": 8.651699319743347e-05, "loss": 0.8067, "step": 1085 }, { "epoch": 0.37, "learning_rate": 8.647306018609107e-05, "loss": 1.1123, "step": 1086 }, { "epoch": 0.37, "learning_rate": 8.642906690914853e-05, "loss": 0.8515, "step": 1087 }, { "epoch": 0.37, "learning_rate": 8.638501343929736e-05, "loss": 0.845, "step": 1088 }, { "epoch": 0.37, "learning_rate": 8.634089984932848e-05, "loss": 0.9342, "step": 1089 }, { "epoch": 0.37, "learning_rate": 8.62967262121322e-05, "loss": 0.9965, "step": 1090 }, { "epoch": 0.37, "learning_rate": 8.625249260069803e-05, "loss": 1.0539, "step": 1091 }, { "epoch": 0.37, "learning_rate": 8.620819908811455e-05, "loss": 0.9698, "step": 1092 }, { "epoch": 0.37, "learning_rate": 8.61638457475694e-05, "loss": 0.9064, "step": 1093 }, { "epoch": 0.37, "learning_rate": 8.611943265234896e-05, "loss": 1.201, "step": 1094 }, { "epoch": 0.37, "learning_rate": 8.607495987583843e-05, "loss": 1.0805, "step": 1095 }, { "epoch": 0.37, "learning_rate": 8.60304274915216e-05, "loss": 1.2458, "step": 1096 }, { "epoch": 0.37, "learning_rate": 8.598583557298072e-05, "loss": 1.1629, "step": 1097 }, { "epoch": 0.37, "learning_rate": 8.594118419389647e-05, "loss": 1.2841, "step": 1098 }, { "epoch": 0.37, "learning_rate": 8.589647342804775e-05, "loss": 1.0221, "step": 1099 }, { "epoch": 0.37, "learning_rate": 8.585170334931155e-05, "loss": 0.9637, "step": 1100 }, { "epoch": 0.37, "learning_rate": 8.580687403166295e-05, "loss": 1.0417, "step": 1101 }, { "epoch": 0.37, "learning_rate": 8.576198554917479e-05, "loss": 1.1771, "step": 1102 }, { "epoch": 0.37, "learning_rate": 8.57170379760178e-05, "loss": 1.3966, "step": 1103 }, { "epoch": 0.37, "learning_rate": 8.567203138646027e-05, "loss": 0.7332, "step": 1104 }, { "epoch": 0.38, "learning_rate": 8.562696585486802e-05, "loss": 0.9345, "step": 1105 }, { "epoch": 0.38, "learning_rate": 8.558184145570427e-05, "loss": 1.3037, "step": 1106 }, { "epoch": 0.38, "learning_rate": 8.55366582635295e-05, "loss": 1.1765, "step": 1107 }, { "epoch": 0.38, "learning_rate": 8.549141635300134e-05, "loss": 1.0073, "step": 1108 }, { "epoch": 0.38, "learning_rate": 8.544611579887443e-05, "loss": 0.9767, "step": 1109 }, { "epoch": 0.38, "learning_rate": 8.540075667600034e-05, "loss": 1.1918, "step": 1110 }, { "epoch": 0.38, "learning_rate": 8.535533905932738e-05, "loss": 0.7245, "step": 1111 }, { "epoch": 0.38, "learning_rate": 8.530986302390053e-05, "loss": 1.1281, "step": 1112 }, { "epoch": 0.38, "learning_rate": 8.526432864486129e-05, "loss": 0.8507, "step": 1113 }, { "epoch": 0.38, "learning_rate": 8.521873599744758e-05, "loss": 1.0684, "step": 1114 }, { "epoch": 0.38, "learning_rate": 8.517308515699356e-05, "loss": 0.8558, "step": 1115 }, { "epoch": 0.38, "learning_rate": 8.512737619892959e-05, "loss": 1.0997, "step": 1116 }, { "epoch": 0.38, "learning_rate": 8.508160919878203e-05, "loss": 0.6956, "step": 1117 }, { "epoch": 0.38, "learning_rate": 8.503578423217315e-05, "loss": 1.268, "step": 1118 }, { "epoch": 0.38, "learning_rate": 8.498990137482104e-05, "loss": 0.9251, "step": 1119 }, { "epoch": 0.38, "learning_rate": 8.494396070253933e-05, "loss": 0.9487, "step": 1120 }, { "epoch": 0.38, "learning_rate": 8.489796229123735e-05, "loss": 1.0355, "step": 1121 }, { "epoch": 0.38, "learning_rate": 8.485190621691967e-05, "loss": 1.0579, "step": 1122 }, { "epoch": 0.38, "learning_rate": 8.480579255568624e-05, "loss": 1.0543, "step": 1123 }, { "epoch": 0.38, "learning_rate": 8.475962138373213e-05, "loss": 1.0368, "step": 1124 }, { "epoch": 0.38, "learning_rate": 8.471339277734742e-05, "loss": 1.1895, "step": 1125 }, { "epoch": 0.38, "learning_rate": 8.466710681291713e-05, "loss": 0.9667, "step": 1126 }, { "epoch": 0.38, "learning_rate": 8.462076356692106e-05, "loss": 1.2272, "step": 1127 }, { "epoch": 0.38, "learning_rate": 8.457436311593358e-05, "loss": 0.7709, "step": 1128 }, { "epoch": 0.38, "learning_rate": 8.452790553662367e-05, "loss": 0.9871, "step": 1129 }, { "epoch": 0.38, "learning_rate": 8.448139090575466e-05, "loss": 0.7043, "step": 1130 }, { "epoch": 0.38, "learning_rate": 8.443481930018416e-05, "loss": 0.7847, "step": 1131 }, { "epoch": 0.38, "learning_rate": 8.438819079686391e-05, "loss": 0.9859, "step": 1132 }, { "epoch": 0.38, "learning_rate": 8.434150547283968e-05, "loss": 0.8892, "step": 1133 }, { "epoch": 0.39, "learning_rate": 8.429476340525111e-05, "loss": 1.0734, "step": 1134 }, { "epoch": 0.39, "learning_rate": 8.424796467133163e-05, "loss": 1.2201, "step": 1135 }, { "epoch": 0.39, "learning_rate": 8.420110934840826e-05, "loss": 0.9725, "step": 1136 }, { "epoch": 0.39, "learning_rate": 8.415419751390155e-05, "loss": 1.1368, "step": 1137 }, { "epoch": 0.39, "learning_rate": 8.410722924532541e-05, "loss": 0.7961, "step": 1138 }, { "epoch": 0.39, "learning_rate": 8.406020462028701e-05, "loss": 1.0009, "step": 1139 }, { "epoch": 0.39, "learning_rate": 8.401312371648666e-05, "loss": 0.844, "step": 1140 }, { "epoch": 0.39, "learning_rate": 8.396598661171763e-05, "loss": 1.1228, "step": 1141 }, { "epoch": 0.39, "learning_rate": 8.391879338386603e-05, "loss": 0.9785, "step": 1142 }, { "epoch": 0.39, "learning_rate": 8.387154411091078e-05, "loss": 0.9105, "step": 1143 }, { "epoch": 0.39, "learning_rate": 8.382423887092332e-05, "loss": 1.1509, "step": 1144 }, { "epoch": 0.39, "learning_rate": 8.377687774206763e-05, "loss": 0.7655, "step": 1145 }, { "epoch": 0.39, "learning_rate": 8.372946080260003e-05, "loss": 1.5266, "step": 1146 }, { "epoch": 0.39, "learning_rate": 8.3681988130869e-05, "loss": 0.8968, "step": 1147 }, { "epoch": 0.39, "learning_rate": 8.363445980531514e-05, "loss": 1.0466, "step": 1148 }, { "epoch": 0.39, "learning_rate": 8.358687590447109e-05, "loss": 0.8721, "step": 1149 }, { "epoch": 0.39, "learning_rate": 8.353923650696118e-05, "loss": 1.0031, "step": 1150 }, { "epoch": 0.39, "learning_rate": 8.349154169150153e-05, "loss": 1.204, "step": 1151 }, { "epoch": 0.39, "learning_rate": 8.34437915368998e-05, "loss": 1.0761, "step": 1152 }, { "epoch": 0.39, "learning_rate": 8.339598612205508e-05, "loss": 1.1869, "step": 1153 }, { "epoch": 0.39, "learning_rate": 8.334812552595781e-05, "loss": 1.1026, "step": 1154 }, { "epoch": 0.39, "learning_rate": 8.330020982768957e-05, "loss": 0.8891, "step": 1155 }, { "epoch": 0.39, "learning_rate": 8.325223910642297e-05, "loss": 0.8581, "step": 1156 }, { "epoch": 0.39, "learning_rate": 8.32042134414216e-05, "loss": 1.0325, "step": 1157 }, { "epoch": 0.39, "learning_rate": 8.315613291203976e-05, "loss": 1.2507, "step": 1158 }, { "epoch": 0.39, "learning_rate": 8.310799759772248e-05, "loss": 1.1548, "step": 1159 }, { "epoch": 0.39, "learning_rate": 8.305980757800525e-05, "loss": 0.9088, "step": 1160 }, { "epoch": 0.39, "learning_rate": 8.301156293251397e-05, "loss": 1.0156, "step": 1161 }, { "epoch": 0.39, "learning_rate": 8.296326374096482e-05, "loss": 0.9521, "step": 1162 }, { "epoch": 0.4, "learning_rate": 8.291491008316409e-05, "loss": 0.8887, "step": 1163 }, { "epoch": 0.4, "learning_rate": 8.286650203900808e-05, "loss": 0.8787, "step": 1164 }, { "epoch": 0.4, "learning_rate": 8.281803968848292e-05, "loss": 1.1336, "step": 1165 }, { "epoch": 0.4, "learning_rate": 8.276952311166452e-05, "loss": 1.1694, "step": 1166 }, { "epoch": 0.4, "learning_rate": 8.272095238871835e-05, "loss": 1.0572, "step": 1167 }, { "epoch": 0.4, "learning_rate": 8.267232759989938e-05, "loss": 0.8619, "step": 1168 }, { "epoch": 0.4, "learning_rate": 8.262364882555189e-05, "loss": 1.2025, "step": 1169 }, { "epoch": 0.4, "learning_rate": 8.257491614610939e-05, "loss": 0.9091, "step": 1170 }, { "epoch": 0.4, "learning_rate": 8.25261296420944e-05, "loss": 0.7346, "step": 1171 }, { "epoch": 0.4, "learning_rate": 8.247728939411845e-05, "loss": 1.1791, "step": 1172 }, { "epoch": 0.4, "learning_rate": 8.242839548288181e-05, "loss": 0.9015, "step": 1173 }, { "epoch": 0.4, "learning_rate": 8.237944798917347e-05, "loss": 0.9365, "step": 1174 }, { "epoch": 0.4, "learning_rate": 8.233044699387096e-05, "loss": 1.0604, "step": 1175 }, { "epoch": 0.4, "learning_rate": 8.228139257794012e-05, "loss": 1.3326, "step": 1176 }, { "epoch": 0.4, "learning_rate": 8.223228482243519e-05, "loss": 0.9618, "step": 1177 }, { "epoch": 0.4, "learning_rate": 8.218312380849843e-05, "loss": 1.367, "step": 1178 }, { "epoch": 0.4, "learning_rate": 8.213390961736019e-05, "loss": 0.9165, "step": 1179 }, { "epoch": 0.4, "learning_rate": 8.208464233033861e-05, "loss": 1.0935, "step": 1180 }, { "epoch": 0.4, "learning_rate": 8.203532202883963e-05, "loss": 1.2947, "step": 1181 }, { "epoch": 0.4, "learning_rate": 8.198594879435673e-05, "loss": 0.9801, "step": 1182 }, { "epoch": 0.4, "learning_rate": 8.193652270847092e-05, "loss": 1.3747, "step": 1183 }, { "epoch": 0.4, "learning_rate": 8.188704385285045e-05, "loss": 1.4243, "step": 1184 }, { "epoch": 0.4, "learning_rate": 8.183751230925084e-05, "loss": 1.2853, "step": 1185 }, { "epoch": 0.4, "learning_rate": 8.178792815951464e-05, "loss": 1.3507, "step": 1186 }, { "epoch": 0.4, "learning_rate": 8.173829148557131e-05, "loss": 0.753, "step": 1187 }, { "epoch": 0.4, "learning_rate": 8.16886023694371e-05, "loss": 0.868, "step": 1188 }, { "epoch": 0.4, "learning_rate": 8.163886089321493e-05, "loss": 1.0687, "step": 1189 }, { "epoch": 0.4, "learning_rate": 8.158906713909425e-05, "loss": 1.3367, "step": 1190 }, { "epoch": 0.4, "learning_rate": 8.153922118935082e-05, "loss": 1.2212, "step": 1191 }, { "epoch": 0.4, "learning_rate": 8.148932312634674e-05, "loss": 0.7764, "step": 1192 }, { "epoch": 0.41, "learning_rate": 8.143937303253014e-05, "loss": 1.1209, "step": 1193 }, { "epoch": 0.41, "learning_rate": 8.138937099043515e-05, "loss": 1.2671, "step": 1194 }, { "epoch": 0.41, "learning_rate": 8.133931708268177e-05, "loss": 0.909, "step": 1195 }, { "epoch": 0.41, "learning_rate": 8.128921139197563e-05, "loss": 1.1164, "step": 1196 }, { "epoch": 0.41, "learning_rate": 8.123905400110799e-05, "loss": 1.033, "step": 1197 }, { "epoch": 0.41, "learning_rate": 8.118884499295549e-05, "loss": 1.0096, "step": 1198 }, { "epoch": 0.41, "learning_rate": 8.113858445048006e-05, "loss": 0.8292, "step": 1199 }, { "epoch": 0.41, "learning_rate": 8.108827245672883e-05, "loss": 0.7382, "step": 1200 }, { "epoch": 0.41, "learning_rate": 8.103790909483391e-05, "loss": 0.8406, "step": 1201 }, { "epoch": 0.41, "learning_rate": 8.098749444801224e-05, "loss": 1.3016, "step": 1202 }, { "epoch": 0.41, "learning_rate": 8.093702859956562e-05, "loss": 1.0975, "step": 1203 }, { "epoch": 0.41, "learning_rate": 8.08865116328803e-05, "loss": 0.9835, "step": 1204 }, { "epoch": 0.41, "learning_rate": 8.083594363142717e-05, "loss": 0.968, "step": 1205 }, { "epoch": 0.41, "learning_rate": 8.078532467876126e-05, "loss": 1.2603, "step": 1206 }, { "epoch": 0.41, "learning_rate": 8.073465485852192e-05, "loss": 1.057, "step": 1207 }, { "epoch": 0.41, "learning_rate": 8.068393425443252e-05, "loss": 1.1679, "step": 1208 }, { "epoch": 0.41, "learning_rate": 8.06331629503003e-05, "loss": 1.1294, "step": 1209 }, { "epoch": 0.41, "learning_rate": 8.058234103001634e-05, "loss": 1.0, "step": 1210 }, { "epoch": 0.41, "learning_rate": 8.053146857755532e-05, "loss": 0.9034, "step": 1211 }, { "epoch": 0.41, "learning_rate": 8.048054567697535e-05, "loss": 0.9725, "step": 1212 }, { "epoch": 0.41, "learning_rate": 8.042957241241804e-05, "loss": 1.0401, "step": 1213 }, { "epoch": 0.41, "learning_rate": 8.037854886810812e-05, "loss": 1.0346, "step": 1214 }, { "epoch": 0.41, "learning_rate": 8.032747512835337e-05, "loss": 0.792, "step": 1215 }, { "epoch": 0.41, "learning_rate": 8.027635127754462e-05, "loss": 0.9809, "step": 1216 }, { "epoch": 0.41, "learning_rate": 8.022517740015542e-05, "loss": 0.8778, "step": 1217 }, { "epoch": 0.41, "learning_rate": 8.017395358074198e-05, "loss": 0.7033, "step": 1218 }, { "epoch": 0.41, "learning_rate": 8.012267990394307e-05, "loss": 1.1968, "step": 1219 }, { "epoch": 0.41, "learning_rate": 8.007135645447982e-05, "loss": 1.2326, "step": 1220 }, { "epoch": 0.41, "learning_rate": 8.001998331715559e-05, "loss": 1.0712, "step": 1221 }, { "epoch": 0.42, "learning_rate": 7.996856057685587e-05, "loss": 1.1981, "step": 1222 }, { "epoch": 0.42, "learning_rate": 7.991708831854806e-05, "loss": 0.919, "step": 1223 }, { "epoch": 0.42, "learning_rate": 7.986556662728145e-05, "loss": 1.2049, "step": 1224 }, { "epoch": 0.42, "learning_rate": 7.981399558818697e-05, "loss": 0.894, "step": 1225 }, { "epoch": 0.42, "learning_rate": 7.976237528647705e-05, "loss": 1.2494, "step": 1226 }, { "epoch": 0.42, "learning_rate": 7.971070580744562e-05, "loss": 1.2061, "step": 1227 }, { "epoch": 0.42, "learning_rate": 7.965898723646776e-05, "loss": 1.0071, "step": 1228 }, { "epoch": 0.42, "learning_rate": 7.960721965899974e-05, "loss": 0.7334, "step": 1229 }, { "epoch": 0.42, "learning_rate": 7.955540316057877e-05, "loss": 0.9455, "step": 1230 }, { "epoch": 0.42, "learning_rate": 7.950353782682291e-05, "loss": 0.8254, "step": 1231 }, { "epoch": 0.42, "learning_rate": 7.94516237434309e-05, "loss": 0.7854, "step": 1232 }, { "epoch": 0.42, "learning_rate": 7.939966099618203e-05, "loss": 0.9294, "step": 1233 }, { "epoch": 0.42, "learning_rate": 7.9347649670936e-05, "loss": 0.7782, "step": 1234 }, { "epoch": 0.42, "learning_rate": 7.929558985363278e-05, "loss": 1.3221, "step": 1235 }, { "epoch": 0.42, "learning_rate": 7.924348163029249e-05, "loss": 0.8853, "step": 1236 }, { "epoch": 0.42, "learning_rate": 7.919132508701518e-05, "loss": 1.0136, "step": 1237 }, { "epoch": 0.42, "learning_rate": 7.913912030998078e-05, "loss": 1.0029, "step": 1238 }, { "epoch": 0.42, "learning_rate": 7.908686738544893e-05, "loss": 0.8706, "step": 1239 }, { "epoch": 0.42, "learning_rate": 7.903456639975875e-05, "loss": 1.0037, "step": 1240 }, { "epoch": 0.42, "learning_rate": 7.898221743932888e-05, "loss": 1.0868, "step": 1241 }, { "epoch": 0.42, "learning_rate": 7.892982059065714e-05, "loss": 0.9837, "step": 1242 }, { "epoch": 0.42, "learning_rate": 7.887737594032054e-05, "loss": 1.1385, "step": 1243 }, { "epoch": 0.42, "learning_rate": 7.882488357497504e-05, "loss": 1.0477, "step": 1244 }, { "epoch": 0.42, "learning_rate": 7.877234358135543e-05, "loss": 1.1323, "step": 1245 }, { "epoch": 0.42, "learning_rate": 7.871975604627524e-05, "loss": 1.0543, "step": 1246 }, { "epoch": 0.42, "learning_rate": 7.866712105662653e-05, "loss": 1.038, "step": 1247 }, { "epoch": 0.42, "learning_rate": 7.861443869937974e-05, "loss": 0.8819, "step": 1248 }, { "epoch": 0.42, "learning_rate": 7.856170906158363e-05, "loss": 0.927, "step": 1249 }, { "epoch": 0.42, "learning_rate": 7.850893223036509e-05, "loss": 0.9945, "step": 1250 }, { "epoch": 0.42, "learning_rate": 7.845610829292891e-05, "loss": 1.1037, "step": 1251 }, { "epoch": 0.43, "learning_rate": 7.840323733655778e-05, "loss": 0.848, "step": 1252 }, { "epoch": 0.43, "learning_rate": 7.835031944861211e-05, "loss": 1.0035, "step": 1253 }, { "epoch": 0.43, "learning_rate": 7.829735471652978e-05, "loss": 0.9732, "step": 1254 }, { "epoch": 0.43, "learning_rate": 7.824434322782608e-05, "loss": 0.9965, "step": 1255 }, { "epoch": 0.43, "learning_rate": 7.819128507009361e-05, "loss": 0.8346, "step": 1256 }, { "epoch": 0.43, "learning_rate": 7.813818033100206e-05, "loss": 1.1361, "step": 1257 }, { "epoch": 0.43, "learning_rate": 7.808502909829807e-05, "loss": 0.9584, "step": 1258 }, { "epoch": 0.43, "learning_rate": 7.803183145980514e-05, "loss": 0.957, "step": 1259 }, { "epoch": 0.43, "learning_rate": 7.79785875034234e-05, "loss": 1.1767, "step": 1260 }, { "epoch": 0.43, "learning_rate": 7.792529731712954e-05, "loss": 0.8672, "step": 1261 }, { "epoch": 0.43, "learning_rate": 7.787196098897664e-05, "loss": 1.1808, "step": 1262 }, { "epoch": 0.43, "learning_rate": 7.7818578607094e-05, "loss": 1.1241, "step": 1263 }, { "epoch": 0.43, "learning_rate": 7.776515025968707e-05, "loss": 0.8533, "step": 1264 }, { "epoch": 0.43, "learning_rate": 7.771167603503717e-05, "loss": 1.1037, "step": 1265 }, { "epoch": 0.43, "learning_rate": 7.765815602150149e-05, "loss": 0.9796, "step": 1266 }, { "epoch": 0.43, "learning_rate": 7.760459030751284e-05, "loss": 1.0567, "step": 1267 }, { "epoch": 0.43, "learning_rate": 7.755097898157957e-05, "loss": 0.7107, "step": 1268 }, { "epoch": 0.43, "learning_rate": 7.749732213228537e-05, "loss": 1.2755, "step": 1269 }, { "epoch": 0.43, "learning_rate": 7.744361984828919e-05, "loss": 0.9611, "step": 1270 }, { "epoch": 0.43, "learning_rate": 7.738987221832498e-05, "loss": 0.8535, "step": 1271 }, { "epoch": 0.43, "learning_rate": 7.733607933120169e-05, "loss": 1.0751, "step": 1272 }, { "epoch": 0.43, "learning_rate": 7.728224127580303e-05, "loss": 1.1492, "step": 1273 }, { "epoch": 0.43, "learning_rate": 7.722835814108733e-05, "loss": 1.0798, "step": 1274 }, { "epoch": 0.43, "learning_rate": 7.71744300160874e-05, "loss": 0.91, "step": 1275 }, { "epoch": 0.43, "learning_rate": 7.712045698991041e-05, "loss": 0.9607, "step": 1276 }, { "epoch": 0.43, "learning_rate": 7.70664391517377e-05, "loss": 1.0185, "step": 1277 }, { "epoch": 0.43, "learning_rate": 7.70123765908247e-05, "loss": 0.8179, "step": 1278 }, { "epoch": 0.43, "learning_rate": 7.695826939650066e-05, "loss": 1.2276, "step": 1279 }, { "epoch": 0.43, "learning_rate": 7.690411765816864e-05, "loss": 0.7487, "step": 1280 }, { "epoch": 0.44, "learning_rate": 7.684992146530529e-05, "loss": 0.8832, "step": 1281 }, { "epoch": 0.44, "learning_rate": 7.67956809074607e-05, "loss": 0.9475, "step": 1282 }, { "epoch": 0.44, "learning_rate": 7.674139607425826e-05, "loss": 1.0093, "step": 1283 }, { "epoch": 0.44, "learning_rate": 7.668706705539457e-05, "loss": 1.1049, "step": 1284 }, { "epoch": 0.44, "learning_rate": 7.663269394063917e-05, "loss": 1.1938, "step": 1285 }, { "epoch": 0.44, "learning_rate": 7.657827681983448e-05, "loss": 0.842, "step": 1286 }, { "epoch": 0.44, "learning_rate": 7.652381578289568e-05, "loss": 1.1828, "step": 1287 }, { "epoch": 0.44, "learning_rate": 7.646931091981045e-05, "loss": 1.1977, "step": 1288 }, { "epoch": 0.44, "learning_rate": 7.641476232063891e-05, "loss": 0.7324, "step": 1289 }, { "epoch": 0.44, "learning_rate": 7.636017007551348e-05, "loss": 0.9004, "step": 1290 }, { "epoch": 0.44, "learning_rate": 7.630553427463862e-05, "loss": 1.2284, "step": 1291 }, { "epoch": 0.44, "learning_rate": 7.625085500829086e-05, "loss": 1.002, "step": 1292 }, { "epoch": 0.44, "learning_rate": 7.619613236681843e-05, "loss": 1.0344, "step": 1293 }, { "epoch": 0.44, "learning_rate": 7.614136644064135e-05, "loss": 1.234, "step": 1294 }, { "epoch": 0.44, "learning_rate": 7.608655732025112e-05, "loss": 0.8096, "step": 1295 }, { "epoch": 0.44, "learning_rate": 7.603170509621053e-05, "loss": 0.9509, "step": 1296 }, { "epoch": 0.44, "learning_rate": 7.59768098591537e-05, "loss": 1.0231, "step": 1297 }, { "epoch": 0.44, "learning_rate": 7.592187169978579e-05, "loss": 0.8157, "step": 1298 }, { "epoch": 0.44, "learning_rate": 7.586689070888284e-05, "loss": 1.0357, "step": 1299 }, { "epoch": 0.44, "learning_rate": 7.581186697729173e-05, "loss": 1.1388, "step": 1300 }, { "epoch": 0.44, "learning_rate": 7.57568005959299e-05, "loss": 0.986, "step": 1301 }, { "epoch": 0.44, "learning_rate": 7.570169165578527e-05, "loss": 1.1364, "step": 1302 }, { "epoch": 0.44, "learning_rate": 7.564654024791615e-05, "loss": 1.1418, "step": 1303 }, { "epoch": 0.44, "learning_rate": 7.559134646345092e-05, "loss": 1.2501, "step": 1304 }, { "epoch": 0.44, "learning_rate": 7.553611039358806e-05, "loss": 1.1381, "step": 1305 }, { "epoch": 0.44, "learning_rate": 7.548083212959588e-05, "loss": 0.9016, "step": 1306 }, { "epoch": 0.44, "learning_rate": 7.542551176281242e-05, "loss": 0.8712, "step": 1307 }, { "epoch": 0.44, "learning_rate": 7.537014938464528e-05, "loss": 0.8187, "step": 1308 }, { "epoch": 0.44, "learning_rate": 7.53147450865715e-05, "loss": 0.8543, "step": 1309 }, { "epoch": 0.44, "learning_rate": 7.525929896013735e-05, "loss": 0.9261, "step": 1310 }, { "epoch": 0.45, "learning_rate": 7.520381109695822e-05, "loss": 1.0717, "step": 1311 }, { "epoch": 0.45, "learning_rate": 7.514828158871852e-05, "loss": 1.0684, "step": 1312 }, { "epoch": 0.45, "learning_rate": 7.509271052717139e-05, "loss": 0.6846, "step": 1313 }, { "epoch": 0.45, "learning_rate": 7.503709800413868e-05, "loss": 1.1465, "step": 1314 }, { "epoch": 0.45, "learning_rate": 7.498144411151073e-05, "loss": 1.2496, "step": 1315 }, { "epoch": 0.45, "learning_rate": 7.492574894124624e-05, "loss": 0.6703, "step": 1316 }, { "epoch": 0.45, "learning_rate": 7.48700125853721e-05, "loss": 0.8595, "step": 1317 }, { "epoch": 0.45, "learning_rate": 7.481423513598331e-05, "loss": 1.0891, "step": 1318 }, { "epoch": 0.45, "learning_rate": 7.475841668524268e-05, "loss": 0.7548, "step": 1319 }, { "epoch": 0.45, "learning_rate": 7.470255732538086e-05, "loss": 0.9805, "step": 1320 }, { "epoch": 0.45, "learning_rate": 7.464665714869602e-05, "loss": 0.7253, "step": 1321 }, { "epoch": 0.45, "learning_rate": 7.459071624755381e-05, "loss": 1.1128, "step": 1322 }, { "epoch": 0.45, "learning_rate": 7.453473471438719e-05, "loss": 1.2157, "step": 1323 }, { "epoch": 0.45, "learning_rate": 7.44787126416962e-05, "loss": 1.0998, "step": 1324 }, { "epoch": 0.45, "learning_rate": 7.44226501220479e-05, "loss": 1.0762, "step": 1325 }, { "epoch": 0.45, "learning_rate": 7.436654724807619e-05, "loss": 1.2027, "step": 1326 }, { "epoch": 0.45, "learning_rate": 7.431040411248162e-05, "loss": 1.2007, "step": 1327 }, { "epoch": 0.45, "learning_rate": 7.425422080803132e-05, "loss": 1.1174, "step": 1328 }, { "epoch": 0.45, "learning_rate": 7.419799742755871e-05, "loss": 1.1856, "step": 1329 }, { "epoch": 0.45, "learning_rate": 7.414173406396351e-05, "loss": 0.8158, "step": 1330 }, { "epoch": 0.45, "learning_rate": 7.408543081021145e-05, "loss": 0.9521, "step": 1331 }, { "epoch": 0.45, "learning_rate": 7.402908775933419e-05, "loss": 1.097, "step": 1332 }, { "epoch": 0.45, "learning_rate": 7.397270500442916e-05, "loss": 1.0881, "step": 1333 }, { "epoch": 0.45, "learning_rate": 7.391628263865939e-05, "loss": 0.9636, "step": 1334 }, { "epoch": 0.45, "learning_rate": 7.385982075525332e-05, "loss": 0.9554, "step": 1335 }, { "epoch": 0.45, "learning_rate": 7.380331944750476e-05, "loss": 1.0945, "step": 1336 }, { "epoch": 0.45, "learning_rate": 7.37467788087726e-05, "loss": 0.9706, "step": 1337 }, { "epoch": 0.45, "learning_rate": 7.369019893248074e-05, "loss": 0.9917, "step": 1338 }, { "epoch": 0.45, "learning_rate": 7.363357991211791e-05, "loss": 0.9956, "step": 1339 }, { "epoch": 0.46, "learning_rate": 7.357692184123753e-05, "loss": 0.9692, "step": 1340 }, { "epoch": 0.46, "learning_rate": 7.352022481345755e-05, "loss": 0.9827, "step": 1341 }, { "epoch": 0.46, "learning_rate": 7.346348892246025e-05, "loss": 1.0565, "step": 1342 }, { "epoch": 0.46, "learning_rate": 7.34067142619922e-05, "loss": 0.6972, "step": 1343 }, { "epoch": 0.46, "learning_rate": 7.33499009258639e-05, "loss": 0.9352, "step": 1344 }, { "epoch": 0.46, "learning_rate": 7.329304900794991e-05, "loss": 0.9592, "step": 1345 }, { "epoch": 0.46, "learning_rate": 7.323615860218843e-05, "loss": 0.6823, "step": 1346 }, { "epoch": 0.46, "learning_rate": 7.31792298025813e-05, "loss": 0.9401, "step": 1347 }, { "epoch": 0.46, "learning_rate": 7.31222627031938e-05, "loss": 0.9133, "step": 1348 }, { "epoch": 0.46, "learning_rate": 7.306525739815447e-05, "loss": 1.1434, "step": 1349 }, { "epoch": 0.46, "learning_rate": 7.3008213981655e-05, "loss": 1.0579, "step": 1350 }, { "epoch": 0.46, "learning_rate": 7.295113254795005e-05, "loss": 1.09, "step": 1351 }, { "epoch": 0.46, "learning_rate": 7.289401319135707e-05, "loss": 1.0102, "step": 1352 }, { "epoch": 0.46, "learning_rate": 7.283685600625621e-05, "loss": 1.0512, "step": 1353 }, { "epoch": 0.46, "learning_rate": 7.277966108709013e-05, "loss": 0.903, "step": 1354 }, { "epoch": 0.46, "learning_rate": 7.272242852836378e-05, "loss": 1.002, "step": 1355 }, { "epoch": 0.46, "learning_rate": 7.266515842464437e-05, "loss": 1.0707, "step": 1356 }, { "epoch": 0.46, "learning_rate": 7.260785087056112e-05, "loss": 1.0596, "step": 1357 }, { "epoch": 0.46, "learning_rate": 7.255050596080509e-05, "loss": 1.1223, "step": 1358 }, { "epoch": 0.46, "learning_rate": 7.249312379012915e-05, "loss": 1.1039, "step": 1359 }, { "epoch": 0.46, "learning_rate": 7.243570445334766e-05, "loss": 0.9334, "step": 1360 }, { "epoch": 0.46, "learning_rate": 7.237824804533644e-05, "loss": 1.3723, "step": 1361 }, { "epoch": 0.46, "learning_rate": 7.232075466103253e-05, "loss": 0.9042, "step": 1362 }, { "epoch": 0.46, "learning_rate": 7.226322439543407e-05, "loss": 1.0148, "step": 1363 }, { "epoch": 0.46, "learning_rate": 7.220565734360018e-05, "loss": 1.2219, "step": 1364 }, { "epoch": 0.46, "learning_rate": 7.214805360065073e-05, "loss": 1.0604, "step": 1365 }, { "epoch": 0.46, "learning_rate": 7.20904132617662e-05, "loss": 0.8128, "step": 1366 }, { "epoch": 0.46, "learning_rate": 7.203273642218758e-05, "loss": 0.8846, "step": 1367 }, { "epoch": 0.46, "learning_rate": 7.197502317721616e-05, "loss": 1.1068, "step": 1368 }, { "epoch": 0.46, "learning_rate": 7.191727362221334e-05, "loss": 0.984, "step": 1369 }, { "epoch": 0.47, "learning_rate": 7.185948785260058e-05, "loss": 0.8394, "step": 1370 }, { "epoch": 0.47, "learning_rate": 7.180166596385914e-05, "loss": 1.1457, "step": 1371 }, { "epoch": 0.47, "learning_rate": 7.174380805152997e-05, "loss": 1.0143, "step": 1372 }, { "epoch": 0.47, "learning_rate": 7.168591421121353e-05, "loss": 1.2137, "step": 1373 }, { "epoch": 0.47, "learning_rate": 7.162798453856969e-05, "loss": 1.1731, "step": 1374 }, { "epoch": 0.47, "learning_rate": 7.157001912931747e-05, "loss": 1.0153, "step": 1375 }, { "epoch": 0.47, "learning_rate": 7.151201807923497e-05, "loss": 1.1745, "step": 1376 }, { "epoch": 0.47, "learning_rate": 7.145398148415916e-05, "loss": 0.9129, "step": 1377 }, { "epoch": 0.47, "learning_rate": 7.139590943998579e-05, "loss": 1.161, "step": 1378 }, { "epoch": 0.47, "learning_rate": 7.133780204266912e-05, "loss": 0.9564, "step": 1379 }, { "epoch": 0.47, "learning_rate": 7.127965938822186e-05, "loss": 1.096, "step": 1380 }, { "epoch": 0.47, "learning_rate": 7.122148157271499e-05, "loss": 1.1669, "step": 1381 }, { "epoch": 0.47, "learning_rate": 7.116326869227756e-05, "loss": 0.9738, "step": 1382 }, { "epoch": 0.47, "learning_rate": 7.110502084309655e-05, "loss": 0.9476, "step": 1383 }, { "epoch": 0.47, "learning_rate": 7.104673812141675e-05, "loss": 0.9529, "step": 1384 }, { "epoch": 0.47, "learning_rate": 7.098842062354057e-05, "loss": 1.1069, "step": 1385 }, { "epoch": 0.47, "learning_rate": 7.093006844582786e-05, "loss": 1.0901, "step": 1386 }, { "epoch": 0.47, "learning_rate": 7.087168168469579e-05, "loss": 0.9785, "step": 1387 }, { "epoch": 0.47, "learning_rate": 7.081326043661867e-05, "loss": 0.9507, "step": 1388 }, { "epoch": 0.47, "learning_rate": 7.075480479812777e-05, "loss": 1.2056, "step": 1389 }, { "epoch": 0.47, "learning_rate": 7.069631486581123e-05, "loss": 0.6984, "step": 1390 }, { "epoch": 0.47, "learning_rate": 7.06377907363138e-05, "loss": 1.1632, "step": 1391 }, { "epoch": 0.47, "learning_rate": 7.05792325063368e-05, "loss": 1.1104, "step": 1392 }, { "epoch": 0.47, "learning_rate": 7.052064027263786e-05, "loss": 1.1522, "step": 1393 }, { "epoch": 0.47, "learning_rate": 7.046201413203075e-05, "loss": 1.1299, "step": 1394 }, { "epoch": 0.47, "learning_rate": 7.040335418138535e-05, "loss": 0.9037, "step": 1395 }, { "epoch": 0.47, "learning_rate": 7.034466051762736e-05, "loss": 1.1395, "step": 1396 }, { "epoch": 0.47, "learning_rate": 7.02859332377382e-05, "loss": 0.9128, "step": 1397 }, { "epoch": 0.47, "learning_rate": 7.02271724387548e-05, "loss": 1.1159, "step": 1398 }, { "epoch": 0.48, "learning_rate": 7.016837821776952e-05, "loss": 0.974, "step": 1399 }, { "epoch": 0.48, "learning_rate": 7.010955067192992e-05, "loss": 0.8739, "step": 1400 }, { "epoch": 0.48, "learning_rate": 7.005068989843864e-05, "loss": 1.1559, "step": 1401 }, { "epoch": 0.48, "learning_rate": 6.999179599455322e-05, "loss": 0.9316, "step": 1402 }, { "epoch": 0.48, "learning_rate": 6.99328690575859e-05, "loss": 0.8837, "step": 1403 }, { "epoch": 0.48, "learning_rate": 6.987390918490357e-05, "loss": 0.9755, "step": 1404 }, { "epoch": 0.48, "learning_rate": 6.981491647392748e-05, "loss": 0.9544, "step": 1405 }, { "epoch": 0.48, "learning_rate": 6.975589102213318e-05, "loss": 0.9335, "step": 1406 }, { "epoch": 0.48, "learning_rate": 6.969683292705032e-05, "loss": 1.2468, "step": 1407 }, { "epoch": 0.48, "learning_rate": 6.963774228626246e-05, "loss": 0.8528, "step": 1408 }, { "epoch": 0.48, "learning_rate": 6.957861919740695e-05, "loss": 1.0372, "step": 1409 }, { "epoch": 0.48, "learning_rate": 6.951946375817474e-05, "loss": 1.0093, "step": 1410 }, { "epoch": 0.48, "learning_rate": 6.946027606631028e-05, "loss": 1.0156, "step": 1411 }, { "epoch": 0.48, "learning_rate": 6.940105621961125e-05, "loss": 0.7378, "step": 1412 }, { "epoch": 0.48, "learning_rate": 6.934180431592853e-05, "loss": 1.1554, "step": 1413 }, { "epoch": 0.48, "learning_rate": 6.928252045316588e-05, "loss": 1.2318, "step": 1414 }, { "epoch": 0.48, "learning_rate": 6.922320472927994e-05, "loss": 0.709, "step": 1415 }, { "epoch": 0.48, "learning_rate": 6.916385724227998e-05, "loss": 1.1225, "step": 1416 }, { "epoch": 0.48, "learning_rate": 6.910447809022775e-05, "loss": 1.2661, "step": 1417 }, { "epoch": 0.48, "learning_rate": 6.904506737123728e-05, "loss": 1.104, "step": 1418 }, { "epoch": 0.48, "learning_rate": 6.898562518347482e-05, "loss": 1.0701, "step": 1419 }, { "epoch": 0.48, "learning_rate": 6.892615162515859e-05, "loss": 0.9994, "step": 1420 }, { "epoch": 0.48, "learning_rate": 6.886664679455867e-05, "loss": 0.7689, "step": 1421 }, { "epoch": 0.48, "learning_rate": 6.880711078999673e-05, "loss": 1.1222, "step": 1422 }, { "epoch": 0.48, "learning_rate": 6.874754370984606e-05, "loss": 0.8599, "step": 1423 }, { "epoch": 0.48, "learning_rate": 6.868794565253123e-05, "loss": 1.3028, "step": 1424 }, { "epoch": 0.48, "learning_rate": 6.8628316716528e-05, "loss": 0.9777, "step": 1425 }, { "epoch": 0.48, "learning_rate": 6.856865700036317e-05, "loss": 1.089, "step": 1426 }, { "epoch": 0.48, "learning_rate": 6.850896660261441e-05, "loss": 0.9551, "step": 1427 }, { "epoch": 0.49, "learning_rate": 6.844924562191003e-05, "loss": 0.9398, "step": 1428 }, { "epoch": 0.49, "learning_rate": 6.838949415692893e-05, "loss": 1.1729, "step": 1429 }, { "epoch": 0.49, "learning_rate": 6.832971230640038e-05, "loss": 0.9585, "step": 1430 }, { "epoch": 0.49, "learning_rate": 6.82699001691038e-05, "loss": 1.1939, "step": 1431 }, { "epoch": 0.49, "learning_rate": 6.821005784386872e-05, "loss": 0.848, "step": 1432 }, { "epoch": 0.49, "learning_rate": 6.815018542957449e-05, "loss": 0.9523, "step": 1433 }, { "epoch": 0.49, "learning_rate": 6.809028302515024e-05, "loss": 1.1162, "step": 1434 }, { "epoch": 0.49, "learning_rate": 6.803035072957461e-05, "loss": 1.1373, "step": 1435 }, { "epoch": 0.49, "learning_rate": 6.797038864187564e-05, "loss": 0.7348, "step": 1436 }, { "epoch": 0.49, "learning_rate": 6.79103968611306e-05, "loss": 1.207, "step": 1437 }, { "epoch": 0.49, "learning_rate": 6.785037548646585e-05, "loss": 0.9127, "step": 1438 }, { "epoch": 0.49, "learning_rate": 6.779032461705658e-05, "loss": 1.1951, "step": 1439 }, { "epoch": 0.49, "learning_rate": 6.773024435212678e-05, "loss": 0.9068, "step": 1440 }, { "epoch": 0.49, "learning_rate": 6.767013479094899e-05, "loss": 1.0619, "step": 1441 }, { "epoch": 0.49, "learning_rate": 6.760999603284413e-05, "loss": 0.8151, "step": 1442 }, { "epoch": 0.49, "learning_rate": 6.75498281771814e-05, "loss": 0.8822, "step": 1443 }, { "epoch": 0.49, "learning_rate": 6.748963132337806e-05, "loss": 0.937, "step": 1444 }, { "epoch": 0.49, "learning_rate": 6.742940557089931e-05, "loss": 1.2148, "step": 1445 }, { "epoch": 0.49, "learning_rate": 6.736915101925806e-05, "loss": 0.9719, "step": 1446 }, { "epoch": 0.49, "learning_rate": 6.730886776801485e-05, "loss": 0.9331, "step": 1447 }, { "epoch": 0.49, "learning_rate": 6.72485559167776e-05, "loss": 0.8502, "step": 1448 }, { "epoch": 0.49, "learning_rate": 6.718821556520151e-05, "loss": 1.3111, "step": 1449 }, { "epoch": 0.49, "learning_rate": 6.712784681298885e-05, "loss": 0.7736, "step": 1450 }, { "epoch": 0.49, "learning_rate": 6.706744975988885e-05, "loss": 1.0907, "step": 1451 }, { "epoch": 0.49, "learning_rate": 6.70070245056975e-05, "loss": 1.2127, "step": 1452 }, { "epoch": 0.49, "learning_rate": 6.694657115025736e-05, "loss": 1.0008, "step": 1453 }, { "epoch": 0.49, "learning_rate": 6.688608979345742e-05, "loss": 0.8601, "step": 1454 }, { "epoch": 0.49, "learning_rate": 6.682558053523298e-05, "loss": 0.881, "step": 1455 }, { "epoch": 0.49, "learning_rate": 6.676504347556541e-05, "loss": 1.088, "step": 1456 }, { "epoch": 0.49, "learning_rate": 6.6704478714482e-05, "loss": 1.2683, "step": 1457 }, { "epoch": 0.5, "learning_rate": 6.664388635205587e-05, "loss": 1.2617, "step": 1458 }, { "epoch": 0.5, "learning_rate": 6.658326648840569e-05, "loss": 0.8452, "step": 1459 }, { "epoch": 0.5, "learning_rate": 6.652261922369562e-05, "loss": 0.9553, "step": 1460 }, { "epoch": 0.5, "learning_rate": 6.646194465813503e-05, "loss": 1.2134, "step": 1461 }, { "epoch": 0.5, "learning_rate": 6.640124289197845e-05, "loss": 1.1072, "step": 1462 }, { "epoch": 0.5, "learning_rate": 6.634051402552538e-05, "loss": 0.8501, "step": 1463 }, { "epoch": 0.5, "learning_rate": 6.627975815912002e-05, "loss": 0.9498, "step": 1464 }, { "epoch": 0.5, "learning_rate": 6.621897539315124e-05, "loss": 0.8689, "step": 1465 }, { "epoch": 0.5, "learning_rate": 6.615816582805234e-05, "loss": 1.0125, "step": 1466 }, { "epoch": 0.5, "learning_rate": 6.609732956430091e-05, "loss": 1.0928, "step": 1467 }, { "epoch": 0.5, "learning_rate": 6.603646670241863e-05, "loss": 0.8105, "step": 1468 }, { "epoch": 0.5, "learning_rate": 6.597557734297118e-05, "loss": 1.2783, "step": 1469 }, { "epoch": 0.5, "learning_rate": 6.591466158656795e-05, "loss": 0.9291, "step": 1470 }, { "epoch": 0.5, "learning_rate": 6.585371953386201e-05, "loss": 0.8808, "step": 1471 }, { "epoch": 0.5, "learning_rate": 6.579275128554985e-05, "loss": 0.9887, "step": 1472 }, { "epoch": 0.5, "learning_rate": 6.573175694237128e-05, "loss": 0.6852, "step": 1473 }, { "epoch": 0.5, "learning_rate": 6.567073660510914e-05, "loss": 1.2279, "step": 1474 }, { "epoch": 0.5, "learning_rate": 6.560969037458933e-05, "loss": 1.0804, "step": 1475 }, { "epoch": 0.5, "learning_rate": 6.554861835168045e-05, "loss": 0.9863, "step": 1476 }, { "epoch": 0.5, "learning_rate": 6.548752063729377e-05, "loss": 1.2279, "step": 1477 }, { "epoch": 0.5, "learning_rate": 6.542639733238297e-05, "loss": 0.7584, "step": 1478 }, { "epoch": 0.5, "learning_rate": 6.536524853794406e-05, "loss": 0.9817, "step": 1479 }, { "epoch": 0.5, "learning_rate": 6.530407435501513e-05, "loss": 1.027, "step": 1480 }, { "epoch": 0.5, "learning_rate": 6.524287488467623e-05, "loss": 0.9684, "step": 1481 }, { "epoch": 0.5, "learning_rate": 6.51816502280492e-05, "loss": 1.205, "step": 1482 }, { "epoch": 0.5, "learning_rate": 6.51204004862975e-05, "loss": 0.9251, "step": 1483 }, { "epoch": 0.5, "learning_rate": 6.505912576062602e-05, "loss": 0.7882, "step": 1484 }, { "epoch": 0.5, "learning_rate": 6.499782615228093e-05, "loss": 0.9375, "step": 1485 }, { "epoch": 0.5, "learning_rate": 6.493650176254958e-05, "loss": 1.1361, "step": 1486 }, { "epoch": 0.51, "learning_rate": 6.487515269276016e-05, "loss": 0.9536, "step": 1487 }, { "epoch": 0.51, "learning_rate": 6.481377904428171e-05, "loss": 0.608, "step": 1488 }, { "epoch": 0.51, "learning_rate": 6.475238091852388e-05, "loss": 1.126, "step": 1489 }, { "epoch": 0.51, "learning_rate": 6.469095841693671e-05, "loss": 1.1508, "step": 1490 }, { "epoch": 0.51, "learning_rate": 6.462951164101061e-05, "loss": 0.9305, "step": 1491 }, { "epoch": 0.51, "learning_rate": 6.456804069227601e-05, "loss": 1.1425, "step": 1492 }, { "epoch": 0.51, "learning_rate": 6.450654567230333e-05, "loss": 0.9645, "step": 1493 }, { "epoch": 0.51, "learning_rate": 6.444502668270276e-05, "loss": 1.0726, "step": 1494 }, { "epoch": 0.51, "learning_rate": 6.438348382512407e-05, "loss": 0.8361, "step": 1495 }, { "epoch": 0.51, "learning_rate": 6.432191720125651e-05, "loss": 0.8541, "step": 1496 }, { "epoch": 0.51, "learning_rate": 6.426032691282857e-05, "loss": 0.8771, "step": 1497 }, { "epoch": 0.51, "learning_rate": 6.419871306160782e-05, "loss": 1.2057, "step": 1498 }, { "epoch": 0.51, "learning_rate": 6.413707574940081e-05, "loss": 1.1805, "step": 1499 }, { "epoch": 0.51, "learning_rate": 6.407541507805286e-05, "loss": 1.1445, "step": 1500 }, { "epoch": 0.51, "learning_rate": 6.401373114944781e-05, "loss": 1.2273, "step": 1501 }, { "epoch": 0.51, "learning_rate": 6.395202406550804e-05, "loss": 1.4446, "step": 1502 }, { "epoch": 0.51, "learning_rate": 6.389029392819407e-05, "loss": 1.22, "step": 1503 }, { "epoch": 0.51, "learning_rate": 6.382854083950462e-05, "loss": 0.9219, "step": 1504 }, { "epoch": 0.51, "learning_rate": 6.376676490147628e-05, "loss": 1.1998, "step": 1505 }, { "epoch": 0.51, "learning_rate": 6.370496621618338e-05, "loss": 0.7189, "step": 1506 }, { "epoch": 0.51, "learning_rate": 6.364314488573789e-05, "loss": 1.1212, "step": 1507 }, { "epoch": 0.51, "learning_rate": 6.358130101228914e-05, "loss": 1.2224, "step": 1508 }, { "epoch": 0.51, "learning_rate": 6.351943469802375e-05, "loss": 1.0255, "step": 1509 }, { "epoch": 0.51, "learning_rate": 6.345754604516538e-05, "loss": 0.9712, "step": 1510 }, { "epoch": 0.51, "learning_rate": 6.339563515597466e-05, "loss": 1.1603, "step": 1511 }, { "epoch": 0.51, "learning_rate": 6.33337021327489e-05, "loss": 1.1376, "step": 1512 }, { "epoch": 0.51, "learning_rate": 6.327174707782201e-05, "loss": 1.4016, "step": 1513 }, { "epoch": 0.51, "learning_rate": 6.320977009356431e-05, "loss": 1.1725, "step": 1514 }, { "epoch": 0.51, "learning_rate": 6.314777128238235e-05, "loss": 0.8652, "step": 1515 }, { "epoch": 0.51, "learning_rate": 6.308575074671872e-05, "loss": 0.9279, "step": 1516 }, { "epoch": 0.52, "learning_rate": 6.302370858905196e-05, "loss": 1.004, "step": 1517 }, { "epoch": 0.52, "learning_rate": 6.296164491189627e-05, "loss": 0.9504, "step": 1518 }, { "epoch": 0.52, "learning_rate": 6.289955981780149e-05, "loss": 1.0236, "step": 1519 }, { "epoch": 0.52, "learning_rate": 6.283745340935277e-05, "loss": 0.9678, "step": 1520 }, { "epoch": 0.52, "learning_rate": 6.27753257891705e-05, "loss": 1.0945, "step": 1521 }, { "epoch": 0.52, "learning_rate": 6.271317705991013e-05, "loss": 1.0671, "step": 1522 }, { "epoch": 0.52, "learning_rate": 6.265100732426201e-05, "loss": 1.1852, "step": 1523 }, { "epoch": 0.52, "learning_rate": 6.258881668495116e-05, "loss": 0.9568, "step": 1524 }, { "epoch": 0.52, "learning_rate": 6.252660524473716e-05, "loss": 0.8163, "step": 1525 }, { "epoch": 0.52, "learning_rate": 6.246437310641395e-05, "loss": 1.042, "step": 1526 }, { "epoch": 0.52, "learning_rate": 6.240212037280966e-05, "loss": 1.0423, "step": 1527 }, { "epoch": 0.52, "learning_rate": 6.233984714678652e-05, "loss": 1.0501, "step": 1528 }, { "epoch": 0.52, "learning_rate": 6.227755353124052e-05, "loss": 0.8944, "step": 1529 }, { "epoch": 0.52, "learning_rate": 6.221523962910138e-05, "loss": 1.0133, "step": 1530 }, { "epoch": 0.52, "learning_rate": 6.21529055433324e-05, "loss": 1.2808, "step": 1531 }, { "epoch": 0.52, "learning_rate": 6.209055137693014e-05, "loss": 1.1015, "step": 1532 }, { "epoch": 0.52, "learning_rate": 6.202817723292438e-05, "loss": 0.9896, "step": 1533 }, { "epoch": 0.52, "learning_rate": 6.19657832143779e-05, "loss": 0.9992, "step": 1534 }, { "epoch": 0.52, "learning_rate": 6.190336942438633e-05, "loss": 0.8868, "step": 1535 }, { "epoch": 0.52, "learning_rate": 6.184093596607799e-05, "loss": 1.1235, "step": 1536 }, { "epoch": 0.52, "learning_rate": 6.177848294261362e-05, "loss": 1.1607, "step": 1537 }, { "epoch": 0.52, "learning_rate": 6.171601045718639e-05, "loss": 1.1614, "step": 1538 }, { "epoch": 0.52, "learning_rate": 6.165351861302157e-05, "loss": 1.0255, "step": 1539 }, { "epoch": 0.52, "learning_rate": 6.159100751337642e-05, "loss": 1.1595, "step": 1540 }, { "epoch": 0.52, "learning_rate": 6.152847726154e-05, "loss": 1.1724, "step": 1541 }, { "epoch": 0.52, "learning_rate": 6.14659279608331e-05, "loss": 1.2108, "step": 1542 }, { "epoch": 0.52, "learning_rate": 6.140335971460787e-05, "loss": 1.234, "step": 1543 }, { "epoch": 0.52, "learning_rate": 6.134077262624782e-05, "loss": 0.8333, "step": 1544 }, { "epoch": 0.52, "learning_rate": 6.127816679916762e-05, "loss": 1.1344, "step": 1545 }, { "epoch": 0.53, "learning_rate": 6.121554233681286e-05, "loss": 0.9033, "step": 1546 }, { "epoch": 0.53, "learning_rate": 6.115289934265993e-05, "loss": 0.8733, "step": 1547 }, { "epoch": 0.53, "learning_rate": 6.109023792021585e-05, "loss": 1.06, "step": 1548 }, { "epoch": 0.53, "learning_rate": 6.1027558173018084e-05, "loss": 1.0678, "step": 1549 }, { "epoch": 0.53, "learning_rate": 6.09648602046344e-05, "loss": 0.9581, "step": 1550 }, { "epoch": 0.53, "learning_rate": 6.090214411866261e-05, "loss": 0.8761, "step": 1551 }, { "epoch": 0.53, "learning_rate": 6.0839410018730514e-05, "loss": 1.1855, "step": 1552 }, { "epoch": 0.53, "learning_rate": 6.077665800849568e-05, "loss": 0.8944, "step": 1553 }, { "epoch": 0.53, "learning_rate": 6.071388819164525e-05, "loss": 0.8032, "step": 1554 }, { "epoch": 0.53, "learning_rate": 6.06511006718958e-05, "loss": 0.8592, "step": 1555 }, { "epoch": 0.53, "learning_rate": 6.058829555299315e-05, "loss": 0.904, "step": 1556 }, { "epoch": 0.53, "learning_rate": 6.052547293871217e-05, "loss": 0.8644, "step": 1557 }, { "epoch": 0.53, "learning_rate": 6.04626329328567e-05, "loss": 0.8555, "step": 1558 }, { "epoch": 0.53, "learning_rate": 6.0399775639259284e-05, "loss": 0.8755, "step": 1559 }, { "epoch": 0.53, "learning_rate": 6.0336901161781014e-05, "loss": 0.7802, "step": 1560 }, { "epoch": 0.53, "learning_rate": 6.027400960431141e-05, "loss": 1.0764, "step": 1561 }, { "epoch": 0.53, "learning_rate": 6.0211101070768186e-05, "loss": 0.814, "step": 1562 }, { "epoch": 0.53, "learning_rate": 6.014817566509713e-05, "loss": 1.186, "step": 1563 }, { "epoch": 0.53, "learning_rate": 6.0085233491271875e-05, "loss": 0.9794, "step": 1564 }, { "epoch": 0.53, "learning_rate": 6.002227465329381e-05, "loss": 0.9691, "step": 1565 }, { "epoch": 0.53, "learning_rate": 5.99592992551918e-05, "loss": 1.0254, "step": 1566 }, { "epoch": 0.53, "learning_rate": 5.989630740102214e-05, "loss": 0.8992, "step": 1567 }, { "epoch": 0.53, "learning_rate": 5.983329919486824e-05, "loss": 1.1605, "step": 1568 }, { "epoch": 0.53, "learning_rate": 5.977027474084059e-05, "loss": 1.2324, "step": 1569 }, { "epoch": 0.53, "learning_rate": 5.970723414307652e-05, "loss": 0.9887, "step": 1570 }, { "epoch": 0.53, "learning_rate": 5.964417750573997e-05, "loss": 1.2275, "step": 1571 }, { "epoch": 0.53, "learning_rate": 5.958110493302148e-05, "loss": 1.4255, "step": 1572 }, { "epoch": 0.53, "learning_rate": 5.9518016529137856e-05, "loss": 0.748, "step": 1573 }, { "epoch": 0.53, "learning_rate": 5.945491239833206e-05, "loss": 1.1173, "step": 1574 }, { "epoch": 0.53, "learning_rate": 5.939179264487309e-05, "loss": 1.2252, "step": 1575 }, { "epoch": 0.54, "learning_rate": 5.932865737305571e-05, "loss": 1.1254, "step": 1576 }, { "epoch": 0.54, "learning_rate": 5.926550668720033e-05, "loss": 1.3563, "step": 1577 }, { "epoch": 0.54, "learning_rate": 5.920234069165289e-05, "loss": 0.903, "step": 1578 }, { "epoch": 0.54, "learning_rate": 5.913915949078452e-05, "loss": 0.7434, "step": 1579 }, { "epoch": 0.54, "learning_rate": 5.907596318899157e-05, "loss": 0.9425, "step": 1580 }, { "epoch": 0.54, "learning_rate": 5.90127518906953e-05, "loss": 1.0016, "step": 1581 }, { "epoch": 0.54, "learning_rate": 5.894952570034173e-05, "loss": 1.0095, "step": 1582 }, { "epoch": 0.54, "learning_rate": 5.888628472240153e-05, "loss": 0.8137, "step": 1583 }, { "epoch": 0.54, "learning_rate": 5.882302906136978e-05, "loss": 0.8489, "step": 1584 }, { "epoch": 0.54, "learning_rate": 5.875975882176583e-05, "loss": 1.0808, "step": 1585 }, { "epoch": 0.54, "learning_rate": 5.8696474108133105e-05, "loss": 0.8442, "step": 1586 }, { "epoch": 0.54, "learning_rate": 5.8633175025038956e-05, "loss": 0.796, "step": 1587 }, { "epoch": 0.54, "learning_rate": 5.8569861677074475e-05, "loss": 1.4076, "step": 1588 }, { "epoch": 0.54, "learning_rate": 5.8506534168854344e-05, "loss": 0.9341, "step": 1589 }, { "epoch": 0.54, "learning_rate": 5.844319260501661e-05, "loss": 1.0711, "step": 1590 }, { "epoch": 0.54, "learning_rate": 5.837983709022254e-05, "loss": 0.8354, "step": 1591 }, { "epoch": 0.54, "learning_rate": 5.831646772915651e-05, "loss": 1.0734, "step": 1592 }, { "epoch": 0.54, "learning_rate": 5.82530846265257e-05, "loss": 1.1089, "step": 1593 }, { "epoch": 0.54, "learning_rate": 5.818968788706005e-05, "loss": 0.7978, "step": 1594 }, { "epoch": 0.54, "learning_rate": 5.812627761551201e-05, "loss": 1.1271, "step": 1595 }, { "epoch": 0.54, "learning_rate": 5.806285391665639e-05, "loss": 1.12, "step": 1596 }, { "epoch": 0.54, "learning_rate": 5.799941689529019e-05, "loss": 1.1562, "step": 1597 }, { "epoch": 0.54, "learning_rate": 5.793596665623243e-05, "loss": 0.8495, "step": 1598 }, { "epoch": 0.54, "learning_rate": 5.787250330432394e-05, "loss": 0.9752, "step": 1599 }, { "epoch": 0.54, "learning_rate": 5.780902694442727e-05, "loss": 1.0133, "step": 1600 }, { "epoch": 0.54, "learning_rate": 5.774553768142643e-05, "loss": 0.7279, "step": 1601 }, { "epoch": 0.54, "learning_rate": 5.768203562022674e-05, "loss": 0.8997, "step": 1602 }, { "epoch": 0.54, "learning_rate": 5.761852086575469e-05, "loss": 0.9058, "step": 1603 }, { "epoch": 0.54, "learning_rate": 5.755499352295771e-05, "loss": 0.947, "step": 1604 }, { "epoch": 0.55, "learning_rate": 5.749145369680407e-05, "loss": 0.748, "step": 1605 }, { "epoch": 0.55, "learning_rate": 5.7427901492282676e-05, "loss": 1.1003, "step": 1606 }, { "epoch": 0.55, "learning_rate": 5.736433701440282e-05, "loss": 0.8087, "step": 1607 }, { "epoch": 0.55, "learning_rate": 5.730076036819414e-05, "loss": 1.119, "step": 1608 }, { "epoch": 0.55, "learning_rate": 5.723717165870635e-05, "loss": 0.8326, "step": 1609 }, { "epoch": 0.55, "learning_rate": 5.7173570991009096e-05, "loss": 1.2217, "step": 1610 }, { "epoch": 0.55, "learning_rate": 5.710995847019178e-05, "loss": 1.017, "step": 1611 }, { "epoch": 0.55, "learning_rate": 5.7046334201363424e-05, "loss": 1.2804, "step": 1612 }, { "epoch": 0.55, "learning_rate": 5.6982698289652424e-05, "loss": 1.1212, "step": 1613 }, { "epoch": 0.55, "learning_rate": 5.691905084020642e-05, "loss": 1.2462, "step": 1614 }, { "epoch": 0.55, "learning_rate": 5.685539195819214e-05, "loss": 1.2251, "step": 1615 }, { "epoch": 0.55, "learning_rate": 5.6791721748795155e-05, "loss": 0.9461, "step": 1616 }, { "epoch": 0.55, "learning_rate": 5.672804031721981e-05, "loss": 1.0951, "step": 1617 }, { "epoch": 0.55, "learning_rate": 5.666434776868895e-05, "loss": 1.0297, "step": 1618 }, { "epoch": 0.55, "learning_rate": 5.66006442084438e-05, "loss": 1.0143, "step": 1619 }, { "epoch": 0.55, "learning_rate": 5.653692974174381e-05, "loss": 0.8653, "step": 1620 }, { "epoch": 0.55, "learning_rate": 5.647320447386637e-05, "loss": 1.0929, "step": 1621 }, { "epoch": 0.55, "learning_rate": 5.6409468510106824e-05, "loss": 1.0483, "step": 1622 }, { "epoch": 0.55, "learning_rate": 5.6345721955778116e-05, "loss": 0.874, "step": 1623 }, { "epoch": 0.55, "learning_rate": 5.628196491621071e-05, "loss": 1.1437, "step": 1624 }, { "epoch": 0.55, "learning_rate": 5.62181974967524e-05, "loss": 1.1363, "step": 1625 }, { "epoch": 0.55, "learning_rate": 5.6154419802768135e-05, "loss": 0.6734, "step": 1626 }, { "epoch": 0.55, "learning_rate": 5.6090631939639804e-05, "loss": 1.1138, "step": 1627 }, { "epoch": 0.55, "learning_rate": 5.602683401276615e-05, "loss": 1.3301, "step": 1628 }, { "epoch": 0.55, "learning_rate": 5.596302612756252e-05, "loss": 0.6244, "step": 1629 }, { "epoch": 0.55, "learning_rate": 5.589920838946071e-05, "loss": 0.9858, "step": 1630 }, { "epoch": 0.55, "learning_rate": 5.583538090390882e-05, "loss": 0.8883, "step": 1631 }, { "epoch": 0.55, "learning_rate": 5.5771543776371e-05, "loss": 1.0445, "step": 1632 }, { "epoch": 0.55, "learning_rate": 5.5707697112327404e-05, "loss": 0.8106, "step": 1633 }, { "epoch": 0.55, "learning_rate": 5.564384101727391e-05, "loss": 0.8035, "step": 1634 }, { "epoch": 0.56, "learning_rate": 5.557997559672197e-05, "loss": 0.9627, "step": 1635 }, { "epoch": 0.56, "learning_rate": 5.551610095619845e-05, "loss": 0.9723, "step": 1636 }, { "epoch": 0.56, "learning_rate": 5.545221720124547e-05, "loss": 0.8614, "step": 1637 }, { "epoch": 0.56, "learning_rate": 5.538832443742018e-05, "loss": 1.1471, "step": 1638 }, { "epoch": 0.56, "learning_rate": 5.532442277029464e-05, "loss": 0.8242, "step": 1639 }, { "epoch": 0.56, "learning_rate": 5.52605123054556e-05, "loss": 0.9834, "step": 1640 }, { "epoch": 0.56, "learning_rate": 5.5196593148504385e-05, "loss": 0.8254, "step": 1641 }, { "epoch": 0.56, "learning_rate": 5.5132665405056616e-05, "loss": 0.7048, "step": 1642 }, { "epoch": 0.56, "learning_rate": 5.5068729180742183e-05, "loss": 1.0154, "step": 1643 }, { "epoch": 0.56, "learning_rate": 5.5004784581204927e-05, "loss": 1.2341, "step": 1644 }, { "epoch": 0.56, "learning_rate": 5.4940831712102536e-05, "loss": 1.0304, "step": 1645 }, { "epoch": 0.56, "learning_rate": 5.4876870679106396e-05, "loss": 1.2646, "step": 1646 }, { "epoch": 0.56, "learning_rate": 5.481290158790136e-05, "loss": 1.1142, "step": 1647 }, { "epoch": 0.56, "learning_rate": 5.4748924544185586e-05, "loss": 1.1155, "step": 1648 }, { "epoch": 0.56, "learning_rate": 5.4684939653670384e-05, "loss": 0.9311, "step": 1649 }, { "epoch": 0.56, "learning_rate": 5.462094702208004e-05, "loss": 0.9559, "step": 1650 }, { "epoch": 0.56, "learning_rate": 5.4556946755151616e-05, "loss": 0.861, "step": 1651 }, { "epoch": 0.56, "learning_rate": 5.4492938958634775e-05, "loss": 1.1805, "step": 1652 }, { "epoch": 0.56, "learning_rate": 5.442892373829165e-05, "loss": 1.0238, "step": 1653 }, { "epoch": 0.56, "learning_rate": 5.436490119989665e-05, "loss": 1.1703, "step": 1654 }, { "epoch": 0.56, "learning_rate": 5.430087144923624e-05, "loss": 0.925, "step": 1655 }, { "epoch": 0.56, "learning_rate": 5.4236834592108795e-05, "loss": 0.8205, "step": 1656 }, { "epoch": 0.56, "learning_rate": 5.41727907343245e-05, "loss": 0.9316, "step": 1657 }, { "epoch": 0.56, "learning_rate": 5.4108739981705025e-05, "loss": 1.2308, "step": 1658 }, { "epoch": 0.56, "learning_rate": 5.404468244008351e-05, "loss": 0.9992, "step": 1659 }, { "epoch": 0.56, "learning_rate": 5.3980618215304234e-05, "loss": 1.1032, "step": 1660 }, { "epoch": 0.56, "learning_rate": 5.391654741322257e-05, "loss": 1.0407, "step": 1661 }, { "epoch": 0.56, "learning_rate": 5.385247013970478e-05, "loss": 0.8816, "step": 1662 }, { "epoch": 0.56, "learning_rate": 5.378838650062774e-05, "loss": 1.1964, "step": 1663 }, { "epoch": 0.57, "learning_rate": 5.37242966018789e-05, "loss": 1.2654, "step": 1664 }, { "epoch": 0.57, "learning_rate": 5.3660200549356064e-05, "loss": 0.8882, "step": 1665 }, { "epoch": 0.57, "learning_rate": 5.359609844896717e-05, "loss": 0.8567, "step": 1666 }, { "epoch": 0.57, "learning_rate": 5.353199040663015e-05, "loss": 1.0241, "step": 1667 }, { "epoch": 0.57, "learning_rate": 5.346787652827279e-05, "loss": 1.2183, "step": 1668 }, { "epoch": 0.57, "learning_rate": 5.340375691983247e-05, "loss": 0.733, "step": 1669 }, { "epoch": 0.57, "learning_rate": 5.3339631687256084e-05, "loss": 1.2671, "step": 1670 }, { "epoch": 0.57, "learning_rate": 5.327550093649979e-05, "loss": 1.034, "step": 1671 }, { "epoch": 0.57, "learning_rate": 5.321136477352887e-05, "loss": 0.7736, "step": 1672 }, { "epoch": 0.57, "learning_rate": 5.314722330431756e-05, "loss": 1.1551, "step": 1673 }, { "epoch": 0.57, "learning_rate": 5.308307663484884e-05, "loss": 1.1262, "step": 1674 }, { "epoch": 0.57, "learning_rate": 5.3018924871114305e-05, "loss": 1.2136, "step": 1675 }, { "epoch": 0.57, "learning_rate": 5.295476811911397e-05, "loss": 1.1537, "step": 1676 }, { "epoch": 0.57, "learning_rate": 5.2890606484856067e-05, "loss": 1.176, "step": 1677 }, { "epoch": 0.57, "learning_rate": 5.28264400743569e-05, "loss": 0.9032, "step": 1678 }, { "epoch": 0.57, "learning_rate": 5.2762268993640703e-05, "loss": 0.8854, "step": 1679 }, { "epoch": 0.57, "learning_rate": 5.2698093348739386e-05, "loss": 0.9525, "step": 1680 }, { "epoch": 0.57, "learning_rate": 5.26339132456924e-05, "loss": 0.841, "step": 1681 }, { "epoch": 0.57, "learning_rate": 5.256972879054659e-05, "loss": 1.0759, "step": 1682 }, { "epoch": 0.57, "learning_rate": 5.250554008935596e-05, "loss": 1.2518, "step": 1683 }, { "epoch": 0.57, "learning_rate": 5.244134724818158e-05, "loss": 0.9731, "step": 1684 }, { "epoch": 0.57, "learning_rate": 5.237715037309131e-05, "loss": 1.2572, "step": 1685 }, { "epoch": 0.57, "learning_rate": 5.231294957015969e-05, "loss": 1.0814, "step": 1686 }, { "epoch": 0.57, "learning_rate": 5.2248744945467765e-05, "loss": 0.8759, "step": 1687 }, { "epoch": 0.57, "learning_rate": 5.218453660510286e-05, "loss": 0.915, "step": 1688 }, { "epoch": 0.57, "learning_rate": 5.212032465515848e-05, "loss": 0.9229, "step": 1689 }, { "epoch": 0.57, "learning_rate": 5.2056109201734086e-05, "loss": 1.1698, "step": 1690 }, { "epoch": 0.57, "learning_rate": 5.199189035093489e-05, "loss": 1.0125, "step": 1691 }, { "epoch": 0.57, "learning_rate": 5.192766820887177e-05, "loss": 1.0012, "step": 1692 }, { "epoch": 0.58, "learning_rate": 5.1863442881661006e-05, "loss": 1.0101, "step": 1693 }, { "epoch": 0.58, "learning_rate": 5.1799214475424166e-05, "loss": 0.8465, "step": 1694 }, { "epoch": 0.58, "learning_rate": 5.173498309628788e-05, "loss": 0.9764, "step": 1695 }, { "epoch": 0.58, "learning_rate": 5.167074885038373e-05, "loss": 0.9195, "step": 1696 }, { "epoch": 0.58, "learning_rate": 5.160651184384798e-05, "loss": 1.3706, "step": 1697 }, { "epoch": 0.58, "learning_rate": 5.15422721828215e-05, "loss": 0.9541, "step": 1698 }, { "epoch": 0.58, "learning_rate": 5.1478029973449515e-05, "loss": 1.2296, "step": 1699 }, { "epoch": 0.58, "learning_rate": 5.141378532188148e-05, "loss": 0.9917, "step": 1700 }, { "epoch": 0.58, "learning_rate": 5.13495383342709e-05, "loss": 0.8195, "step": 1701 }, { "epoch": 0.58, "learning_rate": 5.128528911677509e-05, "loss": 0.8589, "step": 1702 }, { "epoch": 0.58, "learning_rate": 5.12210377755551e-05, "loss": 0.8482, "step": 1703 }, { "epoch": 0.58, "learning_rate": 5.115678441677546e-05, "loss": 0.9358, "step": 1704 }, { "epoch": 0.58, "learning_rate": 5.109252914660406e-05, "loss": 0.9859, "step": 1705 }, { "epoch": 0.58, "learning_rate": 5.1028272071211914e-05, "loss": 1.1453, "step": 1706 }, { "epoch": 0.58, "learning_rate": 5.096401329677305e-05, "loss": 0.9634, "step": 1707 }, { "epoch": 0.58, "learning_rate": 5.0899752929464275e-05, "loss": 1.047, "step": 1708 }, { "epoch": 0.58, "learning_rate": 5.0835491075465045e-05, "loss": 0.8691, "step": 1709 }, { "epoch": 0.58, "learning_rate": 5.0771227840957295e-05, "loss": 0.9825, "step": 1710 }, { "epoch": 0.58, "learning_rate": 5.0706963332125176e-05, "loss": 1.2613, "step": 1711 }, { "epoch": 0.58, "learning_rate": 5.0642697655155e-05, "loss": 0.8457, "step": 1712 }, { "epoch": 0.58, "learning_rate": 5.057843091623502e-05, "loss": 0.8289, "step": 1713 }, { "epoch": 0.58, "learning_rate": 5.0514163221555186e-05, "loss": 0.9811, "step": 1714 }, { "epoch": 0.58, "learning_rate": 5.0449894677307063e-05, "loss": 0.8991, "step": 1715 }, { "epoch": 0.58, "learning_rate": 5.0385625389683636e-05, "loss": 1.1188, "step": 1716 }, { "epoch": 0.58, "learning_rate": 5.0321355464879064e-05, "loss": 0.9342, "step": 1717 }, { "epoch": 0.58, "learning_rate": 5.025708500908863e-05, "loss": 0.9323, "step": 1718 }, { "epoch": 0.58, "learning_rate": 5.019281412850843e-05, "loss": 0.9909, "step": 1719 }, { "epoch": 0.58, "learning_rate": 5.01285429293353e-05, "loss": 1.1988, "step": 1720 }, { "epoch": 0.58, "learning_rate": 5.006427151776659e-05, "loss": 1.1462, "step": 1721 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 1.0483, "step": 1722 }, { "epoch": 0.59, "learning_rate": 4.9935728482233414e-05, "loss": 1.2397, "step": 1723 }, { "epoch": 0.59, "learning_rate": 4.98714570706647e-05, "loss": 0.8348, "step": 1724 }, { "epoch": 0.59, "learning_rate": 4.980718587149159e-05, "loss": 0.9489, "step": 1725 }, { "epoch": 0.59, "learning_rate": 4.974291499091139e-05, "loss": 0.7892, "step": 1726 }, { "epoch": 0.59, "learning_rate": 4.967864453512095e-05, "loss": 1.0563, "step": 1727 }, { "epoch": 0.59, "learning_rate": 4.961437461031638e-05, "loss": 0.8163, "step": 1728 }, { "epoch": 0.59, "learning_rate": 4.955010532269294e-05, "loss": 0.9059, "step": 1729 }, { "epoch": 0.59, "learning_rate": 4.948583677844482e-05, "loss": 1.0051, "step": 1730 }, { "epoch": 0.59, "learning_rate": 4.9421569083764995e-05, "loss": 0.7225, "step": 1731 }, { "epoch": 0.59, "learning_rate": 4.9357302344845004e-05, "loss": 0.9423, "step": 1732 }, { "epoch": 0.59, "learning_rate": 4.9293036667874836e-05, "loss": 1.1784, "step": 1733 }, { "epoch": 0.59, "learning_rate": 4.9228772159042717e-05, "loss": 1.1577, "step": 1734 }, { "epoch": 0.59, "learning_rate": 4.916450892453495e-05, "loss": 0.8913, "step": 1735 }, { "epoch": 0.59, "learning_rate": 4.910024707053572e-05, "loss": 0.9663, "step": 1736 }, { "epoch": 0.59, "learning_rate": 4.903598670322697e-05, "loss": 0.9343, "step": 1737 }, { "epoch": 0.59, "learning_rate": 4.89717279287881e-05, "loss": 0.6731, "step": 1738 }, { "epoch": 0.59, "learning_rate": 4.890747085339595e-05, "loss": 1.1786, "step": 1739 }, { "epoch": 0.59, "learning_rate": 4.8843215583224544e-05, "loss": 1.11, "step": 1740 }, { "epoch": 0.59, "learning_rate": 4.8778962224444904e-05, "loss": 0.7466, "step": 1741 }, { "epoch": 0.59, "learning_rate": 4.871471088322493e-05, "loss": 1.061, "step": 1742 }, { "epoch": 0.59, "learning_rate": 4.865046166572912e-05, "loss": 0.959, "step": 1743 }, { "epoch": 0.59, "learning_rate": 4.858621467811853e-05, "loss": 1.1841, "step": 1744 }, { "epoch": 0.59, "learning_rate": 4.85219700265505e-05, "loss": 1.0828, "step": 1745 }, { "epoch": 0.59, "learning_rate": 4.845772781717851e-05, "loss": 1.0057, "step": 1746 }, { "epoch": 0.59, "learning_rate": 4.839348815615202e-05, "loss": 1.0401, "step": 1747 }, { "epoch": 0.59, "learning_rate": 4.832925114961629e-05, "loss": 0.742, "step": 1748 }, { "epoch": 0.59, "learning_rate": 4.8265016903712126e-05, "loss": 1.0679, "step": 1749 }, { "epoch": 0.59, "learning_rate": 4.820078552457584e-05, "loss": 0.9697, "step": 1750 }, { "epoch": 0.59, "learning_rate": 4.8136557118339005e-05, "loss": 1.2517, "step": 1751 }, { "epoch": 0.6, "learning_rate": 4.8072331791128246e-05, "loss": 0.7689, "step": 1752 }, { "epoch": 0.6, "learning_rate": 4.800810964906513e-05, "loss": 0.8612, "step": 1753 }, { "epoch": 0.6, "learning_rate": 4.794389079826594e-05, "loss": 0.9264, "step": 1754 }, { "epoch": 0.6, "learning_rate": 4.787967534484153e-05, "loss": 0.631, "step": 1755 }, { "epoch": 0.6, "learning_rate": 4.781546339489715e-05, "loss": 0.9173, "step": 1756 }, { "epoch": 0.6, "learning_rate": 4.775125505453225e-05, "loss": 1.1534, "step": 1757 }, { "epoch": 0.6, "learning_rate": 4.768705042984031e-05, "loss": 1.0714, "step": 1758 }, { "epoch": 0.6, "learning_rate": 4.76228496269087e-05, "loss": 0.8827, "step": 1759 }, { "epoch": 0.6, "learning_rate": 4.7558652751818426e-05, "loss": 0.9812, "step": 1760 }, { "epoch": 0.6, "learning_rate": 4.749445991064404e-05, "loss": 0.8379, "step": 1761 }, { "epoch": 0.6, "learning_rate": 4.7430271209453426e-05, "loss": 1.3089, "step": 1762 }, { "epoch": 0.6, "learning_rate": 4.7366086754307616e-05, "loss": 0.8379, "step": 1763 }, { "epoch": 0.6, "learning_rate": 4.730190665126063e-05, "loss": 1.1526, "step": 1764 }, { "epoch": 0.6, "learning_rate": 4.7237731006359315e-05, "loss": 1.0103, "step": 1765 }, { "epoch": 0.6, "learning_rate": 4.717355992564311e-05, "loss": 0.9091, "step": 1766 }, { "epoch": 0.6, "learning_rate": 4.710939351514395e-05, "loss": 0.8976, "step": 1767 }, { "epoch": 0.6, "learning_rate": 4.704523188088604e-05, "loss": 1.1714, "step": 1768 }, { "epoch": 0.6, "learning_rate": 4.6981075128885693e-05, "loss": 0.9105, "step": 1769 }, { "epoch": 0.6, "learning_rate": 4.691692336515118e-05, "loss": 1.011, "step": 1770 }, { "epoch": 0.6, "learning_rate": 4.685277669568246e-05, "loss": 1.0843, "step": 1771 }, { "epoch": 0.6, "learning_rate": 4.678863522647114e-05, "loss": 1.4692, "step": 1772 }, { "epoch": 0.6, "learning_rate": 4.6724499063500224e-05, "loss": 1.1154, "step": 1773 }, { "epoch": 0.6, "learning_rate": 4.666036831274392e-05, "loss": 1.1087, "step": 1774 }, { "epoch": 0.6, "learning_rate": 4.659624308016753e-05, "loss": 1.1883, "step": 1775 }, { "epoch": 0.6, "learning_rate": 4.653212347172723e-05, "loss": 1.0201, "step": 1776 }, { "epoch": 0.6, "learning_rate": 4.646800959336986e-05, "loss": 1.2835, "step": 1777 }, { "epoch": 0.6, "learning_rate": 4.6403901551032846e-05, "loss": 1.0998, "step": 1778 }, { "epoch": 0.6, "learning_rate": 4.633979945064395e-05, "loss": 1.0814, "step": 1779 }, { "epoch": 0.6, "learning_rate": 4.6275703398121094e-05, "loss": 1.0425, "step": 1780 }, { "epoch": 0.6, "learning_rate": 4.6211613499372284e-05, "loss": 1.1444, "step": 1781 }, { "epoch": 0.61, "learning_rate": 4.614752986029524e-05, "loss": 0.8235, "step": 1782 }, { "epoch": 0.61, "learning_rate": 4.6083452586777435e-05, "loss": 0.9397, "step": 1783 }, { "epoch": 0.61, "learning_rate": 4.601938178469578e-05, "loss": 1.2204, "step": 1784 }, { "epoch": 0.61, "learning_rate": 4.59553175599165e-05, "loss": 0.945, "step": 1785 }, { "epoch": 0.61, "learning_rate": 4.589126001829497e-05, "loss": 0.8906, "step": 1786 }, { "epoch": 0.61, "learning_rate": 4.582720926567552e-05, "loss": 1.225, "step": 1787 }, { "epoch": 0.61, "learning_rate": 4.5763165407891216e-05, "loss": 0.9764, "step": 1788 }, { "epoch": 0.61, "learning_rate": 4.569912855076378e-05, "loss": 0.9791, "step": 1789 }, { "epoch": 0.61, "learning_rate": 4.563509880010336e-05, "loss": 1.2451, "step": 1790 }, { "epoch": 0.61, "learning_rate": 4.557107626170835e-05, "loss": 1.1769, "step": 1791 }, { "epoch": 0.61, "learning_rate": 4.550706104136523e-05, "loss": 1.1722, "step": 1792 }, { "epoch": 0.61, "learning_rate": 4.544305324484841e-05, "loss": 1.0211, "step": 1793 }, { "epoch": 0.61, "learning_rate": 4.537905297791997e-05, "loss": 1.4866, "step": 1794 }, { "epoch": 0.61, "learning_rate": 4.531506034632963e-05, "loss": 0.9489, "step": 1795 }, { "epoch": 0.61, "learning_rate": 4.525107545581442e-05, "loss": 1.0361, "step": 1796 }, { "epoch": 0.61, "learning_rate": 4.518709841209865e-05, "loss": 0.8991, "step": 1797 }, { "epoch": 0.61, "learning_rate": 4.512312932089361e-05, "loss": 0.9998, "step": 1798 }, { "epoch": 0.61, "learning_rate": 4.505916828789747e-05, "loss": 1.1658, "step": 1799 }, { "epoch": 0.61, "learning_rate": 4.4995215418795085e-05, "loss": 0.9583, "step": 1800 }, { "epoch": 0.61, "learning_rate": 4.493127081925782e-05, "loss": 0.8129, "step": 1801 }, { "epoch": 0.61, "learning_rate": 4.4867334594943375e-05, "loss": 1.1137, "step": 1802 }, { "epoch": 0.61, "learning_rate": 4.480340685149562e-05, "loss": 0.7234, "step": 1803 }, { "epoch": 0.61, "learning_rate": 4.473948769454441e-05, "loss": 0.9731, "step": 1804 }, { "epoch": 0.61, "learning_rate": 4.467557722970538e-05, "loss": 1.0714, "step": 1805 }, { "epoch": 0.61, "learning_rate": 4.461167556257983e-05, "loss": 0.9362, "step": 1806 }, { "epoch": 0.61, "learning_rate": 4.454778279875455e-05, "loss": 0.903, "step": 1807 }, { "epoch": 0.61, "learning_rate": 4.448389904380156e-05, "loss": 1.1834, "step": 1808 }, { "epoch": 0.61, "learning_rate": 4.442002440327805e-05, "loss": 0.9717, "step": 1809 }, { "epoch": 0.61, "learning_rate": 4.4356158982726107e-05, "loss": 0.9324, "step": 1810 }, { "epoch": 0.62, "learning_rate": 4.42923028876726e-05, "loss": 1.0902, "step": 1811 }, { "epoch": 0.62, "learning_rate": 4.422845622362901e-05, "loss": 0.8578, "step": 1812 }, { "epoch": 0.62, "learning_rate": 4.416461909609119e-05, "loss": 1.1931, "step": 1813 }, { "epoch": 0.62, "learning_rate": 4.410079161053928e-05, "loss": 0.8651, "step": 1814 }, { "epoch": 0.62, "learning_rate": 4.403697387243749e-05, "loss": 1.2287, "step": 1815 }, { "epoch": 0.62, "learning_rate": 4.397316598723385e-05, "loss": 1.0066, "step": 1816 }, { "epoch": 0.62, "learning_rate": 4.39093680603602e-05, "loss": 0.9568, "step": 1817 }, { "epoch": 0.62, "learning_rate": 4.384558019723188e-05, "loss": 1.0268, "step": 1818 }, { "epoch": 0.62, "learning_rate": 4.37818025032476e-05, "loss": 0.8178, "step": 1819 }, { "epoch": 0.62, "learning_rate": 4.371803508378929e-05, "loss": 1.2928, "step": 1820 }, { "epoch": 0.62, "learning_rate": 4.3654278044221896e-05, "loss": 1.2163, "step": 1821 }, { "epoch": 0.62, "learning_rate": 4.359053148989318e-05, "loss": 1.1256, "step": 1822 }, { "epoch": 0.62, "learning_rate": 4.352679552613363e-05, "loss": 0.6627, "step": 1823 }, { "epoch": 0.62, "learning_rate": 4.346307025825621e-05, "loss": 1.0364, "step": 1824 }, { "epoch": 0.62, "learning_rate": 4.33993557915562e-05, "loss": 0.9861, "step": 1825 }, { "epoch": 0.62, "learning_rate": 4.333565223131107e-05, "loss": 0.8605, "step": 1826 }, { "epoch": 0.62, "learning_rate": 4.3271959682780204e-05, "loss": 0.9997, "step": 1827 }, { "epoch": 0.62, "learning_rate": 4.320827825120486e-05, "loss": 0.9317, "step": 1828 }, { "epoch": 0.62, "learning_rate": 4.3144608041807875e-05, "loss": 0.9587, "step": 1829 }, { "epoch": 0.62, "learning_rate": 4.308094915979358e-05, "loss": 1.1885, "step": 1830 }, { "epoch": 0.62, "learning_rate": 4.3017301710347574e-05, "loss": 1.4564, "step": 1831 }, { "epoch": 0.62, "learning_rate": 4.295366579863658e-05, "loss": 1.0766, "step": 1832 }, { "epoch": 0.62, "learning_rate": 4.289004152980822e-05, "loss": 1.4114, "step": 1833 }, { "epoch": 0.62, "learning_rate": 4.282642900899092e-05, "loss": 1.2212, "step": 1834 }, { "epoch": 0.62, "learning_rate": 4.276282834129366e-05, "loss": 1.0806, "step": 1835 }, { "epoch": 0.62, "learning_rate": 4.2699239631805866e-05, "loss": 0.9794, "step": 1836 }, { "epoch": 0.62, "learning_rate": 4.2635662985597195e-05, "loss": 0.8739, "step": 1837 }, { "epoch": 0.62, "learning_rate": 4.257209850771734e-05, "loss": 1.1403, "step": 1838 }, { "epoch": 0.62, "learning_rate": 4.250854630319593e-05, "loss": 1.0278, "step": 1839 }, { "epoch": 0.62, "learning_rate": 4.2445006477042296e-05, "loss": 1.0423, "step": 1840 }, { "epoch": 0.63, "learning_rate": 4.238147913424532e-05, "loss": 1.273, "step": 1841 }, { "epoch": 0.63, "learning_rate": 4.2317964379773265e-05, "loss": 0.833, "step": 1842 }, { "epoch": 0.63, "learning_rate": 4.225446231857359e-05, "loss": 0.8662, "step": 1843 }, { "epoch": 0.63, "learning_rate": 4.2190973055572737e-05, "loss": 1.1087, "step": 1844 }, { "epoch": 0.63, "learning_rate": 4.212749669567607e-05, "loss": 0.8653, "step": 1845 }, { "epoch": 0.63, "learning_rate": 4.2064033343767576e-05, "loss": 0.8787, "step": 1846 }, { "epoch": 0.63, "learning_rate": 4.200058310470981e-05, "loss": 1.0618, "step": 1847 }, { "epoch": 0.63, "learning_rate": 4.193714608334361e-05, "loss": 0.9839, "step": 1848 }, { "epoch": 0.63, "learning_rate": 4.1873722384488006e-05, "loss": 1.0599, "step": 1849 }, { "epoch": 0.63, "learning_rate": 4.1810312112939966e-05, "loss": 1.0272, "step": 1850 }, { "epoch": 0.63, "learning_rate": 4.174691537347431e-05, "loss": 0.9458, "step": 1851 }, { "epoch": 0.63, "learning_rate": 4.1683532270843504e-05, "loss": 0.7723, "step": 1852 }, { "epoch": 0.63, "learning_rate": 4.1620162909777456e-05, "loss": 0.9937, "step": 1853 }, { "epoch": 0.63, "learning_rate": 4.155680739498342e-05, "loss": 1.1018, "step": 1854 }, { "epoch": 0.63, "learning_rate": 4.1493465831145675e-05, "loss": 0.8114, "step": 1855 }, { "epoch": 0.63, "learning_rate": 4.143013832292553e-05, "loss": 1.3097, "step": 1856 }, { "epoch": 0.63, "learning_rate": 4.1366824974961056e-05, "loss": 0.9682, "step": 1857 }, { "epoch": 0.63, "learning_rate": 4.1303525891866906e-05, "loss": 0.8974, "step": 1858 }, { "epoch": 0.63, "learning_rate": 4.124024117823418e-05, "loss": 0.9682, "step": 1859 }, { "epoch": 0.63, "learning_rate": 4.117697093863023e-05, "loss": 0.7309, "step": 1860 }, { "epoch": 0.63, "learning_rate": 4.111371527759849e-05, "loss": 0.9376, "step": 1861 }, { "epoch": 0.63, "learning_rate": 4.105047429965828e-05, "loss": 0.948, "step": 1862 }, { "epoch": 0.63, "learning_rate": 4.0987248109304714e-05, "loss": 1.0539, "step": 1863 }, { "epoch": 0.63, "learning_rate": 4.0924036811008434e-05, "loss": 0.7512, "step": 1864 }, { "epoch": 0.63, "learning_rate": 4.0860840509215496e-05, "loss": 1.0271, "step": 1865 }, { "epoch": 0.63, "learning_rate": 4.079765930834714e-05, "loss": 1.0573, "step": 1866 }, { "epoch": 0.63, "learning_rate": 4.0734493312799674e-05, "loss": 1.0163, "step": 1867 }, { "epoch": 0.63, "learning_rate": 4.067134262694431e-05, "loss": 1.1488, "step": 1868 }, { "epoch": 0.63, "learning_rate": 4.0608207355126926e-05, "loss": 0.924, "step": 1869 }, { "epoch": 0.64, "learning_rate": 4.054508760166795e-05, "loss": 0.8339, "step": 1870 }, { "epoch": 0.64, "learning_rate": 4.048198347086217e-05, "loss": 1.133, "step": 1871 }, { "epoch": 0.64, "learning_rate": 4.0418895066978534e-05, "loss": 1.1988, "step": 1872 }, { "epoch": 0.64, "learning_rate": 4.035582249426003e-05, "loss": 1.1099, "step": 1873 }, { "epoch": 0.64, "learning_rate": 4.029276585692349e-05, "loss": 0.9877, "step": 1874 }, { "epoch": 0.64, "learning_rate": 4.02297252591594e-05, "loss": 1.118, "step": 1875 }, { "epoch": 0.64, "learning_rate": 4.0166700805131755e-05, "loss": 1.2378, "step": 1876 }, { "epoch": 0.64, "learning_rate": 4.010369259897788e-05, "loss": 0.977, "step": 1877 }, { "epoch": 0.64, "learning_rate": 4.0040700744808204e-05, "loss": 1.2349, "step": 1878 }, { "epoch": 0.64, "learning_rate": 3.997772534670621e-05, "loss": 1.0066, "step": 1879 }, { "epoch": 0.64, "learning_rate": 3.991476650872813e-05, "loss": 1.4166, "step": 1880 }, { "epoch": 0.64, "learning_rate": 3.9851824334902884e-05, "loss": 0.9916, "step": 1881 }, { "epoch": 0.64, "learning_rate": 3.978889892923183e-05, "loss": 1.1245, "step": 1882 }, { "epoch": 0.64, "learning_rate": 3.9725990395688614e-05, "loss": 0.9598, "step": 1883 }, { "epoch": 0.64, "learning_rate": 3.9663098838219005e-05, "loss": 1.0369, "step": 1884 }, { "epoch": 0.64, "learning_rate": 3.9600224360740734e-05, "loss": 0.8998, "step": 1885 }, { "epoch": 0.64, "learning_rate": 3.953736706714331e-05, "loss": 0.9617, "step": 1886 }, { "epoch": 0.64, "learning_rate": 3.947452706128784e-05, "loss": 0.9365, "step": 1887 }, { "epoch": 0.64, "learning_rate": 3.941170444700688e-05, "loss": 0.7317, "step": 1888 }, { "epoch": 0.64, "learning_rate": 3.934889932810422e-05, "loss": 0.9631, "step": 1889 }, { "epoch": 0.64, "learning_rate": 3.928611180835476e-05, "loss": 0.9031, "step": 1890 }, { "epoch": 0.64, "learning_rate": 3.922334199150432e-05, "loss": 0.9061, "step": 1891 }, { "epoch": 0.64, "learning_rate": 3.916058998126949e-05, "loss": 0.9417, "step": 1892 }, { "epoch": 0.64, "learning_rate": 3.9097855881337416e-05, "loss": 0.8315, "step": 1893 }, { "epoch": 0.64, "learning_rate": 3.9035139795365626e-05, "loss": 0.8324, "step": 1894 }, { "epoch": 0.64, "learning_rate": 3.897244182698192e-05, "loss": 0.9447, "step": 1895 }, { "epoch": 0.64, "learning_rate": 3.8909762079784155e-05, "loss": 0.9341, "step": 1896 }, { "epoch": 0.64, "learning_rate": 3.884710065734008e-05, "loss": 0.7996, "step": 1897 }, { "epoch": 0.64, "learning_rate": 3.878445766318714e-05, "loss": 0.6865, "step": 1898 }, { "epoch": 0.64, "learning_rate": 3.8721833200832396e-05, "loss": 1.1745, "step": 1899 }, { "epoch": 0.65, "learning_rate": 3.865922737375219e-05, "loss": 1.1621, "step": 1900 }, { "epoch": 0.65, "learning_rate": 3.859664028539215e-05, "loss": 0.8379, "step": 1901 }, { "epoch": 0.65, "learning_rate": 3.8534072039166916e-05, "loss": 1.1151, "step": 1902 }, { "epoch": 0.65, "learning_rate": 3.8471522738459986e-05, "loss": 0.8981, "step": 1903 }, { "epoch": 0.65, "learning_rate": 3.840899248662358e-05, "loss": 1.2133, "step": 1904 }, { "epoch": 0.65, "learning_rate": 3.834648138697844e-05, "loss": 0.8912, "step": 1905 }, { "epoch": 0.65, "learning_rate": 3.8283989542813615e-05, "loss": 0.8683, "step": 1906 }, { "epoch": 0.65, "learning_rate": 3.822151705738639e-05, "loss": 1.1396, "step": 1907 }, { "epoch": 0.65, "learning_rate": 3.815906403392203e-05, "loss": 0.6748, "step": 1908 }, { "epoch": 0.65, "learning_rate": 3.809663057561367e-05, "loss": 1.0364, "step": 1909 }, { "epoch": 0.65, "learning_rate": 3.803421678562213e-05, "loss": 1.2523, "step": 1910 }, { "epoch": 0.65, "learning_rate": 3.797182276707565e-05, "loss": 0.7966, "step": 1911 }, { "epoch": 0.65, "learning_rate": 3.790944862306988e-05, "loss": 0.973, "step": 1912 }, { "epoch": 0.65, "learning_rate": 3.7847094456667604e-05, "loss": 1.0489, "step": 1913 }, { "epoch": 0.65, "learning_rate": 3.7784760370898606e-05, "loss": 1.1548, "step": 1914 }, { "epoch": 0.65, "learning_rate": 3.772244646875949e-05, "loss": 0.8821, "step": 1915 }, { "epoch": 0.65, "learning_rate": 3.7660152853213494e-05, "loss": 1.1645, "step": 1916 }, { "epoch": 0.65, "learning_rate": 3.7597879627190334e-05, "loss": 1.1348, "step": 1917 }, { "epoch": 0.65, "learning_rate": 3.753562689358606e-05, "loss": 1.1532, "step": 1918 }, { "epoch": 0.65, "learning_rate": 3.747339475526285e-05, "loss": 1.0442, "step": 1919 }, { "epoch": 0.65, "learning_rate": 3.741118331504885e-05, "loss": 0.9784, "step": 1920 }, { "epoch": 0.65, "learning_rate": 3.734899267573801e-05, "loss": 1.1178, "step": 1921 }, { "epoch": 0.65, "learning_rate": 3.728682294008988e-05, "loss": 0.9142, "step": 1922 }, { "epoch": 0.65, "learning_rate": 3.722467421082952e-05, "loss": 1.3647, "step": 1923 }, { "epoch": 0.65, "learning_rate": 3.716254659064725e-05, "loss": 0.8944, "step": 1924 }, { "epoch": 0.65, "learning_rate": 3.710044018219852e-05, "loss": 1.0975, "step": 1925 }, { "epoch": 0.65, "learning_rate": 3.703835508810373e-05, "loss": 1.3173, "step": 1926 }, { "epoch": 0.65, "learning_rate": 3.697629141094806e-05, "loss": 1.0221, "step": 1927 }, { "epoch": 0.65, "learning_rate": 3.691424925328129e-05, "loss": 1.1422, "step": 1928 }, { "epoch": 0.66, "learning_rate": 3.6852228717617666e-05, "loss": 0.9872, "step": 1929 }, { "epoch": 0.66, "learning_rate": 3.6790229906435705e-05, "loss": 0.9358, "step": 1930 }, { "epoch": 0.66, "learning_rate": 3.6728252922178e-05, "loss": 1.2959, "step": 1931 }, { "epoch": 0.66, "learning_rate": 3.666629786725111e-05, "loss": 1.3021, "step": 1932 }, { "epoch": 0.66, "learning_rate": 3.660436484402536e-05, "loss": 0.8906, "step": 1933 }, { "epoch": 0.66, "learning_rate": 3.654245395483463e-05, "loss": 0.803, "step": 1934 }, { "epoch": 0.66, "learning_rate": 3.648056530197627e-05, "loss": 0.8562, "step": 1935 }, { "epoch": 0.66, "learning_rate": 3.641869898771087e-05, "loss": 1.3917, "step": 1936 }, { "epoch": 0.66, "learning_rate": 3.635685511426212e-05, "loss": 0.8431, "step": 1937 }, { "epoch": 0.66, "learning_rate": 3.6295033783816636e-05, "loss": 0.7598, "step": 1938 }, { "epoch": 0.66, "learning_rate": 3.623323509852374e-05, "loss": 0.6688, "step": 1939 }, { "epoch": 0.66, "learning_rate": 3.617145916049539e-05, "loss": 0.8986, "step": 1940 }, { "epoch": 0.66, "learning_rate": 3.610970607180594e-05, "loss": 0.762, "step": 1941 }, { "epoch": 0.66, "learning_rate": 3.604797593449198e-05, "loss": 1.0772, "step": 1942 }, { "epoch": 0.66, "learning_rate": 3.598626885055219e-05, "loss": 0.8723, "step": 1943 }, { "epoch": 0.66, "learning_rate": 3.592458492194717e-05, "loss": 0.7893, "step": 1944 }, { "epoch": 0.66, "learning_rate": 3.58629242505992e-05, "loss": 0.8981, "step": 1945 }, { "epoch": 0.66, "learning_rate": 3.5801286938392194e-05, "loss": 1.35, "step": 1946 }, { "epoch": 0.66, "learning_rate": 3.5739673087171446e-05, "loss": 1.1785, "step": 1947 }, { "epoch": 0.66, "learning_rate": 3.5678082798743494e-05, "loss": 0.8049, "step": 1948 }, { "epoch": 0.66, "learning_rate": 3.5616516174875926e-05, "loss": 1.0044, "step": 1949 }, { "epoch": 0.66, "learning_rate": 3.5554973317297255e-05, "loss": 1.1126, "step": 1950 }, { "epoch": 0.66, "learning_rate": 3.549345432769668e-05, "loss": 0.81, "step": 1951 }, { "epoch": 0.66, "learning_rate": 3.5431959307724e-05, "loss": 0.8754, "step": 1952 }, { "epoch": 0.66, "learning_rate": 3.5370488358989406e-05, "loss": 1.2511, "step": 1953 }, { "epoch": 0.66, "learning_rate": 3.530904158306329e-05, "loss": 0.8729, "step": 1954 }, { "epoch": 0.66, "learning_rate": 3.5247619081476157e-05, "loss": 0.8642, "step": 1955 }, { "epoch": 0.66, "learning_rate": 3.5186220955718306e-05, "loss": 1.0178, "step": 1956 }, { "epoch": 0.66, "learning_rate": 3.512484730723986e-05, "loss": 0.683, "step": 1957 }, { "epoch": 0.67, "learning_rate": 3.506349823745043e-05, "loss": 1.119, "step": 1958 }, { "epoch": 0.67, "learning_rate": 3.500217384771906e-05, "loss": 0.9785, "step": 1959 }, { "epoch": 0.67, "learning_rate": 3.494087423937399e-05, "loss": 0.9811, "step": 1960 }, { "epoch": 0.67, "learning_rate": 3.487959951370252e-05, "loss": 0.939, "step": 1961 }, { "epoch": 0.67, "learning_rate": 3.481834977195081e-05, "loss": 1.1444, "step": 1962 }, { "epoch": 0.67, "learning_rate": 3.475712511532378e-05, "loss": 0.7997, "step": 1963 }, { "epoch": 0.67, "learning_rate": 3.469592564498488e-05, "loss": 0.9899, "step": 1964 }, { "epoch": 0.67, "learning_rate": 3.4634751462055946e-05, "loss": 0.9378, "step": 1965 }, { "epoch": 0.67, "learning_rate": 3.4573602667617053e-05, "loss": 0.9641, "step": 1966 }, { "epoch": 0.67, "learning_rate": 3.451247936270626e-05, "loss": 1.0171, "step": 1967 }, { "epoch": 0.67, "learning_rate": 3.445138164831957e-05, "loss": 1.1738, "step": 1968 }, { "epoch": 0.67, "learning_rate": 3.4390309625410686e-05, "loss": 0.9696, "step": 1969 }, { "epoch": 0.67, "learning_rate": 3.432926339489086e-05, "loss": 1.0116, "step": 1970 }, { "epoch": 0.67, "learning_rate": 3.4268243057628736e-05, "loss": 1.0732, "step": 1971 }, { "epoch": 0.67, "learning_rate": 3.420724871445016e-05, "loss": 0.7454, "step": 1972 }, { "epoch": 0.67, "learning_rate": 3.4146280466138e-05, "loss": 1.2827, "step": 1973 }, { "epoch": 0.67, "learning_rate": 3.4085338413432066e-05, "loss": 1.1638, "step": 1974 }, { "epoch": 0.67, "learning_rate": 3.402442265702884e-05, "loss": 1.275, "step": 1975 }, { "epoch": 0.67, "learning_rate": 3.3963533297581375e-05, "loss": 0.9361, "step": 1976 }, { "epoch": 0.67, "learning_rate": 3.39026704356991e-05, "loss": 0.948, "step": 1977 }, { "epoch": 0.67, "learning_rate": 3.384183417194767e-05, "loss": 1.2072, "step": 1978 }, { "epoch": 0.67, "learning_rate": 3.378102460684878e-05, "loss": 1.1038, "step": 1979 }, { "epoch": 0.67, "learning_rate": 3.372024184087999e-05, "loss": 0.8892, "step": 1980 }, { "epoch": 0.67, "learning_rate": 3.365948597447463e-05, "loss": 0.919, "step": 1981 }, { "epoch": 0.67, "learning_rate": 3.3598757108021546e-05, "loss": 0.9507, "step": 1982 }, { "epoch": 0.67, "learning_rate": 3.353805534186499e-05, "loss": 0.8405, "step": 1983 }, { "epoch": 0.67, "learning_rate": 3.347738077630441e-05, "loss": 1.0285, "step": 1984 }, { "epoch": 0.67, "learning_rate": 3.3416733511594326e-05, "loss": 1.0815, "step": 1985 }, { "epoch": 0.67, "learning_rate": 3.335611364794414e-05, "loss": 1.3129, "step": 1986 }, { "epoch": 0.67, "learning_rate": 3.3295521285518006e-05, "loss": 0.9627, "step": 1987 }, { "epoch": 0.68, "learning_rate": 3.323495652443461e-05, "loss": 0.9509, "step": 1988 }, { "epoch": 0.68, "learning_rate": 3.3174419464767036e-05, "loss": 0.9744, "step": 1989 }, { "epoch": 0.68, "learning_rate": 3.311391020654259e-05, "loss": 1.0398, "step": 1990 }, { "epoch": 0.68, "learning_rate": 3.3053428849742654e-05, "loss": 0.928, "step": 1991 }, { "epoch": 0.68, "learning_rate": 3.29929754943025e-05, "loss": 0.8318, "step": 1992 }, { "epoch": 0.68, "learning_rate": 3.293255024011115e-05, "loss": 1.0246, "step": 1993 }, { "epoch": 0.68, "learning_rate": 3.287215318701117e-05, "loss": 1.0594, "step": 1994 }, { "epoch": 0.68, "learning_rate": 3.281178443479852e-05, "loss": 1.0787, "step": 1995 }, { "epoch": 0.68, "learning_rate": 3.275144408322242e-05, "loss": 1.1039, "step": 1996 }, { "epoch": 0.68, "learning_rate": 3.2691132231985164e-05, "loss": 1.0435, "step": 1997 }, { "epoch": 0.68, "learning_rate": 3.263084898074194e-05, "loss": 1.0689, "step": 1998 }, { "epoch": 0.68, "learning_rate": 3.25705944291007e-05, "loss": 1.1981, "step": 1999 }, { "epoch": 0.68, "learning_rate": 3.251036867662195e-05, "loss": 0.7602, "step": 2000 }, { "epoch": 0.68, "learning_rate": 3.245017182281862e-05, "loss": 1.0287, "step": 2001 }, { "epoch": 0.68, "learning_rate": 3.239000396715589e-05, "loss": 1.0978, "step": 2002 }, { "epoch": 0.68, "learning_rate": 3.2329865209051026e-05, "loss": 0.8883, "step": 2003 }, { "epoch": 0.68, "learning_rate": 3.226975564787322e-05, "loss": 0.9448, "step": 2004 }, { "epoch": 0.68, "learning_rate": 3.220967538294341e-05, "loss": 1.1688, "step": 2005 }, { "epoch": 0.68, "learning_rate": 3.214962451353416e-05, "loss": 1.2097, "step": 2006 }, { "epoch": 0.68, "learning_rate": 3.20896031388694e-05, "loss": 0.847, "step": 2007 }, { "epoch": 0.68, "learning_rate": 3.202961135812437e-05, "loss": 0.9076, "step": 2008 }, { "epoch": 0.68, "learning_rate": 3.19696492704254e-05, "loss": 0.9636, "step": 2009 }, { "epoch": 0.68, "learning_rate": 3.1909716974849766e-05, "loss": 1.0381, "step": 2010 }, { "epoch": 0.68, "learning_rate": 3.184981457042553e-05, "loss": 1.2043, "step": 2011 }, { "epoch": 0.68, "learning_rate": 3.1789942156131307e-05, "loss": 1.0848, "step": 2012 }, { "epoch": 0.68, "learning_rate": 3.173009983089622e-05, "loss": 1.017, "step": 2013 }, { "epoch": 0.68, "learning_rate": 3.1670287693599636e-05, "loss": 1.2326, "step": 2014 }, { "epoch": 0.68, "learning_rate": 3.161050584307107e-05, "loss": 0.8244, "step": 2015 }, { "epoch": 0.68, "learning_rate": 3.155075437808997e-05, "loss": 1.1821, "step": 2016 }, { "epoch": 0.69, "learning_rate": 3.149103339738561e-05, "loss": 0.8336, "step": 2017 }, { "epoch": 0.69, "learning_rate": 3.1431342999636836e-05, "loss": 1.127, "step": 2018 }, { "epoch": 0.69, "learning_rate": 3.1371683283472014e-05, "loss": 1.0699, "step": 2019 }, { "epoch": 0.69, "learning_rate": 3.131205434746879e-05, "loss": 1.1913, "step": 2020 }, { "epoch": 0.69, "learning_rate": 3.1252456290153954e-05, "loss": 1.069, "step": 2021 }, { "epoch": 0.69, "learning_rate": 3.119288921000329e-05, "loss": 1.0871, "step": 2022 }, { "epoch": 0.69, "learning_rate": 3.113335320544136e-05, "loss": 0.9509, "step": 2023 }, { "epoch": 0.69, "learning_rate": 3.107384837484141e-05, "loss": 0.9932, "step": 2024 }, { "epoch": 0.69, "learning_rate": 3.101437481652518e-05, "loss": 1.1955, "step": 2025 }, { "epoch": 0.69, "learning_rate": 3.0954932628762726e-05, "loss": 1.155, "step": 2026 }, { "epoch": 0.69, "learning_rate": 3.0895521909772264e-05, "loss": 1.0509, "step": 2027 }, { "epoch": 0.69, "learning_rate": 3.0836142757720034e-05, "loss": 1.2851, "step": 2028 }, { "epoch": 0.69, "learning_rate": 3.077679527072007e-05, "loss": 0.8782, "step": 2029 }, { "epoch": 0.69, "learning_rate": 3.071747954683413e-05, "loss": 1.3336, "step": 2030 }, { "epoch": 0.69, "learning_rate": 3.065819568407149e-05, "loss": 0.9432, "step": 2031 }, { "epoch": 0.69, "learning_rate": 3.059894378038875e-05, "loss": 1.1343, "step": 2032 }, { "epoch": 0.69, "learning_rate": 3.053972393368972e-05, "loss": 0.7663, "step": 2033 }, { "epoch": 0.69, "learning_rate": 3.0480536241825263e-05, "loss": 0.9926, "step": 2034 }, { "epoch": 0.69, "learning_rate": 3.042138080259307e-05, "loss": 0.8277, "step": 2035 }, { "epoch": 0.69, "learning_rate": 3.0362257713737552e-05, "loss": 1.1834, "step": 2036 }, { "epoch": 0.69, "learning_rate": 3.030316707294969e-05, "loss": 1.1013, "step": 2037 }, { "epoch": 0.69, "learning_rate": 3.024410897786682e-05, "loss": 0.7082, "step": 2038 }, { "epoch": 0.69, "learning_rate": 3.0185083526072543e-05, "loss": 1.2033, "step": 2039 }, { "epoch": 0.69, "learning_rate": 3.0126090815096463e-05, "loss": 1.1659, "step": 2040 }, { "epoch": 0.69, "learning_rate": 3.0067130942414123e-05, "loss": 0.837, "step": 2041 }, { "epoch": 0.69, "learning_rate": 3.0008204005446804e-05, "loss": 0.9257, "step": 2042 }, { "epoch": 0.69, "learning_rate": 2.9949310101561367e-05, "loss": 1.3292, "step": 2043 }, { "epoch": 0.69, "learning_rate": 2.989044932807008e-05, "loss": 1.019, "step": 2044 }, { "epoch": 0.69, "learning_rate": 2.98316217822305e-05, "loss": 1.1176, "step": 2045 }, { "epoch": 0.69, "learning_rate": 2.977282756124522e-05, "loss": 0.7504, "step": 2046 }, { "epoch": 0.7, "learning_rate": 2.9714066762261823e-05, "loss": 0.6321, "step": 2047 }, { "epoch": 0.7, "learning_rate": 2.9655339482372647e-05, "loss": 0.8766, "step": 2048 }, { "epoch": 0.7, "learning_rate": 2.9596645818614653e-05, "loss": 1.1699, "step": 2049 }, { "epoch": 0.7, "learning_rate": 2.9537985867969277e-05, "loss": 0.8131, "step": 2050 }, { "epoch": 0.7, "learning_rate": 2.9479359727362173e-05, "loss": 1.0058, "step": 2051 }, { "epoch": 0.7, "learning_rate": 2.942076749366321e-05, "loss": 1.0123, "step": 2052 }, { "epoch": 0.7, "learning_rate": 2.93622092636862e-05, "loss": 0.9732, "step": 2053 }, { "epoch": 0.7, "learning_rate": 2.9303685134188787e-05, "loss": 0.7385, "step": 2054 }, { "epoch": 0.7, "learning_rate": 2.9245195201872233e-05, "loss": 1.0305, "step": 2055 }, { "epoch": 0.7, "learning_rate": 2.918673956338136e-05, "loss": 1.0341, "step": 2056 }, { "epoch": 0.7, "learning_rate": 2.9128318315304226e-05, "loss": 0.828, "step": 2057 }, { "epoch": 0.7, "learning_rate": 2.9069931554172152e-05, "loss": 1.0621, "step": 2058 }, { "epoch": 0.7, "learning_rate": 2.901157937645944e-05, "loss": 1.0421, "step": 2059 }, { "epoch": 0.7, "learning_rate": 2.895326187858326e-05, "loss": 1.1456, "step": 2060 }, { "epoch": 0.7, "learning_rate": 2.889497915690346e-05, "loss": 1.2421, "step": 2061 }, { "epoch": 0.7, "learning_rate": 2.8836731307722454e-05, "loss": 1.045, "step": 2062 }, { "epoch": 0.7, "learning_rate": 2.8778518427285013e-05, "loss": 1.044, "step": 2063 }, { "epoch": 0.7, "learning_rate": 2.8720340611778135e-05, "loss": 0.8506, "step": 2064 }, { "epoch": 0.7, "learning_rate": 2.8662197957330877e-05, "loss": 1.0414, "step": 2065 }, { "epoch": 0.7, "learning_rate": 2.860409056001421e-05, "loss": 1.2015, "step": 2066 }, { "epoch": 0.7, "learning_rate": 2.8546018515840846e-05, "loss": 0.9124, "step": 2067 }, { "epoch": 0.7, "learning_rate": 2.8487981920765045e-05, "loss": 1.1906, "step": 2068 }, { "epoch": 0.7, "learning_rate": 2.8429980870682537e-05, "loss": 1.0633, "step": 2069 }, { "epoch": 0.7, "learning_rate": 2.8372015461430312e-05, "loss": 0.8828, "step": 2070 }, { "epoch": 0.7, "learning_rate": 2.831408578878646e-05, "loss": 1.0698, "step": 2071 }, { "epoch": 0.7, "learning_rate": 2.8256191948470033e-05, "loss": 1.1849, "step": 2072 }, { "epoch": 0.7, "learning_rate": 2.8198334036140874e-05, "loss": 1.0277, "step": 2073 }, { "epoch": 0.7, "learning_rate": 2.8140512147399432e-05, "loss": 0.7905, "step": 2074 }, { "epoch": 0.7, "learning_rate": 2.8082726377786668e-05, "loss": 0.9098, "step": 2075 }, { "epoch": 0.71, "learning_rate": 2.802497682278385e-05, "loss": 1.1651, "step": 2076 }, { "epoch": 0.71, "learning_rate": 2.796726357781242e-05, "loss": 0.7615, "step": 2077 }, { "epoch": 0.71, "learning_rate": 2.7909586738233818e-05, "loss": 0.9298, "step": 2078 }, { "epoch": 0.71, "learning_rate": 2.7851946399349295e-05, "loss": 0.7551, "step": 2079 }, { "epoch": 0.71, "learning_rate": 2.7794342656399835e-05, "loss": 1.0595, "step": 2080 }, { "epoch": 0.71, "learning_rate": 2.7736775604565944e-05, "loss": 0.7837, "step": 2081 }, { "epoch": 0.71, "learning_rate": 2.7679245338967495e-05, "loss": 0.9953, "step": 2082 }, { "epoch": 0.71, "learning_rate": 2.7621751954663577e-05, "loss": 1.0993, "step": 2083 }, { "epoch": 0.71, "learning_rate": 2.7564295546652363e-05, "loss": 1.0301, "step": 2084 }, { "epoch": 0.71, "learning_rate": 2.7506876209870875e-05, "loss": 0.9359, "step": 2085 }, { "epoch": 0.71, "learning_rate": 2.74494940391949e-05, "loss": 1.0524, "step": 2086 }, { "epoch": 0.71, "learning_rate": 2.739214912943888e-05, "loss": 1.245, "step": 2087 }, { "epoch": 0.71, "learning_rate": 2.733484157535562e-05, "loss": 1.0892, "step": 2088 }, { "epoch": 0.71, "learning_rate": 2.7277571471636204e-05, "loss": 1.283, "step": 2089 }, { "epoch": 0.71, "learning_rate": 2.7220338912909883e-05, "loss": 1.0643, "step": 2090 }, { "epoch": 0.71, "learning_rate": 2.716314399374379e-05, "loss": 1.1966, "step": 2091 }, { "epoch": 0.71, "learning_rate": 2.7105986808642937e-05, "loss": 0.9538, "step": 2092 }, { "epoch": 0.71, "learning_rate": 2.7048867452049965e-05, "loss": 1.1513, "step": 2093 }, { "epoch": 0.71, "learning_rate": 2.6991786018345e-05, "loss": 1.2558, "step": 2094 }, { "epoch": 0.71, "learning_rate": 2.693474260184555e-05, "loss": 1.0842, "step": 2095 }, { "epoch": 0.71, "learning_rate": 2.6877737296806216e-05, "loss": 0.9166, "step": 2096 }, { "epoch": 0.71, "learning_rate": 2.682077019741871e-05, "loss": 0.9733, "step": 2097 }, { "epoch": 0.71, "learning_rate": 2.6763841397811573e-05, "loss": 1.1768, "step": 2098 }, { "epoch": 0.71, "learning_rate": 2.6706950992050094e-05, "loss": 0.9626, "step": 2099 }, { "epoch": 0.71, "learning_rate": 2.6650099074136093e-05, "loss": 0.9713, "step": 2100 }, { "epoch": 0.71, "learning_rate": 2.6593285738007834e-05, "loss": 1.2028, "step": 2101 }, { "epoch": 0.71, "learning_rate": 2.6536511077539757e-05, "loss": 0.9134, "step": 2102 }, { "epoch": 0.71, "learning_rate": 2.647977518654246e-05, "loss": 1.2054, "step": 2103 }, { "epoch": 0.71, "learning_rate": 2.642307815876247e-05, "loss": 1.0521, "step": 2104 }, { "epoch": 0.71, "learning_rate": 2.6366420087882098e-05, "loss": 0.8283, "step": 2105 }, { "epoch": 0.72, "learning_rate": 2.630980106751929e-05, "loss": 0.803, "step": 2106 }, { "epoch": 0.72, "learning_rate": 2.6253221191227427e-05, "loss": 1.2112, "step": 2107 }, { "epoch": 0.72, "learning_rate": 2.6196680552495266e-05, "loss": 1.139, "step": 2108 }, { "epoch": 0.72, "learning_rate": 2.6140179244746697e-05, "loss": 0.8777, "step": 2109 }, { "epoch": 0.72, "learning_rate": 2.6083717361340632e-05, "loss": 1.3304, "step": 2110 }, { "epoch": 0.72, "learning_rate": 2.602729499557085e-05, "loss": 1.1596, "step": 2111 }, { "epoch": 0.72, "learning_rate": 2.5970912240665813e-05, "loss": 0.9953, "step": 2112 }, { "epoch": 0.72, "learning_rate": 2.5914569189788563e-05, "loss": 0.844, "step": 2113 }, { "epoch": 0.72, "learning_rate": 2.5858265936036496e-05, "loss": 0.9079, "step": 2114 }, { "epoch": 0.72, "learning_rate": 2.5802002572441285e-05, "loss": 0.9952, "step": 2115 }, { "epoch": 0.72, "learning_rate": 2.5745779191968683e-05, "loss": 1.1375, "step": 2116 }, { "epoch": 0.72, "learning_rate": 2.568959588751837e-05, "loss": 1.0449, "step": 2117 }, { "epoch": 0.72, "learning_rate": 2.5633452751923825e-05, "loss": 0.8751, "step": 2118 }, { "epoch": 0.72, "learning_rate": 2.5577349877952117e-05, "loss": 0.9257, "step": 2119 }, { "epoch": 0.72, "learning_rate": 2.5521287358303814e-05, "loss": 0.7594, "step": 2120 }, { "epoch": 0.72, "learning_rate": 2.546526528561282e-05, "loss": 0.8785, "step": 2121 }, { "epoch": 0.72, "learning_rate": 2.5409283752446185e-05, "loss": 1.1818, "step": 2122 }, { "epoch": 0.72, "learning_rate": 2.5353342851303997e-05, "loss": 0.8934, "step": 2123 }, { "epoch": 0.72, "learning_rate": 2.5297442674619155e-05, "loss": 0.7623, "step": 2124 }, { "epoch": 0.72, "learning_rate": 2.5241583314757327e-05, "loss": 1.2445, "step": 2125 }, { "epoch": 0.72, "learning_rate": 2.518576486401671e-05, "loss": 1.0058, "step": 2126 }, { "epoch": 0.72, "learning_rate": 2.512998741462791e-05, "loss": 1.0079, "step": 2127 }, { "epoch": 0.72, "learning_rate": 2.5074251058753784e-05, "loss": 0.8691, "step": 2128 }, { "epoch": 0.72, "learning_rate": 2.5018555888489303e-05, "loss": 0.8765, "step": 2129 }, { "epoch": 0.72, "learning_rate": 2.496290199586135e-05, "loss": 0.9515, "step": 2130 }, { "epoch": 0.72, "learning_rate": 2.4907289472828632e-05, "loss": 0.9581, "step": 2131 }, { "epoch": 0.72, "learning_rate": 2.4851718411281493e-05, "loss": 0.8629, "step": 2132 }, { "epoch": 0.72, "learning_rate": 2.4796188903041783e-05, "loss": 0.9669, "step": 2133 }, { "epoch": 0.72, "learning_rate": 2.4740701039862663e-05, "loss": 0.9695, "step": 2134 }, { "epoch": 0.73, "learning_rate": 2.4685254913428512e-05, "loss": 1.0699, "step": 2135 }, { "epoch": 0.73, "learning_rate": 2.462985061535472e-05, "loss": 1.0864, "step": 2136 }, { "epoch": 0.73, "learning_rate": 2.4574488237187586e-05, "loss": 0.824, "step": 2137 }, { "epoch": 0.73, "learning_rate": 2.4519167870404125e-05, "loss": 1.0658, "step": 2138 }, { "epoch": 0.73, "learning_rate": 2.4463889606411945e-05, "loss": 1.1506, "step": 2139 }, { "epoch": 0.73, "learning_rate": 2.4408653536549104e-05, "loss": 1.066, "step": 2140 }, { "epoch": 0.73, "learning_rate": 2.435345975208388e-05, "loss": 1.2472, "step": 2141 }, { "epoch": 0.73, "learning_rate": 2.4298308344214745e-05, "loss": 0.9437, "step": 2142 }, { "epoch": 0.73, "learning_rate": 2.4243199404070128e-05, "loss": 0.9417, "step": 2143 }, { "epoch": 0.73, "learning_rate": 2.4188133022708286e-05, "loss": 1.376, "step": 2144 }, { "epoch": 0.73, "learning_rate": 2.4133109291117156e-05, "loss": 0.7561, "step": 2145 }, { "epoch": 0.73, "learning_rate": 2.4078128300214225e-05, "loss": 1.2398, "step": 2146 }, { "epoch": 0.73, "learning_rate": 2.4023190140846302e-05, "loss": 0.9589, "step": 2147 }, { "epoch": 0.73, "learning_rate": 2.3968294903789473e-05, "loss": 0.9647, "step": 2148 }, { "epoch": 0.73, "learning_rate": 2.3913442679748892e-05, "loss": 1.1293, "step": 2149 }, { "epoch": 0.73, "learning_rate": 2.3858633559358633e-05, "loss": 0.9185, "step": 2150 }, { "epoch": 0.73, "learning_rate": 2.3803867633181574e-05, "loss": 1.2827, "step": 2151 }, { "epoch": 0.73, "learning_rate": 2.3749144991709173e-05, "loss": 0.7709, "step": 2152 }, { "epoch": 0.73, "learning_rate": 2.3694465725361393e-05, "loss": 1.1937, "step": 2153 }, { "epoch": 0.73, "learning_rate": 2.3639829924486544e-05, "loss": 0.9818, "step": 2154 }, { "epoch": 0.73, "learning_rate": 2.35852376793611e-05, "loss": 1.017, "step": 2155 }, { "epoch": 0.73, "learning_rate": 2.3530689080189566e-05, "loss": 1.1758, "step": 2156 }, { "epoch": 0.73, "learning_rate": 2.347618421710433e-05, "loss": 1.1436, "step": 2157 }, { "epoch": 0.73, "learning_rate": 2.3421723180165517e-05, "loss": 1.1098, "step": 2158 }, { "epoch": 0.73, "learning_rate": 2.3367306059360837e-05, "loss": 1.3586, "step": 2159 }, { "epoch": 0.73, "learning_rate": 2.331293294460543e-05, "loss": 0.835, "step": 2160 }, { "epoch": 0.73, "learning_rate": 2.3258603925741728e-05, "loss": 1.023, "step": 2161 }, { "epoch": 0.73, "learning_rate": 2.32043190925393e-05, "loss": 1.2612, "step": 2162 }, { "epoch": 0.73, "learning_rate": 2.3150078534694725e-05, "loss": 1.1079, "step": 2163 }, { "epoch": 0.73, "learning_rate": 2.3095882341831372e-05, "loss": 0.7921, "step": 2164 }, { "epoch": 0.74, "learning_rate": 2.3041730603499352e-05, "loss": 0.8896, "step": 2165 }, { "epoch": 0.74, "learning_rate": 2.2987623409175314e-05, "loss": 1.0264, "step": 2166 }, { "epoch": 0.74, "learning_rate": 2.293356084826229e-05, "loss": 1.1236, "step": 2167 }, { "epoch": 0.74, "learning_rate": 2.287954301008961e-05, "loss": 1.1596, "step": 2168 }, { "epoch": 0.74, "learning_rate": 2.2825569983912613e-05, "loss": 1.0961, "step": 2169 }, { "epoch": 0.74, "learning_rate": 2.2771641858912685e-05, "loss": 1.336, "step": 2170 }, { "epoch": 0.74, "learning_rate": 2.271775872419698e-05, "loss": 1.2669, "step": 2171 }, { "epoch": 0.74, "learning_rate": 2.2663920668798316e-05, "loss": 1.1486, "step": 2172 }, { "epoch": 0.74, "learning_rate": 2.2610127781675027e-05, "loss": 0.9301, "step": 2173 }, { "epoch": 0.74, "learning_rate": 2.255638015171085e-05, "loss": 1.115, "step": 2174 }, { "epoch": 0.74, "learning_rate": 2.250267786771465e-05, "loss": 0.9034, "step": 2175 }, { "epoch": 0.74, "learning_rate": 2.244902101842045e-05, "loss": 0.7934, "step": 2176 }, { "epoch": 0.74, "learning_rate": 2.2395409692487175e-05, "loss": 1.2969, "step": 2177 }, { "epoch": 0.74, "learning_rate": 2.2341843978498523e-05, "loss": 1.0594, "step": 2178 }, { "epoch": 0.74, "learning_rate": 2.2288323964962838e-05, "loss": 1.0371, "step": 2179 }, { "epoch": 0.74, "learning_rate": 2.2234849740312942e-05, "loss": 0.9398, "step": 2180 }, { "epoch": 0.74, "learning_rate": 2.2181421392905992e-05, "loss": 1.1056, "step": 2181 }, { "epoch": 0.74, "learning_rate": 2.2128039011023367e-05, "loss": 1.1855, "step": 2182 }, { "epoch": 0.74, "learning_rate": 2.2074702682870464e-05, "loss": 0.8585, "step": 2183 }, { "epoch": 0.74, "learning_rate": 2.2021412496576598e-05, "loss": 1.057, "step": 2184 }, { "epoch": 0.74, "learning_rate": 2.1968168540194872e-05, "loss": 0.8023, "step": 2185 }, { "epoch": 0.74, "learning_rate": 2.191497090170193e-05, "loss": 0.8482, "step": 2186 }, { "epoch": 0.74, "learning_rate": 2.1861819668997945e-05, "loss": 0.8971, "step": 2187 }, { "epoch": 0.74, "learning_rate": 2.180871492990639e-05, "loss": 1.2835, "step": 2188 }, { "epoch": 0.74, "learning_rate": 2.1755656772173926e-05, "loss": 0.8907, "step": 2189 }, { "epoch": 0.74, "learning_rate": 2.1702645283470236e-05, "loss": 1.2872, "step": 2190 }, { "epoch": 0.74, "learning_rate": 2.1649680551387907e-05, "loss": 0.5502, "step": 2191 }, { "epoch": 0.74, "learning_rate": 2.1596762663442218e-05, "loss": 0.9961, "step": 2192 }, { "epoch": 0.74, "learning_rate": 2.15438917070711e-05, "loss": 1.2815, "step": 2193 }, { "epoch": 0.75, "learning_rate": 2.1491067769634927e-05, "loss": 1.012, "step": 2194 }, { "epoch": 0.75, "learning_rate": 2.143829093841637e-05, "loss": 1.0494, "step": 2195 }, { "epoch": 0.75, "learning_rate": 2.1385561300620288e-05, "loss": 1.1834, "step": 2196 }, { "epoch": 0.75, "learning_rate": 2.133287894337351e-05, "loss": 1.2094, "step": 2197 }, { "epoch": 0.75, "learning_rate": 2.128024395372478e-05, "loss": 1.118, "step": 2198 }, { "epoch": 0.75, "learning_rate": 2.1227656418644587e-05, "loss": 1.0948, "step": 2199 }, { "epoch": 0.75, "learning_rate": 2.1175116425024975e-05, "loss": 0.8589, "step": 2200 }, { "epoch": 0.75, "learning_rate": 2.112262405967947e-05, "loss": 0.8839, "step": 2201 }, { "epoch": 0.75, "learning_rate": 2.107017940934286e-05, "loss": 1.0106, "step": 2202 }, { "epoch": 0.75, "learning_rate": 2.1017782560671123e-05, "loss": 0.9141, "step": 2203 }, { "epoch": 0.75, "learning_rate": 2.0965433600241246e-05, "loss": 0.9969, "step": 2204 }, { "epoch": 0.75, "learning_rate": 2.0913132614551083e-05, "loss": 0.8699, "step": 2205 }, { "epoch": 0.75, "learning_rate": 2.0860879690019214e-05, "loss": 1.0407, "step": 2206 }, { "epoch": 0.75, "learning_rate": 2.080867491298483e-05, "loss": 1.0185, "step": 2207 }, { "epoch": 0.75, "learning_rate": 2.075651836970753e-05, "loss": 1.1424, "step": 2208 }, { "epoch": 0.75, "learning_rate": 2.0704410146367227e-05, "loss": 1.061, "step": 2209 }, { "epoch": 0.75, "learning_rate": 2.065235032906401e-05, "loss": 0.9865, "step": 2210 }, { "epoch": 0.75, "learning_rate": 2.060033900381798e-05, "loss": 1.1153, "step": 2211 }, { "epoch": 0.75, "learning_rate": 2.0548376256569108e-05, "loss": 1.0867, "step": 2212 }, { "epoch": 0.75, "learning_rate": 2.0496462173177105e-05, "loss": 0.8053, "step": 2213 }, { "epoch": 0.75, "learning_rate": 2.0444596839421238e-05, "loss": 0.5923, "step": 2214 }, { "epoch": 0.75, "learning_rate": 2.0392780341000263e-05, "loss": 1.1278, "step": 2215 }, { "epoch": 0.75, "learning_rate": 2.0341012763532243e-05, "loss": 0.9483, "step": 2216 }, { "epoch": 0.75, "learning_rate": 2.0289294192554386e-05, "loss": 0.7303, "step": 2217 }, { "epoch": 0.75, "learning_rate": 2.0237624713522946e-05, "loss": 0.8173, "step": 2218 }, { "epoch": 0.75, "learning_rate": 2.0186004411813057e-05, "loss": 1.006, "step": 2219 }, { "epoch": 0.75, "learning_rate": 2.0134433372718565e-05, "loss": 1.2913, "step": 2220 }, { "epoch": 0.75, "learning_rate": 2.008291168145195e-05, "loss": 1.0341, "step": 2221 }, { "epoch": 0.75, "learning_rate": 2.0031439423144148e-05, "loss": 0.8275, "step": 2222 }, { "epoch": 0.76, "learning_rate": 1.9980016682844422e-05, "loss": 1.1533, "step": 2223 }, { "epoch": 0.76, "learning_rate": 1.9928643545520204e-05, "loss": 0.8611, "step": 2224 }, { "epoch": 0.76, "learning_rate": 1.987732009605695e-05, "loss": 1.1173, "step": 2225 }, { "epoch": 0.76, "learning_rate": 1.9826046419258038e-05, "loss": 1.1239, "step": 2226 }, { "epoch": 0.76, "learning_rate": 1.97748225998446e-05, "loss": 0.8841, "step": 2227 }, { "epoch": 0.76, "learning_rate": 1.972364872245539e-05, "loss": 0.8057, "step": 2228 }, { "epoch": 0.76, "learning_rate": 1.967252487164663e-05, "loss": 0.905, "step": 2229 }, { "epoch": 0.76, "learning_rate": 1.9621451131891898e-05, "loss": 0.8123, "step": 2230 }, { "epoch": 0.76, "learning_rate": 1.9570427587581963e-05, "loss": 0.8317, "step": 2231 }, { "epoch": 0.76, "learning_rate": 1.9519454323024643e-05, "loss": 1.0384, "step": 2232 }, { "epoch": 0.76, "learning_rate": 1.9468531422444692e-05, "loss": 1.0024, "step": 2233 }, { "epoch": 0.76, "learning_rate": 1.941765896998365e-05, "loss": 0.7963, "step": 2234 }, { "epoch": 0.76, "learning_rate": 1.93668370496997e-05, "loss": 1.1224, "step": 2235 }, { "epoch": 0.76, "learning_rate": 1.931606574556749e-05, "loss": 0.9895, "step": 2236 }, { "epoch": 0.76, "learning_rate": 1.9265345141478087e-05, "loss": 1.1275, "step": 2237 }, { "epoch": 0.76, "learning_rate": 1.9214675321238752e-05, "loss": 1.1623, "step": 2238 }, { "epoch": 0.76, "learning_rate": 1.9164056368572846e-05, "loss": 0.9531, "step": 2239 }, { "epoch": 0.76, "learning_rate": 1.9113488367119685e-05, "loss": 1.1779, "step": 2240 }, { "epoch": 0.76, "learning_rate": 1.906297140043441e-05, "loss": 0.797, "step": 2241 }, { "epoch": 0.76, "learning_rate": 1.9012505551987765e-05, "loss": 1.0343, "step": 2242 }, { "epoch": 0.76, "learning_rate": 1.8962090905166115e-05, "loss": 1.0305, "step": 2243 }, { "epoch": 0.76, "learning_rate": 1.8911727543271175e-05, "loss": 0.9631, "step": 2244 }, { "epoch": 0.76, "learning_rate": 1.8861415549519946e-05, "loss": 1.1, "step": 2245 }, { "epoch": 0.76, "learning_rate": 1.8811155007044523e-05, "loss": 0.7762, "step": 2246 }, { "epoch": 0.76, "learning_rate": 1.8760945998892038e-05, "loss": 1.0167, "step": 2247 }, { "epoch": 0.76, "learning_rate": 1.8710788608024388e-05, "loss": 1.2516, "step": 2248 }, { "epoch": 0.76, "learning_rate": 1.866068291731825e-05, "loss": 0.8868, "step": 2249 }, { "epoch": 0.76, "learning_rate": 1.8610629009564862e-05, "loss": 1.3549, "step": 2250 }, { "epoch": 0.76, "learning_rate": 1.856062696746988e-05, "loss": 1.0605, "step": 2251 }, { "epoch": 0.76, "learning_rate": 1.8510676873653275e-05, "loss": 1.0451, "step": 2252 }, { "epoch": 0.77, "learning_rate": 1.8460778810649187e-05, "loss": 1.0495, "step": 2253 }, { "epoch": 0.77, "learning_rate": 1.8410932860905766e-05, "loss": 0.7625, "step": 2254 }, { "epoch": 0.77, "learning_rate": 1.836113910678507e-05, "loss": 1.1647, "step": 2255 }, { "epoch": 0.77, "learning_rate": 1.8311397630562904e-05, "loss": 1.2544, "step": 2256 }, { "epoch": 0.77, "learning_rate": 1.82617085144287e-05, "loss": 1.1735, "step": 2257 }, { "epoch": 0.77, "learning_rate": 1.8212071840485378e-05, "loss": 1.1162, "step": 2258 }, { "epoch": 0.77, "learning_rate": 1.816248769074917e-05, "loss": 0.7892, "step": 2259 }, { "epoch": 0.77, "learning_rate": 1.8112956147149556e-05, "loss": 0.9717, "step": 2260 }, { "epoch": 0.77, "learning_rate": 1.806347729152909e-05, "loss": 1.0187, "step": 2261 }, { "epoch": 0.77, "learning_rate": 1.801405120564327e-05, "loss": 1.0912, "step": 2262 }, { "epoch": 0.77, "learning_rate": 1.7964677971160392e-05, "loss": 0.9341, "step": 2263 }, { "epoch": 0.77, "learning_rate": 1.791535766966141e-05, "loss": 1.0746, "step": 2264 }, { "epoch": 0.77, "learning_rate": 1.7866090382639837e-05, "loss": 1.2244, "step": 2265 }, { "epoch": 0.77, "learning_rate": 1.7816876191501586e-05, "loss": 0.9647, "step": 2266 }, { "epoch": 0.77, "learning_rate": 1.776771517756483e-05, "loss": 1.0821, "step": 2267 }, { "epoch": 0.77, "learning_rate": 1.771860742205988e-05, "loss": 1.2555, "step": 2268 }, { "epoch": 0.77, "learning_rate": 1.766955300612907e-05, "loss": 1.1131, "step": 2269 }, { "epoch": 0.77, "learning_rate": 1.7620552010826535e-05, "loss": 0.9761, "step": 2270 }, { "epoch": 0.77, "learning_rate": 1.7571604517118202e-05, "loss": 0.9682, "step": 2271 }, { "epoch": 0.77, "learning_rate": 1.752271060588157e-05, "loss": 1.0092, "step": 2272 }, { "epoch": 0.77, "learning_rate": 1.7473870357905613e-05, "loss": 1.2239, "step": 2273 }, { "epoch": 0.77, "learning_rate": 1.7425083853890628e-05, "loss": 1.2019, "step": 2274 }, { "epoch": 0.77, "learning_rate": 1.7376351174448114e-05, "loss": 0.762, "step": 2275 }, { "epoch": 0.77, "learning_rate": 1.732767240010062e-05, "loss": 1.2845, "step": 2276 }, { "epoch": 0.77, "learning_rate": 1.7279047611281652e-05, "loss": 0.9835, "step": 2277 }, { "epoch": 0.77, "learning_rate": 1.7230476888335485e-05, "loss": 1.2166, "step": 2278 }, { "epoch": 0.77, "learning_rate": 1.718196031151708e-05, "loss": 1.1468, "step": 2279 }, { "epoch": 0.77, "learning_rate": 1.7133497960991946e-05, "loss": 1.1688, "step": 2280 }, { "epoch": 0.77, "learning_rate": 1.7085089916835923e-05, "loss": 1.3361, "step": 2281 }, { "epoch": 0.78, "learning_rate": 1.7036736259035195e-05, "loss": 1.1435, "step": 2282 }, { "epoch": 0.78, "learning_rate": 1.698843706748604e-05, "loss": 0.8509, "step": 2283 }, { "epoch": 0.78, "learning_rate": 1.6940192421994768e-05, "loss": 0.649, "step": 2284 }, { "epoch": 0.78, "learning_rate": 1.6892002402277528e-05, "loss": 0.948, "step": 2285 }, { "epoch": 0.78, "learning_rate": 1.684386708796025e-05, "loss": 1.2118, "step": 2286 }, { "epoch": 0.78, "learning_rate": 1.679578655857842e-05, "loss": 0.9305, "step": 2287 }, { "epoch": 0.78, "learning_rate": 1.6747760893577035e-05, "loss": 1.0226, "step": 2288 }, { "epoch": 0.78, "learning_rate": 1.6699790172310444e-05, "loss": 1.2472, "step": 2289 }, { "epoch": 0.78, "learning_rate": 1.665187447404219e-05, "loss": 0.8466, "step": 2290 }, { "epoch": 0.78, "learning_rate": 1.6604013877944935e-05, "loss": 1.1642, "step": 2291 }, { "epoch": 0.78, "learning_rate": 1.6556208463100225e-05, "loss": 1.2063, "step": 2292 }, { "epoch": 0.78, "learning_rate": 1.650845830849849e-05, "loss": 1.1369, "step": 2293 }, { "epoch": 0.78, "learning_rate": 1.646076349303884e-05, "loss": 1.0145, "step": 2294 }, { "epoch": 0.78, "learning_rate": 1.6413124095528924e-05, "loss": 1.1163, "step": 2295 }, { "epoch": 0.78, "learning_rate": 1.6365540194684852e-05, "loss": 1.2934, "step": 2296 }, { "epoch": 0.78, "learning_rate": 1.6318011869131016e-05, "loss": 1.189, "step": 2297 }, { "epoch": 0.78, "learning_rate": 1.6270539197399988e-05, "loss": 1.0577, "step": 2298 }, { "epoch": 0.78, "learning_rate": 1.622312225793236e-05, "loss": 1.0435, "step": 2299 }, { "epoch": 0.78, "learning_rate": 1.6175761129076673e-05, "loss": 1.0064, "step": 2300 }, { "epoch": 0.78, "learning_rate": 1.612845588908922e-05, "loss": 0.68, "step": 2301 }, { "epoch": 0.78, "learning_rate": 1.608120661613396e-05, "loss": 1.0575, "step": 2302 }, { "epoch": 0.78, "learning_rate": 1.603401338828238e-05, "loss": 1.0838, "step": 2303 }, { "epoch": 0.78, "learning_rate": 1.598687628351334e-05, "loss": 1.0554, "step": 2304 }, { "epoch": 0.78, "learning_rate": 1.5939795379712984e-05, "loss": 0.8154, "step": 2305 }, { "epoch": 0.78, "learning_rate": 1.5892770754674595e-05, "loss": 0.8196, "step": 2306 }, { "epoch": 0.78, "learning_rate": 1.584580248609846e-05, "loss": 1.0896, "step": 2307 }, { "epoch": 0.78, "learning_rate": 1.5798890651591758e-05, "loss": 1.102, "step": 2308 }, { "epoch": 0.78, "learning_rate": 1.5752035328668384e-05, "loss": 1.1338, "step": 2309 }, { "epoch": 0.78, "learning_rate": 1.570523659474889e-05, "loss": 1.1833, "step": 2310 }, { "epoch": 0.78, "learning_rate": 1.565849452716033e-05, "loss": 1.0699, "step": 2311 }, { "epoch": 0.79, "learning_rate": 1.5611809203136092e-05, "loss": 1.0493, "step": 2312 }, { "epoch": 0.79, "learning_rate": 1.556518069981584e-05, "loss": 0.9063, "step": 2313 }, { "epoch": 0.79, "learning_rate": 1.551860909424535e-05, "loss": 1.1344, "step": 2314 }, { "epoch": 0.79, "learning_rate": 1.5472094463376334e-05, "loss": 0.7693, "step": 2315 }, { "epoch": 0.79, "learning_rate": 1.5425636884066426e-05, "loss": 1.1199, "step": 2316 }, { "epoch": 0.79, "learning_rate": 1.5379236433078952e-05, "loss": 0.9143, "step": 2317 }, { "epoch": 0.79, "learning_rate": 1.5332893187082864e-05, "loss": 1.1315, "step": 2318 }, { "epoch": 0.79, "learning_rate": 1.528660722265258e-05, "loss": 0.9626, "step": 2319 }, { "epoch": 0.79, "learning_rate": 1.5240378616267886e-05, "loss": 1.0857, "step": 2320 }, { "epoch": 0.79, "learning_rate": 1.5194207444313774e-05, "loss": 1.0381, "step": 2321 }, { "epoch": 0.79, "learning_rate": 1.5148093783080336e-05, "loss": 1.092, "step": 2322 }, { "epoch": 0.79, "learning_rate": 1.5102037708762662e-05, "loss": 0.9476, "step": 2323 }, { "epoch": 0.79, "learning_rate": 1.5056039297460656e-05, "loss": 0.6398, "step": 2324 }, { "epoch": 0.79, "learning_rate": 1.501009862517898e-05, "loss": 0.998, "step": 2325 }, { "epoch": 0.79, "learning_rate": 1.4964215767826845e-05, "loss": 1.0167, "step": 2326 }, { "epoch": 0.79, "learning_rate": 1.4918390801217974e-05, "loss": 0.9452, "step": 2327 }, { "epoch": 0.79, "learning_rate": 1.4872623801070412e-05, "loss": 1.2008, "step": 2328 }, { "epoch": 0.79, "learning_rate": 1.482691484300644e-05, "loss": 0.8948, "step": 2329 }, { "epoch": 0.79, "learning_rate": 1.4781264002552425e-05, "loss": 1.1953, "step": 2330 }, { "epoch": 0.79, "learning_rate": 1.473567135513872e-05, "loss": 0.8102, "step": 2331 }, { "epoch": 0.79, "learning_rate": 1.4690136976099478e-05, "loss": 1.1035, "step": 2332 }, { "epoch": 0.79, "learning_rate": 1.4644660940672627e-05, "loss": 0.6055, "step": 2333 }, { "epoch": 0.79, "learning_rate": 1.4599243323999667e-05, "loss": 1.1474, "step": 2334 }, { "epoch": 0.79, "learning_rate": 1.4553884201125567e-05, "loss": 1.019, "step": 2335 }, { "epoch": 0.79, "learning_rate": 1.4508583646998675e-05, "loss": 1.1592, "step": 2336 }, { "epoch": 0.79, "learning_rate": 1.4463341736470509e-05, "loss": 1.1537, "step": 2337 }, { "epoch": 0.79, "learning_rate": 1.4418158544295734e-05, "loss": 1.3137, "step": 2338 }, { "epoch": 0.79, "learning_rate": 1.4373034145131981e-05, "loss": 0.7991, "step": 2339 }, { "epoch": 0.79, "learning_rate": 1.4327968613539733e-05, "loss": 0.9505, "step": 2340 }, { "epoch": 0.8, "learning_rate": 1.42829620239822e-05, "loss": 1.2622, "step": 2341 }, { "epoch": 0.8, "learning_rate": 1.4238014450825227e-05, "loss": 0.8015, "step": 2342 }, { "epoch": 0.8, "learning_rate": 1.4193125968337084e-05, "loss": 1.2089, "step": 2343 }, { "epoch": 0.8, "learning_rate": 1.4148296650688464e-05, "loss": 0.8374, "step": 2344 }, { "epoch": 0.8, "learning_rate": 1.4103526571952275e-05, "loss": 0.9171, "step": 2345 }, { "epoch": 0.8, "learning_rate": 1.4058815806103542e-05, "loss": 1.3795, "step": 2346 }, { "epoch": 0.8, "learning_rate": 1.4014164427019294e-05, "loss": 0.7203, "step": 2347 }, { "epoch": 0.8, "learning_rate": 1.3969572508478423e-05, "loss": 1.0695, "step": 2348 }, { "epoch": 0.8, "learning_rate": 1.3925040124161587e-05, "loss": 0.8199, "step": 2349 }, { "epoch": 0.8, "learning_rate": 1.3880567347651052e-05, "loss": 1.1295, "step": 2350 }, { "epoch": 0.8, "learning_rate": 1.3836154252430611e-05, "loss": 0.7595, "step": 2351 }, { "epoch": 0.8, "learning_rate": 1.3791800911885444e-05, "loss": 0.9131, "step": 2352 }, { "epoch": 0.8, "learning_rate": 1.374750739930199e-05, "loss": 1.0846, "step": 2353 }, { "epoch": 0.8, "learning_rate": 1.370327378786781e-05, "loss": 0.6886, "step": 2354 }, { "epoch": 0.8, "learning_rate": 1.3659100150671528e-05, "loss": 0.9361, "step": 2355 }, { "epoch": 0.8, "learning_rate": 1.3614986560702647e-05, "loss": 1.0154, "step": 2356 }, { "epoch": 0.8, "learning_rate": 1.3570933090851462e-05, "loss": 1.2338, "step": 2357 }, { "epoch": 0.8, "learning_rate": 1.3526939813908929e-05, "loss": 1.1126, "step": 2358 }, { "epoch": 0.8, "learning_rate": 1.3483006802566544e-05, "loss": 1.1553, "step": 2359 }, { "epoch": 0.8, "learning_rate": 1.3439134129416209e-05, "loss": 1.3455, "step": 2360 }, { "epoch": 0.8, "learning_rate": 1.3395321866950144e-05, "loss": 1.0736, "step": 2361 }, { "epoch": 0.8, "learning_rate": 1.335157008756075e-05, "loss": 0.9983, "step": 2362 }, { "epoch": 0.8, "learning_rate": 1.3307878863540484e-05, "loss": 1.0749, "step": 2363 }, { "epoch": 0.8, "learning_rate": 1.3264248267081769e-05, "loss": 1.0869, "step": 2364 }, { "epoch": 0.8, "learning_rate": 1.32206783702768e-05, "loss": 0.8537, "step": 2365 }, { "epoch": 0.8, "learning_rate": 1.3177169245117522e-05, "loss": 1.3868, "step": 2366 }, { "epoch": 0.8, "learning_rate": 1.3133720963495443e-05, "loss": 1.0324, "step": 2367 }, { "epoch": 0.8, "learning_rate": 1.3090333597201548e-05, "loss": 1.0954, "step": 2368 }, { "epoch": 0.8, "learning_rate": 1.3047007217926171e-05, "loss": 0.9475, "step": 2369 }, { "epoch": 0.8, "learning_rate": 1.3003741897258865e-05, "loss": 0.9468, "step": 2370 }, { "epoch": 0.81, "learning_rate": 1.2960537706688292e-05, "loss": 0.9107, "step": 2371 }, { "epoch": 0.81, "learning_rate": 1.2917394717602121e-05, "loss": 0.8248, "step": 2372 }, { "epoch": 0.81, "learning_rate": 1.2874313001286892e-05, "loss": 1.0773, "step": 2373 }, { "epoch": 0.81, "learning_rate": 1.2831292628927888e-05, "loss": 1.017, "step": 2374 }, { "epoch": 0.81, "learning_rate": 1.2788333671609049e-05, "loss": 0.834, "step": 2375 }, { "epoch": 0.81, "learning_rate": 1.2745436200312843e-05, "loss": 0.8403, "step": 2376 }, { "epoch": 0.81, "learning_rate": 1.2702600285920102e-05, "loss": 0.8531, "step": 2377 }, { "epoch": 0.81, "learning_rate": 1.2659825999209984e-05, "loss": 0.8541, "step": 2378 }, { "epoch": 0.81, "learning_rate": 1.2617113410859805e-05, "loss": 1.0655, "step": 2379 }, { "epoch": 0.81, "learning_rate": 1.257446259144494e-05, "loss": 1.1658, "step": 2380 }, { "epoch": 0.81, "learning_rate": 1.2531873611438716e-05, "loss": 1.2947, "step": 2381 }, { "epoch": 0.81, "learning_rate": 1.2489346541212227e-05, "loss": 1.1069, "step": 2382 }, { "epoch": 0.81, "learning_rate": 1.2446881451034325e-05, "loss": 0.9959, "step": 2383 }, { "epoch": 0.81, "learning_rate": 1.240447841107143e-05, "loss": 1.2444, "step": 2384 }, { "epoch": 0.81, "learning_rate": 1.2362137491387432e-05, "loss": 0.9013, "step": 2385 }, { "epoch": 0.81, "learning_rate": 1.2319858761943597e-05, "loss": 0.8929, "step": 2386 }, { "epoch": 0.81, "learning_rate": 1.2277642292598418e-05, "loss": 0.8993, "step": 2387 }, { "epoch": 0.81, "learning_rate": 1.2235488153107489e-05, "loss": 1.0735, "step": 2388 }, { "epoch": 0.81, "learning_rate": 1.2193396413123454e-05, "loss": 0.8965, "step": 2389 }, { "epoch": 0.81, "learning_rate": 1.215136714219584e-05, "loss": 1.2615, "step": 2390 }, { "epoch": 0.81, "learning_rate": 1.2109400409770943e-05, "loss": 1.0808, "step": 2391 }, { "epoch": 0.81, "learning_rate": 1.2067496285191743e-05, "loss": 0.9692, "step": 2392 }, { "epoch": 0.81, "learning_rate": 1.2025654837697748e-05, "loss": 0.7178, "step": 2393 }, { "epoch": 0.81, "learning_rate": 1.1983876136424927e-05, "loss": 0.7407, "step": 2394 }, { "epoch": 0.81, "learning_rate": 1.194216025040556e-05, "loss": 1.0862, "step": 2395 }, { "epoch": 0.81, "learning_rate": 1.1900507248568127e-05, "loss": 0.8039, "step": 2396 }, { "epoch": 0.81, "learning_rate": 1.1858917199737219e-05, "loss": 1.1791, "step": 2397 }, { "epoch": 0.81, "learning_rate": 1.1817390172633403e-05, "loss": 1.0947, "step": 2398 }, { "epoch": 0.81, "learning_rate": 1.1775926235873097e-05, "loss": 0.9428, "step": 2399 }, { "epoch": 0.82, "learning_rate": 1.1734525457968488e-05, "loss": 1.0724, "step": 2400 }, { "epoch": 0.82, "learning_rate": 1.1693187907327402e-05, "loss": 1.1134, "step": 2401 }, { "epoch": 0.82, "learning_rate": 1.16519136522532e-05, "loss": 0.7783, "step": 2402 }, { "epoch": 0.82, "learning_rate": 1.1610702760944641e-05, "loss": 0.848, "step": 2403 }, { "epoch": 0.82, "learning_rate": 1.1569555301495816e-05, "loss": 0.9411, "step": 2404 }, { "epoch": 0.82, "learning_rate": 1.1528471341895947e-05, "loss": 1.2191, "step": 2405 }, { "epoch": 0.82, "learning_rate": 1.1487450950029389e-05, "loss": 1.0967, "step": 2406 }, { "epoch": 0.82, "learning_rate": 1.1446494193675444e-05, "loss": 1.0685, "step": 2407 }, { "epoch": 0.82, "learning_rate": 1.1405601140508266e-05, "loss": 1.2786, "step": 2408 }, { "epoch": 0.82, "learning_rate": 1.1364771858096767e-05, "loss": 0.9356, "step": 2409 }, { "epoch": 0.82, "learning_rate": 1.1324006413904437e-05, "loss": 0.9345, "step": 2410 }, { "epoch": 0.82, "learning_rate": 1.1283304875289336e-05, "loss": 0.7042, "step": 2411 }, { "epoch": 0.82, "learning_rate": 1.1242667309503918e-05, "loss": 1.2072, "step": 2412 }, { "epoch": 0.82, "learning_rate": 1.1202093783694922e-05, "loss": 1.1604, "step": 2413 }, { "epoch": 0.82, "learning_rate": 1.1161584364903287e-05, "loss": 1.2383, "step": 2414 }, { "epoch": 0.82, "learning_rate": 1.1121139120064006e-05, "loss": 0.8647, "step": 2415 }, { "epoch": 0.82, "learning_rate": 1.1080758116006057e-05, "loss": 0.8908, "step": 2416 }, { "epoch": 0.82, "learning_rate": 1.1040441419452253e-05, "loss": 0.9487, "step": 2417 }, { "epoch": 0.82, "learning_rate": 1.1000189097019164e-05, "loss": 0.9406, "step": 2418 }, { "epoch": 0.82, "learning_rate": 1.0960001215216975e-05, "loss": 1.1479, "step": 2419 }, { "epoch": 0.82, "learning_rate": 1.0919877840449427e-05, "loss": 1.0693, "step": 2420 }, { "epoch": 0.82, "learning_rate": 1.0879819039013622e-05, "loss": 0.9942, "step": 2421 }, { "epoch": 0.82, "learning_rate": 1.0839824877100008e-05, "loss": 1.0923, "step": 2422 }, { "epoch": 0.82, "learning_rate": 1.0799895420792212e-05, "loss": 1.04, "step": 2423 }, { "epoch": 0.82, "learning_rate": 1.0760030736066951e-05, "loss": 1.034, "step": 2424 }, { "epoch": 0.82, "learning_rate": 1.072023088879391e-05, "loss": 0.8709, "step": 2425 }, { "epoch": 0.82, "learning_rate": 1.0680495944735664e-05, "loss": 1.0289, "step": 2426 }, { "epoch": 0.82, "learning_rate": 1.0640825969547496e-05, "loss": 1.2801, "step": 2427 }, { "epoch": 0.82, "learning_rate": 1.0601221028777392e-05, "loss": 0.8754, "step": 2428 }, { "epoch": 0.82, "learning_rate": 1.0561681187865847e-05, "loss": 1.026, "step": 2429 }, { "epoch": 0.83, "learning_rate": 1.052220651214581e-05, "loss": 1.2942, "step": 2430 }, { "epoch": 0.83, "learning_rate": 1.0482797066842542e-05, "loss": 1.1457, "step": 2431 }, { "epoch": 0.83, "learning_rate": 1.0443452917073537e-05, "loss": 1.1469, "step": 2432 }, { "epoch": 0.83, "learning_rate": 1.0404174127848371e-05, "loss": 1.0833, "step": 2433 }, { "epoch": 0.83, "learning_rate": 1.0364960764068644e-05, "loss": 0.8623, "step": 2434 }, { "epoch": 0.83, "learning_rate": 1.0325812890527847e-05, "loss": 0.8807, "step": 2435 }, { "epoch": 0.83, "learning_rate": 1.0286730571911262e-05, "loss": 0.976, "step": 2436 }, { "epoch": 0.83, "learning_rate": 1.024771387279585e-05, "loss": 1.2377, "step": 2437 }, { "epoch": 0.83, "learning_rate": 1.020876285765015e-05, "loss": 1.0697, "step": 2438 }, { "epoch": 0.83, "learning_rate": 1.0169877590834164e-05, "loss": 0.7937, "step": 2439 }, { "epoch": 0.83, "learning_rate": 1.0131058136599252e-05, "loss": 0.9152, "step": 2440 }, { "epoch": 0.83, "learning_rate": 1.0092304559088045e-05, "loss": 0.9736, "step": 2441 }, { "epoch": 0.83, "learning_rate": 1.0053616922334309e-05, "loss": 0.9861, "step": 2442 }, { "epoch": 0.83, "learning_rate": 1.0014995290262874e-05, "loss": 0.8711, "step": 2443 }, { "epoch": 0.83, "learning_rate": 9.976439726689469e-06, "loss": 1.0217, "step": 2444 }, { "epoch": 0.83, "learning_rate": 9.937950295320692e-06, "loss": 1.1418, "step": 2445 }, { "epoch": 0.83, "learning_rate": 9.899527059753861e-06, "loss": 1.0147, "step": 2446 }, { "epoch": 0.83, "learning_rate": 9.861170083476912e-06, "loss": 0.8072, "step": 2447 }, { "epoch": 0.83, "learning_rate": 9.822879429868304e-06, "loss": 1.1326, "step": 2448 }, { "epoch": 0.83, "learning_rate": 9.784655162196892e-06, "loss": 0.673, "step": 2449 }, { "epoch": 0.83, "learning_rate": 9.746497343621857e-06, "loss": 0.8123, "step": 2450 }, { "epoch": 0.83, "learning_rate": 9.708406037192586e-06, "loss": 0.7425, "step": 2451 }, { "epoch": 0.83, "learning_rate": 9.670381305848547e-06, "loss": 0.9491, "step": 2452 }, { "epoch": 0.83, "learning_rate": 9.632423212419228e-06, "loss": 1.2057, "step": 2453 }, { "epoch": 0.83, "learning_rate": 9.594531819624004e-06, "loss": 0.8949, "step": 2454 }, { "epoch": 0.83, "learning_rate": 9.556707190072007e-06, "loss": 1.0086, "step": 2455 }, { "epoch": 0.83, "learning_rate": 9.518949386262088e-06, "loss": 0.8342, "step": 2456 }, { "epoch": 0.83, "learning_rate": 9.481258470582676e-06, "loss": 1.0786, "step": 2457 }, { "epoch": 0.83, "learning_rate": 9.44363450531167e-06, "loss": 1.1075, "step": 2458 }, { "epoch": 0.84, "learning_rate": 9.406077552616343e-06, "loss": 0.9052, "step": 2459 }, { "epoch": 0.84, "learning_rate": 9.368587674553264e-06, "loss": 1.0017, "step": 2460 }, { "epoch": 0.84, "learning_rate": 9.331164933068126e-06, "loss": 1.0802, "step": 2461 }, { "epoch": 0.84, "learning_rate": 9.293809389995733e-06, "loss": 1.0738, "step": 2462 }, { "epoch": 0.84, "learning_rate": 9.256521107059834e-06, "loss": 0.848, "step": 2463 }, { "epoch": 0.84, "learning_rate": 9.21930014587305e-06, "loss": 1.0343, "step": 2464 }, { "epoch": 0.84, "learning_rate": 9.182146567936767e-06, "loss": 1.2166, "step": 2465 }, { "epoch": 0.84, "learning_rate": 9.145060434641017e-06, "loss": 0.9364, "step": 2466 }, { "epoch": 0.84, "learning_rate": 9.108041807264412e-06, "loss": 1.1065, "step": 2467 }, { "epoch": 0.84, "learning_rate": 9.071090746973998e-06, "loss": 1.4447, "step": 2468 }, { "epoch": 0.84, "learning_rate": 9.034207314825199e-06, "loss": 1.0452, "step": 2469 }, { "epoch": 0.84, "learning_rate": 8.997391571761681e-06, "loss": 1.0695, "step": 2470 }, { "epoch": 0.84, "learning_rate": 8.96064357861528e-06, "loss": 0.9709, "step": 2471 }, { "epoch": 0.84, "learning_rate": 8.92396339610586e-06, "loss": 0.9974, "step": 2472 }, { "epoch": 0.84, "learning_rate": 8.887351084841266e-06, "loss": 1.1768, "step": 2473 }, { "epoch": 0.84, "learning_rate": 8.850806705317183e-06, "loss": 0.9743, "step": 2474 }, { "epoch": 0.84, "learning_rate": 8.814330317917052e-06, "loss": 1.0771, "step": 2475 }, { "epoch": 0.84, "learning_rate": 8.777921982911996e-06, "loss": 0.7596, "step": 2476 }, { "epoch": 0.84, "learning_rate": 8.741581760460626e-06, "loss": 1.1192, "step": 2477 }, { "epoch": 0.84, "learning_rate": 8.705309710609078e-06, "loss": 0.8587, "step": 2478 }, { "epoch": 0.84, "learning_rate": 8.669105893290807e-06, "loss": 0.9042, "step": 2479 }, { "epoch": 0.84, "learning_rate": 8.632970368326537e-06, "loss": 0.83, "step": 2480 }, { "epoch": 0.84, "learning_rate": 8.596903195424149e-06, "loss": 1.0611, "step": 2481 }, { "epoch": 0.84, "learning_rate": 8.560904434178591e-06, "loss": 0.8624, "step": 2482 }, { "epoch": 0.84, "learning_rate": 8.524974144071745e-06, "loss": 1.1271, "step": 2483 }, { "epoch": 0.84, "learning_rate": 8.489112384472386e-06, "loss": 0.9157, "step": 2484 }, { "epoch": 0.84, "learning_rate": 8.453319214636036e-06, "loss": 1.0634, "step": 2485 }, { "epoch": 0.84, "learning_rate": 8.4175946937049e-06, "loss": 0.8443, "step": 2486 }, { "epoch": 0.84, "learning_rate": 8.381938880707736e-06, "loss": 0.8799, "step": 2487 }, { "epoch": 0.85, "learning_rate": 8.346351834559785e-06, "loss": 1.1776, "step": 2488 }, { "epoch": 0.85, "learning_rate": 8.310833614062651e-06, "loss": 1.1083, "step": 2489 }, { "epoch": 0.85, "learning_rate": 8.27538427790423e-06, "loss": 0.8052, "step": 2490 }, { "epoch": 0.85, "learning_rate": 8.240003884658592e-06, "loss": 1.145, "step": 2491 }, { "epoch": 0.85, "learning_rate": 8.204692492785875e-06, "loss": 1.0236, "step": 2492 }, { "epoch": 0.85, "learning_rate": 8.169450160632246e-06, "loss": 1.3165, "step": 2493 }, { "epoch": 0.85, "learning_rate": 8.134276946429703e-06, "loss": 0.8472, "step": 2494 }, { "epoch": 0.85, "learning_rate": 8.099172908296076e-06, "loss": 0.9898, "step": 2495 }, { "epoch": 0.85, "learning_rate": 8.064138104234897e-06, "loss": 0.8994, "step": 2496 }, { "epoch": 0.85, "learning_rate": 8.029172592135276e-06, "loss": 0.8586, "step": 2497 }, { "epoch": 0.85, "learning_rate": 7.994276429771858e-06, "loss": 0.8284, "step": 2498 }, { "epoch": 0.85, "learning_rate": 7.959449674804687e-06, "loss": 0.895, "step": 2499 }, { "epoch": 0.85, "learning_rate": 7.924692384779098e-06, "loss": 0.9604, "step": 2500 }, { "epoch": 0.85, "learning_rate": 7.890004617125684e-06, "loss": 0.9761, "step": 2501 }, { "epoch": 0.85, "learning_rate": 7.85538642916015e-06, "loss": 1.0677, "step": 2502 }, { "epoch": 0.85, "learning_rate": 7.820837878083231e-06, "loss": 1.1091, "step": 2503 }, { "epoch": 0.85, "learning_rate": 7.786359020980605e-06, "loss": 0.9477, "step": 2504 }, { "epoch": 0.85, "learning_rate": 7.75194991482277e-06, "loss": 1.0909, "step": 2505 }, { "epoch": 0.85, "learning_rate": 7.717610616464998e-06, "loss": 1.0145, "step": 2506 }, { "epoch": 0.85, "learning_rate": 7.683341182647214e-06, "loss": 0.8747, "step": 2507 }, { "epoch": 0.85, "learning_rate": 7.649141669993881e-06, "loss": 1.1465, "step": 2508 }, { "epoch": 0.85, "learning_rate": 7.6150121350139545e-06, "loss": 1.1148, "step": 2509 }, { "epoch": 0.85, "learning_rate": 7.580952634100758e-06, "loss": 0.8969, "step": 2510 }, { "epoch": 0.85, "learning_rate": 7.546963223531877e-06, "loss": 0.9775, "step": 2511 }, { "epoch": 0.85, "learning_rate": 7.513043959469107e-06, "loss": 1.17, "step": 2512 }, { "epoch": 0.85, "learning_rate": 7.4791948979583305e-06, "loss": 1.0388, "step": 2513 }, { "epoch": 0.85, "learning_rate": 7.445416094929425e-06, "loss": 1.1483, "step": 2514 }, { "epoch": 0.85, "learning_rate": 7.4117076061961885e-06, "loss": 0.9244, "step": 2515 }, { "epoch": 0.85, "learning_rate": 7.37806948745624e-06, "loss": 0.9507, "step": 2516 }, { "epoch": 0.85, "learning_rate": 7.344501794290892e-06, "loss": 0.9696, "step": 2517 }, { "epoch": 0.86, "learning_rate": 7.3110045821651315e-06, "loss": 1.2121, "step": 2518 }, { "epoch": 0.86, "learning_rate": 7.277577906427463e-06, "loss": 1.1673, "step": 2519 }, { "epoch": 0.86, "learning_rate": 7.244221822309855e-06, "loss": 1.1547, "step": 2520 }, { "epoch": 0.86, "learning_rate": 7.21093638492763e-06, "loss": 0.86, "step": 2521 }, { "epoch": 0.86, "learning_rate": 7.1777216492793675e-06, "loss": 1.279, "step": 2522 }, { "epoch": 0.86, "learning_rate": 7.144577670246838e-06, "loss": 1.0952, "step": 2523 }, { "epoch": 0.86, "learning_rate": 7.111504502594896e-06, "loss": 0.7879, "step": 2524 }, { "epoch": 0.86, "learning_rate": 7.078502200971387e-06, "loss": 0.91, "step": 2525 }, { "epoch": 0.86, "learning_rate": 7.0455708199070716e-06, "loss": 1.3209, "step": 2526 }, { "epoch": 0.86, "learning_rate": 7.012710413815532e-06, "loss": 1.1399, "step": 2527 }, { "epoch": 0.86, "learning_rate": 6.979921036993042e-06, "loss": 1.0645, "step": 2528 }, { "epoch": 0.86, "learning_rate": 6.947202743618541e-06, "loss": 0.9408, "step": 2529 }, { "epoch": 0.86, "learning_rate": 6.9145555877535076e-06, "loss": 1.0063, "step": 2530 }, { "epoch": 0.86, "learning_rate": 6.8819796233418886e-06, "loss": 0.8228, "step": 2531 }, { "epoch": 0.86, "learning_rate": 6.849474904209979e-06, "loss": 1.0364, "step": 2532 }, { "epoch": 0.86, "learning_rate": 6.817041484066366e-06, "loss": 0.8794, "step": 2533 }, { "epoch": 0.86, "learning_rate": 6.784679416501822e-06, "loss": 0.9855, "step": 2534 }, { "epoch": 0.86, "learning_rate": 6.752388754989225e-06, "loss": 0.9987, "step": 2535 }, { "epoch": 0.86, "learning_rate": 6.72016955288346e-06, "loss": 0.8834, "step": 2536 }, { "epoch": 0.86, "learning_rate": 6.688021863421351e-06, "loss": 0.9553, "step": 2537 }, { "epoch": 0.86, "learning_rate": 6.655945739721548e-06, "loss": 0.889, "step": 2538 }, { "epoch": 0.86, "learning_rate": 6.623941234784442e-06, "loss": 1.2477, "step": 2539 }, { "epoch": 0.86, "learning_rate": 6.592008401492106e-06, "loss": 1.1649, "step": 2540 }, { "epoch": 0.86, "learning_rate": 6.5601472926081766e-06, "loss": 0.9493, "step": 2541 }, { "epoch": 0.86, "learning_rate": 6.528357960777775e-06, "loss": 0.8941, "step": 2542 }, { "epoch": 0.86, "learning_rate": 6.496640458527436e-06, "loss": 1.0781, "step": 2543 }, { "epoch": 0.86, "learning_rate": 6.4649948382650096e-06, "loss": 0.9507, "step": 2544 }, { "epoch": 0.86, "learning_rate": 6.433421152279534e-06, "loss": 1.1691, "step": 2545 }, { "epoch": 0.86, "learning_rate": 6.401919452741234e-06, "loss": 0.7599, "step": 2546 }, { "epoch": 0.87, "learning_rate": 6.370489791701373e-06, "loss": 0.884, "step": 2547 }, { "epoch": 0.87, "learning_rate": 6.339132221092181e-06, "loss": 0.9286, "step": 2548 }, { "epoch": 0.87, "learning_rate": 6.307846792726779e-06, "loss": 0.6113, "step": 2549 }, { "epoch": 0.87, "learning_rate": 6.276633558299055e-06, "loss": 0.9529, "step": 2550 }, { "epoch": 0.87, "learning_rate": 6.2454925693836485e-06, "loss": 0.9953, "step": 2551 }, { "epoch": 0.87, "learning_rate": 6.214423877435805e-06, "loss": 0.9351, "step": 2552 }, { "epoch": 0.87, "learning_rate": 6.183427533791314e-06, "loss": 0.9243, "step": 2553 }, { "epoch": 0.87, "learning_rate": 6.152503589666425e-06, "loss": 1.0589, "step": 2554 }, { "epoch": 0.87, "learning_rate": 6.121652096157754e-06, "loss": 0.9674, "step": 2555 }, { "epoch": 0.87, "learning_rate": 6.090873104242212e-06, "loss": 0.777, "step": 2556 }, { "epoch": 0.87, "learning_rate": 6.0601666647769005e-06, "loss": 1.067, "step": 2557 }, { "epoch": 0.87, "learning_rate": 6.029532828499051e-06, "loss": 0.793, "step": 2558 }, { "epoch": 0.87, "learning_rate": 5.998971646025936e-06, "loss": 0.8049, "step": 2559 }, { "epoch": 0.87, "learning_rate": 5.9684831678547606e-06, "loss": 1.1004, "step": 2560 }, { "epoch": 0.87, "learning_rate": 5.9380674443626275e-06, "loss": 1.1101, "step": 2561 }, { "epoch": 0.87, "learning_rate": 5.90772452580638e-06, "loss": 1.0819, "step": 2562 }, { "epoch": 0.87, "learning_rate": 5.877454462322601e-06, "loss": 1.2432, "step": 2563 }, { "epoch": 0.87, "learning_rate": 5.847257303927484e-06, "loss": 1.2702, "step": 2564 }, { "epoch": 0.87, "learning_rate": 5.817133100516759e-06, "loss": 1.1472, "step": 2565 }, { "epoch": 0.87, "learning_rate": 5.7870819018656096e-06, "loss": 0.9363, "step": 2566 }, { "epoch": 0.87, "learning_rate": 5.757103757628573e-06, "loss": 0.9569, "step": 2567 }, { "epoch": 0.87, "learning_rate": 5.727198717339511e-06, "loss": 0.8216, "step": 2568 }, { "epoch": 0.87, "learning_rate": 5.69736683041146e-06, "loss": 0.8895, "step": 2569 }, { "epoch": 0.87, "learning_rate": 5.667608146136611e-06, "loss": 1.0818, "step": 2570 }, { "epoch": 0.87, "learning_rate": 5.6379227136861775e-06, "loss": 1.3868, "step": 2571 }, { "epoch": 0.87, "learning_rate": 5.60831058211036e-06, "loss": 1.0394, "step": 2572 }, { "epoch": 0.87, "learning_rate": 5.578771800338212e-06, "loss": 0.9605, "step": 2573 }, { "epoch": 0.87, "learning_rate": 5.549306417177602e-06, "loss": 1.0894, "step": 2574 }, { "epoch": 0.87, "learning_rate": 5.519914481315136e-06, "loss": 0.9115, "step": 2575 }, { "epoch": 0.87, "learning_rate": 5.490596041316037e-06, "loss": 1.2207, "step": 2576 }, { "epoch": 0.88, "learning_rate": 5.461351145624111e-06, "loss": 1.0146, "step": 2577 }, { "epoch": 0.88, "learning_rate": 5.432179842561613e-06, "loss": 1.0642, "step": 2578 }, { "epoch": 0.88, "learning_rate": 5.4030821803292275e-06, "loss": 0.7553, "step": 2579 }, { "epoch": 0.88, "learning_rate": 5.374058207005944e-06, "loss": 1.0769, "step": 2580 }, { "epoch": 0.88, "learning_rate": 5.345107970548996e-06, "loss": 0.8195, "step": 2581 }, { "epoch": 0.88, "learning_rate": 5.316231518793801e-06, "loss": 0.9985, "step": 2582 }, { "epoch": 0.88, "learning_rate": 5.287428899453834e-06, "loss": 0.9762, "step": 2583 }, { "epoch": 0.88, "learning_rate": 5.2587001601205666e-06, "loss": 1.0639, "step": 2584 }, { "epoch": 0.88, "learning_rate": 5.2300453482634235e-06, "loss": 1.0755, "step": 2585 }, { "epoch": 0.88, "learning_rate": 5.201464511229659e-06, "loss": 0.8523, "step": 2586 }, { "epoch": 0.88, "learning_rate": 5.172957696244301e-06, "loss": 0.9018, "step": 2587 }, { "epoch": 0.88, "learning_rate": 5.144524950410074e-06, "loss": 1.0272, "step": 2588 }, { "epoch": 0.88, "learning_rate": 5.116166320707317e-06, "loss": 0.9388, "step": 2589 }, { "epoch": 0.88, "learning_rate": 5.087881853993876e-06, "loss": 1.2083, "step": 2590 }, { "epoch": 0.88, "learning_rate": 5.059671597005089e-06, "loss": 1.1692, "step": 2591 }, { "epoch": 0.88, "learning_rate": 5.031535596353665e-06, "loss": 1.0823, "step": 2592 }, { "epoch": 0.88, "learning_rate": 5.0034738985296095e-06, "loss": 0.7992, "step": 2593 }, { "epoch": 0.88, "learning_rate": 4.975486549900177e-06, "loss": 1.0382, "step": 2594 }, { "epoch": 0.88, "learning_rate": 4.947573596709732e-06, "loss": 0.9839, "step": 2595 }, { "epoch": 0.88, "learning_rate": 4.919735085079746e-06, "loss": 1.106, "step": 2596 }, { "epoch": 0.88, "learning_rate": 4.891971061008682e-06, "loss": 0.9454, "step": 2597 }, { "epoch": 0.88, "learning_rate": 4.86428157037192e-06, "loss": 0.8553, "step": 2598 }, { "epoch": 0.88, "learning_rate": 4.83666665892169e-06, "loss": 0.776, "step": 2599 }, { "epoch": 0.88, "learning_rate": 4.809126372286998e-06, "loss": 0.7069, "step": 2600 }, { "epoch": 0.88, "learning_rate": 4.781660755973522e-06, "loss": 1.1391, "step": 2601 }, { "epoch": 0.88, "learning_rate": 4.7542698553635855e-06, "loss": 0.7717, "step": 2602 }, { "epoch": 0.88, "learning_rate": 4.726953715716037e-06, "loss": 1.0558, "step": 2603 }, { "epoch": 0.88, "learning_rate": 4.699712382166216e-06, "loss": 0.9577, "step": 2604 }, { "epoch": 0.88, "learning_rate": 4.672545899725844e-06, "loss": 1.0254, "step": 2605 }, { "epoch": 0.89, "learning_rate": 4.645454313282965e-06, "loss": 0.757, "step": 2606 }, { "epoch": 0.89, "learning_rate": 4.618437667601866e-06, "loss": 0.8431, "step": 2607 }, { "epoch": 0.89, "learning_rate": 4.591496007323021e-06, "loss": 1.0393, "step": 2608 }, { "epoch": 0.89, "learning_rate": 4.564629376962987e-06, "loss": 0.9041, "step": 2609 }, { "epoch": 0.89, "learning_rate": 4.537837820914359e-06, "loss": 1.282, "step": 2610 }, { "epoch": 0.89, "learning_rate": 4.511121383445688e-06, "loss": 0.7904, "step": 2611 }, { "epoch": 0.89, "learning_rate": 4.484480108701372e-06, "loss": 0.9081, "step": 2612 }, { "epoch": 0.89, "learning_rate": 4.457914040701649e-06, "loss": 0.7258, "step": 2613 }, { "epoch": 0.89, "learning_rate": 4.431423223342485e-06, "loss": 1.0629, "step": 2614 }, { "epoch": 0.89, "learning_rate": 4.405007700395497e-06, "loss": 1.0251, "step": 2615 }, { "epoch": 0.89, "learning_rate": 4.378667515507895e-06, "loss": 1.0505, "step": 2616 }, { "epoch": 0.89, "learning_rate": 4.3524027122024245e-06, "loss": 0.6726, "step": 2617 }, { "epoch": 0.89, "learning_rate": 4.326213333877227e-06, "loss": 1.0557, "step": 2618 }, { "epoch": 0.89, "learning_rate": 4.3000994238058644e-06, "loss": 1.0708, "step": 2619 }, { "epoch": 0.89, "learning_rate": 4.274061025137183e-06, "loss": 1.096, "step": 2620 }, { "epoch": 0.89, "learning_rate": 4.248098180895254e-06, "loss": 1.0023, "step": 2621 }, { "epoch": 0.89, "learning_rate": 4.222210933979326e-06, "loss": 0.9947, "step": 2622 }, { "epoch": 0.89, "learning_rate": 4.196399327163697e-06, "loss": 0.7598, "step": 2623 }, { "epoch": 0.89, "learning_rate": 4.17066340309773e-06, "loss": 0.8699, "step": 2624 }, { "epoch": 0.89, "learning_rate": 4.145003204305697e-06, "loss": 0.9823, "step": 2625 }, { "epoch": 0.89, "learning_rate": 4.1194187731867785e-06, "loss": 0.8626, "step": 2626 }, { "epoch": 0.89, "learning_rate": 4.093910152014935e-06, "loss": 1.0345, "step": 2627 }, { "epoch": 0.89, "learning_rate": 4.068477382938874e-06, "loss": 0.6479, "step": 2628 }, { "epoch": 0.89, "learning_rate": 4.043120507981979e-06, "loss": 1.0648, "step": 2629 }, { "epoch": 0.89, "learning_rate": 4.017839569042214e-06, "loss": 0.8469, "step": 2630 }, { "epoch": 0.89, "learning_rate": 3.992634607892087e-06, "loss": 1.0875, "step": 2631 }, { "epoch": 0.89, "learning_rate": 3.967505666178556e-06, "loss": 0.9504, "step": 2632 }, { "epoch": 0.89, "learning_rate": 3.942452785422985e-06, "loss": 0.9694, "step": 2633 }, { "epoch": 0.89, "learning_rate": 3.91747600702102e-06, "loss": 0.9267, "step": 2634 }, { "epoch": 0.89, "learning_rate": 3.892575372242607e-06, "loss": 0.9869, "step": 2635 }, { "epoch": 0.9, "learning_rate": 3.867750922231855e-06, "loss": 1.01, "step": 2636 }, { "epoch": 0.9, "learning_rate": 3.843002698006998e-06, "loss": 1.2168, "step": 2637 }, { "epoch": 0.9, "learning_rate": 3.818330740460308e-06, "loss": 0.7112, "step": 2638 }, { "epoch": 0.9, "learning_rate": 3.793735090358064e-06, "loss": 1.1402, "step": 2639 }, { "epoch": 0.9, "learning_rate": 3.769215788340419e-06, "loss": 1.2191, "step": 2640 }, { "epoch": 0.9, "learning_rate": 3.744772874921404e-06, "loss": 1.0154, "step": 2641 }, { "epoch": 0.9, "learning_rate": 3.7204063904888342e-06, "loss": 0.8629, "step": 2642 }, { "epoch": 0.9, "learning_rate": 3.696116375304226e-06, "loss": 0.7715, "step": 2643 }, { "epoch": 0.9, "learning_rate": 3.6719028695027357e-06, "loss": 0.7234, "step": 2644 }, { "epoch": 0.9, "learning_rate": 3.647765913093132e-06, "loss": 0.9144, "step": 2645 }, { "epoch": 0.9, "learning_rate": 3.6237055459576506e-06, "loss": 1.1511, "step": 2646 }, { "epoch": 0.9, "learning_rate": 3.5997218078520246e-06, "loss": 0.9595, "step": 2647 }, { "epoch": 0.9, "learning_rate": 3.5758147384053307e-06, "loss": 1.0183, "step": 2648 }, { "epoch": 0.9, "learning_rate": 3.551984377119999e-06, "loss": 1.2582, "step": 2649 }, { "epoch": 0.9, "learning_rate": 3.5282307633716872e-06, "loss": 0.8067, "step": 2650 }, { "epoch": 0.9, "learning_rate": 3.5045539364092495e-06, "loss": 0.8314, "step": 2651 }, { "epoch": 0.9, "learning_rate": 3.4809539353546583e-06, "loss": 0.9678, "step": 2652 }, { "epoch": 0.9, "learning_rate": 3.4574307992029543e-06, "loss": 1.1083, "step": 2653 }, { "epoch": 0.9, "learning_rate": 3.4339845668221626e-06, "loss": 0.8629, "step": 2654 }, { "epoch": 0.9, "learning_rate": 3.410615276953244e-06, "loss": 1.0266, "step": 2655 }, { "epoch": 0.9, "learning_rate": 3.3873229682100214e-06, "loss": 1.0726, "step": 2656 }, { "epoch": 0.9, "learning_rate": 3.364107679079109e-06, "loss": 1.0759, "step": 2657 }, { "epoch": 0.9, "learning_rate": 3.340969447919873e-06, "loss": 0.6954, "step": 2658 }, { "epoch": 0.9, "learning_rate": 3.3179083129643518e-06, "loss": 1.0621, "step": 2659 }, { "epoch": 0.9, "learning_rate": 3.294924312317199e-06, "loss": 0.9002, "step": 2660 }, { "epoch": 0.9, "learning_rate": 3.272017483955614e-06, "loss": 0.8812, "step": 2661 }, { "epoch": 0.9, "learning_rate": 3.249187865729264e-06, "loss": 1.0831, "step": 2662 }, { "epoch": 0.9, "learning_rate": 3.2264354953602637e-06, "loss": 0.7349, "step": 2663 }, { "epoch": 0.9, "learning_rate": 3.2037604104430794e-06, "loss": 1.2471, "step": 2664 }, { "epoch": 0.91, "learning_rate": 3.181162648444486e-06, "loss": 0.991, "step": 2665 }, { "epoch": 0.91, "learning_rate": 3.158642246703469e-06, "loss": 1.3059, "step": 2666 }, { "epoch": 0.91, "learning_rate": 3.136199242431226e-06, "loss": 0.7361, "step": 2667 }, { "epoch": 0.91, "learning_rate": 3.1138336727110307e-06, "loss": 1.0371, "step": 2668 }, { "epoch": 0.91, "learning_rate": 3.0915455744982336e-06, "loss": 1.1099, "step": 2669 }, { "epoch": 0.91, "learning_rate": 3.0693349846201703e-06, "loss": 0.7829, "step": 2670 }, { "epoch": 0.91, "learning_rate": 3.0472019397761064e-06, "loss": 1.0217, "step": 2671 }, { "epoch": 0.91, "learning_rate": 3.0251464765371774e-06, "loss": 0.9484, "step": 2672 }, { "epoch": 0.91, "learning_rate": 3.003168631346326e-06, "loss": 1.1828, "step": 2673 }, { "epoch": 0.91, "learning_rate": 2.9812684405182533e-06, "loss": 1.0921, "step": 2674 }, { "epoch": 0.91, "learning_rate": 2.959445940239336e-06, "loss": 1.1109, "step": 2675 }, { "epoch": 0.91, "learning_rate": 2.9377011665675913e-06, "loss": 1.2052, "step": 2676 }, { "epoch": 0.91, "learning_rate": 2.9160341554325954e-06, "loss": 0.8899, "step": 2677 }, { "epoch": 0.91, "learning_rate": 2.89444494263546e-06, "loss": 0.8418, "step": 2678 }, { "epoch": 0.91, "learning_rate": 2.8729335638487066e-06, "loss": 1.1006, "step": 2679 }, { "epoch": 0.91, "learning_rate": 2.8515000546162797e-06, "loss": 0.8136, "step": 2680 }, { "epoch": 0.91, "learning_rate": 2.8301444503534557e-06, "loss": 0.6412, "step": 2681 }, { "epoch": 0.91, "learning_rate": 2.8088667863467756e-06, "loss": 1.1138, "step": 2682 }, { "epoch": 0.91, "learning_rate": 2.787667097754004e-06, "loss": 0.774, "step": 2683 }, { "epoch": 0.91, "learning_rate": 2.7665454196040664e-06, "loss": 1.026, "step": 2684 }, { "epoch": 0.91, "learning_rate": 2.745501786796972e-06, "loss": 0.8028, "step": 2685 }, { "epoch": 0.91, "learning_rate": 2.7245362341037915e-06, "loss": 0.8631, "step": 2686 }, { "epoch": 0.91, "learning_rate": 2.7036487961665858e-06, "loss": 1.1998, "step": 2687 }, { "epoch": 0.91, "learning_rate": 2.6828395074983195e-06, "loss": 1.0197, "step": 2688 }, { "epoch": 0.91, "learning_rate": 2.6621084024828656e-06, "loss": 0.9597, "step": 2689 }, { "epoch": 0.91, "learning_rate": 2.641455515374863e-06, "loss": 0.963, "step": 2690 }, { "epoch": 0.91, "learning_rate": 2.6208808802997475e-06, "loss": 0.962, "step": 2691 }, { "epoch": 0.91, "learning_rate": 2.6003845312536525e-06, "loss": 0.9406, "step": 2692 }, { "epoch": 0.91, "learning_rate": 2.579966502103337e-06, "loss": 0.7403, "step": 2693 }, { "epoch": 0.92, "learning_rate": 2.5596268265861643e-06, "loss": 1.0463, "step": 2694 }, { "epoch": 0.92, "learning_rate": 2.5393655383100402e-06, "loss": 0.9788, "step": 2695 }, { "epoch": 0.92, "learning_rate": 2.519182670753317e-06, "loss": 0.9062, "step": 2696 }, { "epoch": 0.92, "learning_rate": 2.4990782572647975e-06, "loss": 0.6495, "step": 2697 }, { "epoch": 0.92, "learning_rate": 2.479052331063658e-06, "loss": 1.094, "step": 2698 }, { "epoch": 0.92, "learning_rate": 2.4591049252393582e-06, "loss": 0.8611, "step": 2699 }, { "epoch": 0.92, "learning_rate": 2.4392360727516438e-06, "loss": 0.9759, "step": 2700 }, { "epoch": 0.92, "learning_rate": 2.419445806430459e-06, "loss": 0.9736, "step": 2701 }, { "epoch": 0.92, "learning_rate": 2.3997341589758693e-06, "loss": 1.0615, "step": 2702 }, { "epoch": 0.92, "learning_rate": 2.380101162958076e-06, "loss": 1.0451, "step": 2703 }, { "epoch": 0.92, "learning_rate": 2.3605468508172966e-06, "loss": 0.6444, "step": 2704 }, { "epoch": 0.92, "learning_rate": 2.341071254863747e-06, "loss": 0.974, "step": 2705 }, { "epoch": 0.92, "learning_rate": 2.3216744072775796e-06, "loss": 0.9495, "step": 2706 }, { "epoch": 0.92, "learning_rate": 2.302356340108819e-06, "loss": 0.7862, "step": 2707 }, { "epoch": 0.92, "learning_rate": 2.2831170852773197e-06, "loss": 0.9298, "step": 2708 }, { "epoch": 0.92, "learning_rate": 2.2639566745727205e-06, "loss": 0.9579, "step": 2709 }, { "epoch": 0.92, "learning_rate": 2.2448751396543787e-06, "loss": 0.9448, "step": 2710 }, { "epoch": 0.92, "learning_rate": 2.2258725120513247e-06, "loss": 1.1698, "step": 2711 }, { "epoch": 0.92, "learning_rate": 2.2069488231622083e-06, "loss": 0.9424, "step": 2712 }, { "epoch": 0.92, "learning_rate": 2.1881041042552342e-06, "loss": 1.164, "step": 2713 }, { "epoch": 0.92, "learning_rate": 2.1693383864681393e-06, "loss": 0.9561, "step": 2714 }, { "epoch": 0.92, "learning_rate": 2.150651700808115e-06, "loss": 0.9999, "step": 2715 }, { "epoch": 0.92, "learning_rate": 2.132044078151768e-06, "loss": 1.2861, "step": 2716 }, { "epoch": 0.92, "learning_rate": 2.113515549245071e-06, "loss": 1.0191, "step": 2717 }, { "epoch": 0.92, "learning_rate": 2.095066144703295e-06, "loss": 0.8744, "step": 2718 }, { "epoch": 0.92, "learning_rate": 2.0766958950109826e-06, "loss": 0.6824, "step": 2719 }, { "epoch": 0.92, "learning_rate": 2.058404830521887e-06, "loss": 1.2851, "step": 2720 }, { "epoch": 0.92, "learning_rate": 2.0401929814589104e-06, "loss": 1.1929, "step": 2721 }, { "epoch": 0.92, "learning_rate": 2.022060377914076e-06, "loss": 0.8254, "step": 2722 }, { "epoch": 0.92, "learning_rate": 2.004007049848461e-06, "loss": 0.8291, "step": 2723 }, { "epoch": 0.93, "learning_rate": 1.98603302709216e-06, "loss": 0.9353, "step": 2724 }, { "epoch": 0.93, "learning_rate": 1.9681383393442165e-06, "loss": 0.9542, "step": 2725 }, { "epoch": 0.93, "learning_rate": 1.950323016172595e-06, "loss": 0.5998, "step": 2726 }, { "epoch": 0.93, "learning_rate": 1.932587087014126e-06, "loss": 1.1864, "step": 2727 }, { "epoch": 0.93, "learning_rate": 1.9149305811744455e-06, "loss": 1.0758, "step": 2728 }, { "epoch": 0.93, "learning_rate": 1.8973535278279719e-06, "loss": 0.9041, "step": 2729 }, { "epoch": 0.93, "learning_rate": 1.8798559560178174e-06, "loss": 1.2222, "step": 2730 }, { "epoch": 0.93, "learning_rate": 1.862437894655783e-06, "loss": 1.0126, "step": 2731 }, { "epoch": 0.93, "learning_rate": 1.8450993725222854e-06, "loss": 1.1057, "step": 2732 }, { "epoch": 0.93, "learning_rate": 1.827840418266319e-06, "loss": 0.9417, "step": 2733 }, { "epoch": 0.93, "learning_rate": 1.8106610604054108e-06, "loss": 1.1397, "step": 2734 }, { "epoch": 0.93, "learning_rate": 1.7935613273255491e-06, "loss": 0.9585, "step": 2735 }, { "epoch": 0.93, "learning_rate": 1.7765412472811771e-06, "loss": 1.1297, "step": 2736 }, { "epoch": 0.93, "learning_rate": 1.7596008483951044e-06, "loss": 1.1608, "step": 2737 }, { "epoch": 0.93, "learning_rate": 1.7427401586585068e-06, "loss": 0.7325, "step": 2738 }, { "epoch": 0.93, "learning_rate": 1.7259592059308272e-06, "loss": 1.1777, "step": 2739 }, { "epoch": 0.93, "learning_rate": 1.7092580179397855e-06, "loss": 1.1683, "step": 2740 }, { "epoch": 0.93, "learning_rate": 1.6926366222812685e-06, "loss": 0.9686, "step": 2741 }, { "epoch": 0.93, "learning_rate": 1.6760950464193459e-06, "loss": 0.7897, "step": 2742 }, { "epoch": 0.93, "learning_rate": 1.6596333176861878e-06, "loss": 1.138, "step": 2743 }, { "epoch": 0.93, "learning_rate": 1.6432514632820362e-06, "loss": 1.0759, "step": 2744 }, { "epoch": 0.93, "learning_rate": 1.6269495102751553e-06, "loss": 1.0634, "step": 2745 }, { "epoch": 0.93, "learning_rate": 1.6107274856017762e-06, "loss": 1.1255, "step": 2746 }, { "epoch": 0.93, "learning_rate": 1.594585416066069e-06, "loss": 1.1417, "step": 2747 }, { "epoch": 0.93, "learning_rate": 1.5785233283400868e-06, "loss": 1.0153, "step": 2748 }, { "epoch": 0.93, "learning_rate": 1.5625412489637337e-06, "loss": 1.068, "step": 2749 }, { "epoch": 0.93, "learning_rate": 1.546639204344713e-06, "loss": 1.1061, "step": 2750 }, { "epoch": 0.93, "learning_rate": 1.5308172207584738e-06, "loss": 0.7879, "step": 2751 }, { "epoch": 0.93, "learning_rate": 1.5150753243481807e-06, "loss": 1.0129, "step": 2752 }, { "epoch": 0.94, "learning_rate": 1.4994135411246778e-06, "loss": 0.9927, "step": 2753 }, { "epoch": 0.94, "learning_rate": 1.48383189696642e-06, "loss": 1.1163, "step": 2754 }, { "epoch": 0.94, "learning_rate": 1.468330417619468e-06, "loss": 0.9742, "step": 2755 }, { "epoch": 0.94, "learning_rate": 1.4529091286973995e-06, "loss": 1.3103, "step": 2756 }, { "epoch": 0.94, "learning_rate": 1.4375680556813097e-06, "loss": 1.2538, "step": 2757 }, { "epoch": 0.94, "learning_rate": 1.4223072239197333e-06, "loss": 1.1108, "step": 2758 }, { "epoch": 0.94, "learning_rate": 1.407126658628638e-06, "loss": 0.8755, "step": 2759 }, { "epoch": 0.94, "learning_rate": 1.3920263848913484e-06, "loss": 1.1653, "step": 2760 }, { "epoch": 0.94, "learning_rate": 1.3770064276585281e-06, "loss": 1.191, "step": 2761 }, { "epoch": 0.94, "learning_rate": 1.3620668117481472e-06, "loss": 1.2128, "step": 2762 }, { "epoch": 0.94, "learning_rate": 1.3472075618453872e-06, "loss": 0.9504, "step": 2763 }, { "epoch": 0.94, "learning_rate": 1.332428702502675e-06, "loss": 0.9316, "step": 2764 }, { "epoch": 0.94, "learning_rate": 1.3177302581395823e-06, "loss": 0.9802, "step": 2765 }, { "epoch": 0.94, "learning_rate": 1.3031122530428264e-06, "loss": 1.3051, "step": 2766 }, { "epoch": 0.94, "learning_rate": 1.2885747113661917e-06, "loss": 0.8334, "step": 2767 }, { "epoch": 0.94, "learning_rate": 1.2741176571305358e-06, "loss": 0.9455, "step": 2768 }, { "epoch": 0.94, "learning_rate": 1.2597411142237004e-06, "loss": 1.1361, "step": 2769 }, { "epoch": 0.94, "learning_rate": 1.2454451064005057e-06, "loss": 1.341, "step": 2770 }, { "epoch": 0.94, "learning_rate": 1.231229657282701e-06, "loss": 1.3244, "step": 2771 }, { "epoch": 0.94, "learning_rate": 1.2170947903589358e-06, "loss": 0.9065, "step": 2772 }, { "epoch": 0.94, "learning_rate": 1.2030405289846892e-06, "loss": 1.0773, "step": 2773 }, { "epoch": 0.94, "learning_rate": 1.1890668963822793e-06, "loss": 1.0584, "step": 2774 }, { "epoch": 0.94, "learning_rate": 1.1751739156407649e-06, "loss": 0.8556, "step": 2775 }, { "epoch": 0.94, "learning_rate": 1.1613616097159774e-06, "loss": 0.9617, "step": 2776 }, { "epoch": 0.94, "learning_rate": 1.147630001430422e-06, "loss": 1.2048, "step": 2777 }, { "epoch": 0.94, "learning_rate": 1.133979113473277e-06, "loss": 1.075, "step": 2778 }, { "epoch": 0.94, "learning_rate": 1.1204089684003438e-06, "loss": 1.0409, "step": 2779 }, { "epoch": 0.94, "learning_rate": 1.1069195886339924e-06, "loss": 1.1069, "step": 2780 }, { "epoch": 0.94, "learning_rate": 1.093510996463165e-06, "loss": 1.1799, "step": 2781 }, { "epoch": 0.94, "learning_rate": 1.0801832140433066e-06, "loss": 1.0804, "step": 2782 }, { "epoch": 0.95, "learning_rate": 1.0669362633963287e-06, "loss": 0.9801, "step": 2783 }, { "epoch": 0.95, "learning_rate": 1.0537701664106002e-06, "loss": 0.9372, "step": 2784 }, { "epoch": 0.95, "learning_rate": 1.0406849448408806e-06, "loss": 1.1676, "step": 2785 }, { "epoch": 0.95, "learning_rate": 1.0276806203082966e-06, "loss": 0.7782, "step": 2786 }, { "epoch": 0.95, "learning_rate": 1.0147572143002992e-06, "loss": 0.8321, "step": 2787 }, { "epoch": 0.95, "learning_rate": 1.0019147481706625e-06, "loss": 1.003, "step": 2788 }, { "epoch": 0.95, "learning_rate": 9.891532431393902e-07, "loss": 1.0933, "step": 2789 }, { "epoch": 0.95, "learning_rate": 9.76472720292726e-07, "loss": 1.2431, "step": 2790 }, { "epoch": 0.95, "learning_rate": 9.638732005830985e-07, "loss": 1.1105, "step": 2791 }, { "epoch": 0.95, "learning_rate": 9.51354704829105e-07, "loss": 0.8305, "step": 2792 }, { "epoch": 0.95, "learning_rate": 9.389172537154545e-07, "loss": 1.0652, "step": 2793 }, { "epoch": 0.95, "learning_rate": 9.26560867792936e-07, "loss": 0.9783, "step": 2794 }, { "epoch": 0.95, "learning_rate": 9.142855674784012e-07, "loss": 1.1734, "step": 2795 }, { "epoch": 0.95, "learning_rate": 9.020913730547309e-07, "loss": 0.7998, "step": 2796 }, { "epoch": 0.95, "learning_rate": 8.899783046707688e-07, "loss": 0.7071, "step": 2797 }, { "epoch": 0.95, "learning_rate": 8.77946382341327e-07, "loss": 1.2971, "step": 2798 }, { "epoch": 0.95, "learning_rate": 8.659956259471358e-07, "loss": 1.3021, "step": 2799 }, { "epoch": 0.95, "learning_rate": 8.541260552348107e-07, "loss": 0.9733, "step": 2800 }, { "epoch": 0.95, "learning_rate": 8.423376898168245e-07, "loss": 1.0403, "step": 2801 }, { "epoch": 0.95, "learning_rate": 8.306305491714683e-07, "loss": 1.2157, "step": 2802 }, { "epoch": 0.95, "learning_rate": 8.190046526428242e-07, "loss": 0.8581, "step": 2803 }, { "epoch": 0.95, "learning_rate": 8.074600194407256e-07, "loss": 1.1153, "step": 2804 }, { "epoch": 0.95, "learning_rate": 7.959966686407416e-07, "loss": 1.042, "step": 2805 }, { "epoch": 0.95, "learning_rate": 7.846146191841319e-07, "loss": 0.8708, "step": 2806 }, { "epoch": 0.95, "learning_rate": 7.733138898778247e-07, "loss": 0.9755, "step": 2807 }, { "epoch": 0.95, "learning_rate": 7.620944993943668e-07, "loss": 0.9075, "step": 2808 }, { "epoch": 0.95, "learning_rate": 7.509564662719238e-07, "loss": 1.0675, "step": 2809 }, { "epoch": 0.95, "learning_rate": 7.398998089142129e-07, "loss": 1.0429, "step": 2810 }, { "epoch": 0.95, "learning_rate": 7.289245455905091e-07, "loss": 1.2234, "step": 2811 }, { "epoch": 0.96, "learning_rate": 7.180306944355897e-07, "loss": 0.9043, "step": 2812 }, { "epoch": 0.96, "learning_rate": 7.072182734497057e-07, "loss": 1.103, "step": 2813 }, { "epoch": 0.96, "learning_rate": 6.964873004985717e-07, "loss": 1.3733, "step": 2814 }, { "epoch": 0.96, "learning_rate": 6.858377933133153e-07, "loss": 1.3427, "step": 2815 }, { "epoch": 0.96, "learning_rate": 6.752697694904553e-07, "loss": 0.8101, "step": 2816 }, { "epoch": 0.96, "learning_rate": 6.647832464918735e-07, "loss": 1.0565, "step": 2817 }, { "epoch": 0.96, "learning_rate": 6.54378241644793e-07, "loss": 1.3004, "step": 2818 }, { "epoch": 0.96, "learning_rate": 6.440547721417278e-07, "loss": 1.0087, "step": 2819 }, { "epoch": 0.96, "learning_rate": 6.338128550404721e-07, "loss": 1.1053, "step": 2820 }, { "epoch": 0.96, "learning_rate": 6.236525072640831e-07, "loss": 1.0848, "step": 2821 }, { "epoch": 0.96, "learning_rate": 6.135737456008206e-07, "loss": 0.8995, "step": 2822 }, { "epoch": 0.96, "learning_rate": 6.035765867041409e-07, "loss": 1.051, "step": 2823 }, { "epoch": 0.96, "learning_rate": 5.936610470926751e-07, "loss": 0.7778, "step": 2824 }, { "epoch": 0.96, "learning_rate": 5.83827143150184e-07, "loss": 0.9916, "step": 2825 }, { "epoch": 0.96, "learning_rate": 5.740748911255366e-07, "loss": 1.1247, "step": 2826 }, { "epoch": 0.96, "learning_rate": 5.644043071326932e-07, "loss": 1.0907, "step": 2827 }, { "epoch": 0.96, "learning_rate": 5.548154071506661e-07, "loss": 0.9537, "step": 2828 }, { "epoch": 0.96, "learning_rate": 5.453082070235038e-07, "loss": 0.9658, "step": 2829 }, { "epoch": 0.96, "learning_rate": 5.35882722460257e-07, "loss": 0.8362, "step": 2830 }, { "epoch": 0.96, "learning_rate": 5.265389690349509e-07, "loss": 1.2667, "step": 2831 }, { "epoch": 0.96, "learning_rate": 5.172769621865636e-07, "loss": 0.7819, "step": 2832 }, { "epoch": 0.96, "learning_rate": 5.080967172190087e-07, "loss": 0.8992, "step": 2833 }, { "epoch": 0.96, "learning_rate": 4.989982493010969e-07, "loss": 0.9941, "step": 2834 }, { "epoch": 0.96, "learning_rate": 4.899815734665192e-07, "loss": 1.0914, "step": 2835 }, { "epoch": 0.96, "learning_rate": 4.810467046138134e-07, "loss": 1.0846, "step": 2836 }, { "epoch": 0.96, "learning_rate": 4.721936575063479e-07, "loss": 0.823, "step": 2837 }, { "epoch": 0.96, "learning_rate": 4.6342244677229916e-07, "loss": 0.8687, "step": 2838 }, { "epoch": 0.96, "learning_rate": 4.547330869046129e-07, "loss": 0.8091, "step": 2839 }, { "epoch": 0.96, "learning_rate": 4.461255922609986e-07, "loss": 0.9388, "step": 2840 }, { "epoch": 0.96, "learning_rate": 4.3759997706389055e-07, "loss": 1.0827, "step": 2841 }, { "epoch": 0.97, "learning_rate": 4.291562554004369e-07, "loss": 1.0725, "step": 2842 }, { "epoch": 0.97, "learning_rate": 4.207944412224718e-07, "loss": 1.2882, "step": 2843 }, { "epoch": 0.97, "learning_rate": 4.1251454834648207e-07, "loss": 0.9097, "step": 2844 }, { "epoch": 0.97, "learning_rate": 4.043165904536017e-07, "loss": 0.9427, "step": 2845 }, { "epoch": 0.97, "learning_rate": 3.9620058108957856e-07, "loss": 0.9268, "step": 2846 }, { "epoch": 0.97, "learning_rate": 3.881665336647522e-07, "loss": 1.2267, "step": 2847 }, { "epoch": 0.97, "learning_rate": 3.802144614540315e-07, "loss": 1.2473, "step": 2848 }, { "epoch": 0.97, "learning_rate": 3.723443775968838e-07, "loss": 1.0649, "step": 2849 }, { "epoch": 0.97, "learning_rate": 3.6455629509730136e-07, "loss": 0.9586, "step": 2850 }, { "epoch": 0.97, "learning_rate": 3.568502268237739e-07, "loss": 1.1556, "step": 2851 }, { "epoch": 0.97, "learning_rate": 3.4922618550929374e-07, "loss": 0.8354, "step": 2852 }, { "epoch": 0.97, "learning_rate": 3.416841837512952e-07, "loss": 0.9368, "step": 2853 }, { "epoch": 0.97, "learning_rate": 3.3422423401167634e-07, "loss": 0.9442, "step": 2854 }, { "epoch": 0.97, "learning_rate": 3.2684634861674944e-07, "loss": 1.0168, "step": 2855 }, { "epoch": 0.97, "learning_rate": 3.19550539757224e-07, "loss": 0.962, "step": 2856 }, { "epoch": 0.97, "learning_rate": 3.1233681948820703e-07, "loss": 0.9553, "step": 2857 }, { "epoch": 0.97, "learning_rate": 3.052051997291527e-07, "loss": 1.184, "step": 2858 }, { "epoch": 0.97, "learning_rate": 2.9815569226386284e-07, "loss": 0.969, "step": 2859 }, { "epoch": 0.97, "learning_rate": 2.9118830874046986e-07, "loss": 0.8832, "step": 2860 }, { "epoch": 0.97, "learning_rate": 2.8430306067139257e-07, "loss": 1.0043, "step": 2861 }, { "epoch": 0.97, "learning_rate": 2.774999594333527e-07, "loss": 1.2031, "step": 2862 }, { "epoch": 0.97, "learning_rate": 2.707790162673363e-07, "loss": 1.1518, "step": 2863 }, { "epoch": 0.97, "learning_rate": 2.641402422785599e-07, "loss": 1.0191, "step": 2864 }, { "epoch": 0.97, "learning_rate": 2.575836484364935e-07, "loss": 0.8117, "step": 2865 }, { "epoch": 0.97, "learning_rate": 2.511092455747932e-07, "loss": 0.9987, "step": 2866 }, { "epoch": 0.97, "learning_rate": 2.447170443913349e-07, "loss": 1.094, "step": 2867 }, { "epoch": 0.97, "learning_rate": 2.3840705544815323e-07, "loss": 1.0939, "step": 2868 }, { "epoch": 0.97, "learning_rate": 2.3217928917143584e-07, "loss": 0.7948, "step": 2869 }, { "epoch": 0.97, "learning_rate": 2.2603375585152909e-07, "loss": 1.0493, "step": 2870 }, { "epoch": 0.98, "learning_rate": 2.199704656428825e-07, "loss": 1.2953, "step": 2871 }, { "epoch": 0.98, "learning_rate": 2.1398942856407644e-07, "loss": 1.0591, "step": 2872 }, { "epoch": 0.98, "learning_rate": 2.080906544977612e-07, "loss": 1.1375, "step": 2873 }, { "epoch": 0.98, "learning_rate": 2.0227415319067355e-07, "loss": 0.9511, "step": 2874 }, { "epoch": 0.98, "learning_rate": 1.9653993425359784e-07, "loss": 0.849, "step": 2875 }, { "epoch": 0.98, "learning_rate": 1.9088800716137167e-07, "loss": 1.1227, "step": 2876 }, { "epoch": 0.98, "learning_rate": 1.8531838125285251e-07, "loss": 1.0131, "step": 2877 }, { "epoch": 0.98, "learning_rate": 1.798310657309177e-07, "loss": 0.7122, "step": 2878 }, { "epoch": 0.98, "learning_rate": 1.7442606966242004e-07, "loss": 0.8075, "step": 2879 }, { "epoch": 0.98, "learning_rate": 1.6910340197822117e-07, "loss": 1.2404, "step": 2880 }, { "epoch": 0.98, "learning_rate": 1.6386307147312485e-07, "loss": 0.7516, "step": 2881 }, { "epoch": 0.98, "learning_rate": 1.5870508680589924e-07, "loss": 1.1334, "step": 2882 }, { "epoch": 0.98, "learning_rate": 1.536294564992491e-07, "loss": 0.8242, "step": 2883 }, { "epoch": 0.98, "learning_rate": 1.486361889397936e-07, "loss": 1.1216, "step": 2884 }, { "epoch": 0.98, "learning_rate": 1.4372529237807742e-07, "loss": 1.2295, "step": 2885 }, { "epoch": 0.98, "learning_rate": 1.3889677492852083e-07, "loss": 0.9269, "step": 2886 }, { "epoch": 0.98, "learning_rate": 1.3415064456944736e-07, "loss": 0.7971, "step": 2887 }, { "epoch": 0.98, "learning_rate": 1.2948690914303397e-07, "loss": 0.9553, "step": 2888 }, { "epoch": 0.98, "learning_rate": 1.2490557635532197e-07, "loss": 0.8068, "step": 2889 }, { "epoch": 0.98, "learning_rate": 1.2040665377618943e-07, "loss": 0.8793, "step": 2890 }, { "epoch": 0.98, "learning_rate": 1.1599014883935112e-07, "loss": 0.6546, "step": 2891 }, { "epoch": 0.98, "learning_rate": 1.1165606884234181e-07, "loss": 1.0684, "step": 2892 }, { "epoch": 0.98, "learning_rate": 1.0740442094649972e-07, "loss": 1.3122, "step": 2893 }, { "epoch": 0.98, "learning_rate": 1.032352121769553e-07, "loss": 0.9805, "step": 2894 }, { "epoch": 0.98, "learning_rate": 9.91484494226258e-08, "loss": 1.0092, "step": 2895 }, { "epoch": 0.98, "learning_rate": 9.514413943619849e-08, "loss": 0.8867, "step": 2896 }, { "epoch": 0.98, "learning_rate": 9.12222888341252e-08, "loss": 1.0695, "step": 2897 }, { "epoch": 0.98, "learning_rate": 8.738290409660566e-08, "loss": 0.9363, "step": 2898 }, { "epoch": 0.98, "learning_rate": 8.362599156757633e-08, "loss": 0.8304, "step": 2899 }, { "epoch": 0.98, "learning_rate": 7.99515574546994e-08, "loss": 1.103, "step": 2900 }, { "epoch": 0.99, "learning_rate": 7.635960782936824e-08, "loss": 1.0456, "step": 2901 }, { "epoch": 0.99, "learning_rate": 7.285014862666862e-08, "loss": 1.1695, "step": 2902 }, { "epoch": 0.99, "learning_rate": 6.942318564538975e-08, "loss": 1.158, "step": 2903 }, { "epoch": 0.99, "learning_rate": 6.607872454801877e-08, "loss": 1.2326, "step": 2904 }, { "epoch": 0.99, "learning_rate": 6.281677086071303e-08, "loss": 1.2305, "step": 2905 }, { "epoch": 0.99, "learning_rate": 5.963732997329996e-08, "loss": 0.7424, "step": 2906 }, { "epoch": 0.99, "learning_rate": 5.654040713927722e-08, "loss": 1.1384, "step": 2907 }, { "epoch": 0.99, "learning_rate": 5.352600747577929e-08, "loss": 1.0194, "step": 2908 }, { "epoch": 0.99, "learning_rate": 5.059413596360529e-08, "loss": 0.7927, "step": 2909 }, { "epoch": 0.99, "learning_rate": 4.774479744717453e-08, "loss": 1.2247, "step": 2910 }, { "epoch": 0.99, "learning_rate": 4.4977996634543165e-08, "loss": 0.9784, "step": 2911 }, { "epoch": 0.99, "learning_rate": 4.2293738097376467e-08, "loss": 0.8128, "step": 2912 }, { "epoch": 0.99, "learning_rate": 3.969202627097102e-08, "loss": 0.8781, "step": 2913 }, { "epoch": 0.99, "learning_rate": 3.717286545421028e-08, "loss": 1.1076, "step": 2914 }, { "epoch": 0.99, "learning_rate": 3.473625980958683e-08, "loss": 1.0267, "step": 2915 }, { "epoch": 0.99, "learning_rate": 3.238221336318015e-08, "loss": 0.9949, "step": 2916 }, { "epoch": 0.99, "learning_rate": 3.011073000466213e-08, "loss": 0.9504, "step": 2917 }, { "epoch": 0.99, "learning_rate": 2.792181348726941e-08, "loss": 1.0256, "step": 2918 }, { "epoch": 0.99, "learning_rate": 2.581546742782548e-08, "loss": 1.1527, "step": 2919 }, { "epoch": 0.99, "learning_rate": 2.379169530670744e-08, "loss": 0.8505, "step": 2920 }, { "epoch": 0.99, "learning_rate": 2.185050046785708e-08, "loss": 0.8863, "step": 2921 }, { "epoch": 0.99, "learning_rate": 1.999188611878089e-08, "loss": 1.3617, "step": 2922 }, { "epoch": 0.99, "learning_rate": 1.821585533052228e-08, "loss": 1.0421, "step": 2923 }, { "epoch": 0.99, "learning_rate": 1.6522411037667164e-08, "loss": 1.129, "step": 2924 }, { "epoch": 0.99, "learning_rate": 1.4911556038360586e-08, "loss": 0.922, "step": 2925 }, { "epoch": 0.99, "learning_rate": 1.338329299425678e-08, "loss": 1.0938, "step": 2926 }, { "epoch": 0.99, "learning_rate": 1.1937624430563566e-08, "loss": 1.3024, "step": 2927 }, { "epoch": 0.99, "learning_rate": 1.0574552735997944e-08, "loss": 0.8813, "step": 2928 }, { "epoch": 0.99, "learning_rate": 9.294080162813857e-09, "loss": 1.0092, "step": 2929 }, { "epoch": 1.0, "learning_rate": 8.096208826768869e-09, "loss": 0.7946, "step": 2930 }, { "epoch": 1.0, "learning_rate": 6.980940707146389e-09, "loss": 1.1236, "step": 2931 }, { "epoch": 1.0, "learning_rate": 5.9482776467445485e-09, "loss": 1.0652, "step": 2932 }, { "epoch": 1.0, "learning_rate": 4.998221351859566e-09, "loss": 1.2393, "step": 2933 }, { "epoch": 1.0, "learning_rate": 4.13077339230794e-09, "loss": 1.0353, "step": 2934 }, { "epoch": 1.0, "learning_rate": 3.3459352013875957e-09, "loss": 0.7593, "step": 2935 }, { "epoch": 1.0, "learning_rate": 2.643708075922291e-09, "loss": 0.9176, "step": 2936 }, { "epoch": 1.0, "learning_rate": 2.024093176222763e-09, "loss": 1.0766, "step": 2937 }, { "epoch": 1.0, "learning_rate": 1.487091526097828e-09, "loss": 1.2918, "step": 2938 }, { "epoch": 1.0, "learning_rate": 1.0327040128543797e-09, "loss": 0.7919, "step": 2939 }, { "epoch": 1.0, "learning_rate": 6.609313872862899e-10, "loss": 0.9317, "step": 2940 }, { "epoch": 1.0, "learning_rate": 3.7177426368550927e-10, "loss": 1.1601, "step": 2941 }, { "epoch": 1.0, "learning_rate": 1.6523311984206757e-10, "loss": 1.132, "step": 2942 }, { "epoch": 1.0, "learning_rate": 4.1308297021869224e-11, "loss": 1.013, "step": 2943 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.7711, "step": 2944 }, { "epoch": 1.0, "step": 2944, "total_flos": 1.9924390016704315e+19, "train_loss": 1.0237662696644017, "train_runtime": 195431.7573, "train_samples_per_second": 0.06, "train_steps_per_second": 0.015 } ], "logging_steps": 1.0, "max_steps": 2944, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.9924390016704315e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }