diff --git "a/great-13b/trainer_state.json" "b/great-13b/trainer_state.json" deleted file mode 100644--- "a/great-13b/trainer_state.json" +++ /dev/null @@ -1,19368 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9998448890957035, - "eval_steps": 500, - "global_step": 3223, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 1.0309278350515464e-06, - "loss": 1.7841, - "step": 1 - }, - { - "epoch": 0.0, - "learning_rate": 2.061855670103093e-06, - "loss": 1.5955, - "step": 2 - }, - { - "epoch": 0.0, - "learning_rate": 3.0927835051546395e-06, - "loss": 1.8377, - "step": 3 - }, - { - "epoch": 0.0, - "learning_rate": 4.123711340206186e-06, - "loss": 1.6467, - "step": 4 - }, - { - "epoch": 0.0, - "learning_rate": 5.154639175257732e-06, - "loss": 1.6818, - "step": 5 - }, - { - "epoch": 0.0, - "learning_rate": 6.185567010309279e-06, - "loss": 1.5233, - "step": 6 - }, - { - "epoch": 0.0, - "learning_rate": 7.216494845360824e-06, - "loss": 1.5204, - "step": 7 - }, - { - "epoch": 0.0, - "learning_rate": 8.247422680412371e-06, - "loss": 1.4938, - "step": 8 - }, - { - "epoch": 0.0, - "learning_rate": 9.278350515463918e-06, - "loss": 1.4, - "step": 9 - }, - { - "epoch": 0.0, - "learning_rate": 1.0309278350515464e-05, - "loss": 1.4213, - "step": 10 - }, - { - "epoch": 0.0, - "learning_rate": 1.134020618556701e-05, - "loss": 1.5469, - "step": 11 - }, - { - "epoch": 0.0, - "learning_rate": 1.2371134020618558e-05, - "loss": 1.4413, - "step": 12 - }, - { - "epoch": 0.0, - "learning_rate": 1.3402061855670103e-05, - "loss": 1.425, - "step": 13 - }, - { - "epoch": 0.0, - "learning_rate": 1.4432989690721649e-05, - "loss": 1.356, - "step": 14 - }, - { - "epoch": 0.0, - "learning_rate": 1.5463917525773197e-05, - "loss": 1.3763, - "step": 15 - }, - { - "epoch": 0.0, - "learning_rate": 1.6494845360824743e-05, - "loss": 1.4039, - "step": 16 - }, - { - "epoch": 0.01, - "learning_rate": 1.7525773195876288e-05, - "loss": 1.3372, - "step": 17 - }, - { - "epoch": 0.01, - "learning_rate": 1.8556701030927837e-05, - "loss": 1.2991, - "step": 18 - }, - { - "epoch": 0.01, - "learning_rate": 1.9587628865979382e-05, - "loss": 1.3417, - "step": 19 - }, - { - "epoch": 0.01, - "learning_rate": 2.0618556701030927e-05, - "loss": 1.2799, - "step": 20 - }, - { - "epoch": 0.01, - "learning_rate": 2.1649484536082476e-05, - "loss": 1.3102, - "step": 21 - }, - { - "epoch": 0.01, - "learning_rate": 2.268041237113402e-05, - "loss": 1.2534, - "step": 22 - }, - { - "epoch": 0.01, - "learning_rate": 2.3711340206185567e-05, - "loss": 1.4076, - "step": 23 - }, - { - "epoch": 0.01, - "learning_rate": 2.4742268041237116e-05, - "loss": 1.3091, - "step": 24 - }, - { - "epoch": 0.01, - "learning_rate": 2.5773195876288658e-05, - "loss": 1.3371, - "step": 25 - }, - { - "epoch": 0.01, - "learning_rate": 2.6804123711340206e-05, - "loss": 1.2364, - "step": 26 - }, - { - "epoch": 0.01, - "learning_rate": 2.7835051546391755e-05, - "loss": 1.2294, - "step": 27 - }, - { - "epoch": 0.01, - "learning_rate": 2.8865979381443297e-05, - "loss": 1.3519, - "step": 28 - }, - { - "epoch": 0.01, - "learning_rate": 2.9896907216494846e-05, - "loss": 1.3396, - "step": 29 - }, - { - "epoch": 0.01, - "learning_rate": 3.0927835051546395e-05, - "loss": 1.1992, - "step": 30 - }, - { - "epoch": 0.01, - "learning_rate": 3.1958762886597937e-05, - "loss": 1.239, - "step": 31 - }, - { - "epoch": 0.01, - "learning_rate": 3.2989690721649485e-05, - "loss": 1.2627, - "step": 32 - }, - { - "epoch": 0.01, - "learning_rate": 3.4020618556701034e-05, - "loss": 1.2391, - "step": 33 - }, - { - "epoch": 0.01, - "learning_rate": 3.5051546391752576e-05, - "loss": 1.2345, - "step": 34 - }, - { - "epoch": 0.01, - "learning_rate": 3.6082474226804125e-05, - "loss": 1.1584, - "step": 35 - }, - { - "epoch": 0.01, - "learning_rate": 3.7113402061855674e-05, - "loss": 1.2505, - "step": 36 - }, - { - "epoch": 0.01, - "learning_rate": 3.8144329896907216e-05, - "loss": 1.2455, - "step": 37 - }, - { - "epoch": 0.01, - "learning_rate": 3.9175257731958764e-05, - "loss": 1.2096, - "step": 38 - }, - { - "epoch": 0.01, - "learning_rate": 4.020618556701031e-05, - "loss": 1.2452, - "step": 39 - }, - { - "epoch": 0.01, - "learning_rate": 4.1237113402061855e-05, - "loss": 1.2914, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 4.2268041237113404e-05, - "loss": 1.255, - "step": 41 - }, - { - "epoch": 0.01, - "learning_rate": 4.329896907216495e-05, - "loss": 1.1605, - "step": 42 - }, - { - "epoch": 0.01, - "learning_rate": 4.4329896907216494e-05, - "loss": 1.2037, - "step": 43 - }, - { - "epoch": 0.01, - "learning_rate": 4.536082474226804e-05, - "loss": 1.2675, - "step": 44 - }, - { - "epoch": 0.01, - "learning_rate": 4.639175257731959e-05, - "loss": 1.2548, - "step": 45 - }, - { - "epoch": 0.01, - "learning_rate": 4.7422680412371134e-05, - "loss": 1.1484, - "step": 46 - }, - { - "epoch": 0.01, - "learning_rate": 4.845360824742268e-05, - "loss": 1.1658, - "step": 47 - }, - { - "epoch": 0.01, - "learning_rate": 4.948453608247423e-05, - "loss": 1.2771, - "step": 48 - }, - { - "epoch": 0.02, - "learning_rate": 5.051546391752577e-05, - "loss": 1.1713, - "step": 49 - }, - { - "epoch": 0.02, - "learning_rate": 5.1546391752577315e-05, - "loss": 1.2206, - "step": 50 - }, - { - "epoch": 0.02, - "learning_rate": 5.257731958762887e-05, - "loss": 1.1849, - "step": 51 - }, - { - "epoch": 0.02, - "learning_rate": 5.360824742268041e-05, - "loss": 1.351, - "step": 52 - }, - { - "epoch": 0.02, - "learning_rate": 5.4639175257731955e-05, - "loss": 1.3074, - "step": 53 - }, - { - "epoch": 0.02, - "learning_rate": 5.567010309278351e-05, - "loss": 1.2391, - "step": 54 - }, - { - "epoch": 0.02, - "learning_rate": 5.670103092783505e-05, - "loss": 1.1326, - "step": 55 - }, - { - "epoch": 0.02, - "learning_rate": 5.7731958762886594e-05, - "loss": 1.2005, - "step": 56 - }, - { - "epoch": 0.02, - "learning_rate": 5.876288659793815e-05, - "loss": 1.234, - "step": 57 - }, - { - "epoch": 0.02, - "learning_rate": 5.979381443298969e-05, - "loss": 1.1695, - "step": 58 - }, - { - "epoch": 0.02, - "learning_rate": 6.0824742268041234e-05, - "loss": 1.1958, - "step": 59 - }, - { - "epoch": 0.02, - "learning_rate": 6.185567010309279e-05, - "loss": 1.2024, - "step": 60 - }, - { - "epoch": 0.02, - "learning_rate": 6.288659793814433e-05, - "loss": 1.2726, - "step": 61 - }, - { - "epoch": 0.02, - "learning_rate": 6.391752577319587e-05, - "loss": 1.133, - "step": 62 - }, - { - "epoch": 0.02, - "learning_rate": 6.494845360824743e-05, - "loss": 1.1654, - "step": 63 - }, - { - "epoch": 0.02, - "learning_rate": 6.597938144329897e-05, - "loss": 1.2973, - "step": 64 - }, - { - "epoch": 0.02, - "learning_rate": 6.701030927835051e-05, - "loss": 1.1685, - "step": 65 - }, - { - "epoch": 0.02, - "learning_rate": 6.804123711340207e-05, - "loss": 1.1676, - "step": 66 - }, - { - "epoch": 0.02, - "learning_rate": 6.907216494845361e-05, - "loss": 1.2184, - "step": 67 - }, - { - "epoch": 0.02, - "learning_rate": 7.010309278350515e-05, - "loss": 1.0957, - "step": 68 - }, - { - "epoch": 0.02, - "learning_rate": 7.113402061855671e-05, - "loss": 1.1766, - "step": 69 - }, - { - "epoch": 0.02, - "learning_rate": 7.216494845360825e-05, - "loss": 1.268, - "step": 70 - }, - { - "epoch": 0.02, - "learning_rate": 7.319587628865979e-05, - "loss": 1.1881, - "step": 71 - }, - { - "epoch": 0.02, - "learning_rate": 7.422680412371135e-05, - "loss": 1.2662, - "step": 72 - }, - { - "epoch": 0.02, - "learning_rate": 7.525773195876289e-05, - "loss": 1.1308, - "step": 73 - }, - { - "epoch": 0.02, - "learning_rate": 7.628865979381443e-05, - "loss": 1.1786, - "step": 74 - }, - { - "epoch": 0.02, - "learning_rate": 7.731958762886599e-05, - "loss": 1.1462, - "step": 75 - }, - { - "epoch": 0.02, - "learning_rate": 7.835051546391753e-05, - "loss": 1.2382, - "step": 76 - }, - { - "epoch": 0.02, - "learning_rate": 7.938144329896907e-05, - "loss": 1.2569, - "step": 77 - }, - { - "epoch": 0.02, - "learning_rate": 8.041237113402063e-05, - "loss": 1.1747, - "step": 78 - }, - { - "epoch": 0.02, - "learning_rate": 8.144329896907217e-05, - "loss": 1.2007, - "step": 79 - }, - { - "epoch": 0.02, - "learning_rate": 8.247422680412371e-05, - "loss": 1.1599, - "step": 80 - }, - { - "epoch": 0.03, - "learning_rate": 8.350515463917527e-05, - "loss": 1.2151, - "step": 81 - }, - { - "epoch": 0.03, - "learning_rate": 8.453608247422681e-05, - "loss": 1.1806, - "step": 82 - }, - { - "epoch": 0.03, - "learning_rate": 8.556701030927835e-05, - "loss": 1.1954, - "step": 83 - }, - { - "epoch": 0.03, - "learning_rate": 8.65979381443299e-05, - "loss": 1.1982, - "step": 84 - }, - { - "epoch": 0.03, - "learning_rate": 8.762886597938145e-05, - "loss": 1.1911, - "step": 85 - }, - { - "epoch": 0.03, - "learning_rate": 8.865979381443299e-05, - "loss": 1.1215, - "step": 86 - }, - { - "epoch": 0.03, - "learning_rate": 8.969072164948454e-05, - "loss": 1.1614, - "step": 87 - }, - { - "epoch": 0.03, - "learning_rate": 9.072164948453609e-05, - "loss": 1.1825, - "step": 88 - }, - { - "epoch": 0.03, - "learning_rate": 9.175257731958763e-05, - "loss": 1.167, - "step": 89 - }, - { - "epoch": 0.03, - "learning_rate": 9.278350515463918e-05, - "loss": 1.2623, - "step": 90 - }, - { - "epoch": 0.03, - "learning_rate": 9.381443298969073e-05, - "loss": 1.148, - "step": 91 - }, - { - "epoch": 0.03, - "learning_rate": 9.484536082474227e-05, - "loss": 1.245, - "step": 92 - }, - { - "epoch": 0.03, - "learning_rate": 9.587628865979382e-05, - "loss": 1.1938, - "step": 93 - }, - { - "epoch": 0.03, - "learning_rate": 9.690721649484537e-05, - "loss": 1.1867, - "step": 94 - }, - { - "epoch": 0.03, - "learning_rate": 9.793814432989691e-05, - "loss": 1.2122, - "step": 95 - }, - { - "epoch": 0.03, - "learning_rate": 9.896907216494846e-05, - "loss": 1.2483, - "step": 96 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001, - "loss": 1.2468, - "step": 97 - }, - { - "epoch": 0.03, - "learning_rate": 9.999997474997746e-05, - "loss": 1.2307, - "step": 98 - }, - { - "epoch": 0.03, - "learning_rate": 9.999989899993534e-05, - "loss": 1.1429, - "step": 99 - }, - { - "epoch": 0.03, - "learning_rate": 9.999977274995016e-05, - "loss": 1.2376, - "step": 100 - }, - { - "epoch": 0.03, - "learning_rate": 9.999959600014941e-05, - "loss": 1.1831, - "step": 101 - }, - { - "epoch": 0.03, - "learning_rate": 9.999936875071164e-05, - "loss": 1.1208, - "step": 102 - }, - { - "epoch": 0.03, - "learning_rate": 9.999909100186632e-05, - "loss": 1.2097, - "step": 103 - }, - { - "epoch": 0.03, - "learning_rate": 9.999876275389402e-05, - "loss": 1.2815, - "step": 104 - }, - { - "epoch": 0.03, - "learning_rate": 9.999838400712627e-05, - "loss": 1.2118, - "step": 105 - }, - { - "epoch": 0.03, - "learning_rate": 9.99979547619456e-05, - "loss": 1.1507, - "step": 106 - }, - { - "epoch": 0.03, - "learning_rate": 9.999747501878551e-05, - "loss": 1.2095, - "step": 107 - }, - { - "epoch": 0.03, - "learning_rate": 9.99969447781306e-05, - "loss": 1.1837, - "step": 108 - }, - { - "epoch": 0.03, - "learning_rate": 9.999636404051638e-05, - "loss": 1.2061, - "step": 109 - }, - { - "epoch": 0.03, - "learning_rate": 9.999573280652941e-05, - "loss": 1.2932, - "step": 110 - }, - { - "epoch": 0.03, - "learning_rate": 9.99950510768072e-05, - "loss": 1.1962, - "step": 111 - }, - { - "epoch": 0.03, - "learning_rate": 9.999431885203836e-05, - "loss": 1.1821, - "step": 112 - }, - { - "epoch": 0.04, - "learning_rate": 9.999353613296238e-05, - "loss": 1.1777, - "step": 113 - }, - { - "epoch": 0.04, - "learning_rate": 9.999270292036985e-05, - "loss": 1.1827, - "step": 114 - }, - { - "epoch": 0.04, - "learning_rate": 9.999181921510229e-05, - "loss": 1.187, - "step": 115 - }, - { - "epoch": 0.04, - "learning_rate": 9.999088501805225e-05, - "loss": 1.1777, - "step": 116 - }, - { - "epoch": 0.04, - "learning_rate": 9.998990033016327e-05, - "loss": 1.2371, - "step": 117 - }, - { - "epoch": 0.04, - "learning_rate": 9.998886515242987e-05, - "loss": 1.2704, - "step": 118 - }, - { - "epoch": 0.04, - "learning_rate": 9.998777948589761e-05, - "loss": 1.1391, - "step": 119 - }, - { - "epoch": 0.04, - "learning_rate": 9.998664333166301e-05, - "loss": 1.1448, - "step": 120 - }, - { - "epoch": 0.04, - "learning_rate": 9.998545669087355e-05, - "loss": 1.2014, - "step": 121 - }, - { - "epoch": 0.04, - "learning_rate": 9.998421956472779e-05, - "loss": 1.1768, - "step": 122 - }, - { - "epoch": 0.04, - "learning_rate": 9.998293195447519e-05, - "loss": 1.2545, - "step": 123 - }, - { - "epoch": 0.04, - "learning_rate": 9.998159386141625e-05, - "loss": 1.1089, - "step": 124 - }, - { - "epoch": 0.04, - "learning_rate": 9.998020528690246e-05, - "loss": 1.2557, - "step": 125 - }, - { - "epoch": 0.04, - "learning_rate": 9.997876623233626e-05, - "loss": 1.358, - "step": 126 - }, - { - "epoch": 0.04, - "learning_rate": 9.99772766991711e-05, - "loss": 1.1192, - "step": 127 - }, - { - "epoch": 0.04, - "learning_rate": 9.997573668891142e-05, - "loss": 1.1718, - "step": 128 - }, - { - "epoch": 0.04, - "learning_rate": 9.997414620311262e-05, - "loss": 1.1821, - "step": 129 - }, - { - "epoch": 0.04, - "learning_rate": 9.997250524338109e-05, - "loss": 1.1613, - "step": 130 - }, - { - "epoch": 0.04, - "learning_rate": 9.997081381137423e-05, - "loss": 1.2994, - "step": 131 - }, - { - "epoch": 0.04, - "learning_rate": 9.996907190880036e-05, - "loss": 1.0991, - "step": 132 - }, - { - "epoch": 0.04, - "learning_rate": 9.99672795374188e-05, - "loss": 1.2455, - "step": 133 - }, - { - "epoch": 0.04, - "learning_rate": 9.996543669903985e-05, - "loss": 1.2017, - "step": 134 - }, - { - "epoch": 0.04, - "learning_rate": 9.99635433955248e-05, - "loss": 1.2036, - "step": 135 - }, - { - "epoch": 0.04, - "learning_rate": 9.996159962878587e-05, - "loss": 1.1263, - "step": 136 - }, - { - "epoch": 0.04, - "learning_rate": 9.995960540078627e-05, - "loss": 1.2489, - "step": 137 - }, - { - "epoch": 0.04, - "learning_rate": 9.995756071354017e-05, - "loss": 1.2832, - "step": 138 - }, - { - "epoch": 0.04, - "learning_rate": 9.995546556911271e-05, - "loss": 1.2109, - "step": 139 - }, - { - "epoch": 0.04, - "learning_rate": 9.995331996961998e-05, - "loss": 1.1029, - "step": 140 - }, - { - "epoch": 0.04, - "learning_rate": 9.995112391722905e-05, - "loss": 1.2725, - "step": 141 - }, - { - "epoch": 0.04, - "learning_rate": 9.994887741415792e-05, - "loss": 1.227, - "step": 142 - }, - { - "epoch": 0.04, - "learning_rate": 9.994658046267556e-05, - "loss": 1.2024, - "step": 143 - }, - { - "epoch": 0.04, - "learning_rate": 9.994423306510192e-05, - "loss": 1.1734, - "step": 144 - }, - { - "epoch": 0.04, - "learning_rate": 9.994183522380784e-05, - "loss": 1.1743, - "step": 145 - }, - { - "epoch": 0.05, - "learning_rate": 9.993938694121515e-05, - "loss": 1.1856, - "step": 146 - }, - { - "epoch": 0.05, - "learning_rate": 9.993688821979664e-05, - "loss": 1.2957, - "step": 147 - }, - { - "epoch": 0.05, - "learning_rate": 9.993433906207599e-05, - "loss": 1.1469, - "step": 148 - }, - { - "epoch": 0.05, - "learning_rate": 9.993173947062788e-05, - "loss": 1.2544, - "step": 149 - }, - { - "epoch": 0.05, - "learning_rate": 9.992908944807789e-05, - "loss": 1.1464, - "step": 150 - }, - { - "epoch": 0.05, - "learning_rate": 9.992638899710252e-05, - "loss": 1.1222, - "step": 151 - }, - { - "epoch": 0.05, - "learning_rate": 9.992363812042926e-05, - "loss": 1.4001, - "step": 152 - }, - { - "epoch": 0.05, - "learning_rate": 9.992083682083648e-05, - "loss": 1.0865, - "step": 153 - }, - { - "epoch": 0.05, - "learning_rate": 9.991798510115351e-05, - "loss": 1.1891, - "step": 154 - }, - { - "epoch": 0.05, - "learning_rate": 9.991508296426058e-05, - "loss": 1.1551, - "step": 155 - }, - { - "epoch": 0.05, - "learning_rate": 9.991213041308884e-05, - "loss": 1.2203, - "step": 156 - }, - { - "epoch": 0.05, - "learning_rate": 9.99091274506204e-05, - "loss": 1.1705, - "step": 157 - }, - { - "epoch": 0.05, - "learning_rate": 9.990607407988824e-05, - "loss": 1.2573, - "step": 158 - }, - { - "epoch": 0.05, - "learning_rate": 9.990297030397625e-05, - "loss": 1.2933, - "step": 159 - }, - { - "epoch": 0.05, - "learning_rate": 9.989981612601926e-05, - "loss": 1.1376, - "step": 160 - }, - { - "epoch": 0.05, - "learning_rate": 9.9896611549203e-05, - "loss": 1.0865, - "step": 161 - }, - { - "epoch": 0.05, - "learning_rate": 9.989335657676409e-05, - "loss": 1.2149, - "step": 162 - }, - { - "epoch": 0.05, - "learning_rate": 9.989005121199005e-05, - "loss": 1.348, - "step": 163 - }, - { - "epoch": 0.05, - "learning_rate": 9.988669545821929e-05, - "loss": 1.1311, - "step": 164 - }, - { - "epoch": 0.05, - "learning_rate": 9.988328931884116e-05, - "loss": 1.2536, - "step": 165 - }, - { - "epoch": 0.05, - "learning_rate": 9.987983279729583e-05, - "loss": 1.1724, - "step": 166 - }, - { - "epoch": 0.05, - "learning_rate": 9.987632589707441e-05, - "loss": 1.2652, - "step": 167 - }, - { - "epoch": 0.05, - "learning_rate": 9.987276862171887e-05, - "loss": 1.0368, - "step": 168 - }, - { - "epoch": 0.05, - "learning_rate": 9.986916097482204e-05, - "loss": 1.2181, - "step": 169 - }, - { - "epoch": 0.05, - "learning_rate": 9.986550296002766e-05, - "loss": 1.2733, - "step": 170 - }, - { - "epoch": 0.05, - "learning_rate": 9.986179458103035e-05, - "loss": 1.1719, - "step": 171 - }, - { - "epoch": 0.05, - "learning_rate": 9.985803584157555e-05, - "loss": 1.2475, - "step": 172 - }, - { - "epoch": 0.05, - "learning_rate": 9.985422674545959e-05, - "loss": 1.1334, - "step": 173 - }, - { - "epoch": 0.05, - "learning_rate": 9.985036729652969e-05, - "loss": 1.2472, - "step": 174 - }, - { - "epoch": 0.05, - "learning_rate": 9.984645749868385e-05, - "loss": 1.2485, - "step": 175 - }, - { - "epoch": 0.05, - "learning_rate": 9.984249735587102e-05, - "loss": 1.1692, - "step": 176 - }, - { - "epoch": 0.05, - "learning_rate": 9.98384868720909e-05, - "loss": 1.2017, - "step": 177 - }, - { - "epoch": 0.06, - "learning_rate": 9.983442605139411e-05, - "loss": 1.2792, - "step": 178 - }, - { - "epoch": 0.06, - "learning_rate": 9.983031489788209e-05, - "loss": 1.0819, - "step": 179 - }, - { - "epoch": 0.06, - "learning_rate": 9.982615341570708e-05, - "loss": 1.2106, - "step": 180 - }, - { - "epoch": 0.06, - "learning_rate": 9.982194160907219e-05, - "loss": 1.2632, - "step": 181 - }, - { - "epoch": 0.06, - "learning_rate": 9.981767948223137e-05, - "loss": 1.1568, - "step": 182 - }, - { - "epoch": 0.06, - "learning_rate": 9.981336703948935e-05, - "loss": 1.2105, - "step": 183 - }, - { - "epoch": 0.06, - "learning_rate": 9.980900428520172e-05, - "loss": 1.1674, - "step": 184 - }, - { - "epoch": 0.06, - "learning_rate": 9.980459122377483e-05, - "loss": 1.2238, - "step": 185 - }, - { - "epoch": 0.06, - "learning_rate": 9.980012785966593e-05, - "loss": 1.0386, - "step": 186 - }, - { - "epoch": 0.06, - "learning_rate": 9.979561419738296e-05, - "loss": 1.106, - "step": 187 - }, - { - "epoch": 0.06, - "learning_rate": 9.979105024148478e-05, - "loss": 1.3142, - "step": 188 - }, - { - "epoch": 0.06, - "learning_rate": 9.978643599658096e-05, - "loss": 1.0993, - "step": 189 - }, - { - "epoch": 0.06, - "learning_rate": 9.978177146733187e-05, - "loss": 1.1615, - "step": 190 - }, - { - "epoch": 0.06, - "learning_rate": 9.977705665844874e-05, - "loss": 1.2837, - "step": 191 - }, - { - "epoch": 0.06, - "learning_rate": 9.97722915746935e-05, - "loss": 1.168, - "step": 192 - }, - { - "epoch": 0.06, - "learning_rate": 9.97674762208789e-05, - "loss": 1.1494, - "step": 193 - }, - { - "epoch": 0.06, - "learning_rate": 9.976261060186845e-05, - "loss": 1.2097, - "step": 194 - }, - { - "epoch": 0.06, - "learning_rate": 9.975769472257641e-05, - "loss": 1.2527, - "step": 195 - }, - { - "epoch": 0.06, - "learning_rate": 9.975272858796785e-05, - "loss": 1.3102, - "step": 196 - }, - { - "epoch": 0.06, - "learning_rate": 9.974771220305854e-05, - "loss": 1.1494, - "step": 197 - }, - { - "epoch": 0.06, - "learning_rate": 9.974264557291507e-05, - "loss": 1.2007, - "step": 198 - }, - { - "epoch": 0.06, - "learning_rate": 9.973752870265472e-05, - "loss": 1.2887, - "step": 199 - }, - { - "epoch": 0.06, - "learning_rate": 9.973236159744555e-05, - "loss": 1.1073, - "step": 200 - }, - { - "epoch": 0.06, - "learning_rate": 9.97271442625063e-05, - "loss": 1.2055, - "step": 201 - }, - { - "epoch": 0.06, - "learning_rate": 9.972187670310653e-05, - "loss": 1.1843, - "step": 202 - }, - { - "epoch": 0.06, - "learning_rate": 9.971655892456645e-05, - "loss": 1.1796, - "step": 203 - }, - { - "epoch": 0.06, - "learning_rate": 9.971119093225702e-05, - "loss": 1.1497, - "step": 204 - }, - { - "epoch": 0.06, - "learning_rate": 9.970577273159996e-05, - "loss": 1.1955, - "step": 205 - }, - { - "epoch": 0.06, - "learning_rate": 9.970030432806761e-05, - "loss": 1.1847, - "step": 206 - }, - { - "epoch": 0.06, - "learning_rate": 9.969478572718308e-05, - "loss": 1.2097, - "step": 207 - }, - { - "epoch": 0.06, - "learning_rate": 9.968921693452015e-05, - "loss": 1.1622, - "step": 208 - }, - { - "epoch": 0.06, - "learning_rate": 9.968359795570332e-05, - "loss": 1.2141, - "step": 209 - }, - { - "epoch": 0.07, - "learning_rate": 9.967792879640777e-05, - "loss": 1.2261, - "step": 210 - }, - { - "epoch": 0.07, - "learning_rate": 9.967220946235934e-05, - "loss": 1.2273, - "step": 211 - }, - { - "epoch": 0.07, - "learning_rate": 9.966643995933457e-05, - "loss": 1.1822, - "step": 212 - }, - { - "epoch": 0.07, - "learning_rate": 9.966062029316066e-05, - "loss": 1.1518, - "step": 213 - }, - { - "epoch": 0.07, - "learning_rate": 9.965475046971547e-05, - "loss": 1.1526, - "step": 214 - }, - { - "epoch": 0.07, - "learning_rate": 9.964883049492755e-05, - "loss": 1.2336, - "step": 215 - }, - { - "epoch": 0.07, - "learning_rate": 9.964286037477605e-05, - "loss": 1.2287, - "step": 216 - }, - { - "epoch": 0.07, - "learning_rate": 9.963684011529084e-05, - "loss": 1.1258, - "step": 217 - }, - { - "epoch": 0.07, - "learning_rate": 9.963076972255235e-05, - "loss": 1.1933, - "step": 218 - }, - { - "epoch": 0.07, - "learning_rate": 9.962464920269168e-05, - "loss": 1.2681, - "step": 219 - }, - { - "epoch": 0.07, - "learning_rate": 9.961847856189058e-05, - "loss": 1.2145, - "step": 220 - }, - { - "epoch": 0.07, - "learning_rate": 9.96122578063814e-05, - "loss": 1.1637, - "step": 221 - }, - { - "epoch": 0.07, - "learning_rate": 9.96059869424471e-05, - "loss": 1.0797, - "step": 222 - }, - { - "epoch": 0.07, - "learning_rate": 9.959966597642126e-05, - "loss": 1.2157, - "step": 223 - }, - { - "epoch": 0.07, - "learning_rate": 9.959329491468807e-05, - "loss": 1.0133, - "step": 224 - }, - { - "epoch": 0.07, - "learning_rate": 9.95868737636823e-05, - "loss": 1.2514, - "step": 225 - }, - { - "epoch": 0.07, - "learning_rate": 9.958040252988932e-05, - "loss": 1.0404, - "step": 226 - }, - { - "epoch": 0.07, - "learning_rate": 9.95738812198451e-05, - "loss": 1.2573, - "step": 227 - }, - { - "epoch": 0.07, - "learning_rate": 9.956730984013614e-05, - "loss": 1.1807, - "step": 228 - }, - { - "epoch": 0.07, - "learning_rate": 9.956068839739954e-05, - "loss": 1.2198, - "step": 229 - }, - { - "epoch": 0.07, - "learning_rate": 9.955401689832298e-05, - "loss": 1.1552, - "step": 230 - }, - { - "epoch": 0.07, - "learning_rate": 9.954729534964469e-05, - "loss": 1.2775, - "step": 231 - }, - { - "epoch": 0.07, - "learning_rate": 9.95405237581534e-05, - "loss": 1.1726, - "step": 232 - }, - { - "epoch": 0.07, - "learning_rate": 9.953370213068847e-05, - "loss": 1.0787, - "step": 233 - }, - { - "epoch": 0.07, - "learning_rate": 9.952683047413972e-05, - "loss": 1.1688, - "step": 234 - }, - { - "epoch": 0.07, - "learning_rate": 9.951990879544753e-05, - "loss": 1.1992, - "step": 235 - }, - { - "epoch": 0.07, - "learning_rate": 9.951293710160281e-05, - "loss": 1.1921, - "step": 236 - }, - { - "epoch": 0.07, - "learning_rate": 9.950591539964697e-05, - "loss": 1.1642, - "step": 237 - }, - { - "epoch": 0.07, - "learning_rate": 9.949884369667196e-05, - "loss": 1.1241, - "step": 238 - }, - { - "epoch": 0.07, - "learning_rate": 9.949172199982019e-05, - "loss": 1.1247, - "step": 239 - }, - { - "epoch": 0.07, - "learning_rate": 9.948455031628457e-05, - "loss": 1.2295, - "step": 240 - }, - { - "epoch": 0.07, - "learning_rate": 9.94773286533085e-05, - "loss": 1.228, - "step": 241 - }, - { - "epoch": 0.08, - "learning_rate": 9.94700570181859e-05, - "loss": 1.1303, - "step": 242 - }, - { - "epoch": 0.08, - "learning_rate": 9.94627354182611e-05, - "loss": 1.1309, - "step": 243 - }, - { - "epoch": 0.08, - "learning_rate": 9.945536386092892e-05, - "loss": 1.2121, - "step": 244 - }, - { - "epoch": 0.08, - "learning_rate": 9.944794235363469e-05, - "loss": 1.1657, - "step": 245 - }, - { - "epoch": 0.08, - "learning_rate": 9.944047090387408e-05, - "loss": 1.0833, - "step": 246 - }, - { - "epoch": 0.08, - "learning_rate": 9.943294951919326e-05, - "loss": 1.2155, - "step": 247 - }, - { - "epoch": 0.08, - "learning_rate": 9.942537820718888e-05, - "loss": 1.1629, - "step": 248 - }, - { - "epoch": 0.08, - "learning_rate": 9.941775697550795e-05, - "loss": 1.2027, - "step": 249 - }, - { - "epoch": 0.08, - "learning_rate": 9.94100858318479e-05, - "loss": 1.0467, - "step": 250 - }, - { - "epoch": 0.08, - "learning_rate": 9.940236478395663e-05, - "loss": 1.2663, - "step": 251 - }, - { - "epoch": 0.08, - "learning_rate": 9.939459383963238e-05, - "loss": 1.1571, - "step": 252 - }, - { - "epoch": 0.08, - "learning_rate": 9.938677300672382e-05, - "loss": 1.1168, - "step": 253 - }, - { - "epoch": 0.08, - "learning_rate": 9.937890229312998e-05, - "loss": 1.1729, - "step": 254 - }, - { - "epoch": 0.08, - "learning_rate": 9.937098170680033e-05, - "loss": 1.1244, - "step": 255 - }, - { - "epoch": 0.08, - "learning_rate": 9.936301125573462e-05, - "loss": 1.2096, - "step": 256 - }, - { - "epoch": 0.08, - "learning_rate": 9.935499094798304e-05, - "loss": 1.1407, - "step": 257 - }, - { - "epoch": 0.08, - "learning_rate": 9.934692079164612e-05, - "loss": 1.0703, - "step": 258 - }, - { - "epoch": 0.08, - "learning_rate": 9.933880079487468e-05, - "loss": 1.2032, - "step": 259 - }, - { - "epoch": 0.08, - "learning_rate": 9.933063096586999e-05, - "loss": 1.3059, - "step": 260 - }, - { - "epoch": 0.08, - "learning_rate": 9.932241131288351e-05, - "loss": 1.1324, - "step": 261 - }, - { - "epoch": 0.08, - "learning_rate": 9.931414184421716e-05, - "loss": 1.1564, - "step": 262 - }, - { - "epoch": 0.08, - "learning_rate": 9.930582256822307e-05, - "loss": 1.0901, - "step": 263 - }, - { - "epoch": 0.08, - "learning_rate": 9.929745349330375e-05, - "loss": 1.175, - "step": 264 - }, - { - "epoch": 0.08, - "learning_rate": 9.928903462791195e-05, - "loss": 1.2437, - "step": 265 - }, - { - "epoch": 0.08, - "learning_rate": 9.928056598055073e-05, - "loss": 1.0832, - "step": 266 - }, - { - "epoch": 0.08, - "learning_rate": 9.927204755977343e-05, - "loss": 1.2584, - "step": 267 - }, - { - "epoch": 0.08, - "learning_rate": 9.926347937418367e-05, - "loss": 1.1468, - "step": 268 - }, - { - "epoch": 0.08, - "learning_rate": 9.925486143243535e-05, - "loss": 1.1263, - "step": 269 - }, - { - "epoch": 0.08, - "learning_rate": 9.924619374323255e-05, - "loss": 1.2301, - "step": 270 - }, - { - "epoch": 0.08, - "learning_rate": 9.923747631532968e-05, - "loss": 1.1346, - "step": 271 - }, - { - "epoch": 0.08, - "learning_rate": 9.922870915753133e-05, - "loss": 1.1664, - "step": 272 - }, - { - "epoch": 0.08, - "learning_rate": 9.921989227869237e-05, - "loss": 1.2434, - "step": 273 - }, - { - "epoch": 0.09, - "learning_rate": 9.92110256877178e-05, - "loss": 1.0634, - "step": 274 - }, - { - "epoch": 0.09, - "learning_rate": 9.920210939356294e-05, - "loss": 1.2136, - "step": 275 - }, - { - "epoch": 0.09, - "learning_rate": 9.919314340523321e-05, - "loss": 1.0822, - "step": 276 - }, - { - "epoch": 0.09, - "learning_rate": 9.91841277317843e-05, - "loss": 1.2029, - "step": 277 - }, - { - "epoch": 0.09, - "learning_rate": 9.917506238232203e-05, - "loss": 1.1702, - "step": 278 - }, - { - "epoch": 0.09, - "learning_rate": 9.916594736600244e-05, - "loss": 1.1607, - "step": 279 - }, - { - "epoch": 0.09, - "learning_rate": 9.915678269203165e-05, - "loss": 1.1876, - "step": 280 - }, - { - "epoch": 0.09, - "learning_rate": 9.914756836966605e-05, - "loss": 1.127, - "step": 281 - }, - { - "epoch": 0.09, - "learning_rate": 9.913830440821207e-05, - "loss": 1.1379, - "step": 282 - }, - { - "epoch": 0.09, - "learning_rate": 9.912899081702633e-05, - "loss": 1.1575, - "step": 283 - }, - { - "epoch": 0.09, - "learning_rate": 9.911962760551557e-05, - "loss": 1.1544, - "step": 284 - }, - { - "epoch": 0.09, - "learning_rate": 9.911021478313665e-05, - "loss": 1.2066, - "step": 285 - }, - { - "epoch": 0.09, - "learning_rate": 9.910075235939653e-05, - "loss": 1.0937, - "step": 286 - }, - { - "epoch": 0.09, - "learning_rate": 9.909124034385225e-05, - "loss": 1.1758, - "step": 287 - }, - { - "epoch": 0.09, - "learning_rate": 9.908167874611093e-05, - "loss": 1.1281, - "step": 288 - }, - { - "epoch": 0.09, - "learning_rate": 9.907206757582986e-05, - "loss": 1.1933, - "step": 289 - }, - { - "epoch": 0.09, - "learning_rate": 9.906240684271628e-05, - "loss": 1.1989, - "step": 290 - }, - { - "epoch": 0.09, - "learning_rate": 9.905269655652757e-05, - "loss": 1.0353, - "step": 291 - }, - { - "epoch": 0.09, - "learning_rate": 9.90429367270711e-05, - "loss": 1.2066, - "step": 292 - }, - { - "epoch": 0.09, - "learning_rate": 9.903312736420432e-05, - "loss": 1.2039, - "step": 293 - }, - { - "epoch": 0.09, - "learning_rate": 9.90232684778347e-05, - "loss": 1.0584, - "step": 294 - }, - { - "epoch": 0.09, - "learning_rate": 9.90133600779197e-05, - "loss": 1.1911, - "step": 295 - }, - { - "epoch": 0.09, - "learning_rate": 9.900340217446684e-05, - "loss": 1.176, - "step": 296 - }, - { - "epoch": 0.09, - "learning_rate": 9.89933947775336e-05, - "loss": 1.0731, - "step": 297 - }, - { - "epoch": 0.09, - "learning_rate": 9.898333789722746e-05, - "loss": 1.2692, - "step": 298 - }, - { - "epoch": 0.09, - "learning_rate": 9.89732315437059e-05, - "loss": 1.0797, - "step": 299 - }, - { - "epoch": 0.09, - "learning_rate": 9.89630757271763e-05, - "loss": 1.2411, - "step": 300 - }, - { - "epoch": 0.09, - "learning_rate": 9.895287045789607e-05, - "loss": 1.1243, - "step": 301 - }, - { - "epoch": 0.09, - "learning_rate": 9.894261574617256e-05, - "loss": 1.1264, - "step": 302 - }, - { - "epoch": 0.09, - "learning_rate": 9.893231160236302e-05, - "loss": 1.1949, - "step": 303 - }, - { - "epoch": 0.09, - "learning_rate": 9.892195803687463e-05, - "loss": 1.1513, - "step": 304 - }, - { - "epoch": 0.09, - "learning_rate": 9.891155506016451e-05, - "loss": 1.0776, - "step": 305 - }, - { - "epoch": 0.09, - "learning_rate": 9.89011026827397e-05, - "loss": 1.2725, - "step": 306 - }, - { - "epoch": 0.1, - "learning_rate": 9.889060091515707e-05, - "loss": 1.0559, - "step": 307 - }, - { - "epoch": 0.1, - "learning_rate": 9.888004976802346e-05, - "loss": 1.1536, - "step": 308 - }, - { - "epoch": 0.1, - "learning_rate": 9.88694492519955e-05, - "loss": 1.077, - "step": 309 - }, - { - "epoch": 0.1, - "learning_rate": 9.885879937777973e-05, - "loss": 1.1402, - "step": 310 - }, - { - "epoch": 0.1, - "learning_rate": 9.884810015613253e-05, - "loss": 1.1638, - "step": 311 - }, - { - "epoch": 0.1, - "learning_rate": 9.883735159786015e-05, - "loss": 1.2104, - "step": 312 - }, - { - "epoch": 0.1, - "learning_rate": 9.882655371381862e-05, - "loss": 1.1863, - "step": 313 - }, - { - "epoch": 0.1, - "learning_rate": 9.88157065149138e-05, - "loss": 1.153, - "step": 314 - }, - { - "epoch": 0.1, - "learning_rate": 9.880481001210142e-05, - "loss": 1.0241, - "step": 315 - }, - { - "epoch": 0.1, - "learning_rate": 9.87938642163869e-05, - "loss": 1.2642, - "step": 316 - }, - { - "epoch": 0.1, - "learning_rate": 9.878286913882553e-05, - "loss": 1.101, - "step": 317 - }, - { - "epoch": 0.1, - "learning_rate": 9.877182479052235e-05, - "loss": 1.2226, - "step": 318 - }, - { - "epoch": 0.1, - "learning_rate": 9.876073118263216e-05, - "loss": 1.0936, - "step": 319 - }, - { - "epoch": 0.1, - "learning_rate": 9.874958832635951e-05, - "loss": 1.2763, - "step": 320 - }, - { - "epoch": 0.1, - "learning_rate": 9.873839623295869e-05, - "loss": 1.1001, - "step": 321 - }, - { - "epoch": 0.1, - "learning_rate": 9.872715491373374e-05, - "loss": 1.0486, - "step": 322 - }, - { - "epoch": 0.1, - "learning_rate": 9.87158643800384e-05, - "loss": 1.0746, - "step": 323 - }, - { - "epoch": 0.1, - "learning_rate": 9.870452464327611e-05, - "loss": 1.1881, - "step": 324 - }, - { - "epoch": 0.1, - "learning_rate": 9.869313571490002e-05, - "loss": 1.1939, - "step": 325 - }, - { - "epoch": 0.1, - "learning_rate": 9.868169760641294e-05, - "loss": 1.1575, - "step": 326 - }, - { - "epoch": 0.1, - "learning_rate": 9.867021032936741e-05, - "loss": 1.1449, - "step": 327 - }, - { - "epoch": 0.1, - "learning_rate": 9.865867389536555e-05, - "loss": 1.1184, - "step": 328 - }, - { - "epoch": 0.1, - "learning_rate": 9.864708831605918e-05, - "loss": 1.1842, - "step": 329 - }, - { - "epoch": 0.1, - "learning_rate": 9.863545360314976e-05, - "loss": 1.2106, - "step": 330 - }, - { - "epoch": 0.1, - "learning_rate": 9.862376976838835e-05, - "loss": 1.1206, - "step": 331 - }, - { - "epoch": 0.1, - "learning_rate": 9.861203682357564e-05, - "loss": 1.2947, - "step": 332 - }, - { - "epoch": 0.1, - "learning_rate": 9.86002547805619e-05, - "loss": 1.2166, - "step": 333 - }, - { - "epoch": 0.1, - "learning_rate": 9.8588423651247e-05, - "loss": 1.1135, - "step": 334 - }, - { - "epoch": 0.1, - "learning_rate": 9.857654344758043e-05, - "loss": 1.075, - "step": 335 - }, - { - "epoch": 0.1, - "learning_rate": 9.856461418156117e-05, - "loss": 1.1379, - "step": 336 - }, - { - "epoch": 0.1, - "learning_rate": 9.855263586523782e-05, - "loss": 1.2196, - "step": 337 - }, - { - "epoch": 0.1, - "learning_rate": 9.854060851070844e-05, - "loss": 1.1839, - "step": 338 - }, - { - "epoch": 0.11, - "learning_rate": 9.852853213012072e-05, - "loss": 1.1718, - "step": 339 - }, - { - "epoch": 0.11, - "learning_rate": 9.851640673567178e-05, - "loss": 1.1366, - "step": 340 - }, - { - "epoch": 0.11, - "learning_rate": 9.85042323396083e-05, - "loss": 1.1235, - "step": 341 - }, - { - "epoch": 0.11, - "learning_rate": 9.849200895422643e-05, - "loss": 1.1425, - "step": 342 - }, - { - "epoch": 0.11, - "learning_rate": 9.84797365918718e-05, - "loss": 1.2186, - "step": 343 - }, - { - "epoch": 0.11, - "learning_rate": 9.846741526493949e-05, - "loss": 1.0701, - "step": 344 - }, - { - "epoch": 0.11, - "learning_rate": 9.845504498587407e-05, - "loss": 1.2033, - "step": 345 - }, - { - "epoch": 0.11, - "learning_rate": 9.844262576716952e-05, - "loss": 1.162, - "step": 346 - }, - { - "epoch": 0.11, - "learning_rate": 9.843015762136926e-05, - "loss": 1.1386, - "step": 347 - }, - { - "epoch": 0.11, - "learning_rate": 9.841764056106617e-05, - "loss": 1.0388, - "step": 348 - }, - { - "epoch": 0.11, - "learning_rate": 9.840507459890243e-05, - "loss": 1.2529, - "step": 349 - }, - { - "epoch": 0.11, - "learning_rate": 9.839245974756972e-05, - "loss": 1.0954, - "step": 350 - }, - { - "epoch": 0.11, - "learning_rate": 9.837979601980903e-05, - "loss": 1.1257, - "step": 351 - }, - { - "epoch": 0.11, - "learning_rate": 9.836708342841075e-05, - "loss": 1.1318, - "step": 352 - }, - { - "epoch": 0.11, - "learning_rate": 9.835432198621457e-05, - "loss": 1.16, - "step": 353 - }, - { - "epoch": 0.11, - "learning_rate": 9.834151170610961e-05, - "loss": 1.0649, - "step": 354 - }, - { - "epoch": 0.11, - "learning_rate": 9.832865260103423e-05, - "loss": 1.2644, - "step": 355 - }, - { - "epoch": 0.11, - "learning_rate": 9.831574468397617e-05, - "loss": 1.0838, - "step": 356 - }, - { - "epoch": 0.11, - "learning_rate": 9.830278796797239e-05, - "loss": 1.1117, - "step": 357 - }, - { - "epoch": 0.11, - "learning_rate": 9.828978246610922e-05, - "loss": 1.2126, - "step": 358 - }, - { - "epoch": 0.11, - "learning_rate": 9.827672819152224e-05, - "loss": 1.0954, - "step": 359 - }, - { - "epoch": 0.11, - "learning_rate": 9.826362515739625e-05, - "loss": 1.2461, - "step": 360 - }, - { - "epoch": 0.11, - "learning_rate": 9.825047337696533e-05, - "loss": 1.1415, - "step": 361 - }, - { - "epoch": 0.11, - "learning_rate": 9.823727286351279e-05, - "loss": 1.201, - "step": 362 - }, - { - "epoch": 0.11, - "learning_rate": 9.822402363037118e-05, - "loss": 1.1353, - "step": 363 - }, - { - "epoch": 0.11, - "learning_rate": 9.821072569092223e-05, - "loss": 1.1519, - "step": 364 - }, - { - "epoch": 0.11, - "learning_rate": 9.819737905859684e-05, - "loss": 1.1763, - "step": 365 - }, - { - "epoch": 0.11, - "learning_rate": 9.818398374687516e-05, - "loss": 1.2091, - "step": 366 - }, - { - "epoch": 0.11, - "learning_rate": 9.817053976928645e-05, - "loss": 1.2131, - "step": 367 - }, - { - "epoch": 0.11, - "learning_rate": 9.815704713940916e-05, - "loss": 1.0092, - "step": 368 - }, - { - "epoch": 0.11, - "learning_rate": 9.814350587087083e-05, - "loss": 1.1916, - "step": 369 - }, - { - "epoch": 0.11, - "learning_rate": 9.812991597734815e-05, - "loss": 1.1964, - "step": 370 - }, - { - "epoch": 0.12, - "learning_rate": 9.811627747256694e-05, - "loss": 1.1316, - "step": 371 - }, - { - "epoch": 0.12, - "learning_rate": 9.810259037030211e-05, - "loss": 1.1463, - "step": 372 - }, - { - "epoch": 0.12, - "learning_rate": 9.808885468437763e-05, - "loss": 1.131, - "step": 373 - }, - { - "epoch": 0.12, - "learning_rate": 9.807507042866657e-05, - "loss": 1.2549, - "step": 374 - }, - { - "epoch": 0.12, - "learning_rate": 9.806123761709102e-05, - "loss": 1.2111, - "step": 375 - }, - { - "epoch": 0.12, - "learning_rate": 9.804735626362216e-05, - "loss": 1.0817, - "step": 376 - }, - { - "epoch": 0.12, - "learning_rate": 9.803342638228015e-05, - "loss": 1.1605, - "step": 377 - }, - { - "epoch": 0.12, - "learning_rate": 9.801944798713419e-05, - "loss": 1.1723, - "step": 378 - }, - { - "epoch": 0.12, - "learning_rate": 9.800542109230246e-05, - "loss": 1.2154, - "step": 379 - }, - { - "epoch": 0.12, - "learning_rate": 9.799134571195214e-05, - "loss": 1.1087, - "step": 380 - }, - { - "epoch": 0.12, - "learning_rate": 9.797722186029939e-05, - "loss": 1.1138, - "step": 381 - }, - { - "epoch": 0.12, - "learning_rate": 9.796304955160931e-05, - "loss": 1.1802, - "step": 382 - }, - { - "epoch": 0.12, - "learning_rate": 9.794882880019593e-05, - "loss": 1.1514, - "step": 383 - }, - { - "epoch": 0.12, - "learning_rate": 9.793455962042223e-05, - "loss": 1.1846, - "step": 384 - }, - { - "epoch": 0.12, - "learning_rate": 9.79202420267001e-05, - "loss": 1.1667, - "step": 385 - }, - { - "epoch": 0.12, - "learning_rate": 9.79058760334903e-05, - "loss": 1.0934, - "step": 386 - }, - { - "epoch": 0.12, - "learning_rate": 9.789146165530254e-05, - "loss": 1.165, - "step": 387 - }, - { - "epoch": 0.12, - "learning_rate": 9.787699890669533e-05, - "loss": 1.1445, - "step": 388 - }, - { - "epoch": 0.12, - "learning_rate": 9.786248780227604e-05, - "loss": 1.1355, - "step": 389 - }, - { - "epoch": 0.12, - "learning_rate": 9.784792835670091e-05, - "loss": 1.1142, - "step": 390 - }, - { - "epoch": 0.12, - "learning_rate": 9.783332058467503e-05, - "loss": 1.0242, - "step": 391 - }, - { - "epoch": 0.12, - "learning_rate": 9.78186645009522e-05, - "loss": 1.1453, - "step": 392 - }, - { - "epoch": 0.12, - "learning_rate": 9.780396012033512e-05, - "loss": 1.2821, - "step": 393 - }, - { - "epoch": 0.12, - "learning_rate": 9.778920745767523e-05, - "loss": 1.0864, - "step": 394 - }, - { - "epoch": 0.12, - "learning_rate": 9.777440652787271e-05, - "loss": 1.2502, - "step": 395 - }, - { - "epoch": 0.12, - "learning_rate": 9.775955734587655e-05, - "loss": 1.048, - "step": 396 - }, - { - "epoch": 0.12, - "learning_rate": 9.774465992668438e-05, - "loss": 1.1887, - "step": 397 - }, - { - "epoch": 0.12, - "learning_rate": 9.772971428534264e-05, - "loss": 1.1747, - "step": 398 - }, - { - "epoch": 0.12, - "learning_rate": 9.771472043694643e-05, - "loss": 1.1468, - "step": 399 - }, - { - "epoch": 0.12, - "learning_rate": 9.769967839663958e-05, - "loss": 1.1029, - "step": 400 - }, - { - "epoch": 0.12, - "learning_rate": 9.768458817961455e-05, - "loss": 1.1713, - "step": 401 - }, - { - "epoch": 0.12, - "learning_rate": 9.766944980111243e-05, - "loss": 1.1172, - "step": 402 - }, - { - "epoch": 0.13, - "learning_rate": 9.765426327642307e-05, - "loss": 1.3023, - "step": 403 - }, - { - "epoch": 0.13, - "learning_rate": 9.763902862088481e-05, - "loss": 1.1309, - "step": 404 - }, - { - "epoch": 0.13, - "learning_rate": 9.76237458498847e-05, - "loss": 1.0881, - "step": 405 - }, - { - "epoch": 0.13, - "learning_rate": 9.760841497885833e-05, - "loss": 1.177, - "step": 406 - }, - { - "epoch": 0.13, - "learning_rate": 9.759303602328993e-05, - "loss": 1.1293, - "step": 407 - }, - { - "epoch": 0.13, - "learning_rate": 9.757760899871222e-05, - "loss": 1.1728, - "step": 408 - }, - { - "epoch": 0.13, - "learning_rate": 9.756213392070653e-05, - "loss": 1.1662, - "step": 409 - }, - { - "epoch": 0.13, - "learning_rate": 9.75466108049027e-05, - "loss": 1.0512, - "step": 410 - }, - { - "epoch": 0.13, - "learning_rate": 9.753103966697909e-05, - "loss": 1.2002, - "step": 411 - }, - { - "epoch": 0.13, - "learning_rate": 9.751542052266255e-05, - "loss": 1.1217, - "step": 412 - }, - { - "epoch": 0.13, - "learning_rate": 9.749975338772846e-05, - "loss": 1.0958, - "step": 413 - }, - { - "epoch": 0.13, - "learning_rate": 9.74840382780006e-05, - "loss": 1.289, - "step": 414 - }, - { - "epoch": 0.13, - "learning_rate": 9.746827520935129e-05, - "loss": 1.1355, - "step": 415 - }, - { - "epoch": 0.13, - "learning_rate": 9.745246419770122e-05, - "loss": 1.1092, - "step": 416 - }, - { - "epoch": 0.13, - "learning_rate": 9.743660525901952e-05, - "loss": 1.0887, - "step": 417 - }, - { - "epoch": 0.13, - "learning_rate": 9.742069840932373e-05, - "loss": 1.1419, - "step": 418 - }, - { - "epoch": 0.13, - "learning_rate": 9.74047436646798e-05, - "loss": 1.2064, - "step": 419 - }, - { - "epoch": 0.13, - "learning_rate": 9.738874104120203e-05, - "loss": 1.1259, - "step": 420 - }, - { - "epoch": 0.13, - "learning_rate": 9.737269055505307e-05, - "loss": 1.125, - "step": 421 - }, - { - "epoch": 0.13, - "learning_rate": 9.735659222244396e-05, - "loss": 1.1954, - "step": 422 - }, - { - "epoch": 0.13, - "learning_rate": 9.7340446059634e-05, - "loss": 1.0378, - "step": 423 - }, - { - "epoch": 0.13, - "learning_rate": 9.732425208293083e-05, - "loss": 1.167, - "step": 424 - }, - { - "epoch": 0.13, - "learning_rate": 9.73080103086904e-05, - "loss": 1.0203, - "step": 425 - }, - { - "epoch": 0.13, - "learning_rate": 9.729172075331687e-05, - "loss": 1.1693, - "step": 426 - }, - { - "epoch": 0.13, - "learning_rate": 9.727538343326278e-05, - "loss": 1.2246, - "step": 427 - }, - { - "epoch": 0.13, - "learning_rate": 9.725899836502879e-05, - "loss": 1.1337, - "step": 428 - }, - { - "epoch": 0.13, - "learning_rate": 9.724256556516382e-05, - "loss": 1.1144, - "step": 429 - }, - { - "epoch": 0.13, - "learning_rate": 9.722608505026507e-05, - "loss": 1.1068, - "step": 430 - }, - { - "epoch": 0.13, - "learning_rate": 9.720955683697781e-05, - "loss": 1.1947, - "step": 431 - }, - { - "epoch": 0.13, - "learning_rate": 9.71929809419956e-05, - "loss": 1.3006, - "step": 432 - }, - { - "epoch": 0.13, - "learning_rate": 9.717635738206008e-05, - "loss": 1.0359, - "step": 433 - }, - { - "epoch": 0.13, - "learning_rate": 9.715968617396107e-05, - "loss": 1.1915, - "step": 434 - }, - { - "epoch": 0.13, - "learning_rate": 9.71429673345365e-05, - "loss": 1.1788, - "step": 435 - }, - { - "epoch": 0.14, - "learning_rate": 9.712620088067244e-05, - "loss": 1.1302, - "step": 436 - }, - { - "epoch": 0.14, - "learning_rate": 9.710938682930297e-05, - "loss": 1.1559, - "step": 437 - }, - { - "epoch": 0.14, - "learning_rate": 9.709252519741034e-05, - "loss": 1.0101, - "step": 438 - }, - { - "epoch": 0.14, - "learning_rate": 9.707561600202482e-05, - "loss": 1.1451, - "step": 439 - }, - { - "epoch": 0.14, - "learning_rate": 9.705865926022468e-05, - "loss": 1.2513, - "step": 440 - }, - { - "epoch": 0.14, - "learning_rate": 9.704165498913625e-05, - "loss": 1.1153, - "step": 441 - }, - { - "epoch": 0.14, - "learning_rate": 9.702460320593388e-05, - "loss": 1.1491, - "step": 442 - }, - { - "epoch": 0.14, - "learning_rate": 9.700750392783986e-05, - "loss": 1.1796, - "step": 443 - }, - { - "epoch": 0.14, - "learning_rate": 9.69903571721245e-05, - "loss": 1.1527, - "step": 444 - }, - { - "epoch": 0.14, - "learning_rate": 9.697316295610603e-05, - "loss": 1.0697, - "step": 445 - }, - { - "epoch": 0.14, - "learning_rate": 9.695592129715063e-05, - "loss": 1.0722, - "step": 446 - }, - { - "epoch": 0.14, - "learning_rate": 9.693863221267238e-05, - "loss": 1.2451, - "step": 447 - }, - { - "epoch": 0.14, - "learning_rate": 9.692129572013327e-05, - "loss": 1.2017, - "step": 448 - }, - { - "epoch": 0.14, - "learning_rate": 9.690391183704317e-05, - "loss": 1.0213, - "step": 449 - }, - { - "epoch": 0.14, - "learning_rate": 9.688648058095984e-05, - "loss": 1.2104, - "step": 450 - }, - { - "epoch": 0.14, - "learning_rate": 9.686900196948885e-05, - "loss": 1.1434, - "step": 451 - }, - { - "epoch": 0.14, - "learning_rate": 9.68514760202836e-05, - "loss": 1.132, - "step": 452 - }, - { - "epoch": 0.14, - "learning_rate": 9.683390275104533e-05, - "loss": 1.1217, - "step": 453 - }, - { - "epoch": 0.14, - "learning_rate": 9.681628217952308e-05, - "loss": 1.0739, - "step": 454 - }, - { - "epoch": 0.14, - "learning_rate": 9.67986143235136e-05, - "loss": 1.0234, - "step": 455 - }, - { - "epoch": 0.14, - "learning_rate": 9.678089920086145e-05, - "loss": 1.2176, - "step": 456 - }, - { - "epoch": 0.14, - "learning_rate": 9.676313682945895e-05, - "loss": 1.2088, - "step": 457 - }, - { - "epoch": 0.14, - "learning_rate": 9.674532722724607e-05, - "loss": 1.03, - "step": 458 - }, - { - "epoch": 0.14, - "learning_rate": 9.672747041221055e-05, - "loss": 1.1044, - "step": 459 - }, - { - "epoch": 0.14, - "learning_rate": 9.67095664023878e-05, - "loss": 1.2163, - "step": 460 - }, - { - "epoch": 0.14, - "learning_rate": 9.669161521586085e-05, - "loss": 1.1907, - "step": 461 - }, - { - "epoch": 0.14, - "learning_rate": 9.667361687076045e-05, - "loss": 1.0693, - "step": 462 - }, - { - "epoch": 0.14, - "learning_rate": 9.665557138526492e-05, - "loss": 1.2219, - "step": 463 - }, - { - "epoch": 0.14, - "learning_rate": 9.663747877760024e-05, - "loss": 1.1963, - "step": 464 - }, - { - "epoch": 0.14, - "learning_rate": 9.661933906603992e-05, - "loss": 1.0656, - "step": 465 - }, - { - "epoch": 0.14, - "learning_rate": 9.660115226890513e-05, - "loss": 1.1559, - "step": 466 - }, - { - "epoch": 0.14, - "learning_rate": 9.658291840456452e-05, - "loss": 1.173, - "step": 467 - }, - { - "epoch": 0.15, - "learning_rate": 9.656463749143433e-05, - "loss": 1.2059, - "step": 468 - }, - { - "epoch": 0.15, - "learning_rate": 9.654630954797828e-05, - "loss": 1.0934, - "step": 469 - }, - { - "epoch": 0.15, - "learning_rate": 9.652793459270763e-05, - "loss": 1.175, - "step": 470 - }, - { - "epoch": 0.15, - "learning_rate": 9.65095126441811e-05, - "loss": 1.0705, - "step": 471 - }, - { - "epoch": 0.15, - "learning_rate": 9.649104372100485e-05, - "loss": 1.0793, - "step": 472 - }, - { - "epoch": 0.15, - "learning_rate": 9.647252784183253e-05, - "loss": 1.2582, - "step": 473 - }, - { - "epoch": 0.15, - "learning_rate": 9.64539650253652e-05, - "loss": 1.128, - "step": 474 - }, - { - "epoch": 0.15, - "learning_rate": 9.64353552903513e-05, - "loss": 1.1956, - "step": 475 - }, - { - "epoch": 0.15, - "learning_rate": 9.641669865558669e-05, - "loss": 1.1685, - "step": 476 - }, - { - "epoch": 0.15, - "learning_rate": 9.63979951399146e-05, - "loss": 1.1404, - "step": 477 - }, - { - "epoch": 0.15, - "learning_rate": 9.637924476222558e-05, - "loss": 1.099, - "step": 478 - }, - { - "epoch": 0.15, - "learning_rate": 9.636044754145753e-05, - "loss": 1.2255, - "step": 479 - }, - { - "epoch": 0.15, - "learning_rate": 9.634160349659567e-05, - "loss": 1.1705, - "step": 480 - }, - { - "epoch": 0.15, - "learning_rate": 9.632271264667249e-05, - "loss": 1.1362, - "step": 481 - }, - { - "epoch": 0.15, - "learning_rate": 9.630377501076778e-05, - "loss": 1.1371, - "step": 482 - }, - { - "epoch": 0.15, - "learning_rate": 9.628479060800855e-05, - "loss": 1.1125, - "step": 483 - }, - { - "epoch": 0.15, - "learning_rate": 9.626575945756909e-05, - "loss": 1.1968, - "step": 484 - }, - { - "epoch": 0.15, - "learning_rate": 9.624668157867084e-05, - "loss": 1.104, - "step": 485 - }, - { - "epoch": 0.15, - "learning_rate": 9.622755699058251e-05, - "loss": 1.1495, - "step": 486 - }, - { - "epoch": 0.15, - "learning_rate": 9.620838571261994e-05, - "loss": 1.1113, - "step": 487 - }, - { - "epoch": 0.15, - "learning_rate": 9.618916776414615e-05, - "loss": 1.1322, - "step": 488 - }, - { - "epoch": 0.15, - "learning_rate": 9.616990316457127e-05, - "loss": 0.9552, - "step": 489 - }, - { - "epoch": 0.15, - "learning_rate": 9.615059193335256e-05, - "loss": 1.1265, - "step": 490 - }, - { - "epoch": 0.15, - "learning_rate": 9.61312340899944e-05, - "loss": 1.1283, - "step": 491 - }, - { - "epoch": 0.15, - "learning_rate": 9.61118296540482e-05, - "loss": 1.1241, - "step": 492 - }, - { - "epoch": 0.15, - "learning_rate": 9.609237864511248e-05, - "loss": 1.1444, - "step": 493 - }, - { - "epoch": 0.15, - "learning_rate": 9.607288108283278e-05, - "loss": 1.1183, - "step": 494 - }, - { - "epoch": 0.15, - "learning_rate": 9.605333698690164e-05, - "loss": 1.191, - "step": 495 - }, - { - "epoch": 0.15, - "learning_rate": 9.603374637705862e-05, - "loss": 1.1187, - "step": 496 - }, - { - "epoch": 0.15, - "learning_rate": 9.601410927309026e-05, - "loss": 1.1355, - "step": 497 - }, - { - "epoch": 0.15, - "learning_rate": 9.599442569483004e-05, - "loss": 1.1064, - "step": 498 - }, - { - "epoch": 0.15, - "learning_rate": 9.59746956621584e-05, - "loss": 1.2277, - "step": 499 - }, - { - "epoch": 0.16, - "learning_rate": 9.59549191950027e-05, - "loss": 1.2213, - "step": 500 - }, - { - "epoch": 0.16, - "learning_rate": 9.593509631333717e-05, - "loss": 1.1357, - "step": 501 - }, - { - "epoch": 0.16, - "learning_rate": 9.591522703718295e-05, - "loss": 1.0891, - "step": 502 - }, - { - "epoch": 0.16, - "learning_rate": 9.589531138660803e-05, - "loss": 1.142, - "step": 503 - }, - { - "epoch": 0.16, - "learning_rate": 9.587534938172722e-05, - "loss": 1.1499, - "step": 504 - }, - { - "epoch": 0.16, - "learning_rate": 9.585534104270219e-05, - "loss": 1.044, - "step": 505 - }, - { - "epoch": 0.16, - "learning_rate": 9.583528638974136e-05, - "loss": 1.1026, - "step": 506 - }, - { - "epoch": 0.16, - "learning_rate": 9.581518544309993e-05, - "loss": 1.2509, - "step": 507 - }, - { - "epoch": 0.16, - "learning_rate": 9.579503822307991e-05, - "loss": 1.1208, - "step": 508 - }, - { - "epoch": 0.16, - "learning_rate": 9.577484475002999e-05, - "loss": 1.1014, - "step": 509 - }, - { - "epoch": 0.16, - "learning_rate": 9.57546050443456e-05, - "loss": 1.1063, - "step": 510 - }, - { - "epoch": 0.16, - "learning_rate": 9.573431912646888e-05, - "loss": 1.1421, - "step": 511 - }, - { - "epoch": 0.16, - "learning_rate": 9.571398701688859e-05, - "loss": 1.2008, - "step": 512 - }, - { - "epoch": 0.16, - "learning_rate": 9.56936087361402e-05, - "loss": 1.1231, - "step": 513 - }, - { - "epoch": 0.16, - "learning_rate": 9.567318430480578e-05, - "loss": 1.1073, - "step": 514 - }, - { - "epoch": 0.16, - "learning_rate": 9.565271374351405e-05, - "loss": 1.0955, - "step": 515 - }, - { - "epoch": 0.16, - "learning_rate": 9.563219707294028e-05, - "loss": 1.0992, - "step": 516 - }, - { - "epoch": 0.16, - "learning_rate": 9.56116343138063e-05, - "loss": 1.0963, - "step": 517 - }, - { - "epoch": 0.16, - "learning_rate": 9.559102548688056e-05, - "loss": 1.0755, - "step": 518 - }, - { - "epoch": 0.16, - "learning_rate": 9.557037061297798e-05, - "loss": 1.183, - "step": 519 - }, - { - "epoch": 0.16, - "learning_rate": 9.554966971295998e-05, - "loss": 1.1695, - "step": 520 - }, - { - "epoch": 0.16, - "learning_rate": 9.552892280773451e-05, - "loss": 1.1796, - "step": 521 - }, - { - "epoch": 0.16, - "learning_rate": 9.550812991825596e-05, - "loss": 1.094, - "step": 522 - }, - { - "epoch": 0.16, - "learning_rate": 9.548729106552515e-05, - "loss": 1.1198, - "step": 523 - }, - { - "epoch": 0.16, - "learning_rate": 9.546640627058935e-05, - "loss": 1.0019, - "step": 524 - }, - { - "epoch": 0.16, - "learning_rate": 9.544547555454222e-05, - "loss": 1.0843, - "step": 525 - }, - { - "epoch": 0.16, - "learning_rate": 9.542449893852381e-05, - "loss": 1.1506, - "step": 526 - }, - { - "epoch": 0.16, - "learning_rate": 9.540347644372053e-05, - "loss": 1.0759, - "step": 527 - }, - { - "epoch": 0.16, - "learning_rate": 9.538240809136509e-05, - "loss": 1.2293, - "step": 528 - }, - { - "epoch": 0.16, - "learning_rate": 9.536129390273658e-05, - "loss": 1.2542, - "step": 529 - }, - { - "epoch": 0.16, - "learning_rate": 9.53401338991603e-05, - "loss": 1.0751, - "step": 530 - }, - { - "epoch": 0.16, - "learning_rate": 9.531892810200794e-05, - "loss": 1.0839, - "step": 531 - }, - { - "epoch": 0.17, - "learning_rate": 9.52976765326973e-05, - "loss": 1.1693, - "step": 532 - }, - { - "epoch": 0.17, - "learning_rate": 9.527637921269255e-05, - "loss": 1.1072, - "step": 533 - }, - { - "epoch": 0.17, - "learning_rate": 9.525503616350395e-05, - "loss": 1.1919, - "step": 534 - }, - { - "epoch": 0.17, - "learning_rate": 9.523364740668805e-05, - "loss": 1.0582, - "step": 535 - }, - { - "epoch": 0.17, - "learning_rate": 9.521221296384745e-05, - "loss": 1.1531, - "step": 536 - }, - { - "epoch": 0.17, - "learning_rate": 9.519073285663103e-05, - "loss": 1.1777, - "step": 537 - }, - { - "epoch": 0.17, - "learning_rate": 9.516920710673367e-05, - "loss": 1.1227, - "step": 538 - }, - { - "epoch": 0.17, - "learning_rate": 9.51476357358964e-05, - "loss": 1.1231, - "step": 539 - }, - { - "epoch": 0.17, - "learning_rate": 9.512601876590632e-05, - "loss": 1.1518, - "step": 540 - }, - { - "epoch": 0.17, - "learning_rate": 9.510435621859663e-05, - "loss": 1.2206, - "step": 541 - }, - { - "epoch": 0.17, - "learning_rate": 9.508264811584647e-05, - "loss": 1.1708, - "step": 542 - }, - { - "epoch": 0.17, - "learning_rate": 9.506089447958108e-05, - "loss": 1.0096, - "step": 543 - }, - { - "epoch": 0.17, - "learning_rate": 9.503909533177162e-05, - "loss": 1.0861, - "step": 544 - }, - { - "epoch": 0.17, - "learning_rate": 9.501725069443528e-05, - "loss": 1.2421, - "step": 545 - }, - { - "epoch": 0.17, - "learning_rate": 9.499536058963516e-05, - "loss": 1.0911, - "step": 546 - }, - { - "epoch": 0.17, - "learning_rate": 9.497342503948026e-05, - "loss": 1.1609, - "step": 547 - }, - { - "epoch": 0.17, - "learning_rate": 9.495144406612553e-05, - "loss": 1.096, - "step": 548 - }, - { - "epoch": 0.17, - "learning_rate": 9.492941769177175e-05, - "loss": 1.1832, - "step": 549 - }, - { - "epoch": 0.17, - "learning_rate": 9.490734593866562e-05, - "loss": 1.212, - "step": 550 - }, - { - "epoch": 0.17, - "learning_rate": 9.488522882909959e-05, - "loss": 1.0436, - "step": 551 - }, - { - "epoch": 0.17, - "learning_rate": 9.486306638541195e-05, - "loss": 1.2304, - "step": 552 - }, - { - "epoch": 0.17, - "learning_rate": 9.484085862998683e-05, - "loss": 1.0846, - "step": 553 - }, - { - "epoch": 0.17, - "learning_rate": 9.481860558525409e-05, - "loss": 1.1185, - "step": 554 - }, - { - "epoch": 0.17, - "learning_rate": 9.479630727368927e-05, - "loss": 1.2101, - "step": 555 - }, - { - "epoch": 0.17, - "learning_rate": 9.477396371781372e-05, - "loss": 0.9014, - "step": 556 - }, - { - "epoch": 0.17, - "learning_rate": 9.475157494019444e-05, - "loss": 1.1282, - "step": 557 - }, - { - "epoch": 0.17, - "learning_rate": 9.472914096344414e-05, - "loss": 1.1136, - "step": 558 - }, - { - "epoch": 0.17, - "learning_rate": 9.470666181022112e-05, - "loss": 1.1671, - "step": 559 - }, - { - "epoch": 0.17, - "learning_rate": 9.468413750322939e-05, - "loss": 1.1413, - "step": 560 - }, - { - "epoch": 0.17, - "learning_rate": 9.466156806521848e-05, - "loss": 1.1143, - "step": 561 - }, - { - "epoch": 0.17, - "learning_rate": 9.463895351898355e-05, - "loss": 1.3011, - "step": 562 - }, - { - "epoch": 0.17, - "learning_rate": 9.461629388736532e-05, - "loss": 1.0754, - "step": 563 - }, - { - "epoch": 0.17, - "learning_rate": 9.459358919325004e-05, - "loss": 1.0886, - "step": 564 - }, - { - "epoch": 0.18, - "learning_rate": 9.457083945956948e-05, - "loss": 1.1755, - "step": 565 - }, - { - "epoch": 0.18, - "learning_rate": 9.454804470930087e-05, - "loss": 1.2035, - "step": 566 - }, - { - "epoch": 0.18, - "learning_rate": 9.452520496546693e-05, - "loss": 1.0114, - "step": 567 - }, - { - "epoch": 0.18, - "learning_rate": 9.450232025113584e-05, - "loss": 1.1833, - "step": 568 - }, - { - "epoch": 0.18, - "learning_rate": 9.447939058942117e-05, - "loss": 1.0769, - "step": 569 - }, - { - "epoch": 0.18, - "learning_rate": 9.445641600348191e-05, - "loss": 1.2213, - "step": 570 - }, - { - "epoch": 0.18, - "learning_rate": 9.443339651652241e-05, - "loss": 1.0538, - "step": 571 - }, - { - "epoch": 0.18, - "learning_rate": 9.441033215179234e-05, - "loss": 1.1454, - "step": 572 - }, - { - "epoch": 0.18, - "learning_rate": 9.438722293258678e-05, - "loss": 1.1428, - "step": 573 - }, - { - "epoch": 0.18, - "learning_rate": 9.436406888224602e-05, - "loss": 1.1039, - "step": 574 - }, - { - "epoch": 0.18, - "learning_rate": 9.43408700241557e-05, - "loss": 1.2082, - "step": 575 - }, - { - "epoch": 0.18, - "learning_rate": 9.431762638174668e-05, - "loss": 1.0862, - "step": 576 - }, - { - "epoch": 0.18, - "learning_rate": 9.429433797849507e-05, - "loss": 1.1214, - "step": 577 - }, - { - "epoch": 0.18, - "learning_rate": 9.427100483792216e-05, - "loss": 1.1871, - "step": 578 - }, - { - "epoch": 0.18, - "learning_rate": 9.424762698359442e-05, - "loss": 1.1042, - "step": 579 - }, - { - "epoch": 0.18, - "learning_rate": 9.422420443912356e-05, - "loss": 1.1546, - "step": 580 - }, - { - "epoch": 0.18, - "learning_rate": 9.420073722816633e-05, - "loss": 1.1996, - "step": 581 - }, - { - "epoch": 0.18, - "learning_rate": 9.417722537442467e-05, - "loss": 1.071, - "step": 582 - }, - { - "epoch": 0.18, - "learning_rate": 9.415366890164552e-05, - "loss": 1.1528, - "step": 583 - }, - { - "epoch": 0.18, - "learning_rate": 9.413006783362099e-05, - "loss": 1.1066, - "step": 584 - }, - { - "epoch": 0.18, - "learning_rate": 9.410642219418815e-05, - "loss": 1.1, - "step": 585 - }, - { - "epoch": 0.18, - "learning_rate": 9.408273200722912e-05, - "loss": 1.165, - "step": 586 - }, - { - "epoch": 0.18, - "learning_rate": 9.405899729667104e-05, - "loss": 1.1104, - "step": 587 - }, - { - "epoch": 0.18, - "learning_rate": 9.403521808648595e-05, - "loss": 1.2281, - "step": 588 - }, - { - "epoch": 0.18, - "learning_rate": 9.40113944006909e-05, - "loss": 0.948, - "step": 589 - }, - { - "epoch": 0.18, - "learning_rate": 9.398752626334781e-05, - "loss": 1.0624, - "step": 590 - }, - { - "epoch": 0.18, - "learning_rate": 9.396361369856355e-05, - "loss": 1.152, - "step": 591 - }, - { - "epoch": 0.18, - "learning_rate": 9.393965673048981e-05, - "loss": 1.0726, - "step": 592 - }, - { - "epoch": 0.18, - "learning_rate": 9.391565538332317e-05, - "loss": 1.1482, - "step": 593 - }, - { - "epoch": 0.18, - "learning_rate": 9.389160968130497e-05, - "loss": 1.1816, - "step": 594 - }, - { - "epoch": 0.18, - "learning_rate": 9.386751964872145e-05, - "loss": 1.0783, - "step": 595 - }, - { - "epoch": 0.18, - "learning_rate": 9.38433853099035e-05, - "loss": 1.0685, - "step": 596 - }, - { - "epoch": 0.19, - "learning_rate": 9.381920668922688e-05, - "loss": 1.2108, - "step": 597 - }, - { - "epoch": 0.19, - "learning_rate": 9.3794983811112e-05, - "loss": 1.1299, - "step": 598 - }, - { - "epoch": 0.19, - "learning_rate": 9.377071670002397e-05, - "loss": 1.2934, - "step": 599 - }, - { - "epoch": 0.19, - "learning_rate": 9.374640538047262e-05, - "loss": 1.0242, - "step": 600 - }, - { - "epoch": 0.19, - "learning_rate": 9.372204987701239e-05, - "loss": 1.201, - "step": 601 - }, - { - "epoch": 0.19, - "learning_rate": 9.369765021424236e-05, - "loss": 1.0836, - "step": 602 - }, - { - "epoch": 0.19, - "learning_rate": 9.367320641680621e-05, - "loss": 1.0656, - "step": 603 - }, - { - "epoch": 0.19, - "learning_rate": 9.364871850939221e-05, - "loss": 1.1465, - "step": 604 - }, - { - "epoch": 0.19, - "learning_rate": 9.362418651673317e-05, - "loss": 1.1744, - "step": 605 - }, - { - "epoch": 0.19, - "learning_rate": 9.359961046360642e-05, - "loss": 1.0681, - "step": 606 - }, - { - "epoch": 0.19, - "learning_rate": 9.357499037483377e-05, - "loss": 1.1797, - "step": 607 - }, - { - "epoch": 0.19, - "learning_rate": 9.355032627528157e-05, - "loss": 1.0415, - "step": 608 - }, - { - "epoch": 0.19, - "learning_rate": 9.352561818986056e-05, - "loss": 1.1871, - "step": 609 - }, - { - "epoch": 0.19, - "learning_rate": 9.350086614352593e-05, - "loss": 1.1927, - "step": 610 - }, - { - "epoch": 0.19, - "learning_rate": 9.347607016127728e-05, - "loss": 1.074, - "step": 611 - }, - { - "epoch": 0.19, - "learning_rate": 9.345123026815857e-05, - "loss": 1.0608, - "step": 612 - }, - { - "epoch": 0.19, - "learning_rate": 9.342634648925813e-05, - "loss": 1.1635, - "step": 613 - }, - { - "epoch": 0.19, - "learning_rate": 9.340141884970855e-05, - "loss": 1.1154, - "step": 614 - }, - { - "epoch": 0.19, - "learning_rate": 9.337644737468681e-05, - "loss": 1.1974, - "step": 615 - }, - { - "epoch": 0.19, - "learning_rate": 9.335143208941413e-05, - "loss": 1.0577, - "step": 616 - }, - { - "epoch": 0.19, - "learning_rate": 9.332637301915594e-05, - "loss": 1.1514, - "step": 617 - }, - { - "epoch": 0.19, - "learning_rate": 9.330127018922194e-05, - "loss": 1.1706, - "step": 618 - }, - { - "epoch": 0.19, - "learning_rate": 9.327612362496601e-05, - "loss": 1.0837, - "step": 619 - }, - { - "epoch": 0.19, - "learning_rate": 9.325093335178619e-05, - "loss": 1.1252, - "step": 620 - }, - { - "epoch": 0.19, - "learning_rate": 9.322569939512472e-05, - "loss": 1.1806, - "step": 621 - }, - { - "epoch": 0.19, - "learning_rate": 9.320042178046785e-05, - "loss": 1.1407, - "step": 622 - }, - { - "epoch": 0.19, - "learning_rate": 9.317510053334604e-05, - "loss": 1.111, - "step": 623 - }, - { - "epoch": 0.19, - "learning_rate": 9.314973567933379e-05, - "loss": 1.1045, - "step": 624 - }, - { - "epoch": 0.19, - "learning_rate": 9.312432724404956e-05, - "loss": 1.0275, - "step": 625 - }, - { - "epoch": 0.19, - "learning_rate": 9.309887525315597e-05, - "loss": 1.1925, - "step": 626 - }, - { - "epoch": 0.19, - "learning_rate": 9.30733797323595e-05, - "loss": 0.9711, - "step": 627 - }, - { - "epoch": 0.19, - "learning_rate": 9.304784070741066e-05, - "loss": 1.1709, - "step": 628 - }, - { - "epoch": 0.2, - "learning_rate": 9.302225820410391e-05, - "loss": 0.9641, - "step": 629 - }, - { - "epoch": 0.2, - "learning_rate": 9.299663224827757e-05, - "loss": 1.2511, - "step": 630 - }, - { - "epoch": 0.2, - "learning_rate": 9.29709628658139e-05, - "loss": 1.0882, - "step": 631 - }, - { - "epoch": 0.2, - "learning_rate": 9.294525008263899e-05, - "loss": 1.2824, - "step": 632 - }, - { - "epoch": 0.2, - "learning_rate": 9.291949392472277e-05, - "loss": 1.0559, - "step": 633 - }, - { - "epoch": 0.2, - "learning_rate": 9.289369441807901e-05, - "loss": 1.1686, - "step": 634 - }, - { - "epoch": 0.2, - "learning_rate": 9.28678515887652e-05, - "loss": 1.1198, - "step": 635 - }, - { - "epoch": 0.2, - "learning_rate": 9.284196546288262e-05, - "loss": 1.0549, - "step": 636 - }, - { - "epoch": 0.2, - "learning_rate": 9.281603606657632e-05, - "loss": 1.1663, - "step": 637 - }, - { - "epoch": 0.2, - "learning_rate": 9.279006342603498e-05, - "loss": 1.1131, - "step": 638 - }, - { - "epoch": 0.2, - "learning_rate": 9.2764047567491e-05, - "loss": 1.1392, - "step": 639 - }, - { - "epoch": 0.2, - "learning_rate": 9.273798851722042e-05, - "loss": 1.1119, - "step": 640 - }, - { - "epoch": 0.2, - "learning_rate": 9.27118863015429e-05, - "loss": 1.0885, - "step": 641 - }, - { - "epoch": 0.2, - "learning_rate": 9.268574094682172e-05, - "loss": 1.0987, - "step": 642 - }, - { - "epoch": 0.2, - "learning_rate": 9.265955247946369e-05, - "loss": 1.1357, - "step": 643 - }, - { - "epoch": 0.2, - "learning_rate": 9.263332092591919e-05, - "loss": 1.1357, - "step": 644 - }, - { - "epoch": 0.2, - "learning_rate": 9.260704631268211e-05, - "loss": 1.0362, - "step": 645 - }, - { - "epoch": 0.2, - "learning_rate": 9.258072866628986e-05, - "loss": 1.1042, - "step": 646 - }, - { - "epoch": 0.2, - "learning_rate": 9.255436801332325e-05, - "loss": 1.1302, - "step": 647 - }, - { - "epoch": 0.2, - "learning_rate": 9.252796438040659e-05, - "loss": 1.1947, - "step": 648 - }, - { - "epoch": 0.2, - "learning_rate": 9.250151779420755e-05, - "loss": 1.0233, - "step": 649 - }, - { - "epoch": 0.2, - "learning_rate": 9.247502828143722e-05, - "loss": 1.1773, - "step": 650 - }, - { - "epoch": 0.2, - "learning_rate": 9.244849586885004e-05, - "loss": 1.1171, - "step": 651 - }, - { - "epoch": 0.2, - "learning_rate": 9.242192058324377e-05, - "loss": 1.1412, - "step": 652 - }, - { - "epoch": 0.2, - "learning_rate": 9.239530245145944e-05, - "loss": 1.0613, - "step": 653 - }, - { - "epoch": 0.2, - "learning_rate": 9.236864150038141e-05, - "loss": 1.2004, - "step": 654 - }, - { - "epoch": 0.2, - "learning_rate": 9.234193775693726e-05, - "loss": 1.0896, - "step": 655 - }, - { - "epoch": 0.2, - "learning_rate": 9.231519124809782e-05, - "loss": 1.1509, - "step": 656 - }, - { - "epoch": 0.2, - "learning_rate": 9.228840200087707e-05, - "loss": 1.0956, - "step": 657 - }, - { - "epoch": 0.2, - "learning_rate": 9.226157004233216e-05, - "loss": 1.0598, - "step": 658 - }, - { - "epoch": 0.2, - "learning_rate": 9.223469539956341e-05, - "loss": 1.2002, - "step": 659 - }, - { - "epoch": 0.2, - "learning_rate": 9.220777809971421e-05, - "loss": 1.0367, - "step": 660 - }, - { - "epoch": 0.21, - "learning_rate": 9.218081816997109e-05, - "loss": 1.0436, - "step": 661 - }, - { - "epoch": 0.21, - "learning_rate": 9.21538156375636e-05, - "loss": 1.1255, - "step": 662 - }, - { - "epoch": 0.21, - "learning_rate": 9.212677052976428e-05, - "loss": 1.2042, - "step": 663 - }, - { - "epoch": 0.21, - "learning_rate": 9.209968287388877e-05, - "loss": 1.1677, - "step": 664 - }, - { - "epoch": 0.21, - "learning_rate": 9.207255269729559e-05, - "loss": 1.1331, - "step": 665 - }, - { - "epoch": 0.21, - "learning_rate": 9.204538002738625e-05, - "loss": 1.1212, - "step": 666 - }, - { - "epoch": 0.21, - "learning_rate": 9.201816489160517e-05, - "loss": 1.0892, - "step": 667 - }, - { - "epoch": 0.21, - "learning_rate": 9.199090731743966e-05, - "loss": 1.224, - "step": 668 - }, - { - "epoch": 0.21, - "learning_rate": 9.196360733241992e-05, - "loss": 1.0401, - "step": 669 - }, - { - "epoch": 0.21, - "learning_rate": 9.193626496411893e-05, - "loss": 1.2047, - "step": 670 - }, - { - "epoch": 0.21, - "learning_rate": 9.190888024015251e-05, - "loss": 1.0717, - "step": 671 - }, - { - "epoch": 0.21, - "learning_rate": 9.188145318817928e-05, - "loss": 1.1365, - "step": 672 - }, - { - "epoch": 0.21, - "learning_rate": 9.185398383590055e-05, - "loss": 1.1807, - "step": 673 - }, - { - "epoch": 0.21, - "learning_rate": 9.182647221106043e-05, - "loss": 1.0971, - "step": 674 - }, - { - "epoch": 0.21, - "learning_rate": 9.179891834144564e-05, - "loss": 1.1288, - "step": 675 - }, - { - "epoch": 0.21, - "learning_rate": 9.177132225488566e-05, - "loss": 1.1583, - "step": 676 - }, - { - "epoch": 0.21, - "learning_rate": 9.174368397925254e-05, - "loss": 1.0236, - "step": 677 - }, - { - "epoch": 0.21, - "learning_rate": 9.171600354246095e-05, - "loss": 1.1553, - "step": 678 - }, - { - "epoch": 0.21, - "learning_rate": 9.168828097246817e-05, - "loss": 1.0784, - "step": 679 - }, - { - "epoch": 0.21, - "learning_rate": 9.166051629727404e-05, - "loss": 1.1895, - "step": 680 - }, - { - "epoch": 0.21, - "learning_rate": 9.163270954492089e-05, - "loss": 1.1025, - "step": 681 - }, - { - "epoch": 0.21, - "learning_rate": 9.160486074349354e-05, - "loss": 1.1023, - "step": 682 - }, - { - "epoch": 0.21, - "learning_rate": 9.157696992111935e-05, - "loss": 1.1552, - "step": 683 - }, - { - "epoch": 0.21, - "learning_rate": 9.154903710596804e-05, - "loss": 1.1295, - "step": 684 - }, - { - "epoch": 0.21, - "learning_rate": 9.152106232625179e-05, - "loss": 1.1438, - "step": 685 - }, - { - "epoch": 0.21, - "learning_rate": 9.149304561022516e-05, - "loss": 1.1131, - "step": 686 - }, - { - "epoch": 0.21, - "learning_rate": 9.146498698618506e-05, - "loss": 1.0969, - "step": 687 - }, - { - "epoch": 0.21, - "learning_rate": 9.14368864824707e-05, - "loss": 1.1531, - "step": 688 - }, - { - "epoch": 0.21, - "learning_rate": 9.140874412746366e-05, - "loss": 1.1027, - "step": 689 - }, - { - "epoch": 0.21, - "learning_rate": 9.13805599495877e-05, - "loss": 1.0608, - "step": 690 - }, - { - "epoch": 0.21, - "learning_rate": 9.135233397730888e-05, - "loss": 1.1659, - "step": 691 - }, - { - "epoch": 0.21, - "learning_rate": 9.132406623913546e-05, - "loss": 1.1372, - "step": 692 - }, - { - "epoch": 0.21, - "learning_rate": 9.129575676361789e-05, - "loss": 1.0994, - "step": 693 - }, - { - "epoch": 0.22, - "learning_rate": 9.126740557934874e-05, - "loss": 0.9553, - "step": 694 - }, - { - "epoch": 0.22, - "learning_rate": 9.123901271496276e-05, - "loss": 1.1595, - "step": 695 - }, - { - "epoch": 0.22, - "learning_rate": 9.121057819913677e-05, - "loss": 1.2654, - "step": 696 - }, - { - "epoch": 0.22, - "learning_rate": 9.118210206058962e-05, - "loss": 1.0364, - "step": 697 - }, - { - "epoch": 0.22, - "learning_rate": 9.115358432808226e-05, - "loss": 1.095, - "step": 698 - }, - { - "epoch": 0.22, - "learning_rate": 9.112502503041763e-05, - "loss": 1.2365, - "step": 699 - }, - { - "epoch": 0.22, - "learning_rate": 9.109642419644064e-05, - "loss": 1.0246, - "step": 700 - }, - { - "epoch": 0.22, - "learning_rate": 9.106778185503816e-05, - "loss": 1.2762, - "step": 701 - }, - { - "epoch": 0.22, - "learning_rate": 9.103909803513899e-05, - "loss": 1.0667, - "step": 702 - }, - { - "epoch": 0.22, - "learning_rate": 9.101037276571377e-05, - "loss": 1.1419, - "step": 703 - }, - { - "epoch": 0.22, - "learning_rate": 9.098160607577511e-05, - "loss": 1.1169, - "step": 704 - }, - { - "epoch": 0.22, - "learning_rate": 9.095279799437737e-05, - "loss": 1.0742, - "step": 705 - }, - { - "epoch": 0.22, - "learning_rate": 9.092394855061671e-05, - "loss": 1.1304, - "step": 706 - }, - { - "epoch": 0.22, - "learning_rate": 9.089505777363114e-05, - "loss": 1.1737, - "step": 707 - }, - { - "epoch": 0.22, - "learning_rate": 9.086612569260032e-05, - "loss": 1.0898, - "step": 708 - }, - { - "epoch": 0.22, - "learning_rate": 9.083715233674573e-05, - "loss": 1.2055, - "step": 709 - }, - { - "epoch": 0.22, - "learning_rate": 9.080813773533044e-05, - "loss": 1.0452, - "step": 710 - }, - { - "epoch": 0.22, - "learning_rate": 9.077908191765925e-05, - "loss": 1.0942, - "step": 711 - }, - { - "epoch": 0.22, - "learning_rate": 9.074998491307857e-05, - "loss": 1.1594, - "step": 712 - }, - { - "epoch": 0.22, - "learning_rate": 9.072084675097638e-05, - "loss": 1.0725, - "step": 713 - }, - { - "epoch": 0.22, - "learning_rate": 9.069166746078224e-05, - "loss": 1.0598, - "step": 714 - }, - { - "epoch": 0.22, - "learning_rate": 9.06624470719673e-05, - "loss": 1.1818, - "step": 715 - }, - { - "epoch": 0.22, - "learning_rate": 9.063318561404415e-05, - "loss": 1.1098, - "step": 716 - }, - { - "epoch": 0.22, - "learning_rate": 9.06038831165669e-05, - "loss": 1.1605, - "step": 717 - }, - { - "epoch": 0.22, - "learning_rate": 9.057453960913108e-05, - "loss": 1.0036, - "step": 718 - }, - { - "epoch": 0.22, - "learning_rate": 9.054515512137367e-05, - "loss": 1.2363, - "step": 719 - }, - { - "epoch": 0.22, - "learning_rate": 9.051572968297304e-05, - "loss": 1.0783, - "step": 720 - }, - { - "epoch": 0.22, - "learning_rate": 9.048626332364891e-05, - "loss": 1.0675, - "step": 721 - }, - { - "epoch": 0.22, - "learning_rate": 9.04567560731623e-05, - "loss": 1.1147, - "step": 722 - }, - { - "epoch": 0.22, - "learning_rate": 9.04272079613156e-05, - "loss": 1.1861, - "step": 723 - }, - { - "epoch": 0.22, - "learning_rate": 9.039761901795241e-05, - "loss": 1.0968, - "step": 724 - }, - { - "epoch": 0.22, - "learning_rate": 9.036798927295758e-05, - "loss": 1.0528, - "step": 725 - }, - { - "epoch": 0.23, - "learning_rate": 9.033831875625718e-05, - "loss": 0.9985, - "step": 726 - }, - { - "epoch": 0.23, - "learning_rate": 9.030860749781848e-05, - "loss": 1.1879, - "step": 727 - }, - { - "epoch": 0.23, - "learning_rate": 9.027885552764985e-05, - "loss": 1.0973, - "step": 728 - }, - { - "epoch": 0.23, - "learning_rate": 9.024906287580083e-05, - "loss": 1.0962, - "step": 729 - }, - { - "epoch": 0.23, - "learning_rate": 9.021922957236201e-05, - "loss": 1.1603, - "step": 730 - }, - { - "epoch": 0.23, - "learning_rate": 9.018935564746509e-05, - "loss": 1.098, - "step": 731 - }, - { - "epoch": 0.23, - "learning_rate": 9.01594411312827e-05, - "loss": 1.182, - "step": 732 - }, - { - "epoch": 0.23, - "learning_rate": 9.012948605402857e-05, - "loss": 1.1617, - "step": 733 - }, - { - "epoch": 0.23, - "learning_rate": 9.009949044595733e-05, - "loss": 1.0745, - "step": 734 - }, - { - "epoch": 0.23, - "learning_rate": 9.006945433736461e-05, - "loss": 1.2751, - "step": 735 - }, - { - "epoch": 0.23, - "learning_rate": 9.003937775858686e-05, - "loss": 1.1439, - "step": 736 - }, - { - "epoch": 0.23, - "learning_rate": 9.000926074000149e-05, - "loss": 1.0863, - "step": 737 - }, - { - "epoch": 0.23, - "learning_rate": 8.99791033120267e-05, - "loss": 1.0703, - "step": 738 - }, - { - "epoch": 0.23, - "learning_rate": 8.994890550512151e-05, - "loss": 1.0206, - "step": 739 - }, - { - "epoch": 0.23, - "learning_rate": 8.991866734978576e-05, - "loss": 1.2499, - "step": 740 - }, - { - "epoch": 0.23, - "learning_rate": 8.988838887655997e-05, - "loss": 1.1178, - "step": 741 - }, - { - "epoch": 0.23, - "learning_rate": 8.985807011602546e-05, - "loss": 1.1206, - "step": 742 - }, - { - "epoch": 0.23, - "learning_rate": 8.98277110988042e-05, - "loss": 1.0889, - "step": 743 - }, - { - "epoch": 0.23, - "learning_rate": 8.979731185555881e-05, - "loss": 1.1099, - "step": 744 - }, - { - "epoch": 0.23, - "learning_rate": 8.976687241699258e-05, - "loss": 1.1173, - "step": 745 - }, - { - "epoch": 0.23, - "learning_rate": 8.973639281384936e-05, - "loss": 1.0818, - "step": 746 - }, - { - "epoch": 0.23, - "learning_rate": 8.970587307691356e-05, - "loss": 1.0979, - "step": 747 - }, - { - "epoch": 0.23, - "learning_rate": 8.967531323701015e-05, - "loss": 1.2023, - "step": 748 - }, - { - "epoch": 0.23, - "learning_rate": 8.96447133250046e-05, - "loss": 1.1245, - "step": 749 - }, - { - "epoch": 0.23, - "learning_rate": 8.961407337180285e-05, - "loss": 1.0253, - "step": 750 - }, - { - "epoch": 0.23, - "learning_rate": 8.958339340835128e-05, - "loss": 1.0924, - "step": 751 - }, - { - "epoch": 0.23, - "learning_rate": 8.955267346563668e-05, - "loss": 1.2144, - "step": 752 - }, - { - "epoch": 0.23, - "learning_rate": 8.952191357468621e-05, - "loss": 1.0286, - "step": 753 - }, - { - "epoch": 0.23, - "learning_rate": 8.94911137665674e-05, - "loss": 1.0278, - "step": 754 - }, - { - "epoch": 0.23, - "learning_rate": 8.946027407238809e-05, - "loss": 1.1479, - "step": 755 - }, - { - "epoch": 0.23, - "learning_rate": 8.942939452329637e-05, - "loss": 1.1385, - "step": 756 - }, - { - "epoch": 0.23, - "learning_rate": 8.939847515048065e-05, - "loss": 1.014, - "step": 757 - }, - { - "epoch": 0.24, - "learning_rate": 8.936751598516951e-05, - "loss": 1.1821, - "step": 758 - }, - { - "epoch": 0.24, - "learning_rate": 8.933651705863171e-05, - "loss": 1.0394, - "step": 759 - }, - { - "epoch": 0.24, - "learning_rate": 8.930547840217622e-05, - "loss": 1.1014, - "step": 760 - }, - { - "epoch": 0.24, - "learning_rate": 8.92744000471521e-05, - "loss": 1.16, - "step": 761 - }, - { - "epoch": 0.24, - "learning_rate": 8.924328202494855e-05, - "loss": 1.1081, - "step": 762 - }, - { - "epoch": 0.24, - "learning_rate": 8.921212436699476e-05, - "loss": 1.1887, - "step": 763 - }, - { - "epoch": 0.24, - "learning_rate": 8.918092710476e-05, - "loss": 1.0995, - "step": 764 - }, - { - "epoch": 0.24, - "learning_rate": 8.914969026975354e-05, - "loss": 1.1567, - "step": 765 - }, - { - "epoch": 0.24, - "learning_rate": 8.911841389352459e-05, - "loss": 1.1359, - "step": 766 - }, - { - "epoch": 0.24, - "learning_rate": 8.908709800766238e-05, - "loss": 1.167, - "step": 767 - }, - { - "epoch": 0.24, - "learning_rate": 8.905574264379591e-05, - "loss": 1.1247, - "step": 768 - }, - { - "epoch": 0.24, - "learning_rate": 8.902434783359417e-05, - "loss": 1.1468, - "step": 769 - }, - { - "epoch": 0.24, - "learning_rate": 8.899291360876593e-05, - "loss": 1.1, - "step": 770 - }, - { - "epoch": 0.24, - "learning_rate": 8.896144000105977e-05, - "loss": 1.0481, - "step": 771 - }, - { - "epoch": 0.24, - "learning_rate": 8.892992704226411e-05, - "loss": 1.1552, - "step": 772 - }, - { - "epoch": 0.24, - "learning_rate": 8.889837476420703e-05, - "loss": 1.0867, - "step": 773 - }, - { - "epoch": 0.24, - "learning_rate": 8.886678319875636e-05, - "loss": 1.2016, - "step": 774 - }, - { - "epoch": 0.24, - "learning_rate": 8.883515237781963e-05, - "loss": 1.072, - "step": 775 - }, - { - "epoch": 0.24, - "learning_rate": 8.880348233334397e-05, - "loss": 1.0646, - "step": 776 - }, - { - "epoch": 0.24, - "learning_rate": 8.877177309731617e-05, - "loss": 1.2408, - "step": 777 - }, - { - "epoch": 0.24, - "learning_rate": 8.874002470176259e-05, - "loss": 1.1925, - "step": 778 - }, - { - "epoch": 0.24, - "learning_rate": 8.870823717874913e-05, - "loss": 1.0241, - "step": 779 - }, - { - "epoch": 0.24, - "learning_rate": 8.86764105603812e-05, - "loss": 1.0958, - "step": 780 - }, - { - "epoch": 0.24, - "learning_rate": 8.864454487880377e-05, - "loss": 1.136, - "step": 781 - }, - { - "epoch": 0.24, - "learning_rate": 8.861264016620116e-05, - "loss": 1.1532, - "step": 782 - }, - { - "epoch": 0.24, - "learning_rate": 8.858069645479715e-05, - "loss": 1.0573, - "step": 783 - }, - { - "epoch": 0.24, - "learning_rate": 8.854871377685496e-05, - "loss": 1.0491, - "step": 784 - }, - { - "epoch": 0.24, - "learning_rate": 8.851669216467709e-05, - "loss": 1.1522, - "step": 785 - }, - { - "epoch": 0.24, - "learning_rate": 8.848463165060538e-05, - "loss": 1.0855, - "step": 786 - }, - { - "epoch": 0.24, - "learning_rate": 8.845253226702104e-05, - "loss": 1.1046, - "step": 787 - }, - { - "epoch": 0.24, - "learning_rate": 8.842039404634443e-05, - "loss": 1.1626, - "step": 788 - }, - { - "epoch": 0.24, - "learning_rate": 8.83882170210352e-05, - "loss": 1.0896, - "step": 789 - }, - { - "epoch": 0.25, - "learning_rate": 8.835600122359216e-05, - "loss": 1.1043, - "step": 790 - }, - { - "epoch": 0.25, - "learning_rate": 8.83237466865533e-05, - "loss": 1.1077, - "step": 791 - }, - { - "epoch": 0.25, - "learning_rate": 8.829145344249574e-05, - "loss": 1.1916, - "step": 792 - }, - { - "epoch": 0.25, - "learning_rate": 8.825912152403568e-05, - "loss": 0.9196, - "step": 793 - }, - { - "epoch": 0.25, - "learning_rate": 8.822675096382837e-05, - "loss": 1.1574, - "step": 794 - }, - { - "epoch": 0.25, - "learning_rate": 8.819434179456814e-05, - "loss": 1.1273, - "step": 795 - }, - { - "epoch": 0.25, - "learning_rate": 8.816189404898824e-05, - "loss": 1.2279, - "step": 796 - }, - { - "epoch": 0.25, - "learning_rate": 8.812940775986097e-05, - "loss": 1.0474, - "step": 797 - }, - { - "epoch": 0.25, - "learning_rate": 8.809688295999747e-05, - "loss": 1.1645, - "step": 798 - }, - { - "epoch": 0.25, - "learning_rate": 8.806431968224784e-05, - "loss": 1.0177, - "step": 799 - }, - { - "epoch": 0.25, - "learning_rate": 8.8031717959501e-05, - "loss": 1.1987, - "step": 800 - }, - { - "epoch": 0.25, - "learning_rate": 8.799907782468473e-05, - "loss": 1.188, - "step": 801 - }, - { - "epoch": 0.25, - "learning_rate": 8.796639931076562e-05, - "loss": 1.0571, - "step": 802 - }, - { - "epoch": 0.25, - "learning_rate": 8.793368245074896e-05, - "loss": 1.1295, - "step": 803 - }, - { - "epoch": 0.25, - "learning_rate": 8.790092727767882e-05, - "loss": 1.0628, - "step": 804 - }, - { - "epoch": 0.25, - "learning_rate": 8.786813382463796e-05, - "loss": 1.1379, - "step": 805 - }, - { - "epoch": 0.25, - "learning_rate": 8.78353021247478e-05, - "loss": 1.1849, - "step": 806 - }, - { - "epoch": 0.25, - "learning_rate": 8.780243221116837e-05, - "loss": 1.1441, - "step": 807 - }, - { - "epoch": 0.25, - "learning_rate": 8.776952411709833e-05, - "loss": 1.0448, - "step": 808 - }, - { - "epoch": 0.25, - "learning_rate": 8.773657787577489e-05, - "loss": 1.1393, - "step": 809 - }, - { - "epoch": 0.25, - "learning_rate": 8.770359352047375e-05, - "loss": 1.131, - "step": 810 - }, - { - "epoch": 0.25, - "learning_rate": 8.767057108450918e-05, - "loss": 1.1514, - "step": 811 - }, - { - "epoch": 0.25, - "learning_rate": 8.763751060123384e-05, - "loss": 1.1161, - "step": 812 - }, - { - "epoch": 0.25, - "learning_rate": 8.760441210403885e-05, - "loss": 1.0312, - "step": 813 - }, - { - "epoch": 0.25, - "learning_rate": 8.757127562635374e-05, - "loss": 1.1686, - "step": 814 - }, - { - "epoch": 0.25, - "learning_rate": 8.753810120164639e-05, - "loss": 1.0437, - "step": 815 - }, - { - "epoch": 0.25, - "learning_rate": 8.750488886342296e-05, - "loss": 1.0706, - "step": 816 - }, - { - "epoch": 0.25, - "learning_rate": 8.747163864522796e-05, - "loss": 1.2581, - "step": 817 - }, - { - "epoch": 0.25, - "learning_rate": 8.743835058064416e-05, - "loss": 1.0722, - "step": 818 - }, - { - "epoch": 0.25, - "learning_rate": 8.74050247032925e-05, - "loss": 1.0332, - "step": 819 - }, - { - "epoch": 0.25, - "learning_rate": 8.737166104683218e-05, - "loss": 1.1181, - "step": 820 - }, - { - "epoch": 0.25, - "learning_rate": 8.733825964496052e-05, - "loss": 1.1291, - "step": 821 - }, - { - "epoch": 0.26, - "learning_rate": 8.730482053141293e-05, - "loss": 1.1923, - "step": 822 - }, - { - "epoch": 0.26, - "learning_rate": 8.727134373996298e-05, - "loss": 1.0391, - "step": 823 - }, - { - "epoch": 0.26, - "learning_rate": 8.723782930442224e-05, - "loss": 1.0822, - "step": 824 - }, - { - "epoch": 0.26, - "learning_rate": 8.720427725864035e-05, - "loss": 1.1918, - "step": 825 - }, - { - "epoch": 0.26, - "learning_rate": 8.717068763650486e-05, - "loss": 1.0085, - "step": 826 - }, - { - "epoch": 0.26, - "learning_rate": 8.713706047194135e-05, - "loss": 1.2013, - "step": 827 - }, - { - "epoch": 0.26, - "learning_rate": 8.71033957989133e-05, - "loss": 0.9375, - "step": 828 - }, - { - "epoch": 0.26, - "learning_rate": 8.706969365142202e-05, - "loss": 1.1904, - "step": 829 - }, - { - "epoch": 0.26, - "learning_rate": 8.703595406350673e-05, - "loss": 1.1946, - "step": 830 - }, - { - "epoch": 0.26, - "learning_rate": 8.700217706924444e-05, - "loss": 1.1235, - "step": 831 - }, - { - "epoch": 0.26, - "learning_rate": 8.696836270274997e-05, - "loss": 1.0613, - "step": 832 - }, - { - "epoch": 0.26, - "learning_rate": 8.693451099817583e-05, - "loss": 1.0645, - "step": 833 - }, - { - "epoch": 0.26, - "learning_rate": 8.690062198971227e-05, - "loss": 1.1548, - "step": 834 - }, - { - "epoch": 0.26, - "learning_rate": 8.686669571158724e-05, - "loss": 1.2281, - "step": 835 - }, - { - "epoch": 0.26, - "learning_rate": 8.68327321980663e-05, - "loss": 1.0373, - "step": 836 - }, - { - "epoch": 0.26, - "learning_rate": 8.679873148345262e-05, - "loss": 1.102, - "step": 837 - }, - { - "epoch": 0.26, - "learning_rate": 8.676469360208699e-05, - "loss": 1.1676, - "step": 838 - }, - { - "epoch": 0.26, - "learning_rate": 8.673061858834765e-05, - "loss": 1.0516, - "step": 839 - }, - { - "epoch": 0.26, - "learning_rate": 8.669650647665043e-05, - "loss": 1.087, - "step": 840 - }, - { - "epoch": 0.26, - "learning_rate": 8.666235730144858e-05, - "loss": 0.9992, - "step": 841 - }, - { - "epoch": 0.26, - "learning_rate": 8.66281710972328e-05, - "loss": 1.1037, - "step": 842 - }, - { - "epoch": 0.26, - "learning_rate": 8.65939478985312e-05, - "loss": 1.2136, - "step": 843 - }, - { - "epoch": 0.26, - "learning_rate": 8.655968773990921e-05, - "loss": 1.0428, - "step": 844 - }, - { - "epoch": 0.26, - "learning_rate": 8.652539065596965e-05, - "loss": 1.1528, - "step": 845 - }, - { - "epoch": 0.26, - "learning_rate": 8.64910566813526e-05, - "loss": 1.1443, - "step": 846 - }, - { - "epoch": 0.26, - "learning_rate": 8.645668585073539e-05, - "loss": 1.0706, - "step": 847 - }, - { - "epoch": 0.26, - "learning_rate": 8.64222781988326e-05, - "loss": 1.1008, - "step": 848 - }, - { - "epoch": 0.26, - "learning_rate": 8.638783376039601e-05, - "loss": 1.1081, - "step": 849 - }, - { - "epoch": 0.26, - "learning_rate": 8.63533525702145e-05, - "loss": 1.1785, - "step": 850 - }, - { - "epoch": 0.26, - "learning_rate": 8.631883466311412e-05, - "loss": 1.1593, - "step": 851 - }, - { - "epoch": 0.26, - "learning_rate": 8.628428007395799e-05, - "loss": 0.9908, - "step": 852 - }, - { - "epoch": 0.26, - "learning_rate": 8.624968883764626e-05, - "loss": 1.189, - "step": 853 - }, - { - "epoch": 0.26, - "learning_rate": 8.621506098911612e-05, - "loss": 1.137, - "step": 854 - }, - { - "epoch": 0.27, - "learning_rate": 8.618039656334174e-05, - "loss": 1.0051, - "step": 855 - }, - { - "epoch": 0.27, - "learning_rate": 8.61456955953342e-05, - "loss": 1.1229, - "step": 856 - }, - { - "epoch": 0.27, - "learning_rate": 8.611095812014153e-05, - "loss": 1.1238, - "step": 857 - }, - { - "epoch": 0.27, - "learning_rate": 8.60761841728486e-05, - "loss": 0.976, - "step": 858 - }, - { - "epoch": 0.27, - "learning_rate": 8.604137378857713e-05, - "loss": 1.1828, - "step": 859 - }, - { - "epoch": 0.27, - "learning_rate": 8.600652700248563e-05, - "loss": 1.1125, - "step": 860 - }, - { - "epoch": 0.27, - "learning_rate": 8.597164384976939e-05, - "loss": 1.0253, - "step": 861 - }, - { - "epoch": 0.27, - "learning_rate": 8.593672436566044e-05, - "loss": 1.0923, - "step": 862 - }, - { - "epoch": 0.27, - "learning_rate": 8.590176858542748e-05, - "loss": 1.1958, - "step": 863 - }, - { - "epoch": 0.27, - "learning_rate": 8.586677654437587e-05, - "loss": 1.0795, - "step": 864 - }, - { - "epoch": 0.27, - "learning_rate": 8.583174827784763e-05, - "loss": 1.1218, - "step": 865 - }, - { - "epoch": 0.27, - "learning_rate": 8.57966838212213e-05, - "loss": 1.1635, - "step": 866 - }, - { - "epoch": 0.27, - "learning_rate": 8.576158320991205e-05, - "loss": 1.1573, - "step": 867 - }, - { - "epoch": 0.27, - "learning_rate": 8.572644647937151e-05, - "loss": 1.0275, - "step": 868 - }, - { - "epoch": 0.27, - "learning_rate": 8.569127366508783e-05, - "loss": 1.1243, - "step": 869 - }, - { - "epoch": 0.27, - "learning_rate": 8.565606480258556e-05, - "loss": 1.1535, - "step": 870 - }, - { - "epoch": 0.27, - "learning_rate": 8.562081992742567e-05, - "loss": 1.1751, - "step": 871 - }, - { - "epoch": 0.27, - "learning_rate": 8.558553907520556e-05, - "loss": 1.1067, - "step": 872 - }, - { - "epoch": 0.27, - "learning_rate": 8.55502222815589e-05, - "loss": 1.0938, - "step": 873 - }, - { - "epoch": 0.27, - "learning_rate": 8.55148695821557e-05, - "loss": 1.1152, - "step": 874 - }, - { - "epoch": 0.27, - "learning_rate": 8.547948101270216e-05, - "loss": 1.0378, - "step": 875 - }, - { - "epoch": 0.27, - "learning_rate": 8.544405660894082e-05, - "loss": 1.283, - "step": 876 - }, - { - "epoch": 0.27, - "learning_rate": 8.540859640665036e-05, - "loss": 1.0726, - "step": 877 - }, - { - "epoch": 0.27, - "learning_rate": 8.537310044164561e-05, - "loss": 1.121, - "step": 878 - }, - { - "epoch": 0.27, - "learning_rate": 8.533756874977751e-05, - "loss": 1.1803, - "step": 879 - }, - { - "epoch": 0.27, - "learning_rate": 8.530200136693311e-05, - "loss": 1.0762, - "step": 880 - }, - { - "epoch": 0.27, - "learning_rate": 8.526639832903551e-05, - "loss": 1.1034, - "step": 881 - }, - { - "epoch": 0.27, - "learning_rate": 8.52307596720438e-05, - "loss": 1.1592, - "step": 882 - }, - { - "epoch": 0.27, - "learning_rate": 8.519508543195304e-05, - "loss": 1.0896, - "step": 883 - }, - { - "epoch": 0.27, - "learning_rate": 8.515937564479428e-05, - "loss": 1.0594, - "step": 884 - }, - { - "epoch": 0.27, - "learning_rate": 8.512363034663442e-05, - "loss": 1.1297, - "step": 885 - }, - { - "epoch": 0.27, - "learning_rate": 8.508784957357624e-05, - "loss": 1.1011, - "step": 886 - }, - { - "epoch": 0.28, - "learning_rate": 8.505203336175835e-05, - "loss": 1.1849, - "step": 887 - }, - { - "epoch": 0.28, - "learning_rate": 8.501618174735518e-05, - "loss": 1.0362, - "step": 888 - }, - { - "epoch": 0.28, - "learning_rate": 8.498029476657685e-05, - "loss": 1.0878, - "step": 889 - }, - { - "epoch": 0.28, - "learning_rate": 8.494437245566926e-05, - "loss": 1.147, - "step": 890 - }, - { - "epoch": 0.28, - "learning_rate": 8.4908414850914e-05, - "loss": 1.0574, - "step": 891 - }, - { - "epoch": 0.28, - "learning_rate": 8.487242198862827e-05, - "loss": 1.0075, - "step": 892 - }, - { - "epoch": 0.28, - "learning_rate": 8.483639390516487e-05, - "loss": 1.107, - "step": 893 - }, - { - "epoch": 0.28, - "learning_rate": 8.480033063691225e-05, - "loss": 1.0307, - "step": 894 - }, - { - "epoch": 0.28, - "learning_rate": 8.47642322202943e-05, - "loss": 1.1288, - "step": 895 - }, - { - "epoch": 0.28, - "learning_rate": 8.472809869177045e-05, - "loss": 1.0997, - "step": 896 - }, - { - "epoch": 0.28, - "learning_rate": 8.469193008783561e-05, - "loss": 1.1288, - "step": 897 - }, - { - "epoch": 0.28, - "learning_rate": 8.465572644502011e-05, - "loss": 1.1114, - "step": 898 - }, - { - "epoch": 0.28, - "learning_rate": 8.461948779988966e-05, - "loss": 1.0861, - "step": 899 - }, - { - "epoch": 0.28, - "learning_rate": 8.458321418904531e-05, - "loss": 1.115, - "step": 900 - }, - { - "epoch": 0.28, - "learning_rate": 8.454690564912347e-05, - "loss": 1.0512, - "step": 901 - }, - { - "epoch": 0.28, - "learning_rate": 8.451056221679574e-05, - "loss": 1.228, - "step": 902 - }, - { - "epoch": 0.28, - "learning_rate": 8.447418392876908e-05, - "loss": 1.1387, - "step": 903 - }, - { - "epoch": 0.28, - "learning_rate": 8.443777082178557e-05, - "loss": 1.1208, - "step": 904 - }, - { - "epoch": 0.28, - "learning_rate": 8.440132293262246e-05, - "loss": 1.1185, - "step": 905 - }, - { - "epoch": 0.28, - "learning_rate": 8.43648402980922e-05, - "loss": 1.1102, - "step": 906 - }, - { - "epoch": 0.28, - "learning_rate": 8.432832295504224e-05, - "loss": 1.1003, - "step": 907 - }, - { - "epoch": 0.28, - "learning_rate": 8.429177094035514e-05, - "loss": 1.1086, - "step": 908 - }, - { - "epoch": 0.28, - "learning_rate": 8.425518429094848e-05, - "loss": 1.066, - "step": 909 - }, - { - "epoch": 0.28, - "learning_rate": 8.42185630437748e-05, - "loss": 1.1204, - "step": 910 - }, - { - "epoch": 0.28, - "learning_rate": 8.41819072358216e-05, - "loss": 1.1102, - "step": 911 - }, - { - "epoch": 0.28, - "learning_rate": 8.414521690411126e-05, - "loss": 1.0707, - "step": 912 - }, - { - "epoch": 0.28, - "learning_rate": 8.410849208570109e-05, - "loss": 1.0746, - "step": 913 - }, - { - "epoch": 0.28, - "learning_rate": 8.407173281768312e-05, - "loss": 1.1544, - "step": 914 - }, - { - "epoch": 0.28, - "learning_rate": 8.403493913718431e-05, - "loss": 1.1079, - "step": 915 - }, - { - "epoch": 0.28, - "learning_rate": 8.399811108136628e-05, - "loss": 1.1104, - "step": 916 - }, - { - "epoch": 0.28, - "learning_rate": 8.396124868742541e-05, - "loss": 1.0561, - "step": 917 - }, - { - "epoch": 0.28, - "learning_rate": 8.392435199259273e-05, - "loss": 1.1431, - "step": 918 - }, - { - "epoch": 0.29, - "learning_rate": 8.388742103413397e-05, - "loss": 1.0406, - "step": 919 - }, - { - "epoch": 0.29, - "learning_rate": 8.38504558493494e-05, - "loss": 1.112, - "step": 920 - }, - { - "epoch": 0.29, - "learning_rate": 8.38134564755739e-05, - "loss": 1.0406, - "step": 921 - }, - { - "epoch": 0.29, - "learning_rate": 8.37764229501769e-05, - "loss": 1.1357, - "step": 922 - }, - { - "epoch": 0.29, - "learning_rate": 8.373935531056223e-05, - "loss": 1.1402, - "step": 923 - }, - { - "epoch": 0.29, - "learning_rate": 8.37022535941683e-05, - "loss": 1.1371, - "step": 924 - }, - { - "epoch": 0.29, - "learning_rate": 8.366511783846784e-05, - "loss": 1.0988, - "step": 925 - }, - { - "epoch": 0.29, - "learning_rate": 8.362794808096804e-05, - "loss": 0.9106, - "step": 926 - }, - { - "epoch": 0.29, - "learning_rate": 8.359074435921033e-05, - "loss": 1.1245, - "step": 927 - }, - { - "epoch": 0.29, - "learning_rate": 8.355350671077053e-05, - "loss": 1.0741, - "step": 928 - }, - { - "epoch": 0.29, - "learning_rate": 8.35162351732587e-05, - "loss": 1.0607, - "step": 929 - }, - { - "epoch": 0.29, - "learning_rate": 8.347892978431915e-05, - "loss": 1.0459, - "step": 930 - }, - { - "epoch": 0.29, - "learning_rate": 8.344159058163031e-05, - "loss": 1.1376, - "step": 931 - }, - { - "epoch": 0.29, - "learning_rate": 8.340421760290484e-05, - "loss": 1.2477, - "step": 932 - }, - { - "epoch": 0.29, - "learning_rate": 8.336681088588948e-05, - "loss": 1.0221, - "step": 933 - }, - { - "epoch": 0.29, - "learning_rate": 8.332937046836503e-05, - "loss": 1.1031, - "step": 934 - }, - { - "epoch": 0.29, - "learning_rate": 8.329189638814637e-05, - "loss": 1.1321, - "step": 935 - }, - { - "epoch": 0.29, - "learning_rate": 8.325438868308233e-05, - "loss": 1.0823, - "step": 936 - }, - { - "epoch": 0.29, - "learning_rate": 8.321684739105573e-05, - "loss": 1.1105, - "step": 937 - }, - { - "epoch": 0.29, - "learning_rate": 8.317927254998334e-05, - "loss": 1.0459, - "step": 938 - }, - { - "epoch": 0.29, - "learning_rate": 8.314166419781575e-05, - "loss": 1.0896, - "step": 939 - }, - { - "epoch": 0.29, - "learning_rate": 8.310402237253743e-05, - "loss": 1.1653, - "step": 940 - }, - { - "epoch": 0.29, - "learning_rate": 8.306634711216668e-05, - "loss": 1.0968, - "step": 941 - }, - { - "epoch": 0.29, - "learning_rate": 8.302863845475553e-05, - "loss": 1.0222, - "step": 942 - }, - { - "epoch": 0.29, - "learning_rate": 8.299089643838975e-05, - "loss": 1.1577, - "step": 943 - }, - { - "epoch": 0.29, - "learning_rate": 8.295312110118883e-05, - "loss": 1.1644, - "step": 944 - }, - { - "epoch": 0.29, - "learning_rate": 8.291531248130589e-05, - "loss": 1.1609, - "step": 945 - }, - { - "epoch": 0.29, - "learning_rate": 8.287747061692767e-05, - "loss": 1.0086, - "step": 946 - }, - { - "epoch": 0.29, - "learning_rate": 8.283959554627447e-05, - "loss": 1.1276, - "step": 947 - }, - { - "epoch": 0.29, - "learning_rate": 8.280168730760018e-05, - "loss": 1.2107, - "step": 948 - }, - { - "epoch": 0.29, - "learning_rate": 8.276374593919212e-05, - "loss": 1.0448, - "step": 949 - }, - { - "epoch": 0.29, - "learning_rate": 8.272577147937113e-05, - "loss": 1.073, - "step": 950 - }, - { - "epoch": 0.3, - "learning_rate": 8.268776396649144e-05, - "loss": 1.1336, - "step": 951 - }, - { - "epoch": 0.3, - "learning_rate": 8.264972343894066e-05, - "loss": 1.1517, - "step": 952 - }, - { - "epoch": 0.3, - "learning_rate": 8.261164993513978e-05, - "loss": 1.1294, - "step": 953 - }, - { - "epoch": 0.3, - "learning_rate": 8.257354349354305e-05, - "loss": 0.9744, - "step": 954 - }, - { - "epoch": 0.3, - "learning_rate": 8.253540415263805e-05, - "loss": 1.1589, - "step": 955 - }, - { - "epoch": 0.3, - "learning_rate": 8.249723195094549e-05, - "loss": 1.1449, - "step": 956 - }, - { - "epoch": 0.3, - "learning_rate": 8.245902692701939e-05, - "loss": 1.0513, - "step": 957 - }, - { - "epoch": 0.3, - "learning_rate": 8.242078911944682e-05, - "loss": 1.2111, - "step": 958 - }, - { - "epoch": 0.3, - "learning_rate": 8.2382518566848e-05, - "loss": 0.9102, - "step": 959 - }, - { - "epoch": 0.3, - "learning_rate": 8.234421530787623e-05, - "loss": 1.121, - "step": 960 - }, - { - "epoch": 0.3, - "learning_rate": 8.230587938121783e-05, - "loss": 1.1186, - "step": 961 - }, - { - "epoch": 0.3, - "learning_rate": 8.226751082559214e-05, - "loss": 1.0686, - "step": 962 - }, - { - "epoch": 0.3, - "learning_rate": 8.222910967975143e-05, - "loss": 1.1959, - "step": 963 - }, - { - "epoch": 0.3, - "learning_rate": 8.219067598248087e-05, - "loss": 1.0606, - "step": 964 - }, - { - "epoch": 0.3, - "learning_rate": 8.215220977259855e-05, - "loss": 1.2575, - "step": 965 - }, - { - "epoch": 0.3, - "learning_rate": 8.211371108895538e-05, - "loss": 1.0601, - "step": 966 - }, - { - "epoch": 0.3, - "learning_rate": 8.207517997043504e-05, - "loss": 1.0833, - "step": 967 - }, - { - "epoch": 0.3, - "learning_rate": 8.203661645595402e-05, - "loss": 1.1843, - "step": 968 - }, - { - "epoch": 0.3, - "learning_rate": 8.19980205844615e-05, - "loss": 1.0544, - "step": 969 - }, - { - "epoch": 0.3, - "learning_rate": 8.195939239493934e-05, - "loss": 1.006, - "step": 970 - }, - { - "epoch": 0.3, - "learning_rate": 8.192073192640204e-05, - "loss": 1.1431, - "step": 971 - }, - { - "epoch": 0.3, - "learning_rate": 8.188203921789671e-05, - "loss": 1.0281, - "step": 972 - }, - { - "epoch": 0.3, - "learning_rate": 8.184331430850302e-05, - "loss": 1.1452, - "step": 973 - }, - { - "epoch": 0.3, - "learning_rate": 8.180455723733319e-05, - "loss": 1.0788, - "step": 974 - }, - { - "epoch": 0.3, - "learning_rate": 8.176576804353186e-05, - "loss": 1.0615, - "step": 975 - }, - { - "epoch": 0.3, - "learning_rate": 8.172694676627617e-05, - "loss": 1.2017, - "step": 976 - }, - { - "epoch": 0.3, - "learning_rate": 8.168809344477563e-05, - "loss": 1.0662, - "step": 977 - }, - { - "epoch": 0.3, - "learning_rate": 8.164920811827217e-05, - "loss": 1.1445, - "step": 978 - }, - { - "epoch": 0.3, - "learning_rate": 8.161029082603995e-05, - "loss": 1.1072, - "step": 979 - }, - { - "epoch": 0.3, - "learning_rate": 8.15713416073855e-05, - "loss": 1.054, - "step": 980 - }, - { - "epoch": 0.3, - "learning_rate": 8.153236050164757e-05, - "loss": 1.157, - "step": 981 - }, - { - "epoch": 0.3, - "learning_rate": 8.149334754819709e-05, - "loss": 1.0399, - "step": 982 - }, - { - "epoch": 0.3, - "learning_rate": 8.14543027864372e-05, - "loss": 1.1319, - "step": 983 - }, - { - "epoch": 0.31, - "learning_rate": 8.141522625580313e-05, - "loss": 1.2145, - "step": 984 - }, - { - "epoch": 0.31, - "learning_rate": 8.137611799576222e-05, - "loss": 1.082, - "step": 985 - }, - { - "epoch": 0.31, - "learning_rate": 8.133697804581385e-05, - "loss": 1.1077, - "step": 986 - }, - { - "epoch": 0.31, - "learning_rate": 8.129780644548938e-05, - "loss": 1.1003, - "step": 987 - }, - { - "epoch": 0.31, - "learning_rate": 8.125860323435221e-05, - "loss": 1.0505, - "step": 988 - }, - { - "epoch": 0.31, - "learning_rate": 8.121936845199759e-05, - "loss": 1.1413, - "step": 989 - }, - { - "epoch": 0.31, - "learning_rate": 8.118010213805267e-05, - "loss": 1.1008, - "step": 990 - }, - { - "epoch": 0.31, - "learning_rate": 8.114080433217649e-05, - "loss": 1.1511, - "step": 991 - }, - { - "epoch": 0.31, - "learning_rate": 8.110147507405985e-05, - "loss": 0.9599, - "step": 992 - }, - { - "epoch": 0.31, - "learning_rate": 8.106211440342534e-05, - "loss": 1.0308, - "step": 993 - }, - { - "epoch": 0.31, - "learning_rate": 8.102272236002727e-05, - "loss": 1.0401, - "step": 994 - }, - { - "epoch": 0.31, - "learning_rate": 8.098329898365167e-05, - "loss": 1.1921, - "step": 995 - }, - { - "epoch": 0.31, - "learning_rate": 8.094384431411615e-05, - "loss": 1.0706, - "step": 996 - }, - { - "epoch": 0.31, - "learning_rate": 8.090435839126997e-05, - "loss": 1.1422, - "step": 997 - }, - { - "epoch": 0.31, - "learning_rate": 8.086484125499396e-05, - "loss": 1.0488, - "step": 998 - }, - { - "epoch": 0.31, - "learning_rate": 8.082529294520045e-05, - "loss": 1.0454, - "step": 999 - }, - { - "epoch": 0.31, - "learning_rate": 8.078571350183327e-05, - "loss": 1.1437, - "step": 1000 - }, - { - "epoch": 0.31, - "learning_rate": 8.074610296486771e-05, - "loss": 1.1528, - "step": 1001 - }, - { - "epoch": 0.31, - "learning_rate": 8.070646137431042e-05, - "loss": 1.257, - "step": 1002 - }, - { - "epoch": 0.31, - "learning_rate": 8.066678877019945e-05, - "loss": 0.963, - "step": 1003 - }, - { - "epoch": 0.31, - "learning_rate": 8.062708519260418e-05, - "loss": 1.2494, - "step": 1004 - }, - { - "epoch": 0.31, - "learning_rate": 8.058735068162526e-05, - "loss": 1.0667, - "step": 1005 - }, - { - "epoch": 0.31, - "learning_rate": 8.054758527739457e-05, - "loss": 1.0839, - "step": 1006 - }, - { - "epoch": 0.31, - "learning_rate": 8.050778902007521e-05, - "loss": 1.1458, - "step": 1007 - }, - { - "epoch": 0.31, - "learning_rate": 8.046796194986143e-05, - "loss": 1.0515, - "step": 1008 - }, - { - "epoch": 0.31, - "learning_rate": 8.042810410697861e-05, - "loss": 1.0738, - "step": 1009 - }, - { - "epoch": 0.31, - "learning_rate": 8.03882155316832e-05, - "loss": 1.1043, - "step": 1010 - }, - { - "epoch": 0.31, - "learning_rate": 8.034829626426273e-05, - "loss": 1.0272, - "step": 1011 - }, - { - "epoch": 0.31, - "learning_rate": 8.030834634503564e-05, - "loss": 1.2197, - "step": 1012 - }, - { - "epoch": 0.31, - "learning_rate": 8.026836581435142e-05, - "loss": 1.1332, - "step": 1013 - }, - { - "epoch": 0.31, - "learning_rate": 8.022835471259044e-05, - "loss": 1.0495, - "step": 1014 - }, - { - "epoch": 0.31, - "learning_rate": 8.018831308016397e-05, - "loss": 1.0849, - "step": 1015 - }, - { - "epoch": 0.32, - "learning_rate": 8.014824095751405e-05, - "loss": 1.1947, - "step": 1016 - }, - { - "epoch": 0.32, - "learning_rate": 8.010813838511358e-05, - "loss": 1.0744, - "step": 1017 - }, - { - "epoch": 0.32, - "learning_rate": 8.00680054034662e-05, - "loss": 1.1484, - "step": 1018 - }, - { - "epoch": 0.32, - "learning_rate": 8.002784205310625e-05, - "loss": 0.9874, - "step": 1019 - }, - { - "epoch": 0.32, - "learning_rate": 7.998764837459877e-05, - "loss": 1.1228, - "step": 1020 - }, - { - "epoch": 0.32, - "learning_rate": 7.994742440853937e-05, - "loss": 1.1003, - "step": 1021 - }, - { - "epoch": 0.32, - "learning_rate": 7.990717019555432e-05, - "loss": 1.1042, - "step": 1022 - }, - { - "epoch": 0.32, - "learning_rate": 7.986688577630041e-05, - "loss": 1.1093, - "step": 1023 - }, - { - "epoch": 0.32, - "learning_rate": 7.982657119146495e-05, - "loss": 1.1441, - "step": 1024 - }, - { - "epoch": 0.32, - "learning_rate": 7.978622648176569e-05, - "loss": 1.0953, - "step": 1025 - }, - { - "epoch": 0.32, - "learning_rate": 7.974585168795084e-05, - "loss": 1.1735, - "step": 1026 - }, - { - "epoch": 0.32, - "learning_rate": 7.970544685079895e-05, - "loss": 1.0228, - "step": 1027 - }, - { - "epoch": 0.32, - "learning_rate": 7.966501201111895e-05, - "loss": 1.0635, - "step": 1028 - }, - { - "epoch": 0.32, - "learning_rate": 7.962454720975008e-05, - "loss": 1.1527, - "step": 1029 - }, - { - "epoch": 0.32, - "learning_rate": 7.958405248756183e-05, - "loss": 0.9675, - "step": 1030 - }, - { - "epoch": 0.32, - "learning_rate": 7.954352788545388e-05, - "loss": 1.1061, - "step": 1031 - }, - { - "epoch": 0.32, - "learning_rate": 7.950297344435613e-05, - "loss": 0.9936, - "step": 1032 - }, - { - "epoch": 0.32, - "learning_rate": 7.94623892052286e-05, - "loss": 1.2207, - "step": 1033 - }, - { - "epoch": 0.32, - "learning_rate": 7.942177520906142e-05, - "loss": 1.1246, - "step": 1034 - }, - { - "epoch": 0.32, - "learning_rate": 7.938113149687474e-05, - "loss": 1.1313, - "step": 1035 - }, - { - "epoch": 0.32, - "learning_rate": 7.934045810971878e-05, - "loss": 1.0501, - "step": 1036 - }, - { - "epoch": 0.32, - "learning_rate": 7.929975508867365e-05, - "loss": 1.1114, - "step": 1037 - }, - { - "epoch": 0.32, - "learning_rate": 7.925902247484946e-05, - "loss": 1.2061, - "step": 1038 - }, - { - "epoch": 0.32, - "learning_rate": 7.921826030938621e-05, - "loss": 1.0546, - "step": 1039 - }, - { - "epoch": 0.32, - "learning_rate": 7.91774686334537e-05, - "loss": 1.1242, - "step": 1040 - }, - { - "epoch": 0.32, - "learning_rate": 7.913664748825156e-05, - "loss": 1.0658, - "step": 1041 - }, - { - "epoch": 0.32, - "learning_rate": 7.909579691500919e-05, - "loss": 1.0564, - "step": 1042 - }, - { - "epoch": 0.32, - "learning_rate": 7.90549169549857e-05, - "loss": 1.0999, - "step": 1043 - }, - { - "epoch": 0.32, - "learning_rate": 7.901400764946988e-05, - "loss": 1.0744, - "step": 1044 - }, - { - "epoch": 0.32, - "learning_rate": 7.89730690397802e-05, - "loss": 1.0945, - "step": 1045 - }, - { - "epoch": 0.32, - "learning_rate": 7.893210116726467e-05, - "loss": 1.1084, - "step": 1046 - }, - { - "epoch": 0.32, - "learning_rate": 7.889110407330084e-05, - "loss": 1.1295, - "step": 1047 - }, - { - "epoch": 0.33, - "learning_rate": 7.885007779929586e-05, - "loss": 1.0291, - "step": 1048 - }, - { - "epoch": 0.33, - "learning_rate": 7.88090223866863e-05, - "loss": 1.0983, - "step": 1049 - }, - { - "epoch": 0.33, - "learning_rate": 7.876793787693815e-05, - "loss": 1.1338, - "step": 1050 - }, - { - "epoch": 0.33, - "learning_rate": 7.872682431154681e-05, - "loss": 1.1693, - "step": 1051 - }, - { - "epoch": 0.33, - "learning_rate": 7.8685681732037e-05, - "loss": 0.9928, - "step": 1052 - }, - { - "epoch": 0.33, - "learning_rate": 7.86445101799628e-05, - "loss": 1.0734, - "step": 1053 - }, - { - "epoch": 0.33, - "learning_rate": 7.860330969690749e-05, - "loss": 1.1227, - "step": 1054 - }, - { - "epoch": 0.33, - "learning_rate": 7.85620803244836e-05, - "loss": 1.0694, - "step": 1055 - }, - { - "epoch": 0.33, - "learning_rate": 7.85208221043328e-05, - "loss": 1.0886, - "step": 1056 - }, - { - "epoch": 0.33, - "learning_rate": 7.8479535078126e-05, - "loss": 1.1104, - "step": 1057 - }, - { - "epoch": 0.33, - "learning_rate": 7.843821928756308e-05, - "loss": 1.0678, - "step": 1058 - }, - { - "epoch": 0.33, - "learning_rate": 7.839687477437303e-05, - "loss": 1.1399, - "step": 1059 - }, - { - "epoch": 0.33, - "learning_rate": 7.835550158031387e-05, - "loss": 0.9955, - "step": 1060 - }, - { - "epoch": 0.33, - "learning_rate": 7.831409974717253e-05, - "loss": 1.0841, - "step": 1061 - }, - { - "epoch": 0.33, - "learning_rate": 7.82726693167649e-05, - "loss": 1.1054, - "step": 1062 - }, - { - "epoch": 0.33, - "learning_rate": 7.823121033093582e-05, - "loss": 0.986, - "step": 1063 - }, - { - "epoch": 0.33, - "learning_rate": 7.818972283155882e-05, - "loss": 1.0351, - "step": 1064 - }, - { - "epoch": 0.33, - "learning_rate": 7.814820686053635e-05, - "loss": 1.0489, - "step": 1065 - }, - { - "epoch": 0.33, - "learning_rate": 7.810666245979957e-05, - "loss": 1.2216, - "step": 1066 - }, - { - "epoch": 0.33, - "learning_rate": 7.806508967130836e-05, - "loss": 1.1113, - "step": 1067 - }, - { - "epoch": 0.33, - "learning_rate": 7.802348853705128e-05, - "loss": 1.05, - "step": 1068 - }, - { - "epoch": 0.33, - "learning_rate": 7.798185909904552e-05, - "loss": 1.0893, - "step": 1069 - }, - { - "epoch": 0.33, - "learning_rate": 7.794020139933684e-05, - "loss": 1.0174, - "step": 1070 - }, - { - "epoch": 0.33, - "learning_rate": 7.789851547999955e-05, - "loss": 1.1769, - "step": 1071 - }, - { - "epoch": 0.33, - "learning_rate": 7.78568013831365e-05, - "loss": 1.0149, - "step": 1072 - }, - { - "epoch": 0.33, - "learning_rate": 7.78150591508789e-05, - "loss": 1.1973, - "step": 1073 - }, - { - "epoch": 0.33, - "learning_rate": 7.777328882538647e-05, - "loss": 1.0636, - "step": 1074 - }, - { - "epoch": 0.33, - "learning_rate": 7.773149044884732e-05, - "loss": 1.1296, - "step": 1075 - }, - { - "epoch": 0.33, - "learning_rate": 7.768966406347781e-05, - "loss": 1.1631, - "step": 1076 - }, - { - "epoch": 0.33, - "learning_rate": 7.764780971152261e-05, - "loss": 1.0661, - "step": 1077 - }, - { - "epoch": 0.33, - "learning_rate": 7.76059274352547e-05, - "loss": 1.069, - "step": 1078 - }, - { - "epoch": 0.33, - "learning_rate": 7.756401727697517e-05, - "loss": 1.1446, - "step": 1079 - }, - { - "epoch": 0.34, - "learning_rate": 7.752207927901335e-05, - "loss": 1.0277, - "step": 1080 - }, - { - "epoch": 0.34, - "learning_rate": 7.748011348372663e-05, - "loss": 1.0837, - "step": 1081 - }, - { - "epoch": 0.34, - "learning_rate": 7.743811993350054e-05, - "loss": 1.0755, - "step": 1082 - }, - { - "epoch": 0.34, - "learning_rate": 7.739609867074855e-05, - "loss": 1.1526, - "step": 1083 - }, - { - "epoch": 0.34, - "learning_rate": 7.735404973791223e-05, - "loss": 1.0555, - "step": 1084 - }, - { - "epoch": 0.34, - "learning_rate": 7.7311973177461e-05, - "loss": 1.1434, - "step": 1085 - }, - { - "epoch": 0.34, - "learning_rate": 7.726986903189222e-05, - "loss": 1.0602, - "step": 1086 - }, - { - "epoch": 0.34, - "learning_rate": 7.722773734373114e-05, - "loss": 1.0089, - "step": 1087 - }, - { - "epoch": 0.34, - "learning_rate": 7.71855781555308e-05, - "loss": 1.142, - "step": 1088 - }, - { - "epoch": 0.34, - "learning_rate": 7.714339150987202e-05, - "loss": 1.1015, - "step": 1089 - }, - { - "epoch": 0.34, - "learning_rate": 7.710117744936333e-05, - "loss": 1.0397, - "step": 1090 - }, - { - "epoch": 0.34, - "learning_rate": 7.705893601664098e-05, - "loss": 1.1417, - "step": 1091 - }, - { - "epoch": 0.34, - "learning_rate": 7.701666725436886e-05, - "loss": 1.121, - "step": 1092 - }, - { - "epoch": 0.34, - "learning_rate": 7.697437120523845e-05, - "loss": 1.0492, - "step": 1093 - }, - { - "epoch": 0.34, - "learning_rate": 7.693204791196881e-05, - "loss": 1.06, - "step": 1094 - }, - { - "epoch": 0.34, - "learning_rate": 7.68896974173065e-05, - "loss": 1.1093, - "step": 1095 - }, - { - "epoch": 0.34, - "learning_rate": 7.684731976402555e-05, - "loss": 1.0731, - "step": 1096 - }, - { - "epoch": 0.34, - "learning_rate": 7.680491499492743e-05, - "loss": 0.9238, - "step": 1097 - }, - { - "epoch": 0.34, - "learning_rate": 7.6762483152841e-05, - "loss": 1.1392, - "step": 1098 - }, - { - "epoch": 0.34, - "learning_rate": 7.672002428062245e-05, - "loss": 1.1884, - "step": 1099 - }, - { - "epoch": 0.34, - "learning_rate": 7.667753842115529e-05, - "loss": 0.9794, - "step": 1100 - }, - { - "epoch": 0.34, - "learning_rate": 7.663502561735027e-05, - "loss": 1.068, - "step": 1101 - }, - { - "epoch": 0.34, - "learning_rate": 7.659248591214537e-05, - "loss": 1.1499, - "step": 1102 - }, - { - "epoch": 0.34, - "learning_rate": 7.654991934850573e-05, - "loss": 1.02, - "step": 1103 - }, - { - "epoch": 0.34, - "learning_rate": 7.650732596942359e-05, - "loss": 1.183, - "step": 1104 - }, - { - "epoch": 0.34, - "learning_rate": 7.646470581791833e-05, - "loss": 1.0111, - "step": 1105 - }, - { - "epoch": 0.34, - "learning_rate": 7.642205893703633e-05, - "loss": 1.0578, - "step": 1106 - }, - { - "epoch": 0.34, - "learning_rate": 7.6379385369851e-05, - "loss": 1.1461, - "step": 1107 - }, - { - "epoch": 0.34, - "learning_rate": 7.633668515946264e-05, - "loss": 1.0554, - "step": 1108 - }, - { - "epoch": 0.34, - "learning_rate": 7.629395834899853e-05, - "loss": 1.077, - "step": 1109 - }, - { - "epoch": 0.34, - "learning_rate": 7.625120498161278e-05, - "loss": 1.151, - "step": 1110 - }, - { - "epoch": 0.34, - "learning_rate": 7.620842510048633e-05, - "loss": 1.0364, - "step": 1111 - }, - { - "epoch": 0.34, - "learning_rate": 7.61656187488269e-05, - "loss": 1.1049, - "step": 1112 - }, - { - "epoch": 0.35, - "learning_rate": 7.612278596986894e-05, - "loss": 1.0673, - "step": 1113 - }, - { - "epoch": 0.35, - "learning_rate": 7.607992680687361e-05, - "loss": 1.0778, - "step": 1114 - }, - { - "epoch": 0.35, - "learning_rate": 7.603704130312867e-05, - "loss": 1.1002, - "step": 1115 - }, - { - "epoch": 0.35, - "learning_rate": 7.599412950194856e-05, - "loss": 1.0594, - "step": 1116 - }, - { - "epoch": 0.35, - "learning_rate": 7.59511914466742e-05, - "loss": 1.0163, - "step": 1117 - }, - { - "epoch": 0.35, - "learning_rate": 7.59082271806731e-05, - "loss": 1.2105, - "step": 1118 - }, - { - "epoch": 0.35, - "learning_rate": 7.586523674733917e-05, - "loss": 1.0458, - "step": 1119 - }, - { - "epoch": 0.35, - "learning_rate": 7.582222019009282e-05, - "loss": 1.1778, - "step": 1120 - }, - { - "epoch": 0.35, - "learning_rate": 7.577917755238079e-05, - "loss": 1.017, - "step": 1121 - }, - { - "epoch": 0.35, - "learning_rate": 7.57361088776762e-05, - "loss": 1.1242, - "step": 1122 - }, - { - "epoch": 0.35, - "learning_rate": 7.569301420947841e-05, - "loss": 1.1106, - "step": 1123 - }, - { - "epoch": 0.35, - "learning_rate": 7.564989359131312e-05, - "loss": 1.0635, - "step": 1124 - }, - { - "epoch": 0.35, - "learning_rate": 7.560674706673219e-05, - "loss": 1.1263, - "step": 1125 - }, - { - "epoch": 0.35, - "learning_rate": 7.556357467931363e-05, - "loss": 1.2093, - "step": 1126 - }, - { - "epoch": 0.35, - "learning_rate": 7.552037647266157e-05, - "loss": 0.9839, - "step": 1127 - }, - { - "epoch": 0.35, - "learning_rate": 7.547715249040628e-05, - "loss": 1.0284, - "step": 1128 - }, - { - "epoch": 0.35, - "learning_rate": 7.5433902776204e-05, - "loss": 0.9885, - "step": 1129 - }, - { - "epoch": 0.35, - "learning_rate": 7.5390627373737e-05, - "loss": 1.1134, - "step": 1130 - }, - { - "epoch": 0.35, - "learning_rate": 7.534732632671345e-05, - "loss": 1.123, - "step": 1131 - }, - { - "epoch": 0.35, - "learning_rate": 7.530399967886747e-05, - "loss": 1.0894, - "step": 1132 - }, - { - "epoch": 0.35, - "learning_rate": 7.526064747395898e-05, - "loss": 1.0601, - "step": 1133 - }, - { - "epoch": 0.35, - "learning_rate": 7.521726975577377e-05, - "loss": 1.0383, - "step": 1134 - }, - { - "epoch": 0.35, - "learning_rate": 7.517386656812338e-05, - "loss": 1.1852, - "step": 1135 - }, - { - "epoch": 0.35, - "learning_rate": 7.513043795484505e-05, - "loss": 1.2031, - "step": 1136 - }, - { - "epoch": 0.35, - "learning_rate": 7.508698395980173e-05, - "loss": 1.0569, - "step": 1137 - }, - { - "epoch": 0.35, - "learning_rate": 7.5043504626882e-05, - "loss": 1.2081, - "step": 1138 - }, - { - "epoch": 0.35, - "learning_rate": 7.500000000000001e-05, - "loss": 1.122, - "step": 1139 - }, - { - "epoch": 0.35, - "learning_rate": 7.495647012309547e-05, - "loss": 1.0475, - "step": 1140 - }, - { - "epoch": 0.35, - "learning_rate": 7.491291504013362e-05, - "loss": 1.1032, - "step": 1141 - }, - { - "epoch": 0.35, - "learning_rate": 7.486933479510512e-05, - "loss": 1.0068, - "step": 1142 - }, - { - "epoch": 0.35, - "learning_rate": 7.482572943202604e-05, - "loss": 1.1721, - "step": 1143 - }, - { - "epoch": 0.35, - "learning_rate": 7.478209899493786e-05, - "loss": 1.1007, - "step": 1144 - }, - { - "epoch": 0.36, - "learning_rate": 7.473844352790736e-05, - "loss": 1.0531, - "step": 1145 - }, - { - "epoch": 0.36, - "learning_rate": 7.469476307502657e-05, - "loss": 1.1025, - "step": 1146 - }, - { - "epoch": 0.36, - "learning_rate": 7.465105768041283e-05, - "loss": 1.0116, - "step": 1147 - }, - { - "epoch": 0.36, - "learning_rate": 7.46073273882086e-05, - "loss": 1.17, - "step": 1148 - }, - { - "epoch": 0.36, - "learning_rate": 7.45635722425815e-05, - "loss": 1.02, - "step": 1149 - }, - { - "epoch": 0.36, - "learning_rate": 7.451979228772433e-05, - "loss": 1.0379, - "step": 1150 - }, - { - "epoch": 0.36, - "learning_rate": 7.447598756785483e-05, - "loss": 1.2216, - "step": 1151 - }, - { - "epoch": 0.36, - "learning_rate": 7.44321581272158e-05, - "loss": 1.0507, - "step": 1152 - }, - { - "epoch": 0.36, - "learning_rate": 7.438830401007504e-05, - "loss": 1.0413, - "step": 1153 - }, - { - "epoch": 0.36, - "learning_rate": 7.434442526072525e-05, - "loss": 1.1168, - "step": 1154 - }, - { - "epoch": 0.36, - "learning_rate": 7.430052192348398e-05, - "loss": 1.131, - "step": 1155 - }, - { - "epoch": 0.36, - "learning_rate": 7.425659404269366e-05, - "loss": 1.0316, - "step": 1156 - }, - { - "epoch": 0.36, - "learning_rate": 7.421264166272152e-05, - "loss": 1.0529, - "step": 1157 - }, - { - "epoch": 0.36, - "learning_rate": 7.416866482795943e-05, - "loss": 1.0828, - "step": 1158 - }, - { - "epoch": 0.36, - "learning_rate": 7.412466358282409e-05, - "loss": 1.1564, - "step": 1159 - }, - { - "epoch": 0.36, - "learning_rate": 7.408063797175678e-05, - "loss": 0.9196, - "step": 1160 - }, - { - "epoch": 0.36, - "learning_rate": 7.403658803922341e-05, - "loss": 1.1521, - "step": 1161 - }, - { - "epoch": 0.36, - "learning_rate": 7.399251382971443e-05, - "loss": 1.0204, - "step": 1162 - }, - { - "epoch": 0.36, - "learning_rate": 7.394841538774489e-05, - "loss": 1.0427, - "step": 1163 - }, - { - "epoch": 0.36, - "learning_rate": 7.390429275785419e-05, - "loss": 1.1662, - "step": 1164 - }, - { - "epoch": 0.36, - "learning_rate": 7.386014598460625e-05, - "loss": 1.0894, - "step": 1165 - }, - { - "epoch": 0.36, - "learning_rate": 7.381597511258936e-05, - "loss": 1.1885, - "step": 1166 - }, - { - "epoch": 0.36, - "learning_rate": 7.377178018641613e-05, - "loss": 1.0942, - "step": 1167 - }, - { - "epoch": 0.36, - "learning_rate": 7.372756125072348e-05, - "loss": 1.1087, - "step": 1168 - }, - { - "epoch": 0.36, - "learning_rate": 7.368331835017256e-05, - "loss": 1.1565, - "step": 1169 - }, - { - "epoch": 0.36, - "learning_rate": 7.363905152944879e-05, - "loss": 1.1037, - "step": 1170 - }, - { - "epoch": 0.36, - "learning_rate": 7.359476083326163e-05, - "loss": 1.1014, - "step": 1171 - }, - { - "epoch": 0.36, - "learning_rate": 7.355044630634477e-05, - "loss": 1.1177, - "step": 1172 - }, - { - "epoch": 0.36, - "learning_rate": 7.350610799345589e-05, - "loss": 1.1012, - "step": 1173 - }, - { - "epoch": 0.36, - "learning_rate": 7.346174593937676e-05, - "loss": 1.0725, - "step": 1174 - }, - { - "epoch": 0.36, - "learning_rate": 7.341736018891307e-05, - "loss": 1.0731, - "step": 1175 - }, - { - "epoch": 0.36, - "learning_rate": 7.33729507868945e-05, - "loss": 1.0597, - "step": 1176 - }, - { - "epoch": 0.37, - "learning_rate": 7.332851777817453e-05, - "loss": 1.1371, - "step": 1177 - }, - { - "epoch": 0.37, - "learning_rate": 7.328406120763058e-05, - "loss": 0.9958, - "step": 1178 - }, - { - "epoch": 0.37, - "learning_rate": 7.323958112016382e-05, - "loss": 1.1012, - "step": 1179 - }, - { - "epoch": 0.37, - "learning_rate": 7.319507756069918e-05, - "loss": 1.1972, - "step": 1180 - }, - { - "epoch": 0.37, - "learning_rate": 7.315055057418527e-05, - "loss": 1.1106, - "step": 1181 - }, - { - "epoch": 0.37, - "learning_rate": 7.310600020559445e-05, - "loss": 1.0227, - "step": 1182 - }, - { - "epoch": 0.37, - "learning_rate": 7.306142649992256e-05, - "loss": 1.0171, - "step": 1183 - }, - { - "epoch": 0.37, - "learning_rate": 7.301682950218911e-05, - "loss": 1.1073, - "step": 1184 - }, - { - "epoch": 0.37, - "learning_rate": 7.297220925743712e-05, - "loss": 1.1222, - "step": 1185 - }, - { - "epoch": 0.37, - "learning_rate": 7.292756581073305e-05, - "loss": 1.1178, - "step": 1186 - }, - { - "epoch": 0.37, - "learning_rate": 7.288289920716685e-05, - "loss": 1.0749, - "step": 1187 - }, - { - "epoch": 0.37, - "learning_rate": 7.283820949185185e-05, - "loss": 1.0537, - "step": 1188 - }, - { - "epoch": 0.37, - "learning_rate": 7.279349670992463e-05, - "loss": 1.0469, - "step": 1189 - }, - { - "epoch": 0.37, - "learning_rate": 7.27487609065452e-05, - "loss": 1.0716, - "step": 1190 - }, - { - "epoch": 0.37, - "learning_rate": 7.270400212689674e-05, - "loss": 1.0856, - "step": 1191 - }, - { - "epoch": 0.37, - "learning_rate": 7.265922041618567e-05, - "loss": 1.0918, - "step": 1192 - }, - { - "epoch": 0.37, - "learning_rate": 7.261441581964153e-05, - "loss": 1.033, - "step": 1193 - }, - { - "epoch": 0.37, - "learning_rate": 7.256958838251704e-05, - "loss": 1.0935, - "step": 1194 - }, - { - "epoch": 0.37, - "learning_rate": 7.252473815008794e-05, - "loss": 1.1567, - "step": 1195 - }, - { - "epoch": 0.37, - "learning_rate": 7.2479865167653e-05, - "loss": 0.9851, - "step": 1196 - }, - { - "epoch": 0.37, - "learning_rate": 7.243496948053394e-05, - "loss": 1.1416, - "step": 1197 - }, - { - "epoch": 0.37, - "learning_rate": 7.23900511340755e-05, - "loss": 1.0346, - "step": 1198 - }, - { - "epoch": 0.37, - "learning_rate": 7.234511017364523e-05, - "loss": 1.2183, - "step": 1199 - }, - { - "epoch": 0.37, - "learning_rate": 7.230014664463355e-05, - "loss": 1.0443, - "step": 1200 - }, - { - "epoch": 0.37, - "learning_rate": 7.225516059245365e-05, - "loss": 1.1441, - "step": 1201 - }, - { - "epoch": 0.37, - "learning_rate": 7.221015206254146e-05, - "loss": 1.0128, - "step": 1202 - }, - { - "epoch": 0.37, - "learning_rate": 7.216512110035567e-05, - "loss": 1.123, - "step": 1203 - }, - { - "epoch": 0.37, - "learning_rate": 7.212006775137761e-05, - "loss": 1.1405, - "step": 1204 - }, - { - "epoch": 0.37, - "learning_rate": 7.207499206111115e-05, - "loss": 1.0539, - "step": 1205 - }, - { - "epoch": 0.37, - "learning_rate": 7.20298940750828e-05, - "loss": 1.0244, - "step": 1206 - }, - { - "epoch": 0.37, - "learning_rate": 7.198477383884162e-05, - "loss": 1.1154, - "step": 1207 - }, - { - "epoch": 0.37, - "learning_rate": 7.1939631397959e-05, - "loss": 1.1106, - "step": 1208 - }, - { - "epoch": 0.38, - "learning_rate": 7.189446679802889e-05, - "loss": 1.1324, - "step": 1209 - }, - { - "epoch": 0.38, - "learning_rate": 7.184928008466759e-05, - "loss": 1.1538, - "step": 1210 - }, - { - "epoch": 0.38, - "learning_rate": 7.180407130351368e-05, - "loss": 1.036, - "step": 1211 - }, - { - "epoch": 0.38, - "learning_rate": 7.175884050022809e-05, - "loss": 1.0116, - "step": 1212 - }, - { - "epoch": 0.38, - "learning_rate": 7.171358772049401e-05, - "loss": 1.1409, - "step": 1213 - }, - { - "epoch": 0.38, - "learning_rate": 7.166831301001674e-05, - "loss": 1.088, - "step": 1214 - }, - { - "epoch": 0.38, - "learning_rate": 7.16230164145238e-05, - "loss": 1.0431, - "step": 1215 - }, - { - "epoch": 0.38, - "learning_rate": 7.157769797976477e-05, - "loss": 1.1133, - "step": 1216 - }, - { - "epoch": 0.38, - "learning_rate": 7.153235775151134e-05, - "loss": 1.0341, - "step": 1217 - }, - { - "epoch": 0.38, - "learning_rate": 7.148699577555716e-05, - "loss": 1.0631, - "step": 1218 - }, - { - "epoch": 0.38, - "learning_rate": 7.144161209771788e-05, - "loss": 1.0595, - "step": 1219 - }, - { - "epoch": 0.38, - "learning_rate": 7.139620676383105e-05, - "loss": 1.1521, - "step": 1220 - }, - { - "epoch": 0.38, - "learning_rate": 7.135077981975609e-05, - "loss": 1.0506, - "step": 1221 - }, - { - "epoch": 0.38, - "learning_rate": 7.130533131137426e-05, - "loss": 0.9847, - "step": 1222 - }, - { - "epoch": 0.38, - "learning_rate": 7.12598612845886e-05, - "loss": 1.1357, - "step": 1223 - }, - { - "epoch": 0.38, - "learning_rate": 7.121436978532388e-05, - "loss": 1.139, - "step": 1224 - }, - { - "epoch": 0.38, - "learning_rate": 7.116885685952655e-05, - "loss": 1.1738, - "step": 1225 - }, - { - "epoch": 0.38, - "learning_rate": 7.112332255316471e-05, - "loss": 1.0209, - "step": 1226 - }, - { - "epoch": 0.38, - "learning_rate": 7.107776691222803e-05, - "loss": 1.0531, - "step": 1227 - }, - { - "epoch": 0.38, - "learning_rate": 7.103218998272777e-05, - "loss": 1.1262, - "step": 1228 - }, - { - "epoch": 0.38, - "learning_rate": 7.098659181069667e-05, - "loss": 1.0303, - "step": 1229 - }, - { - "epoch": 0.38, - "learning_rate": 7.094097244218892e-05, - "loss": 1.1585, - "step": 1230 - }, - { - "epoch": 0.38, - "learning_rate": 7.089533192328011e-05, - "loss": 0.8435, - "step": 1231 - }, - { - "epoch": 0.38, - "learning_rate": 7.084967030006724e-05, - "loss": 1.2024, - "step": 1232 - }, - { - "epoch": 0.38, - "learning_rate": 7.080398761866856e-05, - "loss": 1.1986, - "step": 1233 - }, - { - "epoch": 0.38, - "learning_rate": 7.075828392522363e-05, - "loss": 1.0486, - "step": 1234 - }, - { - "epoch": 0.38, - "learning_rate": 7.071255926589321e-05, - "loss": 1.0554, - "step": 1235 - }, - { - "epoch": 0.38, - "learning_rate": 7.066681368685926e-05, - "loss": 1.0687, - "step": 1236 - }, - { - "epoch": 0.38, - "learning_rate": 7.062104723432485e-05, - "loss": 1.114, - "step": 1237 - }, - { - "epoch": 0.38, - "learning_rate": 7.057525995451415e-05, - "loss": 1.2131, - "step": 1238 - }, - { - "epoch": 0.38, - "learning_rate": 7.052945189367235e-05, - "loss": 1.0188, - "step": 1239 - }, - { - "epoch": 0.38, - "learning_rate": 7.048362309806561e-05, - "loss": 1.0604, - "step": 1240 - }, - { - "epoch": 0.38, - "learning_rate": 7.043777361398108e-05, - "loss": 1.1842, - "step": 1241 - }, - { - "epoch": 0.39, - "learning_rate": 7.039190348772677e-05, - "loss": 1.0629, - "step": 1242 - }, - { - "epoch": 0.39, - "learning_rate": 7.034601276563155e-05, - "loss": 1.0302, - "step": 1243 - }, - { - "epoch": 0.39, - "learning_rate": 7.030010149404512e-05, - "loss": 1.0119, - "step": 1244 - }, - { - "epoch": 0.39, - "learning_rate": 7.025416971933786e-05, - "loss": 1.0847, - "step": 1245 - }, - { - "epoch": 0.39, - "learning_rate": 7.02082174879009e-05, - "loss": 1.2169, - "step": 1246 - }, - { - "epoch": 0.39, - "learning_rate": 7.016224484614608e-05, - "loss": 0.9513, - "step": 1247 - }, - { - "epoch": 0.39, - "learning_rate": 7.011625184050578e-05, - "loss": 1.1664, - "step": 1248 - }, - { - "epoch": 0.39, - "learning_rate": 7.007023851743299e-05, - "loss": 1.0571, - "step": 1249 - }, - { - "epoch": 0.39, - "learning_rate": 7.00242049234012e-05, - "loss": 1.1474, - "step": 1250 - }, - { - "epoch": 0.39, - "learning_rate": 6.99781511049044e-05, - "loss": 1.0735, - "step": 1251 - }, - { - "epoch": 0.39, - "learning_rate": 6.993207710845696e-05, - "loss": 1.0056, - "step": 1252 - }, - { - "epoch": 0.39, - "learning_rate": 6.988598298059365e-05, - "loss": 1.1654, - "step": 1253 - }, - { - "epoch": 0.39, - "learning_rate": 6.983986876786962e-05, - "loss": 1.0523, - "step": 1254 - }, - { - "epoch": 0.39, - "learning_rate": 6.979373451686024e-05, - "loss": 1.0321, - "step": 1255 - }, - { - "epoch": 0.39, - "learning_rate": 6.974758027416115e-05, - "loss": 1.1516, - "step": 1256 - }, - { - "epoch": 0.39, - "learning_rate": 6.970140608638818e-05, - "loss": 1.0625, - "step": 1257 - }, - { - "epoch": 0.39, - "learning_rate": 6.965521200017731e-05, - "loss": 1.02, - "step": 1258 - }, - { - "epoch": 0.39, - "learning_rate": 6.960899806218458e-05, - "loss": 1.0998, - "step": 1259 - }, - { - "epoch": 0.39, - "learning_rate": 6.956276431908615e-05, - "loss": 1.0759, - "step": 1260 - }, - { - "epoch": 0.39, - "learning_rate": 6.95165108175781e-05, - "loss": 0.9135, - "step": 1261 - }, - { - "epoch": 0.39, - "learning_rate": 6.947023760437653e-05, - "loss": 1.2203, - "step": 1262 - }, - { - "epoch": 0.39, - "learning_rate": 6.942394472621744e-05, - "loss": 1.0323, - "step": 1263 - }, - { - "epoch": 0.39, - "learning_rate": 6.937763222985665e-05, - "loss": 1.0026, - "step": 1264 - }, - { - "epoch": 0.39, - "learning_rate": 6.933130016206985e-05, - "loss": 1.0887, - "step": 1265 - }, - { - "epoch": 0.39, - "learning_rate": 6.928494856965244e-05, - "loss": 1.1215, - "step": 1266 - }, - { - "epoch": 0.39, - "learning_rate": 6.92385774994196e-05, - "loss": 1.0308, - "step": 1267 - }, - { - "epoch": 0.39, - "learning_rate": 6.919218699820613e-05, - "loss": 1.1549, - "step": 1268 - }, - { - "epoch": 0.39, - "learning_rate": 6.914577711286649e-05, - "loss": 1.1668, - "step": 1269 - }, - { - "epoch": 0.39, - "learning_rate": 6.909934789027472e-05, - "loss": 1.0189, - "step": 1270 - }, - { - "epoch": 0.39, - "learning_rate": 6.905289937732434e-05, - "loss": 1.056, - "step": 1271 - }, - { - "epoch": 0.39, - "learning_rate": 6.900643162092841e-05, - "loss": 1.1345, - "step": 1272 - }, - { - "epoch": 0.39, - "learning_rate": 6.895994466801943e-05, - "loss": 1.0732, - "step": 1273 - }, - { - "epoch": 0.4, - "learning_rate": 6.891343856554922e-05, - "loss": 1.066, - "step": 1274 - }, - { - "epoch": 0.4, - "learning_rate": 6.886691336048901e-05, - "loss": 1.0549, - "step": 1275 - }, - { - "epoch": 0.4, - "learning_rate": 6.882036909982932e-05, - "loss": 1.0186, - "step": 1276 - }, - { - "epoch": 0.4, - "learning_rate": 6.877380583057984e-05, - "loss": 1.1096, - "step": 1277 - }, - { - "epoch": 0.4, - "learning_rate": 6.872722359976954e-05, - "loss": 1.0301, - "step": 1278 - }, - { - "epoch": 0.4, - "learning_rate": 6.868062245444655e-05, - "loss": 1.2081, - "step": 1279 - }, - { - "epoch": 0.4, - "learning_rate": 6.863400244167801e-05, - "loss": 1.0592, - "step": 1280 - }, - { - "epoch": 0.4, - "learning_rate": 6.858736360855021e-05, - "loss": 1.0506, - "step": 1281 - }, - { - "epoch": 0.4, - "learning_rate": 6.854070600216842e-05, - "loss": 1.1238, - "step": 1282 - }, - { - "epoch": 0.4, - "learning_rate": 6.849402966965684e-05, - "loss": 1.0195, - "step": 1283 - }, - { - "epoch": 0.4, - "learning_rate": 6.844733465815862e-05, - "loss": 1.1133, - "step": 1284 - }, - { - "epoch": 0.4, - "learning_rate": 6.840062101483577e-05, - "loss": 1.0968, - "step": 1285 - }, - { - "epoch": 0.4, - "learning_rate": 6.83538887868691e-05, - "loss": 1.096, - "step": 1286 - }, - { - "epoch": 0.4, - "learning_rate": 6.830713802145819e-05, - "loss": 1.0655, - "step": 1287 - }, - { - "epoch": 0.4, - "learning_rate": 6.82603687658214e-05, - "loss": 1.0618, - "step": 1288 - }, - { - "epoch": 0.4, - "learning_rate": 6.821358106719568e-05, - "loss": 1.0954, - "step": 1289 - }, - { - "epoch": 0.4, - "learning_rate": 6.816677497283666e-05, - "loss": 1.11, - "step": 1290 - }, - { - "epoch": 0.4, - "learning_rate": 6.811995053001854e-05, - "loss": 1.0274, - "step": 1291 - }, - { - "epoch": 0.4, - "learning_rate": 6.807310778603407e-05, - "loss": 1.0994, - "step": 1292 - }, - { - "epoch": 0.4, - "learning_rate": 6.80262467881944e-05, - "loss": 1.1045, - "step": 1293 - }, - { - "epoch": 0.4, - "learning_rate": 6.797936758382924e-05, - "loss": 1.0779, - "step": 1294 - }, - { - "epoch": 0.4, - "learning_rate": 6.793247022028663e-05, - "loss": 0.9296, - "step": 1295 - }, - { - "epoch": 0.4, - "learning_rate": 6.78855547449329e-05, - "loss": 1.038, - "step": 1296 - }, - { - "epoch": 0.4, - "learning_rate": 6.783862120515276e-05, - "loss": 1.0576, - "step": 1297 - }, - { - "epoch": 0.4, - "learning_rate": 6.779166964834913e-05, - "loss": 1.1607, - "step": 1298 - }, - { - "epoch": 0.4, - "learning_rate": 6.77447001219431e-05, - "loss": 1.0649, - "step": 1299 - }, - { - "epoch": 0.4, - "learning_rate": 6.769771267337394e-05, - "loss": 1.055, - "step": 1300 - }, - { - "epoch": 0.4, - "learning_rate": 6.765070735009906e-05, - "loss": 1.0904, - "step": 1301 - }, - { - "epoch": 0.4, - "learning_rate": 6.760368419959381e-05, - "loss": 1.0802, - "step": 1302 - }, - { - "epoch": 0.4, - "learning_rate": 6.755664326935166e-05, - "loss": 1.0701, - "step": 1303 - }, - { - "epoch": 0.4, - "learning_rate": 6.750958460688399e-05, - "loss": 0.9815, - "step": 1304 - }, - { - "epoch": 0.4, - "learning_rate": 6.746250825972005e-05, - "loss": 1.2739, - "step": 1305 - }, - { - "epoch": 0.41, - "learning_rate": 6.741541427540703e-05, - "loss": 1.1276, - "step": 1306 - }, - { - "epoch": 0.41, - "learning_rate": 6.736830270150992e-05, - "loss": 1.0494, - "step": 1307 - }, - { - "epoch": 0.41, - "learning_rate": 6.73211735856114e-05, - "loss": 1.1057, - "step": 1308 - }, - { - "epoch": 0.41, - "learning_rate": 6.727402697531194e-05, - "loss": 1.073, - "step": 1309 - }, - { - "epoch": 0.41, - "learning_rate": 6.722686291822965e-05, - "loss": 1.1338, - "step": 1310 - }, - { - "epoch": 0.41, - "learning_rate": 6.71796814620003e-05, - "loss": 0.9941, - "step": 1311 - }, - { - "epoch": 0.41, - "learning_rate": 6.713248265427718e-05, - "loss": 1.0214, - "step": 1312 - }, - { - "epoch": 0.41, - "learning_rate": 6.708526654273112e-05, - "loss": 1.1411, - "step": 1313 - }, - { - "epoch": 0.41, - "learning_rate": 6.703803317505048e-05, - "loss": 1.1199, - "step": 1314 - }, - { - "epoch": 0.41, - "learning_rate": 6.699078259894094e-05, - "loss": 1.0843, - "step": 1315 - }, - { - "epoch": 0.41, - "learning_rate": 6.694351486212567e-05, - "loss": 1.0319, - "step": 1316 - }, - { - "epoch": 0.41, - "learning_rate": 6.689623001234511e-05, - "loss": 1.0864, - "step": 1317 - }, - { - "epoch": 0.41, - "learning_rate": 6.684892809735699e-05, - "loss": 1.1058, - "step": 1318 - }, - { - "epoch": 0.41, - "learning_rate": 6.680160916493631e-05, - "loss": 1.1032, - "step": 1319 - }, - { - "epoch": 0.41, - "learning_rate": 6.675427326287525e-05, - "loss": 1.0272, - "step": 1320 - }, - { - "epoch": 0.41, - "learning_rate": 6.670692043898306e-05, - "loss": 1.1505, - "step": 1321 - }, - { - "epoch": 0.41, - "learning_rate": 6.665955074108616e-05, - "loss": 1.0334, - "step": 1322 - }, - { - "epoch": 0.41, - "learning_rate": 6.6612164217028e-05, - "loss": 1.0665, - "step": 1323 - }, - { - "epoch": 0.41, - "learning_rate": 6.656476091466901e-05, - "loss": 1.0167, - "step": 1324 - }, - { - "epoch": 0.41, - "learning_rate": 6.651734088188655e-05, - "loss": 1.0777, - "step": 1325 - }, - { - "epoch": 0.41, - "learning_rate": 6.646990416657492e-05, - "loss": 1.1597, - "step": 1326 - }, - { - "epoch": 0.41, - "learning_rate": 6.642245081664523e-05, - "loss": 1.1133, - "step": 1327 - }, - { - "epoch": 0.41, - "learning_rate": 6.63749808800254e-05, - "loss": 1.1043, - "step": 1328 - }, - { - "epoch": 0.41, - "learning_rate": 6.632749440466013e-05, - "loss": 0.925, - "step": 1329 - }, - { - "epoch": 0.41, - "learning_rate": 6.627999143851076e-05, - "loss": 1.0468, - "step": 1330 - }, - { - "epoch": 0.41, - "learning_rate": 6.623247202955537e-05, - "loss": 1.0598, - "step": 1331 - }, - { - "epoch": 0.41, - "learning_rate": 6.618493622578863e-05, - "loss": 1.0862, - "step": 1332 - }, - { - "epoch": 0.41, - "learning_rate": 6.613738407522168e-05, - "loss": 1.0596, - "step": 1333 - }, - { - "epoch": 0.41, - "learning_rate": 6.608981562588227e-05, - "loss": 1.1501, - "step": 1334 - }, - { - "epoch": 0.41, - "learning_rate": 6.604223092581456e-05, - "loss": 1.1605, - "step": 1335 - }, - { - "epoch": 0.41, - "learning_rate": 6.599463002307918e-05, - "loss": 1.0129, - "step": 1336 - }, - { - "epoch": 0.41, - "learning_rate": 6.594701296575303e-05, - "loss": 1.0794, - "step": 1337 - }, - { - "epoch": 0.42, - "learning_rate": 6.589937980192942e-05, - "loss": 1.1616, - "step": 1338 - }, - { - "epoch": 0.42, - "learning_rate": 6.585173057971787e-05, - "loss": 1.0348, - "step": 1339 - }, - { - "epoch": 0.42, - "learning_rate": 6.580406534724413e-05, - "loss": 1.0463, - "step": 1340 - }, - { - "epoch": 0.42, - "learning_rate": 6.575638415265015e-05, - "loss": 1.1008, - "step": 1341 - }, - { - "epoch": 0.42, - "learning_rate": 6.570868704409398e-05, - "loss": 1.0391, - "step": 1342 - }, - { - "epoch": 0.42, - "learning_rate": 6.566097406974971e-05, - "loss": 1.0925, - "step": 1343 - }, - { - "epoch": 0.42, - "learning_rate": 6.561324527780753e-05, - "loss": 1.065, - "step": 1344 - }, - { - "epoch": 0.42, - "learning_rate": 6.556550071647355e-05, - "loss": 1.0288, - "step": 1345 - }, - { - "epoch": 0.42, - "learning_rate": 6.551774043396978e-05, - "loss": 1.1108, - "step": 1346 - }, - { - "epoch": 0.42, - "learning_rate": 6.546996447853421e-05, - "loss": 1.1544, - "step": 1347 - }, - { - "epoch": 0.42, - "learning_rate": 6.542217289842057e-05, - "loss": 1.0997, - "step": 1348 - }, - { - "epoch": 0.42, - "learning_rate": 6.537436574189837e-05, - "loss": 1.0074, - "step": 1349 - }, - { - "epoch": 0.42, - "learning_rate": 6.532654305725293e-05, - "loss": 1.0633, - "step": 1350 - }, - { - "epoch": 0.42, - "learning_rate": 6.527870489278518e-05, - "loss": 1.116, - "step": 1351 - }, - { - "epoch": 0.42, - "learning_rate": 6.523085129681171e-05, - "loss": 1.0914, - "step": 1352 - }, - { - "epoch": 0.42, - "learning_rate": 6.51829823176647e-05, - "loss": 1.0286, - "step": 1353 - }, - { - "epoch": 0.42, - "learning_rate": 6.513509800369186e-05, - "loss": 1.048, - "step": 1354 - }, - { - "epoch": 0.42, - "learning_rate": 6.508719840325638e-05, - "loss": 1.1335, - "step": 1355 - }, - { - "epoch": 0.42, - "learning_rate": 6.503928356473692e-05, - "loss": 1.1141, - "step": 1356 - }, - { - "epoch": 0.42, - "learning_rate": 6.49913535365275e-05, - "loss": 0.9523, - "step": 1357 - }, - { - "epoch": 0.42, - "learning_rate": 6.494340836703751e-05, - "loss": 1.1301, - "step": 1358 - }, - { - "epoch": 0.42, - "learning_rate": 6.489544810469157e-05, - "loss": 1.0237, - "step": 1359 - }, - { - "epoch": 0.42, - "learning_rate": 6.484747279792962e-05, - "loss": 1.1206, - "step": 1360 - }, - { - "epoch": 0.42, - "learning_rate": 6.479948249520675e-05, - "loss": 1.1378, - "step": 1361 - }, - { - "epoch": 0.42, - "learning_rate": 6.475147724499323e-05, - "loss": 0.8629, - "step": 1362 - }, - { - "epoch": 0.42, - "learning_rate": 6.470345709577439e-05, - "loss": 1.1176, - "step": 1363 - }, - { - "epoch": 0.42, - "learning_rate": 6.465542209605063e-05, - "loss": 1.0523, - "step": 1364 - }, - { - "epoch": 0.42, - "learning_rate": 6.460737229433733e-05, - "loss": 1.0848, - "step": 1365 - }, - { - "epoch": 0.42, - "learning_rate": 6.455930773916484e-05, - "loss": 1.1092, - "step": 1366 - }, - { - "epoch": 0.42, - "learning_rate": 6.451122847907842e-05, - "loss": 1.1011, - "step": 1367 - }, - { - "epoch": 0.42, - "learning_rate": 6.446313456263815e-05, - "loss": 1.1554, - "step": 1368 - }, - { - "epoch": 0.42, - "learning_rate": 6.441502603841891e-05, - "loss": 1.0492, - "step": 1369 - }, - { - "epoch": 0.43, - "learning_rate": 6.436690295501042e-05, - "loss": 1.0963, - "step": 1370 - }, - { - "epoch": 0.43, - "learning_rate": 6.431876536101696e-05, - "loss": 1.0624, - "step": 1371 - }, - { - "epoch": 0.43, - "learning_rate": 6.427061330505757e-05, - "loss": 1.0777, - "step": 1372 - }, - { - "epoch": 0.43, - "learning_rate": 6.42224468357659e-05, - "loss": 0.9925, - "step": 1373 - }, - { - "epoch": 0.43, - "learning_rate": 6.417426600179008e-05, - "loss": 1.1108, - "step": 1374 - }, - { - "epoch": 0.43, - "learning_rate": 6.412607085179282e-05, - "loss": 1.0183, - "step": 1375 - }, - { - "epoch": 0.43, - "learning_rate": 6.407786143445128e-05, - "loss": 1.1758, - "step": 1376 - }, - { - "epoch": 0.43, - "learning_rate": 6.402963779845699e-05, - "loss": 1.0106, - "step": 1377 - }, - { - "epoch": 0.43, - "learning_rate": 6.398139999251587e-05, - "loss": 1.0366, - "step": 1378 - }, - { - "epoch": 0.43, - "learning_rate": 6.393314806534815e-05, - "loss": 1.0953, - "step": 1379 - }, - { - "epoch": 0.43, - "learning_rate": 6.388488206568834e-05, - "loss": 1.0701, - "step": 1380 - }, - { - "epoch": 0.43, - "learning_rate": 6.38366020422851e-05, - "loss": 1.0957, - "step": 1381 - }, - { - "epoch": 0.43, - "learning_rate": 6.378830804390135e-05, - "loss": 1.0888, - "step": 1382 - }, - { - "epoch": 0.43, - "learning_rate": 6.374000011931402e-05, - "loss": 1.0278, - "step": 1383 - }, - { - "epoch": 0.43, - "learning_rate": 6.369167831731418e-05, - "loss": 1.1085, - "step": 1384 - }, - { - "epoch": 0.43, - "learning_rate": 6.36433426867069e-05, - "loss": 1.0525, - "step": 1385 - }, - { - "epoch": 0.43, - "learning_rate": 6.359499327631122e-05, - "loss": 1.103, - "step": 1386 - }, - { - "epoch": 0.43, - "learning_rate": 6.354663013496006e-05, - "loss": 1.2243, - "step": 1387 - }, - { - "epoch": 0.43, - "learning_rate": 6.349825331150026e-05, - "loss": 0.9998, - "step": 1388 - }, - { - "epoch": 0.43, - "learning_rate": 6.344986285479244e-05, - "loss": 1.0595, - "step": 1389 - }, - { - "epoch": 0.43, - "learning_rate": 6.340145881371101e-05, - "loss": 1.1301, - "step": 1390 - }, - { - "epoch": 0.43, - "learning_rate": 6.33530412371441e-05, - "loss": 1.0371, - "step": 1391 - }, - { - "epoch": 0.43, - "learning_rate": 6.33046101739935e-05, - "loss": 1.1433, - "step": 1392 - }, - { - "epoch": 0.43, - "learning_rate": 6.325616567317461e-05, - "loss": 1.0328, - "step": 1393 - }, - { - "epoch": 0.43, - "learning_rate": 6.320770778361644e-05, - "loss": 1.0941, - "step": 1394 - }, - { - "epoch": 0.43, - "learning_rate": 6.315923655426153e-05, - "loss": 0.8811, - "step": 1395 - }, - { - "epoch": 0.43, - "learning_rate": 6.311075203406582e-05, - "loss": 1.1369, - "step": 1396 - }, - { - "epoch": 0.43, - "learning_rate": 6.306225427199873e-05, - "loss": 0.922, - "step": 1397 - }, - { - "epoch": 0.43, - "learning_rate": 6.301374331704306e-05, - "loss": 1.2074, - "step": 1398 - }, - { - "epoch": 0.43, - "learning_rate": 6.29652192181949e-05, - "loss": 0.9893, - "step": 1399 - }, - { - "epoch": 0.43, - "learning_rate": 6.291668202446363e-05, - "loss": 1.187, - "step": 1400 - }, - { - "epoch": 0.43, - "learning_rate": 6.286813178487187e-05, - "loss": 1.0265, - "step": 1401 - }, - { - "epoch": 0.43, - "learning_rate": 6.281956854845544e-05, - "loss": 1.0021, - "step": 1402 - }, - { - "epoch": 0.44, - "learning_rate": 6.277099236426318e-05, - "loss": 1.1968, - "step": 1403 - }, - { - "epoch": 0.44, - "learning_rate": 6.272240328135711e-05, - "loss": 1.0971, - "step": 1404 - }, - { - "epoch": 0.44, - "learning_rate": 6.267380134881229e-05, - "loss": 1.2338, - "step": 1405 - }, - { - "epoch": 0.44, - "learning_rate": 6.262518661571666e-05, - "loss": 0.961, - "step": 1406 - }, - { - "epoch": 0.44, - "learning_rate": 6.257655913117118e-05, - "loss": 1.1601, - "step": 1407 - }, - { - "epoch": 0.44, - "learning_rate": 6.252791894428962e-05, - "loss": 1.0023, - "step": 1408 - }, - { - "epoch": 0.44, - "learning_rate": 6.247926610419864e-05, - "loss": 1.0745, - "step": 1409 - }, - { - "epoch": 0.44, - "learning_rate": 6.243060066003763e-05, - "loss": 1.139, - "step": 1410 - }, - { - "epoch": 0.44, - "learning_rate": 6.238192266095877e-05, - "loss": 1.0159, - "step": 1411 - }, - { - "epoch": 0.44, - "learning_rate": 6.233323215612684e-05, - "loss": 0.9795, - "step": 1412 - }, - { - "epoch": 0.44, - "learning_rate": 6.228452919471932e-05, - "loss": 1.1776, - "step": 1413 - }, - { - "epoch": 0.44, - "learning_rate": 6.223581382592625e-05, - "loss": 0.968, - "step": 1414 - }, - { - "epoch": 0.44, - "learning_rate": 6.218708609895016e-05, - "loss": 1.2115, - "step": 1415 - }, - { - "epoch": 0.44, - "learning_rate": 6.213834606300614e-05, - "loss": 1.1399, - "step": 1416 - }, - { - "epoch": 0.44, - "learning_rate": 6.208959376732164e-05, - "loss": 1.0058, - "step": 1417 - }, - { - "epoch": 0.44, - "learning_rate": 6.204082926113655e-05, - "loss": 1.0235, - "step": 1418 - }, - { - "epoch": 0.44, - "learning_rate": 6.199205259370303e-05, - "loss": 1.169, - "step": 1419 - }, - { - "epoch": 0.44, - "learning_rate": 6.194326381428559e-05, - "loss": 1.058, - "step": 1420 - }, - { - "epoch": 0.44, - "learning_rate": 6.189446297216093e-05, - "loss": 1.1151, - "step": 1421 - }, - { - "epoch": 0.44, - "learning_rate": 6.184565011661794e-05, - "loss": 0.9766, - "step": 1422 - }, - { - "epoch": 0.44, - "learning_rate": 6.179682529695766e-05, - "loss": 1.1478, - "step": 1423 - }, - { - "epoch": 0.44, - "learning_rate": 6.174798856249318e-05, - "loss": 1.0092, - "step": 1424 - }, - { - "epoch": 0.44, - "learning_rate": 6.169913996254967e-05, - "loss": 1.1638, - "step": 1425 - }, - { - "epoch": 0.44, - "learning_rate": 6.165027954646425e-05, - "loss": 1.09, - "step": 1426 - }, - { - "epoch": 0.44, - "learning_rate": 6.1601407363586e-05, - "loss": 1.0879, - "step": 1427 - }, - { - "epoch": 0.44, - "learning_rate": 6.155252346327583e-05, - "loss": 1.0587, - "step": 1428 - }, - { - "epoch": 0.44, - "learning_rate": 6.150362789490653e-05, - "loss": 1.1122, - "step": 1429 - }, - { - "epoch": 0.44, - "learning_rate": 6.145472070786272e-05, - "loss": 1.0043, - "step": 1430 - }, - { - "epoch": 0.44, - "learning_rate": 6.140580195154064e-05, - "loss": 1.0458, - "step": 1431 - }, - { - "epoch": 0.44, - "learning_rate": 6.135687167534834e-05, - "loss": 1.0565, - "step": 1432 - }, - { - "epoch": 0.44, - "learning_rate": 6.130792992870538e-05, - "loss": 1.0394, - "step": 1433 - }, - { - "epoch": 0.44, - "learning_rate": 6.125897676104301e-05, - "loss": 1.0257, - "step": 1434 - }, - { - "epoch": 0.45, - "learning_rate": 6.121001222180394e-05, - "loss": 1.0146, - "step": 1435 - }, - { - "epoch": 0.45, - "learning_rate": 6.116103636044244e-05, - "loss": 1.185, - "step": 1436 - }, - { - "epoch": 0.45, - "learning_rate": 6.111204922642415e-05, - "loss": 1.0341, - "step": 1437 - }, - { - "epoch": 0.45, - "learning_rate": 6.10630508692261e-05, - "loss": 1.1085, - "step": 1438 - }, - { - "epoch": 0.45, - "learning_rate": 6.1014041338336734e-05, - "loss": 1.0279, - "step": 1439 - }, - { - "epoch": 0.45, - "learning_rate": 6.096502068325567e-05, - "loss": 1.0812, - "step": 1440 - }, - { - "epoch": 0.45, - "learning_rate": 6.091598895349382e-05, - "loss": 1.1089, - "step": 1441 - }, - { - "epoch": 0.45, - "learning_rate": 6.086694619857329e-05, - "loss": 1.0064, - "step": 1442 - }, - { - "epoch": 0.45, - "learning_rate": 6.081789246802731e-05, - "loss": 1.0934, - "step": 1443 - }, - { - "epoch": 0.45, - "learning_rate": 6.076882781140016e-05, - "loss": 1.0657, - "step": 1444 - }, - { - "epoch": 0.45, - "learning_rate": 6.071975227824724e-05, - "loss": 1.0465, - "step": 1445 - }, - { - "epoch": 0.45, - "learning_rate": 6.067066591813485e-05, - "loss": 0.9807, - "step": 1446 - }, - { - "epoch": 0.45, - "learning_rate": 6.062156878064026e-05, - "loss": 1.0969, - "step": 1447 - }, - { - "epoch": 0.45, - "learning_rate": 6.0572460915351617e-05, - "loss": 1.0349, - "step": 1448 - }, - { - "epoch": 0.45, - "learning_rate": 6.052334237186792e-05, - "loss": 1.1451, - "step": 1449 - }, - { - "epoch": 0.45, - "learning_rate": 6.047421319979894e-05, - "loss": 1.0553, - "step": 1450 - }, - { - "epoch": 0.45, - "learning_rate": 6.042507344876518e-05, - "loss": 1.0038, - "step": 1451 - }, - { - "epoch": 0.45, - "learning_rate": 6.037592316839785e-05, - "loss": 1.1214, - "step": 1452 - }, - { - "epoch": 0.45, - "learning_rate": 6.032676240833876e-05, - "loss": 1.0868, - "step": 1453 - }, - { - "epoch": 0.45, - "learning_rate": 6.027759121824031e-05, - "loss": 1.1294, - "step": 1454 - }, - { - "epoch": 0.45, - "learning_rate": 6.0228409647765484e-05, - "loss": 0.9412, - "step": 1455 - }, - { - "epoch": 0.45, - "learning_rate": 6.017921774658768e-05, - "loss": 1.0321, - "step": 1456 - }, - { - "epoch": 0.45, - "learning_rate": 6.013001556439078e-05, - "loss": 1.1177, - "step": 1457 - }, - { - "epoch": 0.45, - "learning_rate": 6.0080803150869034e-05, - "loss": 1.0457, - "step": 1458 - }, - { - "epoch": 0.45, - "learning_rate": 6.003158055572701e-05, - "loss": 1.0299, - "step": 1459 - }, - { - "epoch": 0.45, - "learning_rate": 5.998234782867958e-05, - "loss": 1.0761, - "step": 1460 - }, - { - "epoch": 0.45, - "learning_rate": 5.993310501945184e-05, - "loss": 1.049, - "step": 1461 - }, - { - "epoch": 0.45, - "learning_rate": 5.9883852177779076e-05, - "loss": 1.1296, - "step": 1462 - }, - { - "epoch": 0.45, - "learning_rate": 5.983458935340669e-05, - "loss": 1.0462, - "step": 1463 - }, - { - "epoch": 0.45, - "learning_rate": 5.978531659609021e-05, - "loss": 0.9925, - "step": 1464 - }, - { - "epoch": 0.45, - "learning_rate": 5.9736033955595136e-05, - "loss": 1.1032, - "step": 1465 - }, - { - "epoch": 0.45, - "learning_rate": 5.9686741481696996e-05, - "loss": 0.885, - "step": 1466 - }, - { - "epoch": 0.46, - "learning_rate": 5.9637439224181214e-05, - "loss": 1.0365, - "step": 1467 - }, - { - "epoch": 0.46, - "learning_rate": 5.958812723284313e-05, - "loss": 1.0278, - "step": 1468 - }, - { - "epoch": 0.46, - "learning_rate": 5.95388055574879e-05, - "loss": 1.181, - "step": 1469 - }, - { - "epoch": 0.46, - "learning_rate": 5.9489474247930456e-05, - "loss": 1.0497, - "step": 1470 - }, - { - "epoch": 0.46, - "learning_rate": 5.944013335399546e-05, - "loss": 1.0372, - "step": 1471 - }, - { - "epoch": 0.46, - "learning_rate": 5.939078292551728e-05, - "loss": 1.0776, - "step": 1472 - }, - { - "epoch": 0.46, - "learning_rate": 5.934142301233986e-05, - "loss": 1.0149, - "step": 1473 - }, - { - "epoch": 0.46, - "learning_rate": 5.929205366431678e-05, - "loss": 1.1776, - "step": 1474 - }, - { - "epoch": 0.46, - "learning_rate": 5.924267493131112e-05, - "loss": 0.9404, - "step": 1475 - }, - { - "epoch": 0.46, - "learning_rate": 5.9193286863195464e-05, - "loss": 1.1595, - "step": 1476 - }, - { - "epoch": 0.46, - "learning_rate": 5.914388950985178e-05, - "loss": 1.0123, - "step": 1477 - }, - { - "epoch": 0.46, - "learning_rate": 5.9094482921171436e-05, - "loss": 1.1527, - "step": 1478 - }, - { - "epoch": 0.46, - "learning_rate": 5.9045067147055136e-05, - "loss": 1.1118, - "step": 1479 - }, - { - "epoch": 0.46, - "learning_rate": 5.899564223741288e-05, - "loss": 0.999, - "step": 1480 - }, - { - "epoch": 0.46, - "learning_rate": 5.894620824216385e-05, - "loss": 1.0564, - "step": 1481 - }, - { - "epoch": 0.46, - "learning_rate": 5.889676521123643e-05, - "loss": 1.1024, - "step": 1482 - }, - { - "epoch": 0.46, - "learning_rate": 5.884731319456812e-05, - "loss": 0.9801, - "step": 1483 - }, - { - "epoch": 0.46, - "learning_rate": 5.879785224210551e-05, - "loss": 1.0525, - "step": 1484 - }, - { - "epoch": 0.46, - "learning_rate": 5.874838240380419e-05, - "loss": 1.0977, - "step": 1485 - }, - { - "epoch": 0.46, - "learning_rate": 5.8698903729628776e-05, - "loss": 1.0908, - "step": 1486 - }, - { - "epoch": 0.46, - "learning_rate": 5.8649416269552736e-05, - "loss": 1.0135, - "step": 1487 - }, - { - "epoch": 0.46, - "learning_rate": 5.8599920073558465e-05, - "loss": 1.1219, - "step": 1488 - }, - { - "epoch": 0.46, - "learning_rate": 5.8550415191637175e-05, - "loss": 1.071, - "step": 1489 - }, - { - "epoch": 0.46, - "learning_rate": 5.850090167378884e-05, - "loss": 1.0222, - "step": 1490 - }, - { - "epoch": 0.46, - "learning_rate": 5.8451379570022135e-05, - "loss": 1.0857, - "step": 1491 - }, - { - "epoch": 0.46, - "learning_rate": 5.840184893035444e-05, - "loss": 1.0749, - "step": 1492 - }, - { - "epoch": 0.46, - "learning_rate": 5.835230980481177e-05, - "loss": 0.9725, - "step": 1493 - }, - { - "epoch": 0.46, - "learning_rate": 5.830276224342865e-05, - "loss": 1.1136, - "step": 1494 - }, - { - "epoch": 0.46, - "learning_rate": 5.82532062962482e-05, - "loss": 1.0351, - "step": 1495 - }, - { - "epoch": 0.46, - "learning_rate": 5.820364201332193e-05, - "loss": 1.1141, - "step": 1496 - }, - { - "epoch": 0.46, - "learning_rate": 5.815406944470982e-05, - "loss": 1.0516, - "step": 1497 - }, - { - "epoch": 0.46, - "learning_rate": 5.8104488640480236e-05, - "loss": 1.0858, - "step": 1498 - }, - { - "epoch": 0.47, - "learning_rate": 5.805489965070983e-05, - "loss": 1.0643, - "step": 1499 - }, - { - "epoch": 0.47, - "learning_rate": 5.800530252548348e-05, - "loss": 0.8735, - "step": 1500 - }, - { - "epoch": 0.47, - "learning_rate": 5.7955697314894395e-05, - "loss": 1.192, - "step": 1501 - }, - { - "epoch": 0.47, - "learning_rate": 5.7906084069043845e-05, - "loss": 1.0715, - "step": 1502 - }, - { - "epoch": 0.47, - "learning_rate": 5.785646283804124e-05, - "loss": 1.0043, - "step": 1503 - }, - { - "epoch": 0.47, - "learning_rate": 5.780683367200409e-05, - "loss": 1.0637, - "step": 1504 - }, - { - "epoch": 0.47, - "learning_rate": 5.77571966210579e-05, - "loss": 1.1067, - "step": 1505 - }, - { - "epoch": 0.47, - "learning_rate": 5.770755173533612e-05, - "loss": 1.0509, - "step": 1506 - }, - { - "epoch": 0.47, - "learning_rate": 5.7657899064980146e-05, - "loss": 1.1263, - "step": 1507 - }, - { - "epoch": 0.47, - "learning_rate": 5.760823866013921e-05, - "loss": 1.0449, - "step": 1508 - }, - { - "epoch": 0.47, - "learning_rate": 5.755857057097036e-05, - "loss": 0.9776, - "step": 1509 - }, - { - "epoch": 0.47, - "learning_rate": 5.750889484763842e-05, - "loss": 1.1377, - "step": 1510 - }, - { - "epoch": 0.47, - "learning_rate": 5.7459211540315924e-05, - "loss": 1.0459, - "step": 1511 - }, - { - "epoch": 0.47, - "learning_rate": 5.740952069918304e-05, - "loss": 1.0419, - "step": 1512 - }, - { - "epoch": 0.47, - "learning_rate": 5.7359822374427566e-05, - "loss": 1.0654, - "step": 1513 - }, - { - "epoch": 0.47, - "learning_rate": 5.731011661624487e-05, - "loss": 1.1102, - "step": 1514 - }, - { - "epoch": 0.47, - "learning_rate": 5.7260403474837786e-05, - "loss": 1.0561, - "step": 1515 - }, - { - "epoch": 0.47, - "learning_rate": 5.721068300041666e-05, - "loss": 0.979, - "step": 1516 - }, - { - "epoch": 0.47, - "learning_rate": 5.7160955243199186e-05, - "loss": 1.1241, - "step": 1517 - }, - { - "epoch": 0.47, - "learning_rate": 5.711122025341048e-05, - "loss": 1.0547, - "step": 1518 - }, - { - "epoch": 0.47, - "learning_rate": 5.706147808128288e-05, - "loss": 0.9508, - "step": 1519 - }, - { - "epoch": 0.47, - "learning_rate": 5.7011728777056084e-05, - "loss": 1.0399, - "step": 1520 - }, - { - "epoch": 0.47, - "learning_rate": 5.6961972390976894e-05, - "loss": 1.2358, - "step": 1521 - }, - { - "epoch": 0.47, - "learning_rate": 5.69122089732993e-05, - "loss": 0.9599, - "step": 1522 - }, - { - "epoch": 0.47, - "learning_rate": 5.6862438574284405e-05, - "loss": 1.1832, - "step": 1523 - }, - { - "epoch": 0.47, - "learning_rate": 5.681266124420037e-05, - "loss": 0.9775, - "step": 1524 - }, - { - "epoch": 0.47, - "learning_rate": 5.676287703332234e-05, - "loss": 1.0929, - "step": 1525 - }, - { - "epoch": 0.47, - "learning_rate": 5.671308599193241e-05, - "loss": 1.0498, - "step": 1526 - }, - { - "epoch": 0.47, - "learning_rate": 5.666328817031957e-05, - "loss": 1.0218, - "step": 1527 - }, - { - "epoch": 0.47, - "learning_rate": 5.6613483618779675e-05, - "loss": 1.1086, - "step": 1528 - }, - { - "epoch": 0.47, - "learning_rate": 5.6563672387615353e-05, - "loss": 1.0663, - "step": 1529 - }, - { - "epoch": 0.47, - "learning_rate": 5.6513854527136015e-05, - "loss": 1.0098, - "step": 1530 - }, - { - "epoch": 0.47, - "learning_rate": 5.6464030087657714e-05, - "loss": 1.0273, - "step": 1531 - }, - { - "epoch": 0.48, - "learning_rate": 5.641419911950322e-05, - "loss": 0.9237, - "step": 1532 - }, - { - "epoch": 0.48, - "learning_rate": 5.636436167300181e-05, - "loss": 1.1191, - "step": 1533 - }, - { - "epoch": 0.48, - "learning_rate": 5.63145177984894e-05, - "loss": 1.0855, - "step": 1534 - }, - { - "epoch": 0.48, - "learning_rate": 5.626466754630829e-05, - "loss": 1.0543, - "step": 1535 - }, - { - "epoch": 0.48, - "learning_rate": 5.621481096680733e-05, - "loss": 1.0782, - "step": 1536 - }, - { - "epoch": 0.48, - "learning_rate": 5.6164948110341674e-05, - "loss": 0.9916, - "step": 1537 - }, - { - "epoch": 0.48, - "learning_rate": 5.6115079027272865e-05, - "loss": 1.2126, - "step": 1538 - }, - { - "epoch": 0.48, - "learning_rate": 5.6065203767968745e-05, - "loss": 1.0017, - "step": 1539 - }, - { - "epoch": 0.48, - "learning_rate": 5.6015322382803346e-05, - "loss": 1.0166, - "step": 1540 - }, - { - "epoch": 0.48, - "learning_rate": 5.5965434922156913e-05, - "loss": 1.2036, - "step": 1541 - }, - { - "epoch": 0.48, - "learning_rate": 5.591554143641583e-05, - "loss": 1.0151, - "step": 1542 - }, - { - "epoch": 0.48, - "learning_rate": 5.586564197597256e-05, - "loss": 1.0672, - "step": 1543 - }, - { - "epoch": 0.48, - "learning_rate": 5.581573659122561e-05, - "loss": 1.0447, - "step": 1544 - }, - { - "epoch": 0.48, - "learning_rate": 5.576582533257946e-05, - "loss": 1.0109, - "step": 1545 - }, - { - "epoch": 0.48, - "learning_rate": 5.571590825044454e-05, - "loss": 1.0926, - "step": 1546 - }, - { - "epoch": 0.48, - "learning_rate": 5.566598539523711e-05, - "loss": 1.1034, - "step": 1547 - }, - { - "epoch": 0.48, - "learning_rate": 5.561605681737932e-05, - "loss": 0.9765, - "step": 1548 - }, - { - "epoch": 0.48, - "learning_rate": 5.556612256729909e-05, - "loss": 1.1152, - "step": 1549 - }, - { - "epoch": 0.48, - "learning_rate": 5.5516182695430055e-05, - "loss": 1.0149, - "step": 1550 - }, - { - "epoch": 0.48, - "learning_rate": 5.5466237252211516e-05, - "loss": 1.0497, - "step": 1551 - }, - { - "epoch": 0.48, - "learning_rate": 5.5416286288088436e-05, - "loss": 0.9913, - "step": 1552 - }, - { - "epoch": 0.48, - "learning_rate": 5.536632985351131e-05, - "loss": 1.0149, - "step": 1553 - }, - { - "epoch": 0.48, - "learning_rate": 5.531636799893619e-05, - "loss": 1.1818, - "step": 1554 - }, - { - "epoch": 0.48, - "learning_rate": 5.526640077482461e-05, - "loss": 1.0078, - "step": 1555 - }, - { - "epoch": 0.48, - "learning_rate": 5.521642823164348e-05, - "loss": 1.0089, - "step": 1556 - }, - { - "epoch": 0.48, - "learning_rate": 5.5166450419865156e-05, - "loss": 1.1171, - "step": 1557 - }, - { - "epoch": 0.48, - "learning_rate": 5.511646738996723e-05, - "loss": 1.0258, - "step": 1558 - }, - { - "epoch": 0.48, - "learning_rate": 5.506647919243265e-05, - "loss": 1.0836, - "step": 1559 - }, - { - "epoch": 0.48, - "learning_rate": 5.501648587774949e-05, - "loss": 1.0116, - "step": 1560 - }, - { - "epoch": 0.48, - "learning_rate": 5.496648749641109e-05, - "loss": 1.0632, - "step": 1561 - }, - { - "epoch": 0.48, - "learning_rate": 5.4916484098915835e-05, - "loss": 1.1281, - "step": 1562 - }, - { - "epoch": 0.48, - "learning_rate": 5.4866475735767206e-05, - "loss": 0.8992, - "step": 1563 - }, - { - "epoch": 0.49, - "learning_rate": 5.481646245747371e-05, - "loss": 1.1116, - "step": 1564 - }, - { - "epoch": 0.49, - "learning_rate": 5.476644431454878e-05, - "loss": 1.0025, - "step": 1565 - }, - { - "epoch": 0.49, - "learning_rate": 5.471642135751078e-05, - "loss": 0.9988, - "step": 1566 - }, - { - "epoch": 0.49, - "learning_rate": 5.4666393636882965e-05, - "loss": 1.1579, - "step": 1567 - }, - { - "epoch": 0.49, - "learning_rate": 5.461636120319338e-05, - "loss": 1.0558, - "step": 1568 - }, - { - "epoch": 0.49, - "learning_rate": 5.456632410697481e-05, - "loss": 1.1121, - "step": 1569 - }, - { - "epoch": 0.49, - "learning_rate": 5.4516282398764784e-05, - "loss": 1.1108, - "step": 1570 - }, - { - "epoch": 0.49, - "learning_rate": 5.4466236129105455e-05, - "loss": 1.0557, - "step": 1571 - }, - { - "epoch": 0.49, - "learning_rate": 5.4416185348543604e-05, - "loss": 1.1495, - "step": 1572 - }, - { - "epoch": 0.49, - "learning_rate": 5.436613010763056e-05, - "loss": 1.0193, - "step": 1573 - }, - { - "epoch": 0.49, - "learning_rate": 5.43160704569222e-05, - "loss": 1.05, - "step": 1574 - }, - { - "epoch": 0.49, - "learning_rate": 5.4266006446978766e-05, - "loss": 1.1222, - "step": 1575 - }, - { - "epoch": 0.49, - "learning_rate": 5.4215938128364985e-05, - "loss": 1.1137, - "step": 1576 - }, - { - "epoch": 0.49, - "learning_rate": 5.4165865551649885e-05, - "loss": 1.0363, - "step": 1577 - }, - { - "epoch": 0.49, - "learning_rate": 5.411578876740684e-05, - "loss": 1.0234, - "step": 1578 - }, - { - "epoch": 0.49, - "learning_rate": 5.4065707826213405e-05, - "loss": 0.9753, - "step": 1579 - }, - { - "epoch": 0.49, - "learning_rate": 5.401562277865141e-05, - "loss": 1.1618, - "step": 1580 - }, - { - "epoch": 0.49, - "learning_rate": 5.396553367530679e-05, - "loss": 0.9607, - "step": 1581 - }, - { - "epoch": 0.49, - "learning_rate": 5.3915440566769584e-05, - "loss": 1.0258, - "step": 1582 - }, - { - "epoch": 0.49, - "learning_rate": 5.386534350363387e-05, - "loss": 1.2567, - "step": 1583 - }, - { - "epoch": 0.49, - "learning_rate": 5.3815242536497734e-05, - "loss": 1.0375, - "step": 1584 - }, - { - "epoch": 0.49, - "learning_rate": 5.376513771596319e-05, - "loss": 1.0324, - "step": 1585 - }, - { - "epoch": 0.49, - "learning_rate": 5.371502909263616e-05, - "loss": 1.0168, - "step": 1586 - }, - { - "epoch": 0.49, - "learning_rate": 5.366491671712641e-05, - "loss": 1.0511, - "step": 1587 - }, - { - "epoch": 0.49, - "learning_rate": 5.361480064004746e-05, - "loss": 1.1576, - "step": 1588 - }, - { - "epoch": 0.49, - "learning_rate": 5.356468091201661e-05, - "loss": 1.036, - "step": 1589 - }, - { - "epoch": 0.49, - "learning_rate": 5.351455758365482e-05, - "loss": 1.0462, - "step": 1590 - }, - { - "epoch": 0.49, - "learning_rate": 5.346443070558671e-05, - "loss": 1.0077, - "step": 1591 - }, - { - "epoch": 0.49, - "learning_rate": 5.341430032844045e-05, - "loss": 1.0021, - "step": 1592 - }, - { - "epoch": 0.49, - "learning_rate": 5.336416650284779e-05, - "loss": 1.1115, - "step": 1593 - }, - { - "epoch": 0.49, - "learning_rate": 5.3314029279443926e-05, - "loss": 1.1105, - "step": 1594 - }, - { - "epoch": 0.49, - "learning_rate": 5.3263888708867505e-05, - "loss": 1.0453, - "step": 1595 - }, - { - "epoch": 0.5, - "learning_rate": 5.321374484176055e-05, - "loss": 1.0599, - "step": 1596 - }, - { - "epoch": 0.5, - "learning_rate": 5.316359772876839e-05, - "loss": 0.9979, - "step": 1597 - }, - { - "epoch": 0.5, - "learning_rate": 5.3113447420539675e-05, - "loss": 1.0965, - "step": 1598 - }, - { - "epoch": 0.5, - "learning_rate": 5.306329396772627e-05, - "loss": 1.0084, - "step": 1599 - }, - { - "epoch": 0.5, - "learning_rate": 5.301313742098319e-05, - "loss": 1.0502, - "step": 1600 - }, - { - "epoch": 0.5, - "learning_rate": 5.2962977830968606e-05, - "loss": 0.9534, - "step": 1601 - }, - { - "epoch": 0.5, - "learning_rate": 5.2912815248343725e-05, - "loss": 1.1807, - "step": 1602 - }, - { - "epoch": 0.5, - "learning_rate": 5.2862649723772826e-05, - "loss": 1.0619, - "step": 1603 - }, - { - "epoch": 0.5, - "learning_rate": 5.2812481307923125e-05, - "loss": 1.0976, - "step": 1604 - }, - { - "epoch": 0.5, - "learning_rate": 5.276231005146477e-05, - "loss": 0.9588, - "step": 1605 - }, - { - "epoch": 0.5, - "learning_rate": 5.2712136005070764e-05, - "loss": 1.12, - "step": 1606 - }, - { - "epoch": 0.5, - "learning_rate": 5.2661959219416966e-05, - "loss": 1.054, - "step": 1607 - }, - { - "epoch": 0.5, - "learning_rate": 5.261177974518193e-05, - "loss": 0.9853, - "step": 1608 - }, - { - "epoch": 0.5, - "learning_rate": 5.256159763304702e-05, - "loss": 1.0817, - "step": 1609 - }, - { - "epoch": 0.5, - "learning_rate": 5.251141293369618e-05, - "loss": 1.0504, - "step": 1610 - }, - { - "epoch": 0.5, - "learning_rate": 5.2461225697816005e-05, - "loss": 1.0662, - "step": 1611 - }, - { - "epoch": 0.5, - "learning_rate": 5.241103597609567e-05, - "loss": 1.0553, - "step": 1612 - }, - { - "epoch": 0.5, - "learning_rate": 5.2360843819226813e-05, - "loss": 1.0326, - "step": 1613 - }, - { - "epoch": 0.5, - "learning_rate": 5.231064927790359e-05, - "loss": 1.0642, - "step": 1614 - }, - { - "epoch": 0.5, - "learning_rate": 5.2260452402822504e-05, - "loss": 1.0595, - "step": 1615 - }, - { - "epoch": 0.5, - "learning_rate": 5.2210253244682437e-05, - "loss": 1.1402, - "step": 1616 - }, - { - "epoch": 0.5, - "learning_rate": 5.216005185418461e-05, - "loss": 0.9774, - "step": 1617 - }, - { - "epoch": 0.5, - "learning_rate": 5.210984828203247e-05, - "loss": 1.0811, - "step": 1618 - }, - { - "epoch": 0.5, - "learning_rate": 5.205964257893166e-05, - "loss": 1.0628, - "step": 1619 - }, - { - "epoch": 0.5, - "learning_rate": 5.2009434795589984e-05, - "loss": 1.0432, - "step": 1620 - }, - { - "epoch": 0.5, - "learning_rate": 5.195922498271737e-05, - "loss": 1.0771, - "step": 1621 - }, - { - "epoch": 0.5, - "learning_rate": 5.190901319102573e-05, - "loss": 1.0217, - "step": 1622 - }, - { - "epoch": 0.5, - "learning_rate": 5.1858799471229056e-05, - "loss": 1.0499, - "step": 1623 - }, - { - "epoch": 0.5, - "learning_rate": 5.1808583874043246e-05, - "loss": 1.0673, - "step": 1624 - }, - { - "epoch": 0.5, - "learning_rate": 5.175836645018608e-05, - "loss": 0.9597, - "step": 1625 - }, - { - "epoch": 0.5, - "learning_rate": 5.1708147250377236e-05, - "loss": 1.06, - "step": 1626 - }, - { - "epoch": 0.5, - "learning_rate": 5.1657926325338115e-05, - "loss": 1.0882, - "step": 1627 - }, - { - "epoch": 0.51, - "learning_rate": 5.160770372579191e-05, - "loss": 1.0981, - "step": 1628 - }, - { - "epoch": 0.51, - "learning_rate": 5.155747950246349e-05, - "loss": 1.0688, - "step": 1629 - }, - { - "epoch": 0.51, - "learning_rate": 5.150725370607936e-05, - "loss": 1.0074, - "step": 1630 - }, - { - "epoch": 0.51, - "learning_rate": 5.145702638736766e-05, - "loss": 1.104, - "step": 1631 - }, - { - "epoch": 0.51, - "learning_rate": 5.1406797597057975e-05, - "loss": 1.0094, - "step": 1632 - }, - { - "epoch": 0.51, - "learning_rate": 5.1356567385881436e-05, - "loss": 1.0568, - "step": 1633 - }, - { - "epoch": 0.51, - "learning_rate": 5.130633580457064e-05, - "loss": 0.8473, - "step": 1634 - }, - { - "epoch": 0.51, - "learning_rate": 5.1256102903859484e-05, - "loss": 1.1072, - "step": 1635 - }, - { - "epoch": 0.51, - "learning_rate": 5.1205868734483264e-05, - "loss": 1.1439, - "step": 1636 - }, - { - "epoch": 0.51, - "learning_rate": 5.115563334717854e-05, - "loss": 1.0702, - "step": 1637 - }, - { - "epoch": 0.51, - "learning_rate": 5.1105396792683094e-05, - "loss": 1.0126, - "step": 1638 - }, - { - "epoch": 0.51, - "learning_rate": 5.1055159121735894e-05, - "loss": 1.0505, - "step": 1639 - }, - { - "epoch": 0.51, - "learning_rate": 5.100492038507704e-05, - "loss": 1.0542, - "step": 1640 - }, - { - "epoch": 0.51, - "learning_rate": 5.095468063344766e-05, - "loss": 1.1278, - "step": 1641 - }, - { - "epoch": 0.51, - "learning_rate": 5.090443991759e-05, - "loss": 1.009, - "step": 1642 - }, - { - "epoch": 0.51, - "learning_rate": 5.08541982882472e-05, - "loss": 1.1206, - "step": 1643 - }, - { - "epoch": 0.51, - "learning_rate": 5.080395579616336e-05, - "loss": 1.1219, - "step": 1644 - }, - { - "epoch": 0.51, - "learning_rate": 5.0753712492083446e-05, - "loss": 1.0388, - "step": 1645 - }, - { - "epoch": 0.51, - "learning_rate": 5.070346842675323e-05, - "loss": 0.9256, - "step": 1646 - }, - { - "epoch": 0.51, - "learning_rate": 5.065322365091928e-05, - "loss": 1.0411, - "step": 1647 - }, - { - "epoch": 0.51, - "learning_rate": 5.060297821532884e-05, - "loss": 1.0742, - "step": 1648 - }, - { - "epoch": 0.51, - "learning_rate": 5.0552732170729874e-05, - "loss": 1.1278, - "step": 1649 - }, - { - "epoch": 0.51, - "learning_rate": 5.0502485567870904e-05, - "loss": 0.9111, - "step": 1650 - }, - { - "epoch": 0.51, - "learning_rate": 5.0452238457501064e-05, - "loss": 1.057, - "step": 1651 - }, - { - "epoch": 0.51, - "learning_rate": 5.040199089036997e-05, - "loss": 1.0757, - "step": 1652 - }, - { - "epoch": 0.51, - "learning_rate": 5.0351742917227715e-05, - "loss": 1.1176, - "step": 1653 - }, - { - "epoch": 0.51, - "learning_rate": 5.0301494588824795e-05, - "loss": 1.0543, - "step": 1654 - }, - { - "epoch": 0.51, - "learning_rate": 5.025124595591206e-05, - "loss": 0.993, - "step": 1655 - }, - { - "epoch": 0.51, - "learning_rate": 5.020099706924071e-05, - "loss": 1.1372, - "step": 1656 - }, - { - "epoch": 0.51, - "learning_rate": 5.015074797956212e-05, - "loss": 1.0612, - "step": 1657 - }, - { - "epoch": 0.51, - "learning_rate": 5.010049873762793e-05, - "loss": 1.04, - "step": 1658 - }, - { - "epoch": 0.51, - "learning_rate": 5.005024939418994e-05, - "loss": 1.095, - "step": 1659 - }, - { - "epoch": 0.51, - "learning_rate": 5e-05, - "loss": 0.9997, - "step": 1660 - }, - { - "epoch": 0.52, - "learning_rate": 4.994975060581007e-05, - "loss": 1.0042, - "step": 1661 - }, - { - "epoch": 0.52, - "learning_rate": 4.9899501262372076e-05, - "loss": 1.0686, - "step": 1662 - }, - { - "epoch": 0.52, - "learning_rate": 4.984925202043789e-05, - "loss": 0.9744, - "step": 1663 - }, - { - "epoch": 0.52, - "learning_rate": 4.979900293075929e-05, - "loss": 0.9701, - "step": 1664 - }, - { - "epoch": 0.52, - "learning_rate": 4.974875404408794e-05, - "loss": 1.0818, - "step": 1665 - }, - { - "epoch": 0.52, - "learning_rate": 4.96985054111752e-05, - "loss": 1.0462, - "step": 1666 - }, - { - "epoch": 0.52, - "learning_rate": 4.96482570827723e-05, - "loss": 1.0386, - "step": 1667 - }, - { - "epoch": 0.52, - "learning_rate": 4.9598009109630044e-05, - "loss": 1.0218, - "step": 1668 - }, - { - "epoch": 0.52, - "learning_rate": 4.9547761542498955e-05, - "loss": 1.0989, - "step": 1669 - }, - { - "epoch": 0.52, - "learning_rate": 4.949751443212911e-05, - "loss": 1.0762, - "step": 1670 - }, - { - "epoch": 0.52, - "learning_rate": 4.944726782927014e-05, - "loss": 1.0988, - "step": 1671 - }, - { - "epoch": 0.52, - "learning_rate": 4.939702178467117e-05, - "loss": 1.1051, - "step": 1672 - }, - { - "epoch": 0.52, - "learning_rate": 4.9346776349080723e-05, - "loss": 1.0285, - "step": 1673 - }, - { - "epoch": 0.52, - "learning_rate": 4.9296531573246775e-05, - "loss": 1.0088, - "step": 1674 - }, - { - "epoch": 0.52, - "learning_rate": 4.924628750791656e-05, - "loss": 1.1698, - "step": 1675 - }, - { - "epoch": 0.52, - "learning_rate": 4.919604420383664e-05, - "loss": 1.0159, - "step": 1676 - }, - { - "epoch": 0.52, - "learning_rate": 4.91458017117528e-05, - "loss": 1.0455, - "step": 1677 - }, - { - "epoch": 0.52, - "learning_rate": 4.9095560082410006e-05, - "loss": 1.0515, - "step": 1678 - }, - { - "epoch": 0.52, - "learning_rate": 4.904531936655236e-05, - "loss": 0.9703, - "step": 1679 - }, - { - "epoch": 0.52, - "learning_rate": 4.899507961492299e-05, - "loss": 1.093, - "step": 1680 - }, - { - "epoch": 0.52, - "learning_rate": 4.8944840878264125e-05, - "loss": 1.0391, - "step": 1681 - }, - { - "epoch": 0.52, - "learning_rate": 4.889460320731692e-05, - "loss": 1.2014, - "step": 1682 - }, - { - "epoch": 0.52, - "learning_rate": 4.884436665282147e-05, - "loss": 1.0509, - "step": 1683 - }, - { - "epoch": 0.52, - "learning_rate": 4.879413126551675e-05, - "loss": 1.0406, - "step": 1684 - }, - { - "epoch": 0.52, - "learning_rate": 4.874389709614053e-05, - "loss": 1.1208, - "step": 1685 - }, - { - "epoch": 0.52, - "learning_rate": 4.869366419542937e-05, - "loss": 1.0205, - "step": 1686 - }, - { - "epoch": 0.52, - "learning_rate": 4.864343261411857e-05, - "loss": 1.0206, - "step": 1687 - }, - { - "epoch": 0.52, - "learning_rate": 4.859320240294204e-05, - "loss": 1.1111, - "step": 1688 - }, - { - "epoch": 0.52, - "learning_rate": 4.8542973612632346e-05, - "loss": 0.9985, - "step": 1689 - }, - { - "epoch": 0.52, - "learning_rate": 4.849274629392063e-05, - "loss": 1.0366, - "step": 1690 - }, - { - "epoch": 0.52, - "learning_rate": 4.844252049753651e-05, - "loss": 1.0509, - "step": 1691 - }, - { - "epoch": 0.52, - "learning_rate": 4.839229627420811e-05, - "loss": 1.0412, - "step": 1692 - }, - { - "epoch": 0.53, - "learning_rate": 4.8342073674661904e-05, - "loss": 1.0771, - "step": 1693 - }, - { - "epoch": 0.53, - "learning_rate": 4.829185274962279e-05, - "loss": 0.9753, - "step": 1694 - }, - { - "epoch": 0.53, - "learning_rate": 4.8241633549813924e-05, - "loss": 1.1297, - "step": 1695 - }, - { - "epoch": 0.53, - "learning_rate": 4.8191416125956765e-05, - "loss": 1.0481, - "step": 1696 - }, - { - "epoch": 0.53, - "learning_rate": 4.8141200528770955e-05, - "loss": 1.0759, - "step": 1697 - }, - { - "epoch": 0.53, - "learning_rate": 4.809098680897427e-05, - "loss": 0.8345, - "step": 1698 - }, - { - "epoch": 0.53, - "learning_rate": 4.8040775017282644e-05, - "loss": 0.941, - "step": 1699 - }, - { - "epoch": 0.53, - "learning_rate": 4.799056520441002e-05, - "loss": 1.0976, - "step": 1700 - }, - { - "epoch": 0.53, - "learning_rate": 4.794035742106835e-05, - "loss": 1.1086, - "step": 1701 - }, - { - "epoch": 0.53, - "learning_rate": 4.789015171796753e-05, - "loss": 1.0801, - "step": 1702 - }, - { - "epoch": 0.53, - "learning_rate": 4.783994814581539e-05, - "loss": 1.0392, - "step": 1703 - }, - { - "epoch": 0.53, - "learning_rate": 4.7789746755317575e-05, - "loss": 1.087, - "step": 1704 - }, - { - "epoch": 0.53, - "learning_rate": 4.773954759717752e-05, - "loss": 0.986, - "step": 1705 - }, - { - "epoch": 0.53, - "learning_rate": 4.7689350722096436e-05, - "loss": 1.1188, - "step": 1706 - }, - { - "epoch": 0.53, - "learning_rate": 4.763915618077319e-05, - "loss": 0.9672, - "step": 1707 - }, - { - "epoch": 0.53, - "learning_rate": 4.7588964023904336e-05, - "loss": 1.2058, - "step": 1708 - }, - { - "epoch": 0.53, - "learning_rate": 4.7538774302184e-05, - "loss": 1.0989, - "step": 1709 - }, - { - "epoch": 0.53, - "learning_rate": 4.748858706630384e-05, - "loss": 0.9856, - "step": 1710 - }, - { - "epoch": 0.53, - "learning_rate": 4.743840236695299e-05, - "loss": 1.0966, - "step": 1711 - }, - { - "epoch": 0.53, - "learning_rate": 4.7388220254818074e-05, - "loss": 0.9761, - "step": 1712 - }, - { - "epoch": 0.53, - "learning_rate": 4.7338040780583046e-05, - "loss": 1.1309, - "step": 1713 - }, - { - "epoch": 0.53, - "learning_rate": 4.7287863994929234e-05, - "loss": 0.8926, - "step": 1714 - }, - { - "epoch": 0.53, - "learning_rate": 4.7237689948535236e-05, - "loss": 0.9751, - "step": 1715 - }, - { - "epoch": 0.53, - "learning_rate": 4.718751869207687e-05, - "loss": 1.125, - "step": 1716 - }, - { - "epoch": 0.53, - "learning_rate": 4.7137350276227185e-05, - "loss": 1.0449, - "step": 1717 - }, - { - "epoch": 0.53, - "learning_rate": 4.708718475165628e-05, - "loss": 1.1161, - "step": 1718 - }, - { - "epoch": 0.53, - "learning_rate": 4.703702216903142e-05, - "loss": 0.9932, - "step": 1719 - }, - { - "epoch": 0.53, - "learning_rate": 4.698686257901682e-05, - "loss": 1.0031, - "step": 1720 - }, - { - "epoch": 0.53, - "learning_rate": 4.6936706032273735e-05, - "loss": 1.1352, - "step": 1721 - }, - { - "epoch": 0.53, - "learning_rate": 4.688655257946033e-05, - "loss": 1.0747, - "step": 1722 - }, - { - "epoch": 0.53, - "learning_rate": 4.6836402271231615e-05, - "loss": 0.9282, - "step": 1723 - }, - { - "epoch": 0.53, - "learning_rate": 4.678625515823946e-05, - "loss": 1.1627, - "step": 1724 - }, - { - "epoch": 0.54, - "learning_rate": 4.6736111291132506e-05, - "loss": 0.9728, - "step": 1725 - }, - { - "epoch": 0.54, - "learning_rate": 4.668597072055608e-05, - "loss": 1.0569, - "step": 1726 - }, - { - "epoch": 0.54, - "learning_rate": 4.663583349715222e-05, - "loss": 1.0081, - "step": 1727 - }, - { - "epoch": 0.54, - "learning_rate": 4.658569967155955e-05, - "loss": 1.0095, - "step": 1728 - }, - { - "epoch": 0.54, - "learning_rate": 4.6535569294413315e-05, - "loss": 1.089, - "step": 1729 - }, - { - "epoch": 0.54, - "learning_rate": 4.648544241634519e-05, - "loss": 1.09, - "step": 1730 - }, - { - "epoch": 0.54, - "learning_rate": 4.6435319087983404e-05, - "loss": 1.0382, - "step": 1731 - }, - { - "epoch": 0.54, - "learning_rate": 4.6385199359952555e-05, - "loss": 0.9015, - "step": 1732 - }, - { - "epoch": 0.54, - "learning_rate": 4.63350832828736e-05, - "loss": 1.0053, - "step": 1733 - }, - { - "epoch": 0.54, - "learning_rate": 4.628497090736385e-05, - "loss": 1.0317, - "step": 1734 - }, - { - "epoch": 0.54, - "learning_rate": 4.6234862284036815e-05, - "loss": 1.1194, - "step": 1735 - }, - { - "epoch": 0.54, - "learning_rate": 4.618475746350228e-05, - "loss": 1.0677, - "step": 1736 - }, - { - "epoch": 0.54, - "learning_rate": 4.613465649636615e-05, - "loss": 1.1223, - "step": 1737 - }, - { - "epoch": 0.54, - "learning_rate": 4.608455943323043e-05, - "loss": 1.1283, - "step": 1738 - }, - { - "epoch": 0.54, - "learning_rate": 4.6034466324693216e-05, - "loss": 0.9509, - "step": 1739 - }, - { - "epoch": 0.54, - "learning_rate": 4.59843772213486e-05, - "loss": 1.061, - "step": 1740 - }, - { - "epoch": 0.54, - "learning_rate": 4.59342921737866e-05, - "loss": 1.0916, - "step": 1741 - }, - { - "epoch": 0.54, - "learning_rate": 4.588421123259319e-05, - "loss": 1.0646, - "step": 1742 - }, - { - "epoch": 0.54, - "learning_rate": 4.5834134448350126e-05, - "loss": 1.013, - "step": 1743 - }, - { - "epoch": 0.54, - "learning_rate": 4.5784061871635034e-05, - "loss": 1.0571, - "step": 1744 - }, - { - "epoch": 0.54, - "learning_rate": 4.5733993553021245e-05, - "loss": 1.005, - "step": 1745 - }, - { - "epoch": 0.54, - "learning_rate": 4.568392954307781e-05, - "loss": 1.1024, - "step": 1746 - }, - { - "epoch": 0.54, - "learning_rate": 4.563386989236944e-05, - "loss": 0.9681, - "step": 1747 - }, - { - "epoch": 0.54, - "learning_rate": 4.558381465145641e-05, - "loss": 1.0241, - "step": 1748 - }, - { - "epoch": 0.54, - "learning_rate": 4.553376387089455e-05, - "loss": 1.0424, - "step": 1749 - }, - { - "epoch": 0.54, - "learning_rate": 4.5483717601235234e-05, - "loss": 1.1365, - "step": 1750 - }, - { - "epoch": 0.54, - "learning_rate": 4.543367589302519e-05, - "loss": 1.0894, - "step": 1751 - }, - { - "epoch": 0.54, - "learning_rate": 4.538363879680662e-05, - "loss": 0.931, - "step": 1752 - }, - { - "epoch": 0.54, - "learning_rate": 4.533360636311703e-05, - "loss": 1.0421, - "step": 1753 - }, - { - "epoch": 0.54, - "learning_rate": 4.528357864248924e-05, - "loss": 1.0471, - "step": 1754 - }, - { - "epoch": 0.54, - "learning_rate": 4.5233555685451245e-05, - "loss": 1.0641, - "step": 1755 - }, - { - "epoch": 0.54, - "learning_rate": 4.518353754252631e-05, - "loss": 1.0577, - "step": 1756 - }, - { - "epoch": 0.55, - "learning_rate": 4.5133524264232805e-05, - "loss": 1.0566, - "step": 1757 - }, - { - "epoch": 0.55, - "learning_rate": 4.508351590108417e-05, - "loss": 1.0762, - "step": 1758 - }, - { - "epoch": 0.55, - "learning_rate": 4.5033512503588925e-05, - "loss": 1.0389, - "step": 1759 - }, - { - "epoch": 0.55, - "learning_rate": 4.4983514122250516e-05, - "loss": 0.9764, - "step": 1760 - }, - { - "epoch": 0.55, - "learning_rate": 4.4933520807567365e-05, - "loss": 1.0971, - "step": 1761 - }, - { - "epoch": 0.55, - "learning_rate": 4.4883532610032775e-05, - "loss": 1.0318, - "step": 1762 - }, - { - "epoch": 0.55, - "learning_rate": 4.4833549580134855e-05, - "loss": 1.1065, - "step": 1763 - }, - { - "epoch": 0.55, - "learning_rate": 4.478357176835651e-05, - "loss": 1.0246, - "step": 1764 - }, - { - "epoch": 0.55, - "learning_rate": 4.473359922517541e-05, - "loss": 0.8166, - "step": 1765 - }, - { - "epoch": 0.55, - "learning_rate": 4.4683632001063814e-05, - "loss": 1.0914, - "step": 1766 - }, - { - "epoch": 0.55, - "learning_rate": 4.4633670146488714e-05, - "loss": 1.029, - "step": 1767 - }, - { - "epoch": 0.55, - "learning_rate": 4.458371371191158e-05, - "loss": 1.0451, - "step": 1768 - }, - { - "epoch": 0.55, - "learning_rate": 4.45337627477885e-05, - "loss": 1.0787, - "step": 1769 - }, - { - "epoch": 0.55, - "learning_rate": 4.448381730456996e-05, - "loss": 1.0353, - "step": 1770 - }, - { - "epoch": 0.55, - "learning_rate": 4.443387743270092e-05, - "loss": 1.1426, - "step": 1771 - }, - { - "epoch": 0.55, - "learning_rate": 4.438394318262069e-05, - "loss": 0.9909, - "step": 1772 - }, - { - "epoch": 0.55, - "learning_rate": 4.433401460476291e-05, - "loss": 1.096, - "step": 1773 - }, - { - "epoch": 0.55, - "learning_rate": 4.428409174955548e-05, - "loss": 1.0168, - "step": 1774 - }, - { - "epoch": 0.55, - "learning_rate": 4.4234174667420546e-05, - "loss": 0.9977, - "step": 1775 - }, - { - "epoch": 0.55, - "learning_rate": 4.418426340877439e-05, - "loss": 0.9406, - "step": 1776 - }, - { - "epoch": 0.55, - "learning_rate": 4.413435802402743e-05, - "loss": 1.1334, - "step": 1777 - }, - { - "epoch": 0.55, - "learning_rate": 4.408445856358417e-05, - "loss": 1.0487, - "step": 1778 - }, - { - "epoch": 0.55, - "learning_rate": 4.403456507784311e-05, - "loss": 1.1031, - "step": 1779 - }, - { - "epoch": 0.55, - "learning_rate": 4.3984677617196665e-05, - "loss": 0.9708, - "step": 1780 - }, - { - "epoch": 0.55, - "learning_rate": 4.393479623203126e-05, - "loss": 0.9817, - "step": 1781 - }, - { - "epoch": 0.55, - "learning_rate": 4.388492097272714e-05, - "loss": 1.1035, - "step": 1782 - }, - { - "epoch": 0.55, - "learning_rate": 4.383505188965834e-05, - "loss": 1.055, - "step": 1783 - }, - { - "epoch": 0.55, - "learning_rate": 4.3785189033192686e-05, - "loss": 1.0616, - "step": 1784 - }, - { - "epoch": 0.55, - "learning_rate": 4.3735332453691726e-05, - "loss": 0.9983, - "step": 1785 - }, - { - "epoch": 0.55, - "learning_rate": 4.368548220151062e-05, - "loss": 1.0408, - "step": 1786 - }, - { - "epoch": 0.55, - "learning_rate": 4.36356383269982e-05, - "loss": 1.0209, - "step": 1787 - }, - { - "epoch": 0.55, - "learning_rate": 4.358580088049679e-05, - "loss": 0.9952, - "step": 1788 - }, - { - "epoch": 0.55, - "learning_rate": 4.353596991234228e-05, - "loss": 1.1549, - "step": 1789 - }, - { - "epoch": 0.56, - "learning_rate": 4.3486145472863997e-05, - "loss": 1.1207, - "step": 1790 - }, - { - "epoch": 0.56, - "learning_rate": 4.3436327612384645e-05, - "loss": 0.9575, - "step": 1791 - }, - { - "epoch": 0.56, - "learning_rate": 4.338651638122035e-05, - "loss": 1.025, - "step": 1792 - }, - { - "epoch": 0.56, - "learning_rate": 4.3336711829680445e-05, - "loss": 1.0972, - "step": 1793 - }, - { - "epoch": 0.56, - "learning_rate": 4.3286914008067615e-05, - "loss": 1.0064, - "step": 1794 - }, - { - "epoch": 0.56, - "learning_rate": 4.323712296667768e-05, - "loss": 1.0623, - "step": 1795 - }, - { - "epoch": 0.56, - "learning_rate": 4.318733875579964e-05, - "loss": 1.1058, - "step": 1796 - }, - { - "epoch": 0.56, - "learning_rate": 4.3137561425715614e-05, - "loss": 1.0214, - "step": 1797 - }, - { - "epoch": 0.56, - "learning_rate": 4.308779102670072e-05, - "loss": 0.8793, - "step": 1798 - }, - { - "epoch": 0.56, - "learning_rate": 4.3038027609023124e-05, - "loss": 1.103, - "step": 1799 - }, - { - "epoch": 0.56, - "learning_rate": 4.2988271222943934e-05, - "loss": 0.9624, - "step": 1800 - }, - { - "epoch": 0.56, - "learning_rate": 4.2938521918717115e-05, - "loss": 1.1441, - "step": 1801 - }, - { - "epoch": 0.56, - "learning_rate": 4.288877974658952e-05, - "loss": 1.0215, - "step": 1802 - }, - { - "epoch": 0.56, - "learning_rate": 4.283904475680082e-05, - "loss": 1.1671, - "step": 1803 - }, - { - "epoch": 0.56, - "learning_rate": 4.278931699958337e-05, - "loss": 0.9763, - "step": 1804 - }, - { - "epoch": 0.56, - "learning_rate": 4.273959652516223e-05, - "loss": 1.0401, - "step": 1805 - }, - { - "epoch": 0.56, - "learning_rate": 4.268988338375515e-05, - "loss": 1.1606, - "step": 1806 - }, - { - "epoch": 0.56, - "learning_rate": 4.2640177625572445e-05, - "loss": 1.0446, - "step": 1807 - }, - { - "epoch": 0.56, - "learning_rate": 4.2590479300816976e-05, - "loss": 1.1691, - "step": 1808 - }, - { - "epoch": 0.56, - "learning_rate": 4.254078845968409e-05, - "loss": 0.9638, - "step": 1809 - }, - { - "epoch": 0.56, - "learning_rate": 4.249110515236159e-05, - "loss": 1.0731, - "step": 1810 - }, - { - "epoch": 0.56, - "learning_rate": 4.244142942902965e-05, - "loss": 1.0079, - "step": 1811 - }, - { - "epoch": 0.56, - "learning_rate": 4.23917613398608e-05, - "loss": 1.0389, - "step": 1812 - }, - { - "epoch": 0.56, - "learning_rate": 4.2342100935019866e-05, - "loss": 1.0366, - "step": 1813 - }, - { - "epoch": 0.56, - "learning_rate": 4.229244826466389e-05, - "loss": 1.0414, - "step": 1814 - }, - { - "epoch": 0.56, - "learning_rate": 4.224280337894211e-05, - "loss": 0.9676, - "step": 1815 - }, - { - "epoch": 0.56, - "learning_rate": 4.219316632799591e-05, - "loss": 1.1602, - "step": 1816 - }, - { - "epoch": 0.56, - "learning_rate": 4.2143537161958786e-05, - "loss": 0.9597, - "step": 1817 - }, - { - "epoch": 0.56, - "learning_rate": 4.209391593095618e-05, - "loss": 1.0831, - "step": 1818 - }, - { - "epoch": 0.56, - "learning_rate": 4.204430268510563e-05, - "loss": 1.135, - "step": 1819 - }, - { - "epoch": 0.56, - "learning_rate": 4.199469747451652e-05, - "loss": 0.9555, - "step": 1820 - }, - { - "epoch": 0.56, - "learning_rate": 4.194510034929019e-05, - "loss": 0.9763, - "step": 1821 - }, - { - "epoch": 0.57, - "learning_rate": 4.1895511359519776e-05, - "loss": 1.1593, - "step": 1822 - }, - { - "epoch": 0.57, - "learning_rate": 4.184593055529018e-05, - "loss": 0.9782, - "step": 1823 - }, - { - "epoch": 0.57, - "learning_rate": 4.179635798667808e-05, - "loss": 1.0776, - "step": 1824 - }, - { - "epoch": 0.57, - "learning_rate": 4.174679370375182e-05, - "loss": 1.0057, - "step": 1825 - }, - { - "epoch": 0.57, - "learning_rate": 4.169723775657135e-05, - "loss": 1.015, - "step": 1826 - }, - { - "epoch": 0.57, - "learning_rate": 4.164769019518823e-05, - "loss": 1.0589, - "step": 1827 - }, - { - "epoch": 0.57, - "learning_rate": 4.159815106964555e-05, - "loss": 1.0741, - "step": 1828 - }, - { - "epoch": 0.57, - "learning_rate": 4.1548620429977884e-05, - "loss": 1.0738, - "step": 1829 - }, - { - "epoch": 0.57, - "learning_rate": 4.1499098326211184e-05, - "loss": 1.0358, - "step": 1830 - }, - { - "epoch": 0.57, - "learning_rate": 4.144958480836283e-05, - "loss": 0.9943, - "step": 1831 - }, - { - "epoch": 0.57, - "learning_rate": 4.140007992644154e-05, - "loss": 1.1327, - "step": 1832 - }, - { - "epoch": 0.57, - "learning_rate": 4.1350583730447275e-05, - "loss": 0.9927, - "step": 1833 - }, - { - "epoch": 0.57, - "learning_rate": 4.1301096270371235e-05, - "loss": 1.0059, - "step": 1834 - }, - { - "epoch": 0.57, - "learning_rate": 4.125161759619582e-05, - "loss": 1.0032, - "step": 1835 - }, - { - "epoch": 0.57, - "learning_rate": 4.1202147757894505e-05, - "loss": 0.9529, - "step": 1836 - }, - { - "epoch": 0.57, - "learning_rate": 4.115268680543189e-05, - "loss": 1.0169, - "step": 1837 - }, - { - "epoch": 0.57, - "learning_rate": 4.110323478876358e-05, - "loss": 1.0025, - "step": 1838 - }, - { - "epoch": 0.57, - "learning_rate": 4.1053791757836155e-05, - "loss": 1.1633, - "step": 1839 - }, - { - "epoch": 0.57, - "learning_rate": 4.100435776258713e-05, - "loss": 1.022, - "step": 1840 - }, - { - "epoch": 0.57, - "learning_rate": 4.095493285294486e-05, - "loss": 1.0589, - "step": 1841 - }, - { - "epoch": 0.57, - "learning_rate": 4.090551707882859e-05, - "loss": 1.0579, - "step": 1842 - }, - { - "epoch": 0.57, - "learning_rate": 4.085611049014825e-05, - "loss": 1.0182, - "step": 1843 - }, - { - "epoch": 0.57, - "learning_rate": 4.080671313680456e-05, - "loss": 1.1262, - "step": 1844 - }, - { - "epoch": 0.57, - "learning_rate": 4.075732506868888e-05, - "loss": 1.0007, - "step": 1845 - }, - { - "epoch": 0.57, - "learning_rate": 4.0707946335683226e-05, - "loss": 1.0333, - "step": 1846 - }, - { - "epoch": 0.57, - "learning_rate": 4.0658576987660154e-05, - "loss": 1.0552, - "step": 1847 - }, - { - "epoch": 0.57, - "learning_rate": 4.0609217074482735e-05, - "loss": 1.0191, - "step": 1848 - }, - { - "epoch": 0.57, - "learning_rate": 4.055986664600454e-05, - "loss": 1.0204, - "step": 1849 - }, - { - "epoch": 0.57, - "learning_rate": 4.0510525752069556e-05, - "loss": 1.0424, - "step": 1850 - }, - { - "epoch": 0.57, - "learning_rate": 4.0461194442512104e-05, - "loss": 0.9754, - "step": 1851 - }, - { - "epoch": 0.57, - "learning_rate": 4.041187276715687e-05, - "loss": 1.0725, - "step": 1852 - }, - { - "epoch": 0.57, - "learning_rate": 4.036256077581879e-05, - "loss": 1.011, - "step": 1853 - }, - { - "epoch": 0.58, - "learning_rate": 4.031325851830303e-05, - "loss": 0.9734, - "step": 1854 - }, - { - "epoch": 0.58, - "learning_rate": 4.026396604440488e-05, - "loss": 1.1485, - "step": 1855 - }, - { - "epoch": 0.58, - "learning_rate": 4.02146834039098e-05, - "loss": 1.0169, - "step": 1856 - }, - { - "epoch": 0.58, - "learning_rate": 4.016541064659332e-05, - "loss": 1.0129, - "step": 1857 - }, - { - "epoch": 0.58, - "learning_rate": 4.011614782222094e-05, - "loss": 1.0303, - "step": 1858 - }, - { - "epoch": 0.58, - "learning_rate": 4.006689498054817e-05, - "loss": 0.9646, - "step": 1859 - }, - { - "epoch": 0.58, - "learning_rate": 4.001765217132044e-05, - "loss": 1.1236, - "step": 1860 - }, - { - "epoch": 0.58, - "learning_rate": 3.9968419444273e-05, - "loss": 0.9679, - "step": 1861 - }, - { - "epoch": 0.58, - "learning_rate": 3.991919684913097e-05, - "loss": 1.0505, - "step": 1862 - }, - { - "epoch": 0.58, - "learning_rate": 3.986998443560922e-05, - "loss": 1.0313, - "step": 1863 - }, - { - "epoch": 0.58, - "learning_rate": 3.9820782253412316e-05, - "loss": 1.0302, - "step": 1864 - }, - { - "epoch": 0.58, - "learning_rate": 3.9771590352234514e-05, - "loss": 1.0503, - "step": 1865 - }, - { - "epoch": 0.58, - "learning_rate": 3.972240878175969e-05, - "loss": 1.0611, - "step": 1866 - }, - { - "epoch": 0.58, - "learning_rate": 3.967323759166127e-05, - "loss": 1.0161, - "step": 1867 - }, - { - "epoch": 0.58, - "learning_rate": 3.962407683160217e-05, - "loss": 1.1032, - "step": 1868 - }, - { - "epoch": 0.58, - "learning_rate": 3.957492655123482e-05, - "loss": 0.8141, - "step": 1869 - }, - { - "epoch": 0.58, - "learning_rate": 3.9525786800201074e-05, - "loss": 1.0291, - "step": 1870 - }, - { - "epoch": 0.58, - "learning_rate": 3.947665762813209e-05, - "loss": 1.0324, - "step": 1871 - }, - { - "epoch": 0.58, - "learning_rate": 3.94275390846484e-05, - "loss": 1.1333, - "step": 1872 - }, - { - "epoch": 0.58, - "learning_rate": 3.9378431219359755e-05, - "loss": 1.0084, - "step": 1873 - }, - { - "epoch": 0.58, - "learning_rate": 3.9329334081865154e-05, - "loss": 0.9958, - "step": 1874 - }, - { - "epoch": 0.58, - "learning_rate": 3.928024772175277e-05, - "loss": 1.1307, - "step": 1875 - }, - { - "epoch": 0.58, - "learning_rate": 3.923117218859983e-05, - "loss": 1.057, - "step": 1876 - }, - { - "epoch": 0.58, - "learning_rate": 3.91821075319727e-05, - "loss": 1.0642, - "step": 1877 - }, - { - "epoch": 0.58, - "learning_rate": 3.913305380142671e-05, - "loss": 0.9951, - "step": 1878 - }, - { - "epoch": 0.58, - "learning_rate": 3.9084011046506206e-05, - "loss": 1.0911, - "step": 1879 - }, - { - "epoch": 0.58, - "learning_rate": 3.9034979316744356e-05, - "loss": 1.0012, - "step": 1880 - }, - { - "epoch": 0.58, - "learning_rate": 3.8985958661663284e-05, - "loss": 1.1095, - "step": 1881 - }, - { - "epoch": 0.58, - "learning_rate": 3.89369491307739e-05, - "loss": 1.055, - "step": 1882 - }, - { - "epoch": 0.58, - "learning_rate": 3.888795077357587e-05, - "loss": 0.9883, - "step": 1883 - }, - { - "epoch": 0.58, - "learning_rate": 3.883896363955757e-05, - "loss": 1.0701, - "step": 1884 - }, - { - "epoch": 0.58, - "learning_rate": 3.878998777819607e-05, - "loss": 0.9708, - "step": 1885 - }, - { - "epoch": 0.59, - "learning_rate": 3.874102323895701e-05, - "loss": 0.9952, - "step": 1886 - }, - { - "epoch": 0.59, - "learning_rate": 3.869207007129463e-05, - "loss": 1.059, - "step": 1887 - }, - { - "epoch": 0.59, - "learning_rate": 3.864312832465168e-05, - "loss": 1.0182, - "step": 1888 - }, - { - "epoch": 0.59, - "learning_rate": 3.859419804845935e-05, - "loss": 1.0907, - "step": 1889 - }, - { - "epoch": 0.59, - "learning_rate": 3.854527929213728e-05, - "loss": 0.9693, - "step": 1890 - }, - { - "epoch": 0.59, - "learning_rate": 3.849637210509346e-05, - "loss": 1.064, - "step": 1891 - }, - { - "epoch": 0.59, - "learning_rate": 3.84474765367242e-05, - "loss": 1.044, - "step": 1892 - }, - { - "epoch": 0.59, - "learning_rate": 3.839859263641403e-05, - "loss": 1.0102, - "step": 1893 - }, - { - "epoch": 0.59, - "learning_rate": 3.834972045353575e-05, - "loss": 1.0787, - "step": 1894 - }, - { - "epoch": 0.59, - "learning_rate": 3.830086003745034e-05, - "loss": 0.9799, - "step": 1895 - }, - { - "epoch": 0.59, - "learning_rate": 3.8252011437506824e-05, - "loss": 0.9689, - "step": 1896 - }, - { - "epoch": 0.59, - "learning_rate": 3.820317470304234e-05, - "loss": 1.0766, - "step": 1897 - }, - { - "epoch": 0.59, - "learning_rate": 3.815434988338207e-05, - "loss": 1.0319, - "step": 1898 - }, - { - "epoch": 0.59, - "learning_rate": 3.810553702783908e-05, - "loss": 1.0516, - "step": 1899 - }, - { - "epoch": 0.59, - "learning_rate": 3.805673618571442e-05, - "loss": 0.9673, - "step": 1900 - }, - { - "epoch": 0.59, - "learning_rate": 3.800794740629697e-05, - "loss": 1.086, - "step": 1901 - }, - { - "epoch": 0.59, - "learning_rate": 3.795917073886346e-05, - "loss": 1.0156, - "step": 1902 - }, - { - "epoch": 0.59, - "learning_rate": 3.7910406232678363e-05, - "loss": 0.9563, - "step": 1903 - }, - { - "epoch": 0.59, - "learning_rate": 3.7861653936993886e-05, - "loss": 1.1737, - "step": 1904 - }, - { - "epoch": 0.59, - "learning_rate": 3.781291390104986e-05, - "loss": 1.0666, - "step": 1905 - }, - { - "epoch": 0.59, - "learning_rate": 3.776418617407377e-05, - "loss": 0.9535, - "step": 1906 - }, - { - "epoch": 0.59, - "learning_rate": 3.7715470805280694e-05, - "loss": 1.0637, - "step": 1907 - }, - { - "epoch": 0.59, - "learning_rate": 3.766676784387317e-05, - "loss": 0.9802, - "step": 1908 - }, - { - "epoch": 0.59, - "learning_rate": 3.761807733904124e-05, - "loss": 1.0507, - "step": 1909 - }, - { - "epoch": 0.59, - "learning_rate": 3.7569399339962376e-05, - "loss": 1.0493, - "step": 1910 - }, - { - "epoch": 0.59, - "learning_rate": 3.7520733895801376e-05, - "loss": 1.0824, - "step": 1911 - }, - { - "epoch": 0.59, - "learning_rate": 3.7472081055710385e-05, - "loss": 0.9295, - "step": 1912 - }, - { - "epoch": 0.59, - "learning_rate": 3.742344086882884e-05, - "loss": 1.0463, - "step": 1913 - }, - { - "epoch": 0.59, - "learning_rate": 3.7374813384283346e-05, - "loss": 1.08, - "step": 1914 - }, - { - "epoch": 0.59, - "learning_rate": 3.732619865118772e-05, - "loss": 0.9968, - "step": 1915 - }, - { - "epoch": 0.59, - "learning_rate": 3.7277596718642884e-05, - "loss": 0.982, - "step": 1916 - }, - { - "epoch": 0.59, - "learning_rate": 3.722900763573685e-05, - "loss": 1.0746, - "step": 1917 - }, - { - "epoch": 0.6, - "learning_rate": 3.7180431451544596e-05, - "loss": 1.0211, - "step": 1918 - }, - { - "epoch": 0.6, - "learning_rate": 3.7131868215128136e-05, - "loss": 1.0198, - "step": 1919 - }, - { - "epoch": 0.6, - "learning_rate": 3.7083317975536395e-05, - "loss": 1.0736, - "step": 1920 - }, - { - "epoch": 0.6, - "learning_rate": 3.703478078180512e-05, - "loss": 1.0443, - "step": 1921 - }, - { - "epoch": 0.6, - "learning_rate": 3.6986256682956955e-05, - "loss": 0.9455, - "step": 1922 - }, - { - "epoch": 0.6, - "learning_rate": 3.6937745728001284e-05, - "loss": 1.042, - "step": 1923 - }, - { - "epoch": 0.6, - "learning_rate": 3.6889247965934195e-05, - "loss": 1.0776, - "step": 1924 - }, - { - "epoch": 0.6, - "learning_rate": 3.6840763445738485e-05, - "loss": 0.9963, - "step": 1925 - }, - { - "epoch": 0.6, - "learning_rate": 3.679229221638355e-05, - "loss": 1.0754, - "step": 1926 - }, - { - "epoch": 0.6, - "learning_rate": 3.674383432682539e-05, - "loss": 0.9999, - "step": 1927 - }, - { - "epoch": 0.6, - "learning_rate": 3.6695389826006524e-05, - "loss": 1.0672, - "step": 1928 - }, - { - "epoch": 0.6, - "learning_rate": 3.664695876285592e-05, - "loss": 1.0073, - "step": 1929 - }, - { - "epoch": 0.6, - "learning_rate": 3.659854118628902e-05, - "loss": 1.0068, - "step": 1930 - }, - { - "epoch": 0.6, - "learning_rate": 3.655013714520757e-05, - "loss": 1.0499, - "step": 1931 - }, - { - "epoch": 0.6, - "learning_rate": 3.650174668849976e-05, - "loss": 1.0072, - "step": 1932 - }, - { - "epoch": 0.6, - "learning_rate": 3.645336986503995e-05, - "loss": 1.014, - "step": 1933 - }, - { - "epoch": 0.6, - "learning_rate": 3.6405006723688786e-05, - "loss": 0.9484, - "step": 1934 - }, - { - "epoch": 0.6, - "learning_rate": 3.63566573132931e-05, - "loss": 0.9176, - "step": 1935 - }, - { - "epoch": 0.6, - "learning_rate": 3.630832168268582e-05, - "loss": 1.1233, - "step": 1936 - }, - { - "epoch": 0.6, - "learning_rate": 3.6259999880685985e-05, - "loss": 1.122, - "step": 1937 - }, - { - "epoch": 0.6, - "learning_rate": 3.621169195609867e-05, - "loss": 0.9774, - "step": 1938 - }, - { - "epoch": 0.6, - "learning_rate": 3.61633979577149e-05, - "loss": 0.9985, - "step": 1939 - }, - { - "epoch": 0.6, - "learning_rate": 3.6115117934311665e-05, - "loss": 1.0285, - "step": 1940 - }, - { - "epoch": 0.6, - "learning_rate": 3.606685193465185e-05, - "loss": 1.1621, - "step": 1941 - }, - { - "epoch": 0.6, - "learning_rate": 3.601860000748415e-05, - "loss": 0.9988, - "step": 1942 - }, - { - "epoch": 0.6, - "learning_rate": 3.5970362201543026e-05, - "loss": 1.0154, - "step": 1943 - }, - { - "epoch": 0.6, - "learning_rate": 3.592213856554874e-05, - "loss": 1.1788, - "step": 1944 - }, - { - "epoch": 0.6, - "learning_rate": 3.5873929148207186e-05, - "loss": 0.9544, - "step": 1945 - }, - { - "epoch": 0.6, - "learning_rate": 3.582573399820993e-05, - "loss": 1.0285, - "step": 1946 - }, - { - "epoch": 0.6, - "learning_rate": 3.577755316423411e-05, - "loss": 1.0823, - "step": 1947 - }, - { - "epoch": 0.6, - "learning_rate": 3.5729386694942434e-05, - "loss": 0.9548, - "step": 1948 - }, - { - "epoch": 0.6, - "learning_rate": 3.5681234638983054e-05, - "loss": 1.0926, - "step": 1949 - }, - { - "epoch": 0.6, - "learning_rate": 3.5633097044989594e-05, - "loss": 1.089, - "step": 1950 - }, - { - "epoch": 0.61, - "learning_rate": 3.558497396158108e-05, - "loss": 0.9487, - "step": 1951 - }, - { - "epoch": 0.61, - "learning_rate": 3.553686543736186e-05, - "loss": 1.078, - "step": 1952 - }, - { - "epoch": 0.61, - "learning_rate": 3.548877152092158e-05, - "loss": 1.0238, - "step": 1953 - }, - { - "epoch": 0.61, - "learning_rate": 3.544069226083516e-05, - "loss": 0.999, - "step": 1954 - }, - { - "epoch": 0.61, - "learning_rate": 3.5392627705662685e-05, - "loss": 0.9767, - "step": 1955 - }, - { - "epoch": 0.61, - "learning_rate": 3.534457790394939e-05, - "loss": 0.9552, - "step": 1956 - }, - { - "epoch": 0.61, - "learning_rate": 3.5296542904225626e-05, - "loss": 1.1694, - "step": 1957 - }, - { - "epoch": 0.61, - "learning_rate": 3.524852275500679e-05, - "loss": 1.0571, - "step": 1958 - }, - { - "epoch": 0.61, - "learning_rate": 3.520051750479325e-05, - "loss": 0.9397, - "step": 1959 - }, - { - "epoch": 0.61, - "learning_rate": 3.5152527202070396e-05, - "loss": 1.0948, - "step": 1960 - }, - { - "epoch": 0.61, - "learning_rate": 3.510455189530845e-05, - "loss": 1.0132, - "step": 1961 - }, - { - "epoch": 0.61, - "learning_rate": 3.505659163296251e-05, - "loss": 1.0859, - "step": 1962 - }, - { - "epoch": 0.61, - "learning_rate": 3.50086464634725e-05, - "loss": 1.0436, - "step": 1963 - }, - { - "epoch": 0.61, - "learning_rate": 3.496071643526308e-05, - "loss": 0.9904, - "step": 1964 - }, - { - "epoch": 0.61, - "learning_rate": 3.4912801596743614e-05, - "loss": 1.1362, - "step": 1965 - }, - { - "epoch": 0.61, - "learning_rate": 3.4864901996308154e-05, - "loss": 0.9097, - "step": 1966 - }, - { - "epoch": 0.61, - "learning_rate": 3.481701768233532e-05, - "loss": 0.9682, - "step": 1967 - }, - { - "epoch": 0.61, - "learning_rate": 3.476914870318831e-05, - "loss": 1.0343, - "step": 1968 - }, - { - "epoch": 0.61, - "learning_rate": 3.472129510721484e-05, - "loss": 0.9914, - "step": 1969 - }, - { - "epoch": 0.61, - "learning_rate": 3.467345694274709e-05, - "loss": 1.1228, - "step": 1970 - }, - { - "epoch": 0.61, - "learning_rate": 3.4625634258101634e-05, - "loss": 0.9594, - "step": 1971 - }, - { - "epoch": 0.61, - "learning_rate": 3.457782710157945e-05, - "loss": 1.1143, - "step": 1972 - }, - { - "epoch": 0.61, - "learning_rate": 3.4530035521465804e-05, - "loss": 1.0766, - "step": 1973 - }, - { - "epoch": 0.61, - "learning_rate": 3.4482259566030216e-05, - "loss": 1.0347, - "step": 1974 - }, - { - "epoch": 0.61, - "learning_rate": 3.443449928352646e-05, - "loss": 1.1026, - "step": 1975 - }, - { - "epoch": 0.61, - "learning_rate": 3.438675472219247e-05, - "loss": 1.0442, - "step": 1976 - }, - { - "epoch": 0.61, - "learning_rate": 3.433902593025028e-05, - "loss": 0.9564, - "step": 1977 - }, - { - "epoch": 0.61, - "learning_rate": 3.429131295590603e-05, - "loss": 1.1288, - "step": 1978 - }, - { - "epoch": 0.61, - "learning_rate": 3.424361584734985e-05, - "loss": 1.0563, - "step": 1979 - }, - { - "epoch": 0.61, - "learning_rate": 3.419593465275589e-05, - "loss": 1.012, - "step": 1980 - }, - { - "epoch": 0.61, - "learning_rate": 3.414826942028215e-05, - "loss": 0.9805, - "step": 1981 - }, - { - "epoch": 0.61, - "learning_rate": 3.4100620198070596e-05, - "loss": 0.9857, - "step": 1982 - }, - { - "epoch": 0.62, - "learning_rate": 3.4052987034246984e-05, - "loss": 1.1018, - "step": 1983 - }, - { - "epoch": 0.62, - "learning_rate": 3.4005369976920835e-05, - "loss": 0.9303, - "step": 1984 - }, - { - "epoch": 0.62, - "learning_rate": 3.395776907418544e-05, - "loss": 1.0989, - "step": 1985 - }, - { - "epoch": 0.62, - "learning_rate": 3.391018437411774e-05, - "loss": 1.2114, - "step": 1986 - }, - { - "epoch": 0.62, - "learning_rate": 3.3862615924778325e-05, - "loss": 1.0179, - "step": 1987 - }, - { - "epoch": 0.62, - "learning_rate": 3.381506377421138e-05, - "loss": 1.0163, - "step": 1988 - }, - { - "epoch": 0.62, - "learning_rate": 3.376752797044461e-05, - "loss": 1.0461, - "step": 1989 - }, - { - "epoch": 0.62, - "learning_rate": 3.372000856148923e-05, - "loss": 1.0117, - "step": 1990 - }, - { - "epoch": 0.62, - "learning_rate": 3.3672505595339885e-05, - "loss": 1.0364, - "step": 1991 - }, - { - "epoch": 0.62, - "learning_rate": 3.3625019119974624e-05, - "loss": 0.9527, - "step": 1992 - }, - { - "epoch": 0.62, - "learning_rate": 3.357754918335479e-05, - "loss": 1.0359, - "step": 1993 - }, - { - "epoch": 0.62, - "learning_rate": 3.353009583342509e-05, - "loss": 1.0395, - "step": 1994 - }, - { - "epoch": 0.62, - "learning_rate": 3.348265911811346e-05, - "loss": 0.9428, - "step": 1995 - }, - { - "epoch": 0.62, - "learning_rate": 3.3435239085331e-05, - "loss": 1.1036, - "step": 1996 - }, - { - "epoch": 0.62, - "learning_rate": 3.3387835782971996e-05, - "loss": 1.0832, - "step": 1997 - }, - { - "epoch": 0.62, - "learning_rate": 3.3340449258913846e-05, - "loss": 0.964, - "step": 1998 - }, - { - "epoch": 0.62, - "learning_rate": 3.3293079561016956e-05, - "loss": 1.0535, - "step": 1999 - }, - { - "epoch": 0.62, - "learning_rate": 3.324572673712477e-05, - "loss": 0.979, - "step": 2000 - }, - { - "epoch": 0.62, - "learning_rate": 3.3198390835063684e-05, - "loss": 1.1143, - "step": 2001 - }, - { - "epoch": 0.62, - "learning_rate": 3.315107190264301e-05, - "loss": 0.9437, - "step": 2002 - }, - { - "epoch": 0.62, - "learning_rate": 3.31037699876549e-05, - "loss": 1.0079, - "step": 2003 - }, - { - "epoch": 0.62, - "learning_rate": 3.305648513787435e-05, - "loss": 0.997, - "step": 2004 - }, - { - "epoch": 0.62, - "learning_rate": 3.300921740105908e-05, - "loss": 1.1745, - "step": 2005 - }, - { - "epoch": 0.62, - "learning_rate": 3.296196682494954e-05, - "loss": 0.9657, - "step": 2006 - }, - { - "epoch": 0.62, - "learning_rate": 3.2914733457268875e-05, - "loss": 1.0581, - "step": 2007 - }, - { - "epoch": 0.62, - "learning_rate": 3.286751734572283e-05, - "loss": 0.9216, - "step": 2008 - }, - { - "epoch": 0.62, - "learning_rate": 3.28203185379997e-05, - "loss": 1.1332, - "step": 2009 - }, - { - "epoch": 0.62, - "learning_rate": 3.277313708177035e-05, - "loss": 1.025, - "step": 2010 - }, - { - "epoch": 0.62, - "learning_rate": 3.2725973024688074e-05, - "loss": 0.9584, - "step": 2011 - }, - { - "epoch": 0.62, - "learning_rate": 3.267882641438862e-05, - "loss": 1.0476, - "step": 2012 - }, - { - "epoch": 0.62, - "learning_rate": 3.263169729849011e-05, - "loss": 0.9859, - "step": 2013 - }, - { - "epoch": 0.62, - "learning_rate": 3.2584585724592965e-05, - "loss": 1.0953, - "step": 2014 - }, - { - "epoch": 0.63, - "learning_rate": 3.253749174027995e-05, - "loss": 1.0088, - "step": 2015 - }, - { - "epoch": 0.63, - "learning_rate": 3.249041539311603e-05, - "loss": 1.001, - "step": 2016 - }, - { - "epoch": 0.63, - "learning_rate": 3.244335673064836e-05, - "loss": 1.0873, - "step": 2017 - }, - { - "epoch": 0.63, - "learning_rate": 3.239631580040621e-05, - "loss": 1.0032, - "step": 2018 - }, - { - "epoch": 0.63, - "learning_rate": 3.234929264990096e-05, - "loss": 1.1448, - "step": 2019 - }, - { - "epoch": 0.63, - "learning_rate": 3.2302287326626064e-05, - "loss": 0.9712, - "step": 2020 - }, - { - "epoch": 0.63, - "learning_rate": 3.225529987805691e-05, - "loss": 1.0108, - "step": 2021 - }, - { - "epoch": 0.63, - "learning_rate": 3.220833035165088e-05, - "loss": 1.0312, - "step": 2022 - }, - { - "epoch": 0.63, - "learning_rate": 3.2161378794847254e-05, - "loss": 1.0316, - "step": 2023 - }, - { - "epoch": 0.63, - "learning_rate": 3.211444525506711e-05, - "loss": 1.0014, - "step": 2024 - }, - { - "epoch": 0.63, - "learning_rate": 3.206752977971339e-05, - "loss": 1.0755, - "step": 2025 - }, - { - "epoch": 0.63, - "learning_rate": 3.202063241617076e-05, - "loss": 0.9904, - "step": 2026 - }, - { - "epoch": 0.63, - "learning_rate": 3.1973753211805596e-05, - "loss": 1.0055, - "step": 2027 - }, - { - "epoch": 0.63, - "learning_rate": 3.192689221396594e-05, - "loss": 0.9823, - "step": 2028 - }, - { - "epoch": 0.63, - "learning_rate": 3.188004946998146e-05, - "loss": 1.0527, - "step": 2029 - }, - { - "epoch": 0.63, - "learning_rate": 3.183322502716335e-05, - "loss": 1.0249, - "step": 2030 - }, - { - "epoch": 0.63, - "learning_rate": 3.178641893280433e-05, - "loss": 1.0829, - "step": 2031 - }, - { - "epoch": 0.63, - "learning_rate": 3.173963123417861e-05, - "loss": 1.0419, - "step": 2032 - }, - { - "epoch": 0.63, - "learning_rate": 3.169286197854181e-05, - "loss": 0.956, - "step": 2033 - }, - { - "epoch": 0.63, - "learning_rate": 3.1646111213130917e-05, - "loss": 1.1172, - "step": 2034 - }, - { - "epoch": 0.63, - "learning_rate": 3.159937898516424e-05, - "loss": 1.0092, - "step": 2035 - }, - { - "epoch": 0.63, - "learning_rate": 3.1552665341841394e-05, - "loss": 1.016, - "step": 2036 - }, - { - "epoch": 0.63, - "learning_rate": 3.150597033034317e-05, - "loss": 0.8428, - "step": 2037 - }, - { - "epoch": 0.63, - "learning_rate": 3.14592939978316e-05, - "loss": 1.0538, - "step": 2038 - }, - { - "epoch": 0.63, - "learning_rate": 3.141263639144979e-05, - "loss": 1.086, - "step": 2039 - }, - { - "epoch": 0.63, - "learning_rate": 3.1365997558321994e-05, - "loss": 1.0647, - "step": 2040 - }, - { - "epoch": 0.63, - "learning_rate": 3.131937754555347e-05, - "loss": 0.9437, - "step": 2041 - }, - { - "epoch": 0.63, - "learning_rate": 3.127277640023047e-05, - "loss": 1.0704, - "step": 2042 - }, - { - "epoch": 0.63, - "learning_rate": 3.122619416942019e-05, - "loss": 1.0151, - "step": 2043 - }, - { - "epoch": 0.63, - "learning_rate": 3.117963090017071e-05, - "loss": 1.0671, - "step": 2044 - }, - { - "epoch": 0.63, - "learning_rate": 3.1133086639511e-05, - "loss": 1.0219, - "step": 2045 - }, - { - "epoch": 0.63, - "learning_rate": 3.108656143445079e-05, - "loss": 1.0056, - "step": 2046 - }, - { - "epoch": 0.64, - "learning_rate": 3.1040055331980576e-05, - "loss": 1.1006, - "step": 2047 - }, - { - "epoch": 0.64, - "learning_rate": 3.099356837907159e-05, - "loss": 1.004, - "step": 2048 - }, - { - "epoch": 0.64, - "learning_rate": 3.0947100622675665e-05, - "loss": 0.9119, - "step": 2049 - }, - { - "epoch": 0.64, - "learning_rate": 3.0900652109725294e-05, - "loss": 0.9912, - "step": 2050 - }, - { - "epoch": 0.64, - "learning_rate": 3.0854222887133513e-05, - "loss": 1.0631, - "step": 2051 - }, - { - "epoch": 0.64, - "learning_rate": 3.0807813001793876e-05, - "loss": 1.1364, - "step": 2052 - }, - { - "epoch": 0.64, - "learning_rate": 3.07614225005804e-05, - "loss": 0.8872, - "step": 2053 - }, - { - "epoch": 0.64, - "learning_rate": 3.071505143034758e-05, - "loss": 1.0027, - "step": 2054 - }, - { - "epoch": 0.64, - "learning_rate": 3.066869983793017e-05, - "loss": 1.0318, - "step": 2055 - }, - { - "epoch": 0.64, - "learning_rate": 3.0622367770143365e-05, - "loss": 1.1478, - "step": 2056 - }, - { - "epoch": 0.64, - "learning_rate": 3.0576055273782574e-05, - "loss": 0.9752, - "step": 2057 - }, - { - "epoch": 0.64, - "learning_rate": 3.052976239562348e-05, - "loss": 1.0348, - "step": 2058 - }, - { - "epoch": 0.64, - "learning_rate": 3.048348918242191e-05, - "loss": 1.063, - "step": 2059 - }, - { - "epoch": 0.64, - "learning_rate": 3.0437235680913862e-05, - "loss": 1.0095, - "step": 2060 - }, - { - "epoch": 0.64, - "learning_rate": 3.039100193781542e-05, - "loss": 1.0541, - "step": 2061 - }, - { - "epoch": 0.64, - "learning_rate": 3.03447879998227e-05, - "loss": 1.0185, - "step": 2062 - }, - { - "epoch": 0.64, - "learning_rate": 3.0298593913611826e-05, - "loss": 0.9832, - "step": 2063 - }, - { - "epoch": 0.64, - "learning_rate": 3.0252419725838854e-05, - "loss": 1.0302, - "step": 2064 - }, - { - "epoch": 0.64, - "learning_rate": 3.0206265483139763e-05, - "loss": 0.9835, - "step": 2065 - }, - { - "epoch": 0.64, - "learning_rate": 3.016013123213039e-05, - "loss": 0.9384, - "step": 2066 - }, - { - "epoch": 0.64, - "learning_rate": 3.0114017019406358e-05, - "loss": 0.9227, - "step": 2067 - }, - { - "epoch": 0.64, - "learning_rate": 3.0067922891543072e-05, - "loss": 1.1246, - "step": 2068 - }, - { - "epoch": 0.64, - "learning_rate": 3.0021848895095615e-05, - "loss": 0.9827, - "step": 2069 - }, - { - "epoch": 0.64, - "learning_rate": 2.997579507659881e-05, - "loss": 0.988, - "step": 2070 - }, - { - "epoch": 0.64, - "learning_rate": 2.9929761482567016e-05, - "loss": 1.0425, - "step": 2071 - }, - { - "epoch": 0.64, - "learning_rate": 2.9883748159494218e-05, - "loss": 1.1026, - "step": 2072 - }, - { - "epoch": 0.64, - "learning_rate": 2.983775515385393e-05, - "loss": 1.0096, - "step": 2073 - }, - { - "epoch": 0.64, - "learning_rate": 2.9791782512099097e-05, - "loss": 1.0972, - "step": 2074 - }, - { - "epoch": 0.64, - "learning_rate": 2.9745830280662156e-05, - "loss": 0.9776, - "step": 2075 - }, - { - "epoch": 0.64, - "learning_rate": 2.96998985059549e-05, - "loss": 1.035, - "step": 2076 - }, - { - "epoch": 0.64, - "learning_rate": 2.9653987234368442e-05, - "loss": 0.9307, - "step": 2077 - }, - { - "epoch": 0.64, - "learning_rate": 2.9608096512273225e-05, - "loss": 1.1854, - "step": 2078 - }, - { - "epoch": 0.64, - "learning_rate": 2.9562226386018932e-05, - "loss": 0.9909, - "step": 2079 - }, - { - "epoch": 0.65, - "learning_rate": 2.9516376901934413e-05, - "loss": 0.997, - "step": 2080 - }, - { - "epoch": 0.65, - "learning_rate": 2.9470548106327676e-05, - "loss": 1.033, - "step": 2081 - }, - { - "epoch": 0.65, - "learning_rate": 2.9424740045485865e-05, - "loss": 0.9612, - "step": 2082 - }, - { - "epoch": 0.65, - "learning_rate": 2.937895276567516e-05, - "loss": 1.1447, - "step": 2083 - }, - { - "epoch": 0.65, - "learning_rate": 2.9333186313140753e-05, - "loss": 0.9868, - "step": 2084 - }, - { - "epoch": 0.65, - "learning_rate": 2.9287440734106802e-05, - "loss": 1.1077, - "step": 2085 - }, - { - "epoch": 0.65, - "learning_rate": 2.9241716074776392e-05, - "loss": 1.0386, - "step": 2086 - }, - { - "epoch": 0.65, - "learning_rate": 2.919601238133145e-05, - "loss": 1.0341, - "step": 2087 - }, - { - "epoch": 0.65, - "learning_rate": 2.9150329699932764e-05, - "loss": 1.0793, - "step": 2088 - }, - { - "epoch": 0.65, - "learning_rate": 2.9104668076719876e-05, - "loss": 0.9605, - "step": 2089 - }, - { - "epoch": 0.65, - "learning_rate": 2.9059027557811092e-05, - "loss": 1.0006, - "step": 2090 - }, - { - "epoch": 0.65, - "learning_rate": 2.9013408189303337e-05, - "loss": 1.1, - "step": 2091 - }, - { - "epoch": 0.65, - "learning_rate": 2.896781001727225e-05, - "loss": 0.9923, - "step": 2092 - }, - { - "epoch": 0.65, - "learning_rate": 2.892223308777199e-05, - "loss": 0.9746, - "step": 2093 - }, - { - "epoch": 0.65, - "learning_rate": 2.887667744683531e-05, - "loss": 1.078, - "step": 2094 - }, - { - "epoch": 0.65, - "learning_rate": 2.883114314047346e-05, - "loss": 0.9636, - "step": 2095 - }, - { - "epoch": 0.65, - "learning_rate": 2.878563021467612e-05, - "loss": 1.0349, - "step": 2096 - }, - { - "epoch": 0.65, - "learning_rate": 2.8740138715411413e-05, - "loss": 0.9845, - "step": 2097 - }, - { - "epoch": 0.65, - "learning_rate": 2.869466868862575e-05, - "loss": 1.071, - "step": 2098 - }, - { - "epoch": 0.65, - "learning_rate": 2.864922018024392e-05, - "loss": 1.1492, - "step": 2099 - }, - { - "epoch": 0.65, - "learning_rate": 2.8603793236168964e-05, - "loss": 1.0274, - "step": 2100 - }, - { - "epoch": 0.65, - "learning_rate": 2.8558387902282123e-05, - "loss": 0.8952, - "step": 2101 - }, - { - "epoch": 0.65, - "learning_rate": 2.851300422444283e-05, - "loss": 0.9093, - "step": 2102 - }, - { - "epoch": 0.65, - "learning_rate": 2.846764224848867e-05, - "loss": 1.0252, - "step": 2103 - }, - { - "epoch": 0.65, - "learning_rate": 2.8422302020235254e-05, - "loss": 1.1699, - "step": 2104 - }, - { - "epoch": 0.65, - "learning_rate": 2.8376983585476224e-05, - "loss": 1.0313, - "step": 2105 - }, - { - "epoch": 0.65, - "learning_rate": 2.8331686989983276e-05, - "loss": 0.9561, - "step": 2106 - }, - { - "epoch": 0.65, - "learning_rate": 2.8286412279506002e-05, - "loss": 1.0562, - "step": 2107 - }, - { - "epoch": 0.65, - "learning_rate": 2.8241159499771903e-05, - "loss": 0.9698, - "step": 2108 - }, - { - "epoch": 0.65, - "learning_rate": 2.8195928696486345e-05, - "loss": 1.0625, - "step": 2109 - }, - { - "epoch": 0.65, - "learning_rate": 2.8150719915332446e-05, - "loss": 0.9644, - "step": 2110 - }, - { - "epoch": 0.65, - "learning_rate": 2.8105533201971128e-05, - "loss": 1.1688, - "step": 2111 - }, - { - "epoch": 0.66, - "learning_rate": 2.806036860204102e-05, - "loss": 1.0692, - "step": 2112 - }, - { - "epoch": 0.66, - "learning_rate": 2.8015226161158403e-05, - "loss": 0.94, - "step": 2113 - }, - { - "epoch": 0.66, - "learning_rate": 2.7970105924917185e-05, - "loss": 1.0402, - "step": 2114 - }, - { - "epoch": 0.66, - "learning_rate": 2.7925007938888863e-05, - "loss": 1.017, - "step": 2115 - }, - { - "epoch": 0.66, - "learning_rate": 2.787993224862241e-05, - "loss": 1.0617, - "step": 2116 - }, - { - "epoch": 0.66, - "learning_rate": 2.7834878899644345e-05, - "loss": 0.9203, - "step": 2117 - }, - { - "epoch": 0.66, - "learning_rate": 2.7789847937458557e-05, - "loss": 0.9068, - "step": 2118 - }, - { - "epoch": 0.66, - "learning_rate": 2.7744839407546375e-05, - "loss": 1.0854, - "step": 2119 - }, - { - "epoch": 0.66, - "learning_rate": 2.7699853355366466e-05, - "loss": 1.1024, - "step": 2120 - }, - { - "epoch": 0.66, - "learning_rate": 2.7654889826354767e-05, - "loss": 1.0699, - "step": 2121 - }, - { - "epoch": 0.66, - "learning_rate": 2.7609948865924505e-05, - "loss": 0.987, - "step": 2122 - }, - { - "epoch": 0.66, - "learning_rate": 2.7565030519466062e-05, - "loss": 1.0163, - "step": 2123 - }, - { - "epoch": 0.66, - "learning_rate": 2.7520134832347023e-05, - "loss": 1.0114, - "step": 2124 - }, - { - "epoch": 0.66, - "learning_rate": 2.747526184991207e-05, - "loss": 1.0883, - "step": 2125 - }, - { - "epoch": 0.66, - "learning_rate": 2.7430411617482954e-05, - "loss": 0.963, - "step": 2126 - }, - { - "epoch": 0.66, - "learning_rate": 2.7385584180358458e-05, - "loss": 1.0907, - "step": 2127 - }, - { - "epoch": 0.66, - "learning_rate": 2.7340779583814345e-05, - "loss": 0.9096, - "step": 2128 - }, - { - "epoch": 0.66, - "learning_rate": 2.7295997873103286e-05, - "loss": 1.1097, - "step": 2129 - }, - { - "epoch": 0.66, - "learning_rate": 2.7251239093454823e-05, - "loss": 0.9732, - "step": 2130 - }, - { - "epoch": 0.66, - "learning_rate": 2.7206503290075387e-05, - "loss": 0.9912, - "step": 2131 - }, - { - "epoch": 0.66, - "learning_rate": 2.7161790508148178e-05, - "loss": 1.1384, - "step": 2132 - }, - { - "epoch": 0.66, - "learning_rate": 2.7117100792833144e-05, - "loss": 1.0715, - "step": 2133 - }, - { - "epoch": 0.66, - "learning_rate": 2.7072434189266942e-05, - "loss": 0.9783, - "step": 2134 - }, - { - "epoch": 0.66, - "learning_rate": 2.7027790742562904e-05, - "loss": 0.8524, - "step": 2135 - }, - { - "epoch": 0.66, - "learning_rate": 2.6983170497810906e-05, - "loss": 0.9433, - "step": 2136 - }, - { - "epoch": 0.66, - "learning_rate": 2.6938573500077457e-05, - "loss": 1.0516, - "step": 2137 - }, - { - "epoch": 0.66, - "learning_rate": 2.6893999794405568e-05, - "loss": 1.0051, - "step": 2138 - }, - { - "epoch": 0.66, - "learning_rate": 2.684944942581471e-05, - "loss": 1.0129, - "step": 2139 - }, - { - "epoch": 0.66, - "learning_rate": 2.6804922439300833e-05, - "loss": 1.1715, - "step": 2140 - }, - { - "epoch": 0.66, - "learning_rate": 2.6760418879836187e-05, - "loss": 1.0577, - "step": 2141 - }, - { - "epoch": 0.66, - "learning_rate": 2.6715938792369442e-05, - "loss": 0.9068, - "step": 2142 - }, - { - "epoch": 0.66, - "learning_rate": 2.6671482221825482e-05, - "loss": 1.079, - "step": 2143 - }, - { - "epoch": 0.67, - "learning_rate": 2.6627049213105526e-05, - "loss": 0.9678, - "step": 2144 - }, - { - "epoch": 0.67, - "learning_rate": 2.6582639811086928e-05, - "loss": 1.1199, - "step": 2145 - }, - { - "epoch": 0.67, - "learning_rate": 2.6538254060623236e-05, - "loss": 1.0204, - "step": 2146 - }, - { - "epoch": 0.67, - "learning_rate": 2.6493892006544118e-05, - "loss": 0.9822, - "step": 2147 - }, - { - "epoch": 0.67, - "learning_rate": 2.644955369365525e-05, - "loss": 1.087, - "step": 2148 - }, - { - "epoch": 0.67, - "learning_rate": 2.6405239166738377e-05, - "loss": 0.9691, - "step": 2149 - }, - { - "epoch": 0.67, - "learning_rate": 2.6360948470551222e-05, - "loss": 1.0148, - "step": 2150 - }, - { - "epoch": 0.67, - "learning_rate": 2.6316681649827424e-05, - "loss": 1.0113, - "step": 2151 - }, - { - "epoch": 0.67, - "learning_rate": 2.627243874927652e-05, - "loss": 0.9939, - "step": 2152 - }, - { - "epoch": 0.67, - "learning_rate": 2.6228219813583876e-05, - "loss": 1.1307, - "step": 2153 - }, - { - "epoch": 0.67, - "learning_rate": 2.6184024887410663e-05, - "loss": 1.0776, - "step": 2154 - }, - { - "epoch": 0.67, - "learning_rate": 2.613985401539377e-05, - "loss": 0.9138, - "step": 2155 - }, - { - "epoch": 0.67, - "learning_rate": 2.609570724214583e-05, - "loss": 1.0787, - "step": 2156 - }, - { - "epoch": 0.67, - "learning_rate": 2.6051584612255132e-05, - "loss": 0.9745, - "step": 2157 - }, - { - "epoch": 0.67, - "learning_rate": 2.6007486170285555e-05, - "loss": 1.0601, - "step": 2158 - }, - { - "epoch": 0.67, - "learning_rate": 2.596341196077659e-05, - "loss": 1.0884, - "step": 2159 - }, - { - "epoch": 0.67, - "learning_rate": 2.5919362028243233e-05, - "loss": 0.9639, - "step": 2160 - }, - { - "epoch": 0.67, - "learning_rate": 2.587533641717592e-05, - "loss": 1.0239, - "step": 2161 - }, - { - "epoch": 0.67, - "learning_rate": 2.583133517204057e-05, - "loss": 0.9632, - "step": 2162 - }, - { - "epoch": 0.67, - "learning_rate": 2.5787358337278496e-05, - "loss": 0.9891, - "step": 2163 - }, - { - "epoch": 0.67, - "learning_rate": 2.5743405957306325e-05, - "loss": 1.0358, - "step": 2164 - }, - { - "epoch": 0.67, - "learning_rate": 2.569947807651603e-05, - "loss": 1.0354, - "step": 2165 - }, - { - "epoch": 0.67, - "learning_rate": 2.565557473927477e-05, - "loss": 1.0567, - "step": 2166 - }, - { - "epoch": 0.67, - "learning_rate": 2.561169598992499e-05, - "loss": 0.9337, - "step": 2167 - }, - { - "epoch": 0.67, - "learning_rate": 2.5567841872784228e-05, - "loss": 0.8094, - "step": 2168 - }, - { - "epoch": 0.67, - "learning_rate": 2.55240124321452e-05, - "loss": 1.0848, - "step": 2169 - }, - { - "epoch": 0.67, - "learning_rate": 2.5480207712275683e-05, - "loss": 1.0559, - "step": 2170 - }, - { - "epoch": 0.67, - "learning_rate": 2.543642775741849e-05, - "loss": 0.9681, - "step": 2171 - }, - { - "epoch": 0.67, - "learning_rate": 2.5392672611791425e-05, - "loss": 1.0605, - "step": 2172 - }, - { - "epoch": 0.67, - "learning_rate": 2.5348942319587187e-05, - "loss": 1.0661, - "step": 2173 - }, - { - "epoch": 0.67, - "learning_rate": 2.530523692497344e-05, - "loss": 1.0868, - "step": 2174 - }, - { - "epoch": 0.67, - "learning_rate": 2.5261556472092653e-05, - "loss": 1.0166, - "step": 2175 - }, - { - "epoch": 0.68, - "learning_rate": 2.521790100506214e-05, - "loss": 1.0252, - "step": 2176 - }, - { - "epoch": 0.68, - "learning_rate": 2.5174270567973957e-05, - "loss": 0.9921, - "step": 2177 - }, - { - "epoch": 0.68, - "learning_rate": 2.51306652048949e-05, - "loss": 0.9584, - "step": 2178 - }, - { - "epoch": 0.68, - "learning_rate": 2.50870849598664e-05, - "loss": 0.9478, - "step": 2179 - }, - { - "epoch": 0.68, - "learning_rate": 2.5043529876904548e-05, - "loss": 1.0848, - "step": 2180 - }, - { - "epoch": 0.68, - "learning_rate": 2.500000000000001e-05, - "loss": 1.0241, - "step": 2181 - }, - { - "epoch": 0.68, - "learning_rate": 2.4956495373118017e-05, - "loss": 1.0841, - "step": 2182 - }, - { - "epoch": 0.68, - "learning_rate": 2.4913016040198277e-05, - "loss": 0.9863, - "step": 2183 - }, - { - "epoch": 0.68, - "learning_rate": 2.4869562045154953e-05, - "loss": 0.9696, - "step": 2184 - }, - { - "epoch": 0.68, - "learning_rate": 2.482613343187664e-05, - "loss": 1.0829, - "step": 2185 - }, - { - "epoch": 0.68, - "learning_rate": 2.4782730244226244e-05, - "loss": 1.0295, - "step": 2186 - }, - { - "epoch": 0.68, - "learning_rate": 2.473935252604103e-05, - "loss": 1.0277, - "step": 2187 - }, - { - "epoch": 0.68, - "learning_rate": 2.4696000321132544e-05, - "loss": 0.962, - "step": 2188 - }, - { - "epoch": 0.68, - "learning_rate": 2.465267367328655e-05, - "loss": 0.9863, - "step": 2189 - }, - { - "epoch": 0.68, - "learning_rate": 2.460937262626301e-05, - "loss": 1.0777, - "step": 2190 - }, - { - "epoch": 0.68, - "learning_rate": 2.4566097223795998e-05, - "loss": 0.9305, - "step": 2191 - }, - { - "epoch": 0.68, - "learning_rate": 2.4522847509593737e-05, - "loss": 1.1682, - "step": 2192 - }, - { - "epoch": 0.68, - "learning_rate": 2.4479623527338445e-05, - "loss": 1.0756, - "step": 2193 - }, - { - "epoch": 0.68, - "learning_rate": 2.44364253206864e-05, - "loss": 0.9775, - "step": 2194 - }, - { - "epoch": 0.68, - "learning_rate": 2.439325293326783e-05, - "loss": 0.9786, - "step": 2195 - }, - { - "epoch": 0.68, - "learning_rate": 2.4350106408686884e-05, - "loss": 0.9839, - "step": 2196 - }, - { - "epoch": 0.68, - "learning_rate": 2.4306985790521608e-05, - "loss": 1.0631, - "step": 2197 - }, - { - "epoch": 0.68, - "learning_rate": 2.4263891122323828e-05, - "loss": 1.0204, - "step": 2198 - }, - { - "epoch": 0.68, - "learning_rate": 2.4220822447619223e-05, - "loss": 1.0077, - "step": 2199 - }, - { - "epoch": 0.68, - "learning_rate": 2.4177779809907185e-05, - "loss": 0.9982, - "step": 2200 - }, - { - "epoch": 0.68, - "learning_rate": 2.4134763252660824e-05, - "loss": 0.8863, - "step": 2201 - }, - { - "epoch": 0.68, - "learning_rate": 2.4091772819326898e-05, - "loss": 1.0521, - "step": 2202 - }, - { - "epoch": 0.68, - "learning_rate": 2.4048808553325802e-05, - "loss": 0.8839, - "step": 2203 - }, - { - "epoch": 0.68, - "learning_rate": 2.4005870498051464e-05, - "loss": 1.0248, - "step": 2204 - }, - { - "epoch": 0.68, - "learning_rate": 2.3962958696871342e-05, - "loss": 1.0521, - "step": 2205 - }, - { - "epoch": 0.68, - "learning_rate": 2.3920073193126406e-05, - "loss": 1.0641, - "step": 2206 - }, - { - "epoch": 0.68, - "learning_rate": 2.3877214030131067e-05, - "loss": 0.9415, - "step": 2207 - }, - { - "epoch": 0.68, - "learning_rate": 2.3834381251173104e-05, - "loss": 1.0037, - "step": 2208 - }, - { - "epoch": 0.69, - "learning_rate": 2.379157489951367e-05, - "loss": 1.1329, - "step": 2209 - }, - { - "epoch": 0.69, - "learning_rate": 2.3748795018387237e-05, - "loss": 1.0855, - "step": 2210 - }, - { - "epoch": 0.69, - "learning_rate": 2.370604165100148e-05, - "loss": 1.1152, - "step": 2211 - }, - { - "epoch": 0.69, - "learning_rate": 2.366331484053737e-05, - "loss": 0.9463, - "step": 2212 - }, - { - "epoch": 0.69, - "learning_rate": 2.3620614630149013e-05, - "loss": 1.0631, - "step": 2213 - }, - { - "epoch": 0.69, - "learning_rate": 2.3577941062963664e-05, - "loss": 0.9898, - "step": 2214 - }, - { - "epoch": 0.69, - "learning_rate": 2.3535294182081663e-05, - "loss": 1.0282, - "step": 2215 - }, - { - "epoch": 0.69, - "learning_rate": 2.3492674030576417e-05, - "loss": 0.9877, - "step": 2216 - }, - { - "epoch": 0.69, - "learning_rate": 2.34500806514943e-05, - "loss": 0.963, - "step": 2217 - }, - { - "epoch": 0.69, - "learning_rate": 2.3407514087854644e-05, - "loss": 1.0525, - "step": 2218 - }, - { - "epoch": 0.69, - "learning_rate": 2.3364974382649736e-05, - "loss": 1.1235, - "step": 2219 - }, - { - "epoch": 0.69, - "learning_rate": 2.332246157884471e-05, - "loss": 0.9157, - "step": 2220 - }, - { - "epoch": 0.69, - "learning_rate": 2.327997571937755e-05, - "loss": 1.0199, - "step": 2221 - }, - { - "epoch": 0.69, - "learning_rate": 2.3237516847159026e-05, - "loss": 1.07, - "step": 2222 - }, - { - "epoch": 0.69, - "learning_rate": 2.3195085005072585e-05, - "loss": 0.9614, - "step": 2223 - }, - { - "epoch": 0.69, - "learning_rate": 2.3152680235974466e-05, - "loss": 1.0309, - "step": 2224 - }, - { - "epoch": 0.69, - "learning_rate": 2.311030258269351e-05, - "loss": 1.129, - "step": 2225 - }, - { - "epoch": 0.69, - "learning_rate": 2.306795208803119e-05, - "loss": 0.9901, - "step": 2226 - }, - { - "epoch": 0.69, - "learning_rate": 2.3025628794761543e-05, - "loss": 1.0505, - "step": 2227 - }, - { - "epoch": 0.69, - "learning_rate": 2.298333274563115e-05, - "loss": 0.9899, - "step": 2228 - }, - { - "epoch": 0.69, - "learning_rate": 2.2941063983359046e-05, - "loss": 1.0684, - "step": 2229 - }, - { - "epoch": 0.69, - "learning_rate": 2.28988225506367e-05, - "loss": 0.9826, - "step": 2230 - }, - { - "epoch": 0.69, - "learning_rate": 2.2856608490128008e-05, - "loss": 0.9996, - "step": 2231 - }, - { - "epoch": 0.69, - "learning_rate": 2.2814421844469213e-05, - "loss": 1.1003, - "step": 2232 - }, - { - "epoch": 0.69, - "learning_rate": 2.2772262656268867e-05, - "loss": 1.0221, - "step": 2233 - }, - { - "epoch": 0.69, - "learning_rate": 2.273013096810779e-05, - "loss": 0.9564, - "step": 2234 - }, - { - "epoch": 0.69, - "learning_rate": 2.2688026822539033e-05, - "loss": 1.1034, - "step": 2235 - }, - { - "epoch": 0.69, - "learning_rate": 2.2645950262087796e-05, - "loss": 0.8953, - "step": 2236 - }, - { - "epoch": 0.69, - "learning_rate": 2.2603901329251452e-05, - "loss": 1.0391, - "step": 2237 - }, - { - "epoch": 0.69, - "learning_rate": 2.2561880066499476e-05, - "loss": 1.0778, - "step": 2238 - }, - { - "epoch": 0.69, - "learning_rate": 2.2519886516273364e-05, - "loss": 0.8935, - "step": 2239 - }, - { - "epoch": 0.69, - "learning_rate": 2.247792072098665e-05, - "loss": 1.0258, - "step": 2240 - }, - { - "epoch": 0.7, - "learning_rate": 2.243598272302484e-05, - "loss": 1.0229, - "step": 2241 - }, - { - "epoch": 0.7, - "learning_rate": 2.2394072564745326e-05, - "loss": 1.0741, - "step": 2242 - }, - { - "epoch": 0.7, - "learning_rate": 2.2352190288477397e-05, - "loss": 1.0282, - "step": 2243 - }, - { - "epoch": 0.7, - "learning_rate": 2.231033593652221e-05, - "loss": 1.0567, - "step": 2244 - }, - { - "epoch": 0.7, - "learning_rate": 2.2268509551152684e-05, - "loss": 1.0212, - "step": 2245 - }, - { - "epoch": 0.7, - "learning_rate": 2.222671117461352e-05, - "loss": 1.0165, - "step": 2246 - }, - { - "epoch": 0.7, - "learning_rate": 2.2184940849121122e-05, - "loss": 1.0447, - "step": 2247 - }, - { - "epoch": 0.7, - "learning_rate": 2.214319861686353e-05, - "loss": 1.0083, - "step": 2248 - }, - { - "epoch": 0.7, - "learning_rate": 2.2101484520000453e-05, - "loss": 0.9572, - "step": 2249 - }, - { - "epoch": 0.7, - "learning_rate": 2.205979860066316e-05, - "loss": 1.0024, - "step": 2250 - }, - { - "epoch": 0.7, - "learning_rate": 2.2018140900954476e-05, - "loss": 1.0567, - "step": 2251 - }, - { - "epoch": 0.7, - "learning_rate": 2.1976511462948706e-05, - "loss": 0.9861, - "step": 2252 - }, - { - "epoch": 0.7, - "learning_rate": 2.193491032869165e-05, - "loss": 0.9724, - "step": 2253 - }, - { - "epoch": 0.7, - "learning_rate": 2.189333754020046e-05, - "loss": 1.0098, - "step": 2254 - }, - { - "epoch": 0.7, - "learning_rate": 2.1851793139463677e-05, - "loss": 1.0531, - "step": 2255 - }, - { - "epoch": 0.7, - "learning_rate": 2.18102771684412e-05, - "loss": 0.9762, - "step": 2256 - }, - { - "epoch": 0.7, - "learning_rate": 2.1768789669064204e-05, - "loss": 0.9403, - "step": 2257 - }, - { - "epoch": 0.7, - "learning_rate": 2.172733068323509e-05, - "loss": 1.1055, - "step": 2258 - }, - { - "epoch": 0.7, - "learning_rate": 2.1685900252827485e-05, - "loss": 0.9783, - "step": 2259 - }, - { - "epoch": 0.7, - "learning_rate": 2.1644498419686164e-05, - "loss": 0.963, - "step": 2260 - }, - { - "epoch": 0.7, - "learning_rate": 2.160312522562699e-05, - "loss": 1.0364, - "step": 2261 - }, - { - "epoch": 0.7, - "learning_rate": 2.156178071243694e-05, - "loss": 1.0036, - "step": 2262 - }, - { - "epoch": 0.7, - "learning_rate": 2.152046492187401e-05, - "loss": 1.0456, - "step": 2263 - }, - { - "epoch": 0.7, - "learning_rate": 2.1479177895667187e-05, - "loss": 0.9279, - "step": 2264 - }, - { - "epoch": 0.7, - "learning_rate": 2.1437919675516404e-05, - "loss": 1.0146, - "step": 2265 - }, - { - "epoch": 0.7, - "learning_rate": 2.139669030309252e-05, - "loss": 1.0267, - "step": 2266 - }, - { - "epoch": 0.7, - "learning_rate": 2.1355489820037217e-05, - "loss": 1.0301, - "step": 2267 - }, - { - "epoch": 0.7, - "learning_rate": 2.1314318267963002e-05, - "loss": 1.0245, - "step": 2268 - }, - { - "epoch": 0.7, - "learning_rate": 2.12731756884532e-05, - "loss": 1.0288, - "step": 2269 - }, - { - "epoch": 0.7, - "learning_rate": 2.1232062123061858e-05, - "loss": 0.9359, - "step": 2270 - }, - { - "epoch": 0.7, - "learning_rate": 2.1190977613313705e-05, - "loss": 1.127, - "step": 2271 - }, - { - "epoch": 0.7, - "learning_rate": 2.1149922200704137e-05, - "loss": 0.8067, - "step": 2272 - }, - { - "epoch": 0.71, - "learning_rate": 2.1108895926699178e-05, - "loss": 1.053, - "step": 2273 - }, - { - "epoch": 0.71, - "learning_rate": 2.106789883273536e-05, - "loss": 1.03, - "step": 2274 - }, - { - "epoch": 0.71, - "learning_rate": 2.102693096021981e-05, - "loss": 1.0686, - "step": 2275 - }, - { - "epoch": 0.71, - "learning_rate": 2.0985992350530108e-05, - "loss": 1.0836, - "step": 2276 - }, - { - "epoch": 0.71, - "learning_rate": 2.0945083045014296e-05, - "loss": 0.9664, - "step": 2277 - }, - { - "epoch": 0.71, - "learning_rate": 2.090420308499082e-05, - "loss": 1.0581, - "step": 2278 - }, - { - "epoch": 0.71, - "learning_rate": 2.0863352511748463e-05, - "loss": 1.087, - "step": 2279 - }, - { - "epoch": 0.71, - "learning_rate": 2.0822531366546322e-05, - "loss": 1.0235, - "step": 2280 - }, - { - "epoch": 0.71, - "learning_rate": 2.07817396906138e-05, - "loss": 1.0215, - "step": 2281 - }, - { - "epoch": 0.71, - "learning_rate": 2.074097752515054e-05, - "loss": 1.0368, - "step": 2282 - }, - { - "epoch": 0.71, - "learning_rate": 2.0700244911326362e-05, - "loss": 0.945, - "step": 2283 - }, - { - "epoch": 0.71, - "learning_rate": 2.0659541890281236e-05, - "loss": 1.1416, - "step": 2284 - }, - { - "epoch": 0.71, - "learning_rate": 2.061886850312527e-05, - "loss": 1.013, - "step": 2285 - }, - { - "epoch": 0.71, - "learning_rate": 2.0578224790938595e-05, - "loss": 0.9511, - "step": 2286 - }, - { - "epoch": 0.71, - "learning_rate": 2.0537610794771406e-05, - "loss": 1.0564, - "step": 2287 - }, - { - "epoch": 0.71, - "learning_rate": 2.049702655564387e-05, - "loss": 1.0071, - "step": 2288 - }, - { - "epoch": 0.71, - "learning_rate": 2.0456472114546125e-05, - "loss": 1.0504, - "step": 2289 - }, - { - "epoch": 0.71, - "learning_rate": 2.0415947512438176e-05, - "loss": 0.9498, - "step": 2290 - }, - { - "epoch": 0.71, - "learning_rate": 2.0375452790249926e-05, - "loss": 1.0428, - "step": 2291 - }, - { - "epoch": 0.71, - "learning_rate": 2.0334987988881078e-05, - "loss": 1.0499, - "step": 2292 - }, - { - "epoch": 0.71, - "learning_rate": 2.0294553149201078e-05, - "loss": 0.9501, - "step": 2293 - }, - { - "epoch": 0.71, - "learning_rate": 2.025414831204919e-05, - "loss": 1.0331, - "step": 2294 - }, - { - "epoch": 0.71, - "learning_rate": 2.021377351823432e-05, - "loss": 1.0901, - "step": 2295 - }, - { - "epoch": 0.71, - "learning_rate": 2.017342880853505e-05, - "loss": 0.9609, - "step": 2296 - }, - { - "epoch": 0.71, - "learning_rate": 2.013311422369958e-05, - "loss": 1.0412, - "step": 2297 - }, - { - "epoch": 0.71, - "learning_rate": 2.0092829804445684e-05, - "loss": 0.988, - "step": 2298 - }, - { - "epoch": 0.71, - "learning_rate": 2.0052575591460637e-05, - "loss": 0.9473, - "step": 2299 - }, - { - "epoch": 0.71, - "learning_rate": 2.0012351625401245e-05, - "loss": 1.0873, - "step": 2300 - }, - { - "epoch": 0.71, - "learning_rate": 1.9972157946893748e-05, - "loss": 0.9601, - "step": 2301 - }, - { - "epoch": 0.71, - "learning_rate": 1.9931994596533794e-05, - "loss": 0.9861, - "step": 2302 - }, - { - "epoch": 0.71, - "learning_rate": 1.9891861614886433e-05, - "loss": 1.0351, - "step": 2303 - }, - { - "epoch": 0.71, - "learning_rate": 1.9851759042485956e-05, - "loss": 1.0046, - "step": 2304 - }, - { - "epoch": 0.72, - "learning_rate": 1.9811686919836053e-05, - "loss": 0.9454, - "step": 2305 - }, - { - "epoch": 0.72, - "learning_rate": 1.9771645287409564e-05, - "loss": 0.9279, - "step": 2306 - }, - { - "epoch": 0.72, - "learning_rate": 1.9731634185648584e-05, - "loss": 1.1479, - "step": 2307 - }, - { - "epoch": 0.72, - "learning_rate": 1.9691653654964372e-05, - "loss": 1.0279, - "step": 2308 - }, - { - "epoch": 0.72, - "learning_rate": 1.9651703735737292e-05, - "loss": 0.9915, - "step": 2309 - }, - { - "epoch": 0.72, - "learning_rate": 1.961178446831682e-05, - "loss": 1.0319, - "step": 2310 - }, - { - "epoch": 0.72, - "learning_rate": 1.957189589302141e-05, - "loss": 1.0201, - "step": 2311 - }, - { - "epoch": 0.72, - "learning_rate": 1.9532038050138583e-05, - "loss": 1.0037, - "step": 2312 - }, - { - "epoch": 0.72, - "learning_rate": 1.9492210979924804e-05, - "loss": 1.0347, - "step": 2313 - }, - { - "epoch": 0.72, - "learning_rate": 1.9452414722605432e-05, - "loss": 0.9945, - "step": 2314 - }, - { - "epoch": 0.72, - "learning_rate": 1.9412649318374737e-05, - "loss": 0.9499, - "step": 2315 - }, - { - "epoch": 0.72, - "learning_rate": 1.9372914807395824e-05, - "loss": 1.1185, - "step": 2316 - }, - { - "epoch": 0.72, - "learning_rate": 1.9333211229800567e-05, - "loss": 1.0414, - "step": 2317 - }, - { - "epoch": 0.72, - "learning_rate": 1.9293538625689606e-05, - "loss": 0.9911, - "step": 2318 - }, - { - "epoch": 0.72, - "learning_rate": 1.925389703513232e-05, - "loss": 0.9486, - "step": 2319 - }, - { - "epoch": 0.72, - "learning_rate": 1.921428649816674e-05, - "loss": 1.125, - "step": 2320 - }, - { - "epoch": 0.72, - "learning_rate": 1.9174707054799556e-05, - "loss": 0.9233, - "step": 2321 - }, - { - "epoch": 0.72, - "learning_rate": 1.9135158745006044e-05, - "loss": 1.0183, - "step": 2322 - }, - { - "epoch": 0.72, - "learning_rate": 1.9095641608730042e-05, - "loss": 1.0566, - "step": 2323 - }, - { - "epoch": 0.72, - "learning_rate": 1.9056155685883858e-05, - "loss": 1.0243, - "step": 2324 - }, - { - "epoch": 0.72, - "learning_rate": 1.901670101634834e-05, - "loss": 0.93, - "step": 2325 - }, - { - "epoch": 0.72, - "learning_rate": 1.8977277639972717e-05, - "loss": 1.0392, - "step": 2326 - }, - { - "epoch": 0.72, - "learning_rate": 1.8937885596574655e-05, - "loss": 1.0595, - "step": 2327 - }, - { - "epoch": 0.72, - "learning_rate": 1.8898524925940163e-05, - "loss": 1.0019, - "step": 2328 - }, - { - "epoch": 0.72, - "learning_rate": 1.885919566782352e-05, - "loss": 0.9896, - "step": 2329 - }, - { - "epoch": 0.72, - "learning_rate": 1.881989786194735e-05, - "loss": 1.0167, - "step": 2330 - }, - { - "epoch": 0.72, - "learning_rate": 1.8780631548002432e-05, - "loss": 1.0554, - "step": 2331 - }, - { - "epoch": 0.72, - "learning_rate": 1.8741396765647795e-05, - "loss": 0.9469, - "step": 2332 - }, - { - "epoch": 0.72, - "learning_rate": 1.870219355451061e-05, - "loss": 1.0001, - "step": 2333 - }, - { - "epoch": 0.72, - "learning_rate": 1.8663021954186154e-05, - "loss": 1.0285, - "step": 2334 - }, - { - "epoch": 0.72, - "learning_rate": 1.862388200423779e-05, - "loss": 1.0138, - "step": 2335 - }, - { - "epoch": 0.72, - "learning_rate": 1.858477374419688e-05, - "loss": 1.0468, - "step": 2336 - }, - { - "epoch": 0.72, - "learning_rate": 1.8545697213562813e-05, - "loss": 0.8552, - "step": 2337 - }, - { - "epoch": 0.73, - "learning_rate": 1.8506652451802914e-05, - "loss": 0.931, - "step": 2338 - }, - { - "epoch": 0.73, - "learning_rate": 1.846763949835244e-05, - "loss": 1.1053, - "step": 2339 - }, - { - "epoch": 0.73, - "learning_rate": 1.8428658392614496e-05, - "loss": 1.0467, - "step": 2340 - }, - { - "epoch": 0.73, - "learning_rate": 1.838970917396006e-05, - "loss": 0.9446, - "step": 2341 - }, - { - "epoch": 0.73, - "learning_rate": 1.8350791881727863e-05, - "loss": 0.9478, - "step": 2342 - }, - { - "epoch": 0.73, - "learning_rate": 1.8311906555224377e-05, - "loss": 1.0948, - "step": 2343 - }, - { - "epoch": 0.73, - "learning_rate": 1.8273053233723842e-05, - "loss": 1.1499, - "step": 2344 - }, - { - "epoch": 0.73, - "learning_rate": 1.823423195646815e-05, - "loss": 0.9667, - "step": 2345 - }, - { - "epoch": 0.73, - "learning_rate": 1.819544276266682e-05, - "loss": 0.977, - "step": 2346 - }, - { - "epoch": 0.73, - "learning_rate": 1.815668569149697e-05, - "loss": 1.1432, - "step": 2347 - }, - { - "epoch": 0.73, - "learning_rate": 1.8117960782103304e-05, - "loss": 0.9312, - "step": 2348 - }, - { - "epoch": 0.73, - "learning_rate": 1.807926807359798e-05, - "loss": 1.0281, - "step": 2349 - }, - { - "epoch": 0.73, - "learning_rate": 1.8040607605060673e-05, - "loss": 1.0331, - "step": 2350 - }, - { - "epoch": 0.73, - "learning_rate": 1.8001979415538505e-05, - "loss": 0.9621, - "step": 2351 - }, - { - "epoch": 0.73, - "learning_rate": 1.7963383544045974e-05, - "loss": 1.042, - "step": 2352 - }, - { - "epoch": 0.73, - "learning_rate": 1.792482002956495e-05, - "loss": 1.1049, - "step": 2353 - }, - { - "epoch": 0.73, - "learning_rate": 1.788628891104463e-05, - "loss": 0.9287, - "step": 2354 - }, - { - "epoch": 0.73, - "learning_rate": 1.7847790227401466e-05, - "loss": 1.1015, - "step": 2355 - }, - { - "epoch": 0.73, - "learning_rate": 1.7809324017519147e-05, - "loss": 0.98, - "step": 2356 - }, - { - "epoch": 0.73, - "learning_rate": 1.777089032024859e-05, - "loss": 1.0029, - "step": 2357 - }, - { - "epoch": 0.73, - "learning_rate": 1.773248917440786e-05, - "loss": 0.9322, - "step": 2358 - }, - { - "epoch": 0.73, - "learning_rate": 1.7694120618782168e-05, - "loss": 0.9708, - "step": 2359 - }, - { - "epoch": 0.73, - "learning_rate": 1.7655784692123795e-05, - "loss": 1.1588, - "step": 2360 - }, - { - "epoch": 0.73, - "learning_rate": 1.7617481433152022e-05, - "loss": 0.9426, - "step": 2361 - }, - { - "epoch": 0.73, - "learning_rate": 1.75792108805532e-05, - "loss": 0.9284, - "step": 2362 - }, - { - "epoch": 0.73, - "learning_rate": 1.754097307298062e-05, - "loss": 1.041, - "step": 2363 - }, - { - "epoch": 0.73, - "learning_rate": 1.75027680490545e-05, - "loss": 1.0231, - "step": 2364 - }, - { - "epoch": 0.73, - "learning_rate": 1.7464595847361954e-05, - "loss": 1.0114, - "step": 2365 - }, - { - "epoch": 0.73, - "learning_rate": 1.742645650645695e-05, - "loss": 1.0569, - "step": 2366 - }, - { - "epoch": 0.73, - "learning_rate": 1.738835006486024e-05, - "loss": 0.9652, - "step": 2367 - }, - { - "epoch": 0.73, - "learning_rate": 1.7350276561059357e-05, - "loss": 1.0787, - "step": 2368 - }, - { - "epoch": 0.73, - "learning_rate": 1.7312236033508585e-05, - "loss": 0.8884, - "step": 2369 - }, - { - "epoch": 0.74, - "learning_rate": 1.7274228520628884e-05, - "loss": 0.9006, - "step": 2370 - }, - { - "epoch": 0.74, - "learning_rate": 1.723625406080789e-05, - "loss": 1.0707, - "step": 2371 - }, - { - "epoch": 0.74, - "learning_rate": 1.7198312692399826e-05, - "loss": 0.9894, - "step": 2372 - }, - { - "epoch": 0.74, - "learning_rate": 1.716040445372554e-05, - "loss": 1.0915, - "step": 2373 - }, - { - "epoch": 0.74, - "learning_rate": 1.7122529383072345e-05, - "loss": 0.9089, - "step": 2374 - }, - { - "epoch": 0.74, - "learning_rate": 1.708468751869412e-05, - "loss": 1.1739, - "step": 2375 - }, - { - "epoch": 0.74, - "learning_rate": 1.7046878898811174e-05, - "loss": 1.0762, - "step": 2376 - }, - { - "epoch": 0.74, - "learning_rate": 1.700910356161025e-05, - "loss": 1.0056, - "step": 2377 - }, - { - "epoch": 0.74, - "learning_rate": 1.6971361545244474e-05, - "loss": 1.1038, - "step": 2378 - }, - { - "epoch": 0.74, - "learning_rate": 1.6933652887833334e-05, - "loss": 1.0003, - "step": 2379 - }, - { - "epoch": 0.74, - "learning_rate": 1.6895977627462596e-05, - "loss": 0.9163, - "step": 2380 - }, - { - "epoch": 0.74, - "learning_rate": 1.6858335802184277e-05, - "loss": 1.0627, - "step": 2381 - }, - { - "epoch": 0.74, - "learning_rate": 1.6820727450016676e-05, - "loss": 1.0983, - "step": 2382 - }, - { - "epoch": 0.74, - "learning_rate": 1.678315260894427e-05, - "loss": 0.9512, - "step": 2383 - }, - { - "epoch": 0.74, - "learning_rate": 1.674561131691768e-05, - "loss": 0.9597, - "step": 2384 - }, - { - "epoch": 0.74, - "learning_rate": 1.6708103611853638e-05, - "loss": 0.9991, - "step": 2385 - }, - { - "epoch": 0.74, - "learning_rate": 1.6670629531634983e-05, - "loss": 1.0139, - "step": 2386 - }, - { - "epoch": 0.74, - "learning_rate": 1.6633189114110532e-05, - "loss": 0.9523, - "step": 2387 - }, - { - "epoch": 0.74, - "learning_rate": 1.659578239709516e-05, - "loss": 1.0126, - "step": 2388 - }, - { - "epoch": 0.74, - "learning_rate": 1.6558409418369686e-05, - "loss": 1.1906, - "step": 2389 - }, - { - "epoch": 0.74, - "learning_rate": 1.652107021568085e-05, - "loss": 0.9445, - "step": 2390 - }, - { - "epoch": 0.74, - "learning_rate": 1.64837648267413e-05, - "loss": 1.0217, - "step": 2391 - }, - { - "epoch": 0.74, - "learning_rate": 1.6446493289229493e-05, - "loss": 1.0111, - "step": 2392 - }, - { - "epoch": 0.74, - "learning_rate": 1.6409255640789694e-05, - "loss": 0.9898, - "step": 2393 - }, - { - "epoch": 0.74, - "learning_rate": 1.637205191903199e-05, - "loss": 1.0789, - "step": 2394 - }, - { - "epoch": 0.74, - "learning_rate": 1.633488216153216e-05, - "loss": 0.9535, - "step": 2395 - }, - { - "epoch": 0.74, - "learning_rate": 1.6297746405831704e-05, - "loss": 0.9482, - "step": 2396 - }, - { - "epoch": 0.74, - "learning_rate": 1.6260644689437767e-05, - "loss": 0.9922, - "step": 2397 - }, - { - "epoch": 0.74, - "learning_rate": 1.6223577049823124e-05, - "loss": 1.002, - "step": 2398 - }, - { - "epoch": 0.74, - "learning_rate": 1.61865435244261e-05, - "loss": 1.0227, - "step": 2399 - }, - { - "epoch": 0.74, - "learning_rate": 1.6149544150650602e-05, - "loss": 1.0127, - "step": 2400 - }, - { - "epoch": 0.74, - "learning_rate": 1.6112578965866037e-05, - "loss": 0.9361, - "step": 2401 - }, - { - "epoch": 0.75, - "learning_rate": 1.6075648007407263e-05, - "loss": 1.0968, - "step": 2402 - }, - { - "epoch": 0.75, - "learning_rate": 1.603875131257459e-05, - "loss": 0.9042, - "step": 2403 - }, - { - "epoch": 0.75, - "learning_rate": 1.6001888918633727e-05, - "loss": 1.0451, - "step": 2404 - }, - { - "epoch": 0.75, - "learning_rate": 1.596506086281571e-05, - "loss": 0.9088, - "step": 2405 - }, - { - "epoch": 0.75, - "learning_rate": 1.592826718231689e-05, - "loss": 0.9858, - "step": 2406 - }, - { - "epoch": 0.75, - "learning_rate": 1.5891507914298935e-05, - "loss": 0.9884, - "step": 2407 - }, - { - "epoch": 0.75, - "learning_rate": 1.5854783095888744e-05, - "loss": 1.1072, - "step": 2408 - }, - { - "epoch": 0.75, - "learning_rate": 1.5818092764178405e-05, - "loss": 0.9853, - "step": 2409 - }, - { - "epoch": 0.75, - "learning_rate": 1.5781436956225197e-05, - "loss": 0.9847, - "step": 2410 - }, - { - "epoch": 0.75, - "learning_rate": 1.574481570905153e-05, - "loss": 0.9126, - "step": 2411 - }, - { - "epoch": 0.75, - "learning_rate": 1.5708229059644868e-05, - "loss": 1.0986, - "step": 2412 - }, - { - "epoch": 0.75, - "learning_rate": 1.5671677044957773e-05, - "loss": 0.9982, - "step": 2413 - }, - { - "epoch": 0.75, - "learning_rate": 1.5635159701907808e-05, - "loss": 0.9889, - "step": 2414 - }, - { - "epoch": 0.75, - "learning_rate": 1.559867706737753e-05, - "loss": 0.9798, - "step": 2415 - }, - { - "epoch": 0.75, - "learning_rate": 1.556222917821445e-05, - "loss": 0.9397, - "step": 2416 - }, - { - "epoch": 0.75, - "learning_rate": 1.552581607123093e-05, - "loss": 1.065, - "step": 2417 - }, - { - "epoch": 0.75, - "learning_rate": 1.5489437783204274e-05, - "loss": 1.0057, - "step": 2418 - }, - { - "epoch": 0.75, - "learning_rate": 1.545309435087656e-05, - "loss": 0.9495, - "step": 2419 - }, - { - "epoch": 0.75, - "learning_rate": 1.5416785810954697e-05, - "loss": 1.1264, - "step": 2420 - }, - { - "epoch": 0.75, - "learning_rate": 1.5380512200110347e-05, - "loss": 0.9836, - "step": 2421 - }, - { - "epoch": 0.75, - "learning_rate": 1.5344273554979888e-05, - "loss": 0.9982, - "step": 2422 - }, - { - "epoch": 0.75, - "learning_rate": 1.5308069912164402e-05, - "loss": 0.945, - "step": 2423 - }, - { - "epoch": 0.75, - "learning_rate": 1.5271901308229565e-05, - "loss": 0.9996, - "step": 2424 - }, - { - "epoch": 0.75, - "learning_rate": 1.5235767779705717e-05, - "loss": 1.0473, - "step": 2425 - }, - { - "epoch": 0.75, - "learning_rate": 1.5199669363087754e-05, - "loss": 1.0389, - "step": 2426 - }, - { - "epoch": 0.75, - "learning_rate": 1.5163606094835115e-05, - "loss": 0.9302, - "step": 2427 - }, - { - "epoch": 0.75, - "learning_rate": 1.512757801137173e-05, - "loss": 1.087, - "step": 2428 - }, - { - "epoch": 0.75, - "learning_rate": 1.5091585149086007e-05, - "loss": 0.9328, - "step": 2429 - }, - { - "epoch": 0.75, - "learning_rate": 1.5055627544330752e-05, - "loss": 1.0661, - "step": 2430 - }, - { - "epoch": 0.75, - "learning_rate": 1.5019705233423176e-05, - "loss": 0.9954, - "step": 2431 - }, - { - "epoch": 0.75, - "learning_rate": 1.4983818252644849e-05, - "loss": 0.9842, - "step": 2432 - }, - { - "epoch": 0.75, - "learning_rate": 1.4947966638241662e-05, - "loss": 1.0, - "step": 2433 - }, - { - "epoch": 0.76, - "learning_rate": 1.4912150426423766e-05, - "loss": 1.0627, - "step": 2434 - }, - { - "epoch": 0.76, - "learning_rate": 1.487636965336558e-05, - "loss": 1.0407, - "step": 2435 - }, - { - "epoch": 0.76, - "learning_rate": 1.4840624355205728e-05, - "loss": 0.9714, - "step": 2436 - }, - { - "epoch": 0.76, - "learning_rate": 1.4804914568046957e-05, - "loss": 1.0309, - "step": 2437 - }, - { - "epoch": 0.76, - "learning_rate": 1.4769240327956207e-05, - "loss": 1.0241, - "step": 2438 - }, - { - "epoch": 0.76, - "learning_rate": 1.4733601670964491e-05, - "loss": 1.0158, - "step": 2439 - }, - { - "epoch": 0.76, - "learning_rate": 1.4697998633066884e-05, - "loss": 0.9386, - "step": 2440 - }, - { - "epoch": 0.76, - "learning_rate": 1.4662431250222503e-05, - "loss": 1.0245, - "step": 2441 - }, - { - "epoch": 0.76, - "learning_rate": 1.4626899558354401e-05, - "loss": 1.04, - "step": 2442 - }, - { - "epoch": 0.76, - "learning_rate": 1.4591403593349651e-05, - "loss": 0.9473, - "step": 2443 - }, - { - "epoch": 0.76, - "learning_rate": 1.4555943391059184e-05, - "loss": 0.9428, - "step": 2444 - }, - { - "epoch": 0.76, - "learning_rate": 1.4520518987297855e-05, - "loss": 0.9977, - "step": 2445 - }, - { - "epoch": 0.76, - "learning_rate": 1.4485130417844328e-05, - "loss": 1.0046, - "step": 2446 - }, - { - "epoch": 0.76, - "learning_rate": 1.4449777718441105e-05, - "loss": 1.1024, - "step": 2447 - }, - { - "epoch": 0.76, - "learning_rate": 1.4414460924794449e-05, - "loss": 0.9536, - "step": 2448 - }, - { - "epoch": 0.76, - "learning_rate": 1.4379180072574333e-05, - "loss": 0.9728, - "step": 2449 - }, - { - "epoch": 0.76, - "learning_rate": 1.4343935197414455e-05, - "loss": 1.014, - "step": 2450 - }, - { - "epoch": 0.76, - "learning_rate": 1.4308726334912176e-05, - "loss": 1.0086, - "step": 2451 - }, - { - "epoch": 0.76, - "learning_rate": 1.427355352062848e-05, - "loss": 0.867, - "step": 2452 - }, - { - "epoch": 0.76, - "learning_rate": 1.4238416790087943e-05, - "loss": 0.9962, - "step": 2453 - }, - { - "epoch": 0.76, - "learning_rate": 1.4203316178778703e-05, - "loss": 1.0758, - "step": 2454 - }, - { - "epoch": 0.76, - "learning_rate": 1.4168251722152399e-05, - "loss": 1.0066, - "step": 2455 - }, - { - "epoch": 0.76, - "learning_rate": 1.413322345562414e-05, - "loss": 0.9111, - "step": 2456 - }, - { - "epoch": 0.76, - "learning_rate": 1.4098231414572532e-05, - "loss": 1.0263, - "step": 2457 - }, - { - "epoch": 0.76, - "learning_rate": 1.4063275634339567e-05, - "loss": 1.014, - "step": 2458 - }, - { - "epoch": 0.76, - "learning_rate": 1.402835615023061e-05, - "loss": 1.0636, - "step": 2459 - }, - { - "epoch": 0.76, - "learning_rate": 1.3993472997514374e-05, - "loss": 0.948, - "step": 2460 - }, - { - "epoch": 0.76, - "learning_rate": 1.3958626211422888e-05, - "loss": 1.0261, - "step": 2461 - }, - { - "epoch": 0.76, - "learning_rate": 1.392381582715141e-05, - "loss": 1.0119, - "step": 2462 - }, - { - "epoch": 0.76, - "learning_rate": 1.3889041879858472e-05, - "loss": 0.9577, - "step": 2463 - }, - { - "epoch": 0.76, - "learning_rate": 1.3854304404665796e-05, - "loss": 1.034, - "step": 2464 - }, - { - "epoch": 0.76, - "learning_rate": 1.3819603436658256e-05, - "loss": 1.005, - "step": 2465 - }, - { - "epoch": 0.77, - "learning_rate": 1.3784939010883885e-05, - "loss": 0.9765, - "step": 2466 - }, - { - "epoch": 0.77, - "learning_rate": 1.3750311162353752e-05, - "loss": 1.017, - "step": 2467 - }, - { - "epoch": 0.77, - "learning_rate": 1.3715719926042036e-05, - "loss": 0.9909, - "step": 2468 - }, - { - "epoch": 0.77, - "learning_rate": 1.3681165336885899e-05, - "loss": 0.9623, - "step": 2469 - }, - { - "epoch": 0.77, - "learning_rate": 1.3646647429785515e-05, - "loss": 0.8122, - "step": 2470 - }, - { - "epoch": 0.77, - "learning_rate": 1.3612166239604001e-05, - "loss": 1.1148, - "step": 2471 - }, - { - "epoch": 0.77, - "learning_rate": 1.3577721801167399e-05, - "loss": 0.9529, - "step": 2472 - }, - { - "epoch": 0.77, - "learning_rate": 1.3543314149264625e-05, - "loss": 0.9752, - "step": 2473 - }, - { - "epoch": 0.77, - "learning_rate": 1.3508943318647416e-05, - "loss": 1.0196, - "step": 2474 - }, - { - "epoch": 0.77, - "learning_rate": 1.3474609344030359e-05, - "loss": 1.0962, - "step": 2475 - }, - { - "epoch": 0.77, - "learning_rate": 1.3440312260090792e-05, - "loss": 0.9214, - "step": 2476 - }, - { - "epoch": 0.77, - "learning_rate": 1.3406052101468808e-05, - "loss": 1.0669, - "step": 2477 - }, - { - "epoch": 0.77, - "learning_rate": 1.3371828902767198e-05, - "loss": 0.9715, - "step": 2478 - }, - { - "epoch": 0.77, - "learning_rate": 1.3337642698551428e-05, - "loss": 1.0119, - "step": 2479 - }, - { - "epoch": 0.77, - "learning_rate": 1.3303493523349591e-05, - "loss": 1.0077, - "step": 2480 - }, - { - "epoch": 0.77, - "learning_rate": 1.3269381411652364e-05, - "loss": 1.108, - "step": 2481 - }, - { - "epoch": 0.77, - "learning_rate": 1.3235306397913032e-05, - "loss": 0.894, - "step": 2482 - }, - { - "epoch": 0.77, - "learning_rate": 1.3201268516547382e-05, - "loss": 1.0698, - "step": 2483 - }, - { - "epoch": 0.77, - "learning_rate": 1.3167267801933713e-05, - "loss": 0.9792, - "step": 2484 - }, - { - "epoch": 0.77, - "learning_rate": 1.3133304288412768e-05, - "loss": 0.9466, - "step": 2485 - }, - { - "epoch": 0.77, - "learning_rate": 1.3099378010287749e-05, - "loss": 1.026, - "step": 2486 - }, - { - "epoch": 0.77, - "learning_rate": 1.3065489001824194e-05, - "loss": 1.0, - "step": 2487 - }, - { - "epoch": 0.77, - "learning_rate": 1.3031637297250043e-05, - "loss": 1.0239, - "step": 2488 - }, - { - "epoch": 0.77, - "learning_rate": 1.299782293075556e-05, - "loss": 1.0901, - "step": 2489 - }, - { - "epoch": 0.77, - "learning_rate": 1.2964045936493274e-05, - "loss": 0.9969, - "step": 2490 - }, - { - "epoch": 0.77, - "learning_rate": 1.293030634857798e-05, - "loss": 1.0295, - "step": 2491 - }, - { - "epoch": 0.77, - "learning_rate": 1.289660420108672e-05, - "loss": 0.9844, - "step": 2492 - }, - { - "epoch": 0.77, - "learning_rate": 1.2862939528058666e-05, - "loss": 1.0438, - "step": 2493 - }, - { - "epoch": 0.77, - "learning_rate": 1.2829312363495155e-05, - "loss": 1.0723, - "step": 2494 - }, - { - "epoch": 0.77, - "learning_rate": 1.2795722741359672e-05, - "loss": 0.9681, - "step": 2495 - }, - { - "epoch": 0.77, - "learning_rate": 1.2762170695577763e-05, - "loss": 0.9907, - "step": 2496 - }, - { - "epoch": 0.77, - "learning_rate": 1.2728656260037025e-05, - "loss": 1.0576, - "step": 2497 - }, - { - "epoch": 0.77, - "learning_rate": 1.2695179468587082e-05, - "loss": 0.9508, - "step": 2498 - }, - { - "epoch": 0.78, - "learning_rate": 1.2661740355039498e-05, - "loss": 1.0212, - "step": 2499 - }, - { - "epoch": 0.78, - "learning_rate": 1.2628338953167817e-05, - "loss": 1.005, - "step": 2500 - }, - { - "epoch": 0.78, - "learning_rate": 1.2594975296707495e-05, - "loss": 1.0632, - "step": 2501 - }, - { - "epoch": 0.78, - "learning_rate": 1.2561649419355842e-05, - "loss": 1.0478, - "step": 2502 - }, - { - "epoch": 0.78, - "learning_rate": 1.2528361354772029e-05, - "loss": 0.984, - "step": 2503 - }, - { - "epoch": 0.78, - "learning_rate": 1.2495111136577048e-05, - "loss": 0.8312, - "step": 2504 - }, - { - "epoch": 0.78, - "learning_rate": 1.2461898798353638e-05, - "loss": 0.8974, - "step": 2505 - }, - { - "epoch": 0.78, - "learning_rate": 1.2428724373646266e-05, - "loss": 1.0508, - "step": 2506 - }, - { - "epoch": 0.78, - "learning_rate": 1.2395587895961158e-05, - "loss": 1.1006, - "step": 2507 - }, - { - "epoch": 0.78, - "learning_rate": 1.2362489398766175e-05, - "loss": 0.9298, - "step": 2508 - }, - { - "epoch": 0.78, - "learning_rate": 1.2329428915490832e-05, - "loss": 0.9131, - "step": 2509 - }, - { - "epoch": 0.78, - "learning_rate": 1.229640647952625e-05, - "loss": 1.0822, - "step": 2510 - }, - { - "epoch": 0.78, - "learning_rate": 1.2263422124225132e-05, - "loss": 0.9711, - "step": 2511 - }, - { - "epoch": 0.78, - "learning_rate": 1.2230475882901676e-05, - "loss": 1.0902, - "step": 2512 - }, - { - "epoch": 0.78, - "learning_rate": 1.2197567788831638e-05, - "loss": 0.9104, - "step": 2513 - }, - { - "epoch": 0.78, - "learning_rate": 1.2164697875252206e-05, - "loss": 1.1086, - "step": 2514 - }, - { - "epoch": 0.78, - "learning_rate": 1.213186617536204e-05, - "loss": 1.0917, - "step": 2515 - }, - { - "epoch": 0.78, - "learning_rate": 1.209907272232118e-05, - "loss": 0.9463, - "step": 2516 - }, - { - "epoch": 0.78, - "learning_rate": 1.2066317549251056e-05, - "loss": 0.9778, - "step": 2517 - }, - { - "epoch": 0.78, - "learning_rate": 1.2033600689234403e-05, - "loss": 1.0592, - "step": 2518 - }, - { - "epoch": 0.78, - "learning_rate": 1.2000922175315277e-05, - "loss": 1.0095, - "step": 2519 - }, - { - "epoch": 0.78, - "learning_rate": 1.1968282040499008e-05, - "loss": 0.8649, - "step": 2520 - }, - { - "epoch": 0.78, - "learning_rate": 1.1935680317752174e-05, - "loss": 0.914, - "step": 2521 - }, - { - "epoch": 0.78, - "learning_rate": 1.1903117040002531e-05, - "loss": 0.9975, - "step": 2522 - }, - { - "epoch": 0.78, - "learning_rate": 1.1870592240139034e-05, - "loss": 1.1819, - "step": 2523 - }, - { - "epoch": 0.78, - "learning_rate": 1.1838105951011757e-05, - "loss": 0.9564, - "step": 2524 - }, - { - "epoch": 0.78, - "learning_rate": 1.180565820543187e-05, - "loss": 0.9519, - "step": 2525 - }, - { - "epoch": 0.78, - "learning_rate": 1.1773249036171629e-05, - "loss": 1.007, - "step": 2526 - }, - { - "epoch": 0.78, - "learning_rate": 1.1740878475964328e-05, - "loss": 0.9695, - "step": 2527 - }, - { - "epoch": 0.78, - "learning_rate": 1.1708546557504262e-05, - "loss": 1.1303, - "step": 2528 - }, - { - "epoch": 0.78, - "learning_rate": 1.1676253313446706e-05, - "loss": 0.898, - "step": 2529 - }, - { - "epoch": 0.78, - "learning_rate": 1.164399877640786e-05, - "loss": 1.0256, - "step": 2530 - }, - { - "epoch": 0.79, - "learning_rate": 1.1611782978964819e-05, - "loss": 0.8622, - "step": 2531 - }, - { - "epoch": 0.79, - "learning_rate": 1.1579605953655581e-05, - "loss": 1.0266, - "step": 2532 - }, - { - "epoch": 0.79, - "learning_rate": 1.1547467732978967e-05, - "loss": 1.007, - "step": 2533 - }, - { - "epoch": 0.79, - "learning_rate": 1.1515368349394612e-05, - "loss": 0.9716, - "step": 2534 - }, - { - "epoch": 0.79, - "learning_rate": 1.148330783532292e-05, - "loss": 1.1472, - "step": 2535 - }, - { - "epoch": 0.79, - "learning_rate": 1.1451286223145063e-05, - "loss": 1.0458, - "step": 2536 - }, - { - "epoch": 0.79, - "learning_rate": 1.1419303545202852e-05, - "loss": 0.9544, - "step": 2537 - }, - { - "epoch": 0.79, - "learning_rate": 1.1387359833798855e-05, - "loss": 0.8486, - "step": 2538 - }, - { - "epoch": 0.79, - "learning_rate": 1.1355455121196234e-05, - "loss": 0.9712, - "step": 2539 - }, - { - "epoch": 0.79, - "learning_rate": 1.1323589439618781e-05, - "loss": 1.0814, - "step": 2540 - }, - { - "epoch": 0.79, - "learning_rate": 1.1291762821250867e-05, - "loss": 0.9302, - "step": 2541 - }, - { - "epoch": 0.79, - "learning_rate": 1.1259975298237418e-05, - "loss": 1.008, - "step": 2542 - }, - { - "epoch": 0.79, - "learning_rate": 1.1228226902683847e-05, - "loss": 1.1444, - "step": 2543 - }, - { - "epoch": 0.79, - "learning_rate": 1.1196517666656048e-05, - "loss": 1.055, - "step": 2544 - }, - { - "epoch": 0.79, - "learning_rate": 1.1164847622180391e-05, - "loss": 0.9243, - "step": 2545 - }, - { - "epoch": 0.79, - "learning_rate": 1.1133216801243646e-05, - "loss": 1.0438, - "step": 2546 - }, - { - "epoch": 0.79, - "learning_rate": 1.1101625235792979e-05, - "loss": 0.9365, - "step": 2547 - }, - { - "epoch": 0.79, - "learning_rate": 1.1070072957735889e-05, - "loss": 1.112, - "step": 2548 - }, - { - "epoch": 0.79, - "learning_rate": 1.1038559998940228e-05, - "loss": 0.9799, - "step": 2549 - }, - { - "epoch": 0.79, - "learning_rate": 1.1007086391234083e-05, - "loss": 0.9868, - "step": 2550 - }, - { - "epoch": 0.79, - "learning_rate": 1.0975652166405837e-05, - "loss": 1.0413, - "step": 2551 - }, - { - "epoch": 0.79, - "learning_rate": 1.094425735620409e-05, - "loss": 0.9935, - "step": 2552 - }, - { - "epoch": 0.79, - "learning_rate": 1.0912901992337627e-05, - "loss": 0.9646, - "step": 2553 - }, - { - "epoch": 0.79, - "learning_rate": 1.0881586106475405e-05, - "loss": 1.0346, - "step": 2554 - }, - { - "epoch": 0.79, - "learning_rate": 1.0850309730246472e-05, - "loss": 0.9411, - "step": 2555 - }, - { - "epoch": 0.79, - "learning_rate": 1.0819072895240018e-05, - "loss": 1.1245, - "step": 2556 - }, - { - "epoch": 0.79, - "learning_rate": 1.0787875633005257e-05, - "loss": 0.984, - "step": 2557 - }, - { - "epoch": 0.79, - "learning_rate": 1.0756717975051462e-05, - "loss": 0.9052, - "step": 2558 - }, - { - "epoch": 0.79, - "learning_rate": 1.0725599952847893e-05, - "loss": 1.0152, - "step": 2559 - }, - { - "epoch": 0.79, - "learning_rate": 1.0694521597823775e-05, - "loss": 0.941, - "step": 2560 - }, - { - "epoch": 0.79, - "learning_rate": 1.0663482941368302e-05, - "loss": 1.1072, - "step": 2561 - }, - { - "epoch": 0.79, - "learning_rate": 1.0632484014830513e-05, - "loss": 1.019, - "step": 2562 - }, - { - "epoch": 0.8, - "learning_rate": 1.0601524849519356e-05, - "loss": 0.9732, - "step": 2563 - }, - { - "epoch": 0.8, - "learning_rate": 1.0570605476703626e-05, - "loss": 1.0307, - "step": 2564 - }, - { - "epoch": 0.8, - "learning_rate": 1.0539725927611915e-05, - "loss": 0.9583, - "step": 2565 - }, - { - "epoch": 0.8, - "learning_rate": 1.0508886233432597e-05, - "loss": 0.9866, - "step": 2566 - }, - { - "epoch": 0.8, - "learning_rate": 1.0478086425313798e-05, - "loss": 1.0202, - "step": 2567 - }, - { - "epoch": 0.8, - "learning_rate": 1.0447326534363344e-05, - "loss": 0.9543, - "step": 2568 - }, - { - "epoch": 0.8, - "learning_rate": 1.0416606591648737e-05, - "loss": 1.1062, - "step": 2569 - }, - { - "epoch": 0.8, - "learning_rate": 1.0385926628197162e-05, - "loss": 0.9479, - "step": 2570 - }, - { - "epoch": 0.8, - "learning_rate": 1.0355286674995413e-05, - "loss": 0.7697, - "step": 2571 - }, - { - "epoch": 0.8, - "learning_rate": 1.032468676298986e-05, - "loss": 1.0742, - "step": 2572 - }, - { - "epoch": 0.8, - "learning_rate": 1.029412692308645e-05, - "loss": 0.9692, - "step": 2573 - }, - { - "epoch": 0.8, - "learning_rate": 1.0263607186150659e-05, - "loss": 0.9919, - "step": 2574 - }, - { - "epoch": 0.8, - "learning_rate": 1.0233127583007424e-05, - "loss": 1.0358, - "step": 2575 - }, - { - "epoch": 0.8, - "learning_rate": 1.0202688144441186e-05, - "loss": 0.9576, - "step": 2576 - }, - { - "epoch": 0.8, - "learning_rate": 1.0172288901195803e-05, - "loss": 1.1, - "step": 2577 - }, - { - "epoch": 0.8, - "learning_rate": 1.0141929883974539e-05, - "loss": 0.9781, - "step": 2578 - }, - { - "epoch": 0.8, - "learning_rate": 1.0111611123440041e-05, - "loss": 1.0115, - "step": 2579 - }, - { - "epoch": 0.8, - "learning_rate": 1.008133265021426e-05, - "loss": 0.9368, - "step": 2580 - }, - { - "epoch": 0.8, - "learning_rate": 1.0051094494878504e-05, - "loss": 0.9368, - "step": 2581 - }, - { - "epoch": 0.8, - "learning_rate": 1.0020896687973314e-05, - "loss": 0.961, - "step": 2582 - }, - { - "epoch": 0.8, - "learning_rate": 9.990739259998516e-06, - "loss": 1.1248, - "step": 2583 - }, - { - "epoch": 0.8, - "learning_rate": 9.960622241413137e-06, - "loss": 0.9703, - "step": 2584 - }, - { - "epoch": 0.8, - "learning_rate": 9.930545662635393e-06, - "loss": 1.0324, - "step": 2585 - }, - { - "epoch": 0.8, - "learning_rate": 9.90050955404267e-06, - "loss": 0.9385, - "step": 2586 - }, - { - "epoch": 0.8, - "learning_rate": 9.870513945971444e-06, - "loss": 0.9587, - "step": 2587 - }, - { - "epoch": 0.8, - "learning_rate": 9.840558868717303e-06, - "loss": 1.0916, - "step": 2588 - }, - { - "epoch": 0.8, - "learning_rate": 9.81064435253492e-06, - "loss": 0.9862, - "step": 2589 - }, - { - "epoch": 0.8, - "learning_rate": 9.780770427637976e-06, - "loss": 0.9296, - "step": 2590 - }, - { - "epoch": 0.8, - "learning_rate": 9.750937124199162e-06, - "loss": 0.9667, - "step": 2591 - }, - { - "epoch": 0.8, - "learning_rate": 9.721144472350152e-06, - "loss": 0.9665, - "step": 2592 - }, - { - "epoch": 0.8, - "learning_rate": 9.69139250218154e-06, - "loss": 1.0265, - "step": 2593 - }, - { - "epoch": 0.8, - "learning_rate": 9.661681243742832e-06, - "loss": 1.0088, - "step": 2594 - }, - { - "epoch": 0.81, - "learning_rate": 9.63201072704244e-06, - "loss": 1.1002, - "step": 2595 - }, - { - "epoch": 0.81, - "learning_rate": 9.602380982047604e-06, - "loss": 1.0233, - "step": 2596 - }, - { - "epoch": 0.81, - "learning_rate": 9.572792038684402e-06, - "loss": 0.9873, - "step": 2597 - }, - { - "epoch": 0.81, - "learning_rate": 9.543243926837691e-06, - "loss": 0.925, - "step": 2598 - }, - { - "epoch": 0.81, - "learning_rate": 9.513736676351104e-06, - "loss": 1.0244, - "step": 2599 - }, - { - "epoch": 0.81, - "learning_rate": 9.48427031702696e-06, - "loss": 1.0183, - "step": 2600 - }, - { - "epoch": 0.81, - "learning_rate": 9.45484487862633e-06, - "loss": 1.0024, - "step": 2601 - }, - { - "epoch": 0.81, - "learning_rate": 9.425460390868924e-06, - "loss": 0.9706, - "step": 2602 - }, - { - "epoch": 0.81, - "learning_rate": 9.396116883433104e-06, - "loss": 1.0161, - "step": 2603 - }, - { - "epoch": 0.81, - "learning_rate": 9.366814385955846e-06, - "loss": 0.8706, - "step": 2604 - }, - { - "epoch": 0.81, - "learning_rate": 9.337552928032706e-06, - "loss": 1.0616, - "step": 2605 - }, - { - "epoch": 0.81, - "learning_rate": 9.308332539217769e-06, - "loss": 0.9169, - "step": 2606 - }, - { - "epoch": 0.81, - "learning_rate": 9.279153249023637e-06, - "loss": 0.9611, - "step": 2607 - }, - { - "epoch": 0.81, - "learning_rate": 9.250015086921443e-06, - "loss": 1.091, - "step": 2608 - }, - { - "epoch": 0.81, - "learning_rate": 9.220918082340751e-06, - "loss": 1.026, - "step": 2609 - }, - { - "epoch": 0.81, - "learning_rate": 9.19186226466956e-06, - "loss": 1.0134, - "step": 2610 - }, - { - "epoch": 0.81, - "learning_rate": 9.162847663254292e-06, - "loss": 0.9542, - "step": 2611 - }, - { - "epoch": 0.81, - "learning_rate": 9.133874307399686e-06, - "loss": 1.0948, - "step": 2612 - }, - { - "epoch": 0.81, - "learning_rate": 9.104942226368879e-06, - "loss": 1.0914, - "step": 2613 - }, - { - "epoch": 0.81, - "learning_rate": 9.076051449383293e-06, - "loss": 1.0169, - "step": 2614 - }, - { - "epoch": 0.81, - "learning_rate": 9.047202005622641e-06, - "loss": 0.9775, - "step": 2615 - }, - { - "epoch": 0.81, - "learning_rate": 9.018393924224883e-06, - "loss": 1.0169, - "step": 2616 - }, - { - "epoch": 0.81, - "learning_rate": 8.989627234286225e-06, - "loss": 0.9349, - "step": 2617 - }, - { - "epoch": 0.81, - "learning_rate": 8.960901964861041e-06, - "loss": 0.9635, - "step": 2618 - }, - { - "epoch": 0.81, - "learning_rate": 8.932218144961857e-06, - "loss": 0.9678, - "step": 2619 - }, - { - "epoch": 0.81, - "learning_rate": 8.903575803559371e-06, - "loss": 0.9589, - "step": 2620 - }, - { - "epoch": 0.81, - "learning_rate": 8.874974969582378e-06, - "loss": 1.0104, - "step": 2621 - }, - { - "epoch": 0.81, - "learning_rate": 8.846415671917745e-06, - "loss": 0.9842, - "step": 2622 - }, - { - "epoch": 0.81, - "learning_rate": 8.817897939410392e-06, - "loss": 0.9526, - "step": 2623 - }, - { - "epoch": 0.81, - "learning_rate": 8.789421800863257e-06, - "loss": 1.0268, - "step": 2624 - }, - { - "epoch": 0.81, - "learning_rate": 8.760987285037248e-06, - "loss": 1.0102, - "step": 2625 - }, - { - "epoch": 0.81, - "learning_rate": 8.732594420651263e-06, - "loss": 0.99, - "step": 2626 - }, - { - "epoch": 0.81, - "learning_rate": 8.704243236382115e-06, - "loss": 0.9596, - "step": 2627 - }, - { - "epoch": 0.82, - "learning_rate": 8.675933760864536e-06, - "loss": 1.0986, - "step": 2628 - }, - { - "epoch": 0.82, - "learning_rate": 8.64766602269112e-06, - "loss": 0.9356, - "step": 2629 - }, - { - "epoch": 0.82, - "learning_rate": 8.619440050412315e-06, - "loss": 0.9727, - "step": 2630 - }, - { - "epoch": 0.82, - "learning_rate": 8.591255872536363e-06, - "loss": 0.9927, - "step": 2631 - }, - { - "epoch": 0.82, - "learning_rate": 8.563113517529302e-06, - "loss": 1.0421, - "step": 2632 - }, - { - "epoch": 0.82, - "learning_rate": 8.535013013814953e-06, - "loss": 0.9569, - "step": 2633 - }, - { - "epoch": 0.82, - "learning_rate": 8.506954389774846e-06, - "loss": 1.0774, - "step": 2634 - }, - { - "epoch": 0.82, - "learning_rate": 8.478937673748211e-06, - "loss": 1.037, - "step": 2635 - }, - { - "epoch": 0.82, - "learning_rate": 8.450962894031967e-06, - "loss": 0.9413, - "step": 2636 - }, - { - "epoch": 0.82, - "learning_rate": 8.423030078880667e-06, - "loss": 1.0159, - "step": 2637 - }, - { - "epoch": 0.82, - "learning_rate": 8.395139256506463e-06, - "loss": 1.0142, - "step": 2638 - }, - { - "epoch": 0.82, - "learning_rate": 8.367290455079125e-06, - "loss": 0.8928, - "step": 2639 - }, - { - "epoch": 0.82, - "learning_rate": 8.339483702725959e-06, - "loss": 1.0134, - "step": 2640 - }, - { - "epoch": 0.82, - "learning_rate": 8.311719027531817e-06, - "loss": 1.0007, - "step": 2641 - }, - { - "epoch": 0.82, - "learning_rate": 8.283996457539056e-06, - "loss": 0.8726, - "step": 2642 - }, - { - "epoch": 0.82, - "learning_rate": 8.256316020747479e-06, - "loss": 0.9647, - "step": 2643 - }, - { - "epoch": 0.82, - "learning_rate": 8.22867774511435e-06, - "loss": 0.9594, - "step": 2644 - }, - { - "epoch": 0.82, - "learning_rate": 8.20108165855436e-06, - "loss": 1.0204, - "step": 2645 - }, - { - "epoch": 0.82, - "learning_rate": 8.173527788939583e-06, - "loss": 1.0196, - "step": 2646 - }, - { - "epoch": 0.82, - "learning_rate": 8.146016164099445e-06, - "loss": 1.0144, - "step": 2647 - }, - { - "epoch": 0.82, - "learning_rate": 8.118546811820722e-06, - "loss": 1.0554, - "step": 2648 - }, - { - "epoch": 0.82, - "learning_rate": 8.091119759847493e-06, - "loss": 0.9876, - "step": 2649 - }, - { - "epoch": 0.82, - "learning_rate": 8.063735035881076e-06, - "loss": 0.9643, - "step": 2650 - }, - { - "epoch": 0.82, - "learning_rate": 8.036392667580084e-06, - "loss": 0.996, - "step": 2651 - }, - { - "epoch": 0.82, - "learning_rate": 8.009092682560332e-06, - "loss": 0.9119, - "step": 2652 - }, - { - "epoch": 0.82, - "learning_rate": 7.981835108394824e-06, - "loss": 0.9855, - "step": 2653 - }, - { - "epoch": 0.82, - "learning_rate": 7.954619972613747e-06, - "loss": 1.0487, - "step": 2654 - }, - { - "epoch": 0.82, - "learning_rate": 7.927447302704422e-06, - "loss": 0.896, - "step": 2655 - }, - { - "epoch": 0.82, - "learning_rate": 7.900317126111245e-06, - "loss": 1.0027, - "step": 2656 - }, - { - "epoch": 0.82, - "learning_rate": 7.873229470235721e-06, - "loss": 0.9694, - "step": 2657 - }, - { - "epoch": 0.82, - "learning_rate": 7.846184362436415e-06, - "loss": 1.0158, - "step": 2658 - }, - { - "epoch": 0.82, - "learning_rate": 7.81918183002891e-06, - "loss": 1.0402, - "step": 2659 - }, - { - "epoch": 0.83, - "learning_rate": 7.792221900285784e-06, - "loss": 0.9587, - "step": 2660 - }, - { - "epoch": 0.83, - "learning_rate": 7.765304600436595e-06, - "loss": 1.0912, - "step": 2661 - }, - { - "epoch": 0.83, - "learning_rate": 7.738429957667847e-06, - "loss": 0.9637, - "step": 2662 - }, - { - "epoch": 0.83, - "learning_rate": 7.711597999122938e-06, - "loss": 1.0122, - "step": 2663 - }, - { - "epoch": 0.83, - "learning_rate": 7.684808751902179e-06, - "loss": 0.9561, - "step": 2664 - }, - { - "epoch": 0.83, - "learning_rate": 7.658062243062724e-06, - "loss": 0.9816, - "step": 2665 - }, - { - "epoch": 0.83, - "learning_rate": 7.631358499618585e-06, - "loss": 1.0481, - "step": 2666 - }, - { - "epoch": 0.83, - "learning_rate": 7.604697548540573e-06, - "loss": 0.9399, - "step": 2667 - }, - { - "epoch": 0.83, - "learning_rate": 7.578079416756262e-06, - "loss": 0.9936, - "step": 2668 - }, - { - "epoch": 0.83, - "learning_rate": 7.5515041311499726e-06, - "loss": 0.9904, - "step": 2669 - }, - { - "epoch": 0.83, - "learning_rate": 7.524971718562784e-06, - "loss": 1.0482, - "step": 2670 - }, - { - "epoch": 0.83, - "learning_rate": 7.498482205792456e-06, - "loss": 0.9533, - "step": 2671 - }, - { - "epoch": 0.83, - "learning_rate": 7.472035619593421e-06, - "loss": 1.0191, - "step": 2672 - }, - { - "epoch": 0.83, - "learning_rate": 7.445631986676754e-06, - "loss": 0.9592, - "step": 2673 - }, - { - "epoch": 0.83, - "learning_rate": 7.419271333710154e-06, - "loss": 1.0259, - "step": 2674 - }, - { - "epoch": 0.83, - "learning_rate": 7.392953687317894e-06, - "loss": 0.8035, - "step": 2675 - }, - { - "epoch": 0.83, - "learning_rate": 7.366679074080818e-06, - "loss": 0.9976, - "step": 2676 - }, - { - "epoch": 0.83, - "learning_rate": 7.34044752053632e-06, - "loss": 1.0728, - "step": 2677 - }, - { - "epoch": 0.83, - "learning_rate": 7.31425905317829e-06, - "loss": 1.0629, - "step": 2678 - }, - { - "epoch": 0.83, - "learning_rate": 7.2881136984571e-06, - "loss": 1.0404, - "step": 2679 - }, - { - "epoch": 0.83, - "learning_rate": 7.2620114827795905e-06, - "loss": 0.8985, - "step": 2680 - }, - { - "epoch": 0.83, - "learning_rate": 7.2359524325090245e-06, - "loss": 1.043, - "step": 2681 - }, - { - "epoch": 0.83, - "learning_rate": 7.209936573965042e-06, - "loss": 1.0595, - "step": 2682 - }, - { - "epoch": 0.83, - "learning_rate": 7.183963933423698e-06, - "loss": 0.9619, - "step": 2683 - }, - { - "epoch": 0.83, - "learning_rate": 7.158034537117386e-06, - "loss": 1.0222, - "step": 2684 - }, - { - "epoch": 0.83, - "learning_rate": 7.132148411234818e-06, - "loss": 1.0148, - "step": 2685 - }, - { - "epoch": 0.83, - "learning_rate": 7.106305581921008e-06, - "loss": 0.9722, - "step": 2686 - }, - { - "epoch": 0.83, - "learning_rate": 7.0805060752772435e-06, - "loss": 1.1094, - "step": 2687 - }, - { - "epoch": 0.83, - "learning_rate": 7.054749917361025e-06, - "loss": 0.9791, - "step": 2688 - }, - { - "epoch": 0.83, - "learning_rate": 7.029037134186112e-06, - "loss": 0.9795, - "step": 2689 - }, - { - "epoch": 0.83, - "learning_rate": 7.003367751722434e-06, - "loss": 0.9872, - "step": 2690 - }, - { - "epoch": 0.83, - "learning_rate": 6.9777417958960976e-06, - "loss": 0.9643, - "step": 2691 - }, - { - "epoch": 0.84, - "learning_rate": 6.952159292589344e-06, - "loss": 1.0195, - "step": 2692 - }, - { - "epoch": 0.84, - "learning_rate": 6.92662026764051e-06, - "loss": 0.9373, - "step": 2693 - }, - { - "epoch": 0.84, - "learning_rate": 6.901124746844045e-06, - "loss": 0.9829, - "step": 2694 - }, - { - "epoch": 0.84, - "learning_rate": 6.875672755950441e-06, - "loss": 1.0686, - "step": 2695 - }, - { - "epoch": 0.84, - "learning_rate": 6.8502643206662275e-06, - "loss": 0.8972, - "step": 2696 - }, - { - "epoch": 0.84, - "learning_rate": 6.824899466653961e-06, - "loss": 1.1086, - "step": 2697 - }, - { - "epoch": 0.84, - "learning_rate": 6.799578219532154e-06, - "loss": 1.0279, - "step": 2698 - }, - { - "epoch": 0.84, - "learning_rate": 6.774300604875311e-06, - "loss": 0.9068, - "step": 2699 - }, - { - "epoch": 0.84, - "learning_rate": 6.7490666482138144e-06, - "loss": 0.9729, - "step": 2700 - }, - { - "epoch": 0.84, - "learning_rate": 6.7238763750339996e-06, - "loss": 0.9899, - "step": 2701 - }, - { - "epoch": 0.84, - "learning_rate": 6.698729810778065e-06, - "loss": 0.9995, - "step": 2702 - }, - { - "epoch": 0.84, - "learning_rate": 6.673626980844067e-06, - "loss": 1.086, - "step": 2703 - }, - { - "epoch": 0.84, - "learning_rate": 6.648567910585873e-06, - "loss": 0.9002, - "step": 2704 - }, - { - "epoch": 0.84, - "learning_rate": 6.6235526253131896e-06, - "loss": 0.9832, - "step": 2705 - }, - { - "epoch": 0.84, - "learning_rate": 6.598581150291466e-06, - "loss": 1.0299, - "step": 2706 - }, - { - "epoch": 0.84, - "learning_rate": 6.5736535107419005e-06, - "loss": 0.984, - "step": 2707 - }, - { - "epoch": 0.84, - "learning_rate": 6.548769731841437e-06, - "loss": 0.991, - "step": 2708 - }, - { - "epoch": 0.84, - "learning_rate": 6.523929838722725e-06, - "loss": 0.8902, - "step": 2709 - }, - { - "epoch": 0.84, - "learning_rate": 6.499133856474077e-06, - "loss": 1.1251, - "step": 2710 - }, - { - "epoch": 0.84, - "learning_rate": 6.474381810139446e-06, - "loss": 0.974, - "step": 2711 - }, - { - "epoch": 0.84, - "learning_rate": 6.449673724718447e-06, - "loss": 0.9369, - "step": 2712 - }, - { - "epoch": 0.84, - "learning_rate": 6.4250096251662395e-06, - "loss": 1.0529, - "step": 2713 - }, - { - "epoch": 0.84, - "learning_rate": 6.4003895363935975e-06, - "loss": 0.9582, - "step": 2714 - }, - { - "epoch": 0.84, - "learning_rate": 6.375813483266835e-06, - "loss": 0.9654, - "step": 2715 - }, - { - "epoch": 0.84, - "learning_rate": 6.351281490607785e-06, - "loss": 1.0073, - "step": 2716 - }, - { - "epoch": 0.84, - "learning_rate": 6.326793583193796e-06, - "loss": 0.9944, - "step": 2717 - }, - { - "epoch": 0.84, - "learning_rate": 6.30234978575765e-06, - "loss": 0.8801, - "step": 2718 - }, - { - "epoch": 0.84, - "learning_rate": 6.27795012298763e-06, - "loss": 1.0767, - "step": 2719 - }, - { - "epoch": 0.84, - "learning_rate": 6.253594619527397e-06, - "loss": 1.0061, - "step": 2720 - }, - { - "epoch": 0.84, - "learning_rate": 6.229283299976041e-06, - "loss": 0.9879, - "step": 2721 - }, - { - "epoch": 0.84, - "learning_rate": 6.205016188888013e-06, - "loss": 0.9533, - "step": 2722 - }, - { - "epoch": 0.84, - "learning_rate": 6.18079331077312e-06, - "loss": 1.0642, - "step": 2723 - }, - { - "epoch": 0.85, - "learning_rate": 6.156614690096502e-06, - "loss": 0.995, - "step": 2724 - }, - { - "epoch": 0.85, - "learning_rate": 6.1324803512785685e-06, - "loss": 0.9839, - "step": 2725 - }, - { - "epoch": 0.85, - "learning_rate": 6.108390318695028e-06, - "loss": 0.99, - "step": 2726 - }, - { - "epoch": 0.85, - "learning_rate": 6.084344616676841e-06, - "loss": 1.0102, - "step": 2727 - }, - { - "epoch": 0.85, - "learning_rate": 6.060343269510188e-06, - "loss": 0.8768, - "step": 2728 - }, - { - "epoch": 0.85, - "learning_rate": 6.036386301436448e-06, - "loss": 1.0625, - "step": 2729 - }, - { - "epoch": 0.85, - "learning_rate": 6.012473736652191e-06, - "loss": 1.0519, - "step": 2730 - }, - { - "epoch": 0.85, - "learning_rate": 5.9886055993091164e-06, - "loss": 0.9707, - "step": 2731 - }, - { - "epoch": 0.85, - "learning_rate": 5.964781913514062e-06, - "loss": 1.0213, - "step": 2732 - }, - { - "epoch": 0.85, - "learning_rate": 5.941002703328974e-06, - "loss": 0.938, - "step": 2733 - }, - { - "epoch": 0.85, - "learning_rate": 5.9172679927708805e-06, - "loss": 1.0281, - "step": 2734 - }, - { - "epoch": 0.85, - "learning_rate": 5.893577805811856e-06, - "loss": 0.975, - "step": 2735 - }, - { - "epoch": 0.85, - "learning_rate": 5.869932166379022e-06, - "loss": 0.9819, - "step": 2736 - }, - { - "epoch": 0.85, - "learning_rate": 5.84633109835449e-06, - "loss": 0.9495, - "step": 2737 - }, - { - "epoch": 0.85, - "learning_rate": 5.822774625575356e-06, - "loss": 0.9948, - "step": 2738 - }, - { - "epoch": 0.85, - "learning_rate": 5.799262771833675e-06, - "loss": 1.0037, - "step": 2739 - }, - { - "epoch": 0.85, - "learning_rate": 5.775795560876446e-06, - "loss": 0.856, - "step": 2740 - }, - { - "epoch": 0.85, - "learning_rate": 5.752373016405577e-06, - "loss": 0.9328, - "step": 2741 - }, - { - "epoch": 0.85, - "learning_rate": 5.728995162077866e-06, - "loss": 1.0464, - "step": 2742 - }, - { - "epoch": 0.85, - "learning_rate": 5.705662021504943e-06, - "loss": 1.0752, - "step": 2743 - }, - { - "epoch": 0.85, - "learning_rate": 5.682373618253323e-06, - "loss": 0.9482, - "step": 2744 - }, - { - "epoch": 0.85, - "learning_rate": 5.659129975844302e-06, - "loss": 0.9647, - "step": 2745 - }, - { - "epoch": 0.85, - "learning_rate": 5.6359311177539786e-06, - "loss": 1.0088, - "step": 2746 - }, - { - "epoch": 0.85, - "learning_rate": 5.612777067413227e-06, - "loss": 1.0954, - "step": 2747 - }, - { - "epoch": 0.85, - "learning_rate": 5.589667848207653e-06, - "loss": 0.9705, - "step": 2748 - }, - { - "epoch": 0.85, - "learning_rate": 5.566603483477606e-06, - "loss": 0.9182, - "step": 2749 - }, - { - "epoch": 0.85, - "learning_rate": 5.5435839965180925e-06, - "loss": 1.0801, - "step": 2750 - }, - { - "epoch": 0.85, - "learning_rate": 5.520609410578825e-06, - "loss": 0.9192, - "step": 2751 - }, - { - "epoch": 0.85, - "learning_rate": 5.49767974886416e-06, - "loss": 1.0412, - "step": 2752 - }, - { - "epoch": 0.85, - "learning_rate": 5.474795034533064e-06, - "loss": 0.9637, - "step": 2753 - }, - { - "epoch": 0.85, - "learning_rate": 5.451955290699134e-06, - "loss": 0.9778, - "step": 2754 - }, - { - "epoch": 0.85, - "learning_rate": 5.429160540430533e-06, - "loss": 1.0622, - "step": 2755 - }, - { - "epoch": 0.85, - "learning_rate": 5.406410806749967e-06, - "loss": 1.0122, - "step": 2756 - }, - { - "epoch": 0.86, - "learning_rate": 5.38370611263469e-06, - "loss": 0.9322, - "step": 2757 - }, - { - "epoch": 0.86, - "learning_rate": 5.361046481016463e-06, - "loss": 1.0188, - "step": 2758 - }, - { - "epoch": 0.86, - "learning_rate": 5.338431934781535e-06, - "loss": 0.9679, - "step": 2759 - }, - { - "epoch": 0.86, - "learning_rate": 5.315862496770618e-06, - "loss": 0.9979, - "step": 2760 - }, - { - "epoch": 0.86, - "learning_rate": 5.293338189778874e-06, - "loss": 0.8978, - "step": 2761 - }, - { - "epoch": 0.86, - "learning_rate": 5.2708590365558695e-06, - "loss": 0.9844, - "step": 2762 - }, - { - "epoch": 0.86, - "learning_rate": 5.248425059805556e-06, - "loss": 1.0891, - "step": 2763 - }, - { - "epoch": 0.86, - "learning_rate": 5.226036282186286e-06, - "loss": 0.931, - "step": 2764 - }, - { - "epoch": 0.86, - "learning_rate": 5.203692726310738e-06, - "loss": 1.0216, - "step": 2765 - }, - { - "epoch": 0.86, - "learning_rate": 5.181394414745921e-06, - "loss": 0.9797, - "step": 2766 - }, - { - "epoch": 0.86, - "learning_rate": 5.159141370013165e-06, - "loss": 1.0209, - "step": 2767 - }, - { - "epoch": 0.86, - "learning_rate": 5.136933614588047e-06, - "loss": 1.0332, - "step": 2768 - }, - { - "epoch": 0.86, - "learning_rate": 5.114771170900434e-06, - "loss": 1.0191, - "step": 2769 - }, - { - "epoch": 0.86, - "learning_rate": 5.092654061334401e-06, - "loss": 0.9486, - "step": 2770 - }, - { - "epoch": 0.86, - "learning_rate": 5.070582308228256e-06, - "loss": 0.9956, - "step": 2771 - }, - { - "epoch": 0.86, - "learning_rate": 5.048555933874482e-06, - "loss": 0.8923, - "step": 2772 - }, - { - "epoch": 0.86, - "learning_rate": 5.026574960519748e-06, - "loss": 0.9731, - "step": 2773 - }, - { - "epoch": 0.86, - "learning_rate": 5.0046394103648534e-06, - "loss": 0.9816, - "step": 2774 - }, - { - "epoch": 0.86, - "learning_rate": 4.982749305564726e-06, - "loss": 0.9904, - "step": 2775 - }, - { - "epoch": 0.86, - "learning_rate": 4.960904668228384e-06, - "loss": 0.9962, - "step": 2776 - }, - { - "epoch": 0.86, - "learning_rate": 4.939105520418929e-06, - "loss": 0.9438, - "step": 2777 - }, - { - "epoch": 0.86, - "learning_rate": 4.917351884153532e-06, - "loss": 1.1228, - "step": 2778 - }, - { - "epoch": 0.86, - "learning_rate": 4.895643781403375e-06, - "loss": 1.1131, - "step": 2779 - }, - { - "epoch": 0.86, - "learning_rate": 4.873981234093677e-06, - "loss": 0.9251, - "step": 2780 - }, - { - "epoch": 0.86, - "learning_rate": 4.85236426410362e-06, - "loss": 1.0846, - "step": 2781 - }, - { - "epoch": 0.86, - "learning_rate": 4.830792893266345e-06, - "loss": 1.0059, - "step": 2782 - }, - { - "epoch": 0.86, - "learning_rate": 4.809267143368978e-06, - "loss": 0.9462, - "step": 2783 - }, - { - "epoch": 0.86, - "learning_rate": 4.7877870361525414e-06, - "loss": 1.0583, - "step": 2784 - }, - { - "epoch": 0.86, - "learning_rate": 4.766352593311962e-06, - "loss": 1.031, - "step": 2785 - }, - { - "epoch": 0.86, - "learning_rate": 4.744963836496041e-06, - "loss": 0.9167, - "step": 2786 - }, - { - "epoch": 0.86, - "learning_rate": 4.723620787307465e-06, - "loss": 0.9436, - "step": 2787 - }, - { - "epoch": 0.86, - "learning_rate": 4.7023234673026995e-06, - "loss": 0.9929, - "step": 2788 - }, - { - "epoch": 0.87, - "learning_rate": 4.681071897992073e-06, - "loss": 0.9924, - "step": 2789 - }, - { - "epoch": 0.87, - "learning_rate": 4.659866100839694e-06, - "loss": 0.9369, - "step": 2790 - }, - { - "epoch": 0.87, - "learning_rate": 4.638706097263429e-06, - "loss": 1.0333, - "step": 2791 - }, - { - "epoch": 0.87, - "learning_rate": 4.617591908634911e-06, - "loss": 1.1222, - "step": 2792 - }, - { - "epoch": 0.87, - "learning_rate": 4.596523556279475e-06, - "loss": 0.918, - "step": 2793 - }, - { - "epoch": 0.87, - "learning_rate": 4.575501061476195e-06, - "loss": 1.0381, - "step": 2794 - }, - { - "epoch": 0.87, - "learning_rate": 4.554524445457786e-06, - "loss": 1.0933, - "step": 2795 - }, - { - "epoch": 0.87, - "learning_rate": 4.5335937294106624e-06, - "loss": 0.9153, - "step": 2796 - }, - { - "epoch": 0.87, - "learning_rate": 4.512708934474863e-06, - "loss": 1.0132, - "step": 2797 - }, - { - "epoch": 0.87, - "learning_rate": 4.4918700817440536e-06, - "loss": 0.8788, - "step": 2798 - }, - { - "epoch": 0.87, - "learning_rate": 4.471077192265494e-06, - "loss": 1.0035, - "step": 2799 - }, - { - "epoch": 0.87, - "learning_rate": 4.450330287040027e-06, - "loss": 0.9913, - "step": 2800 - }, - { - "epoch": 0.87, - "learning_rate": 4.429629387022027e-06, - "loss": 0.9681, - "step": 2801 - }, - { - "epoch": 0.87, - "learning_rate": 4.408974513119435e-06, - "loss": 0.9818, - "step": 2802 - }, - { - "epoch": 0.87, - "learning_rate": 4.388365686193685e-06, - "loss": 1.0394, - "step": 2803 - }, - { - "epoch": 0.87, - "learning_rate": 4.367802927059722e-06, - "loss": 0.9104, - "step": 2804 - }, - { - "epoch": 0.87, - "learning_rate": 4.3472862564859496e-06, - "loss": 1.0858, - "step": 2805 - }, - { - "epoch": 0.87, - "learning_rate": 4.326815695194214e-06, - "loss": 0.9177, - "step": 2806 - }, - { - "epoch": 0.87, - "learning_rate": 4.306391263859811e-06, - "loss": 0.9733, - "step": 2807 - }, - { - "epoch": 0.87, - "learning_rate": 4.286012983111421e-06, - "loss": 0.9321, - "step": 2808 - }, - { - "epoch": 0.87, - "learning_rate": 4.265680873531136e-06, - "loss": 0.9354, - "step": 2809 - }, - { - "epoch": 0.87, - "learning_rate": 4.245394955654403e-06, - "loss": 0.9978, - "step": 2810 - }, - { - "epoch": 0.87, - "learning_rate": 4.225155249970014e-06, - "loss": 1.1107, - "step": 2811 - }, - { - "epoch": 0.87, - "learning_rate": 4.204961776920102e-06, - "loss": 0.9783, - "step": 2812 - }, - { - "epoch": 0.87, - "learning_rate": 4.184814556900079e-06, - "loss": 0.9028, - "step": 2813 - }, - { - "epoch": 0.87, - "learning_rate": 4.164713610258664e-06, - "loss": 0.9375, - "step": 2814 - }, - { - "epoch": 0.87, - "learning_rate": 4.14465895729782e-06, - "loss": 1.081, - "step": 2815 - }, - { - "epoch": 0.87, - "learning_rate": 4.124650618272779e-06, - "loss": 1.0121, - "step": 2816 - }, - { - "epoch": 0.87, - "learning_rate": 4.104688613391982e-06, - "loss": 0.9292, - "step": 2817 - }, - { - "epoch": 0.87, - "learning_rate": 4.0847729628170575e-06, - "loss": 0.9999, - "step": 2818 - }, - { - "epoch": 0.87, - "learning_rate": 4.064903686662841e-06, - "loss": 0.9774, - "step": 2819 - }, - { - "epoch": 0.87, - "learning_rate": 4.045080804997314e-06, - "loss": 1.028, - "step": 2820 - }, - { - "epoch": 0.88, - "learning_rate": 4.025304337841607e-06, - "loss": 0.9908, - "step": 2821 - }, - { - "epoch": 0.88, - "learning_rate": 4.005574305169968e-06, - "loss": 0.9488, - "step": 2822 - }, - { - "epoch": 0.88, - "learning_rate": 3.985890726909747e-06, - "loss": 1.0824, - "step": 2823 - }, - { - "epoch": 0.88, - "learning_rate": 3.966253622941385e-06, - "loss": 1.0342, - "step": 2824 - }, - { - "epoch": 0.88, - "learning_rate": 3.946663013098373e-06, - "loss": 0.9765, - "step": 2825 - }, - { - "epoch": 0.88, - "learning_rate": 3.927118917167227e-06, - "loss": 0.938, - "step": 2826 - }, - { - "epoch": 0.88, - "learning_rate": 3.907621354887519e-06, - "loss": 1.0297, - "step": 2827 - }, - { - "epoch": 0.88, - "learning_rate": 3.888170345951802e-06, - "loss": 0.9841, - "step": 2828 - }, - { - "epoch": 0.88, - "learning_rate": 3.868765910005606e-06, - "loss": 0.9806, - "step": 2829 - }, - { - "epoch": 0.88, - "learning_rate": 3.849408066647448e-06, - "loss": 0.8701, - "step": 2830 - }, - { - "epoch": 0.88, - "learning_rate": 3.830096835428737e-06, - "loss": 1.1291, - "step": 2831 - }, - { - "epoch": 0.88, - "learning_rate": 3.810832235853862e-06, - "loss": 0.8757, - "step": 2832 - }, - { - "epoch": 0.88, - "learning_rate": 3.791614287380063e-06, - "loss": 1.0602, - "step": 2833 - }, - { - "epoch": 0.88, - "learning_rate": 3.772443009417492e-06, - "loss": 0.9876, - "step": 2834 - }, - { - "epoch": 0.88, - "learning_rate": 3.7533184213291663e-06, - "loss": 0.9172, - "step": 2835 - }, - { - "epoch": 0.88, - "learning_rate": 3.734240542430928e-06, - "loss": 1.0225, - "step": 2836 - }, - { - "epoch": 0.88, - "learning_rate": 3.7152093919914642e-06, - "loss": 0.9986, - "step": 2837 - }, - { - "epoch": 0.88, - "learning_rate": 3.696224989232239e-06, - "loss": 1.054, - "step": 2838 - }, - { - "epoch": 0.88, - "learning_rate": 3.6772873533275185e-06, - "loss": 0.9517, - "step": 2839 - }, - { - "epoch": 0.88, - "learning_rate": 3.658396503404343e-06, - "loss": 0.9973, - "step": 2840 - }, - { - "epoch": 0.88, - "learning_rate": 3.639552458542478e-06, - "loss": 1.0197, - "step": 2841 - }, - { - "epoch": 0.88, - "learning_rate": 3.6207552377744315e-06, - "loss": 0.9704, - "step": 2842 - }, - { - "epoch": 0.88, - "learning_rate": 3.602004860085406e-06, - "loss": 0.8913, - "step": 2843 - }, - { - "epoch": 0.88, - "learning_rate": 3.5833013444133147e-06, - "loss": 1.1073, - "step": 2844 - }, - { - "epoch": 0.88, - "learning_rate": 3.564644709648707e-06, - "loss": 1.0148, - "step": 2845 - }, - { - "epoch": 0.88, - "learning_rate": 3.5460349746348097e-06, - "loss": 0.8993, - "step": 2846 - }, - { - "epoch": 0.88, - "learning_rate": 3.527472158167466e-06, - "loss": 0.9581, - "step": 2847 - }, - { - "epoch": 0.88, - "learning_rate": 3.5089562789951458e-06, - "loss": 0.9602, - "step": 2848 - }, - { - "epoch": 0.88, - "learning_rate": 3.4904873558189054e-06, - "loss": 1.045, - "step": 2849 - }, - { - "epoch": 0.88, - "learning_rate": 3.472065407292369e-06, - "loss": 1.0317, - "step": 2850 - }, - { - "epoch": 0.88, - "learning_rate": 3.4536904520217185e-06, - "loss": 0.9756, - "step": 2851 - }, - { - "epoch": 0.88, - "learning_rate": 3.435362508565676e-06, - "loss": 1.0323, - "step": 2852 - }, - { - "epoch": 0.89, - "learning_rate": 3.417081595435484e-06, - "loss": 0.9568, - "step": 2853 - }, - { - "epoch": 0.89, - "learning_rate": 3.3988477310948787e-06, - "loss": 1.1493, - "step": 2854 - }, - { - "epoch": 0.89, - "learning_rate": 3.380660933960089e-06, - "loss": 0.878, - "step": 2855 - }, - { - "epoch": 0.89, - "learning_rate": 3.362521222399778e-06, - "loss": 0.9265, - "step": 2856 - }, - { - "epoch": 0.89, - "learning_rate": 3.344428614735096e-06, - "loss": 1.084, - "step": 2857 - }, - { - "epoch": 0.89, - "learning_rate": 3.326383129239563e-06, - "loss": 0.9942, - "step": 2858 - }, - { - "epoch": 0.89, - "learning_rate": 3.3083847841391512e-06, - "loss": 0.9254, - "step": 2859 - }, - { - "epoch": 0.89, - "learning_rate": 3.290433597612208e-06, - "loss": 0.9446, - "step": 2860 - }, - { - "epoch": 0.89, - "learning_rate": 3.272529587789447e-06, - "loss": 0.991, - "step": 2861 - }, - { - "epoch": 0.89, - "learning_rate": 3.2546727727539363e-06, - "loss": 0.9721, - "step": 2862 - }, - { - "epoch": 0.89, - "learning_rate": 3.2368631705410655e-06, - "loss": 0.9382, - "step": 2863 - }, - { - "epoch": 0.89, - "learning_rate": 3.2191007991385524e-06, - "loss": 1.0334, - "step": 2864 - }, - { - "epoch": 0.89, - "learning_rate": 3.2013856764864126e-06, - "loss": 0.9881, - "step": 2865 - }, - { - "epoch": 0.89, - "learning_rate": 3.183717820476928e-06, - "loss": 0.9862, - "step": 2866 - }, - { - "epoch": 0.89, - "learning_rate": 3.166097248954669e-06, - "loss": 0.9948, - "step": 2867 - }, - { - "epoch": 0.89, - "learning_rate": 3.1485239797164e-06, - "loss": 0.9601, - "step": 2868 - }, - { - "epoch": 0.89, - "learning_rate": 3.1309980305111675e-06, - "loss": 0.973, - "step": 2869 - }, - { - "epoch": 0.89, - "learning_rate": 3.113519419040173e-06, - "loss": 0.9957, - "step": 2870 - }, - { - "epoch": 0.89, - "learning_rate": 3.0960881629568338e-06, - "loss": 1.0073, - "step": 2871 - }, - { - "epoch": 0.89, - "learning_rate": 3.07870427986674e-06, - "loss": 0.9317, - "step": 2872 - }, - { - "epoch": 0.89, - "learning_rate": 3.061367787327629e-06, - "loss": 0.8533, - "step": 2873 - }, - { - "epoch": 0.89, - "learning_rate": 3.044078702849379e-06, - "loss": 1.0928, - "step": 2874 - }, - { - "epoch": 0.89, - "learning_rate": 3.0268370438939766e-06, - "loss": 0.9079, - "step": 2875 - }, - { - "epoch": 0.89, - "learning_rate": 3.009642827875503e-06, - "loss": 1.004, - "step": 2876 - }, - { - "epoch": 0.89, - "learning_rate": 2.992496072160139e-06, - "loss": 0.9912, - "step": 2877 - }, - { - "epoch": 0.89, - "learning_rate": 2.9753967940661265e-06, - "loss": 1.0892, - "step": 2878 - }, - { - "epoch": 0.89, - "learning_rate": 2.95834501086375e-06, - "loss": 0.9038, - "step": 2879 - }, - { - "epoch": 0.89, - "learning_rate": 2.9413407397753335e-06, - "loss": 1.0826, - "step": 2880 - }, - { - "epoch": 0.89, - "learning_rate": 2.9243839979751943e-06, - "loss": 1.0093, - "step": 2881 - }, - { - "epoch": 0.89, - "learning_rate": 2.9074748025896658e-06, - "loss": 0.9915, - "step": 2882 - }, - { - "epoch": 0.89, - "learning_rate": 2.8906131706970374e-06, - "loss": 0.9698, - "step": 2883 - }, - { - "epoch": 0.89, - "learning_rate": 2.8737991193275805e-06, - "loss": 1.0477, - "step": 2884 - }, - { - "epoch": 0.89, - "learning_rate": 2.8570326654634995e-06, - "loss": 0.9379, - "step": 2885 - }, - { - "epoch": 0.9, - "learning_rate": 2.840313826038932e-06, - "loss": 1.0357, - "step": 2886 - }, - { - "epoch": 0.9, - "learning_rate": 2.8236426179399256e-06, - "loss": 0.8952, - "step": 2887 - }, - { - "epoch": 0.9, - "learning_rate": 2.807019058004412e-06, - "loss": 1.0222, - "step": 2888 - }, - { - "epoch": 0.9, - "learning_rate": 2.790443163022194e-06, - "loss": 1.0137, - "step": 2889 - }, - { - "epoch": 0.9, - "learning_rate": 2.7739149497349406e-06, - "loss": 0.9587, - "step": 2890 - }, - { - "epoch": 0.9, - "learning_rate": 2.7574344348361704e-06, - "loss": 0.9873, - "step": 2891 - }, - { - "epoch": 0.9, - "learning_rate": 2.7410016349712244e-06, - "loss": 1.0414, - "step": 2892 - }, - { - "epoch": 0.9, - "learning_rate": 2.7246165667372315e-06, - "loss": 1.0908, - "step": 2893 - }, - { - "epoch": 0.9, - "learning_rate": 2.708279246683132e-06, - "loss": 0.9707, - "step": 2894 - }, - { - "epoch": 0.9, - "learning_rate": 2.6919896913096263e-06, - "loss": 0.8908, - "step": 2895 - }, - { - "epoch": 0.9, - "learning_rate": 2.675747917069188e-06, - "loss": 1.0085, - "step": 2896 - }, - { - "epoch": 0.9, - "learning_rate": 2.659553940366016e-06, - "loss": 1.0997, - "step": 2897 - }, - { - "epoch": 0.9, - "learning_rate": 2.6434077775560506e-06, - "loss": 0.9714, - "step": 2898 - }, - { - "epoch": 0.9, - "learning_rate": 2.6273094449469292e-06, - "loss": 0.9306, - "step": 2899 - }, - { - "epoch": 0.9, - "learning_rate": 2.61125895879798e-06, - "loss": 0.9903, - "step": 2900 - }, - { - "epoch": 0.9, - "learning_rate": 2.5952563353202065e-06, - "loss": 0.918, - "step": 2901 - }, - { - "epoch": 0.9, - "learning_rate": 2.579301590676275e-06, - "loss": 1.0024, - "step": 2902 - }, - { - "epoch": 0.9, - "learning_rate": 2.5633947409804905e-06, - "loss": 0.954, - "step": 2903 - }, - { - "epoch": 0.9, - "learning_rate": 2.5475358022987894e-06, - "loss": 1.0279, - "step": 2904 - }, - { - "epoch": 0.9, - "learning_rate": 2.531724790648715e-06, - "loss": 1.1077, - "step": 2905 - }, - { - "epoch": 0.9, - "learning_rate": 2.5159617219993935e-06, - "loss": 0.9668, - "step": 2906 - }, - { - "epoch": 0.9, - "learning_rate": 2.5002466122715506e-06, - "loss": 0.8727, - "step": 2907 - }, - { - "epoch": 0.9, - "learning_rate": 2.484579477337451e-06, - "loss": 0.8931, - "step": 2908 - }, - { - "epoch": 0.9, - "learning_rate": 2.46896033302092e-06, - "loss": 1.0894, - "step": 2909 - }, - { - "epoch": 0.9, - "learning_rate": 2.4533891950973054e-06, - "loss": 1.0718, - "step": 2910 - }, - { - "epoch": 0.9, - "learning_rate": 2.4378660792934714e-06, - "loss": 0.949, - "step": 2911 - }, - { - "epoch": 0.9, - "learning_rate": 2.422391001287777e-06, - "loss": 0.9193, - "step": 2912 - }, - { - "epoch": 0.9, - "learning_rate": 2.4069639767100806e-06, - "loss": 0.9935, - "step": 2913 - }, - { - "epoch": 0.9, - "learning_rate": 2.391585021141668e-06, - "loss": 0.951, - "step": 2914 - }, - { - "epoch": 0.9, - "learning_rate": 2.376254150115309e-06, - "loss": 1.0278, - "step": 2915 - }, - { - "epoch": 0.9, - "learning_rate": 2.3609713791151956e-06, - "loss": 0.9297, - "step": 2916 - }, - { - "epoch": 0.9, - "learning_rate": 2.345736723576947e-06, - "loss": 1.0993, - "step": 2917 - }, - { - "epoch": 0.91, - "learning_rate": 2.3305501988875713e-06, - "loss": 1.0178, - "step": 2918 - }, - { - "epoch": 0.91, - "learning_rate": 2.315411820385477e-06, - "loss": 0.9862, - "step": 2919 - }, - { - "epoch": 0.91, - "learning_rate": 2.300321603360428e-06, - "loss": 0.9783, - "step": 2920 - }, - { - "epoch": 0.91, - "learning_rate": 2.285279563053566e-06, - "loss": 0.9665, - "step": 2921 - }, - { - "epoch": 0.91, - "learning_rate": 2.2702857146573663e-06, - "loss": 1.0572, - "step": 2922 - }, - { - "epoch": 0.91, - "learning_rate": 2.2553400733156314e-06, - "loss": 0.8868, - "step": 2923 - }, - { - "epoch": 0.91, - "learning_rate": 2.2404426541234645e-06, - "loss": 0.9011, - "step": 2924 - }, - { - "epoch": 0.91, - "learning_rate": 2.225593472127291e-06, - "loss": 1.0339, - "step": 2925 - }, - { - "epoch": 0.91, - "learning_rate": 2.2107925423247744e-06, - "loss": 1.0871, - "step": 2926 - }, - { - "epoch": 0.91, - "learning_rate": 2.196039879664874e-06, - "loss": 0.9177, - "step": 2927 - }, - { - "epoch": 0.91, - "learning_rate": 2.1813354990477984e-06, - "loss": 1.0156, - "step": 2928 - }, - { - "epoch": 0.91, - "learning_rate": 2.1666794153249793e-06, - "loss": 0.9466, - "step": 2929 - }, - { - "epoch": 0.91, - "learning_rate": 2.1520716432990863e-06, - "loss": 0.9595, - "step": 2930 - }, - { - "epoch": 0.91, - "learning_rate": 2.1375121977239675e-06, - "loss": 1.1284, - "step": 2931 - }, - { - "epoch": 0.91, - "learning_rate": 2.1230010933046886e-06, - "loss": 0.8406, - "step": 2932 - }, - { - "epoch": 0.91, - "learning_rate": 2.108538344697464e-06, - "loss": 1.0524, - "step": 2933 - }, - { - "epoch": 0.91, - "learning_rate": 2.0941239665096922e-06, - "loss": 0.8434, - "step": 2934 - }, - { - "epoch": 0.91, - "learning_rate": 2.0797579732999063e-06, - "loss": 1.0, - "step": 2935 - }, - { - "epoch": 0.91, - "learning_rate": 2.0654403795777766e-06, - "loss": 1.0355, - "step": 2936 - }, - { - "epoch": 0.91, - "learning_rate": 2.051171199804075e-06, - "loss": 0.9577, - "step": 2937 - }, - { - "epoch": 0.91, - "learning_rate": 2.036950448390701e-06, - "loss": 0.9924, - "step": 2938 - }, - { - "epoch": 0.91, - "learning_rate": 2.022778139700615e-06, - "loss": 1.0836, - "step": 2939 - }, - { - "epoch": 0.91, - "learning_rate": 2.0086542880478566e-06, - "loss": 0.9554, - "step": 2940 - }, - { - "epoch": 0.91, - "learning_rate": 1.994578907697542e-06, - "loss": 0.8636, - "step": 2941 - }, - { - "epoch": 0.91, - "learning_rate": 1.9805520128658215e-06, - "loss": 0.9609, - "step": 2942 - }, - { - "epoch": 0.91, - "learning_rate": 1.9665736177198525e-06, - "loss": 0.9884, - "step": 2943 - }, - { - "epoch": 0.91, - "learning_rate": 1.9526437363778406e-06, - "loss": 0.9695, - "step": 2944 - }, - { - "epoch": 0.91, - "learning_rate": 1.938762382908976e-06, - "loss": 1.0064, - "step": 2945 - }, - { - "epoch": 0.91, - "learning_rate": 1.9249295713334335e-06, - "loss": 1.0773, - "step": 2946 - }, - { - "epoch": 0.91, - "learning_rate": 1.91114531562237e-06, - "loss": 1.0186, - "step": 2947 - }, - { - "epoch": 0.91, - "learning_rate": 1.8974096296978939e-06, - "loss": 0.8899, - "step": 2948 - }, - { - "epoch": 0.91, - "learning_rate": 1.8837225274330572e-06, - "loss": 1.0526, - "step": 2949 - }, - { - "epoch": 0.92, - "learning_rate": 1.8700840226518634e-06, - "loss": 0.9175, - "step": 2950 - }, - { - "epoch": 0.92, - "learning_rate": 1.8564941291291881e-06, - "loss": 1.0293, - "step": 2951 - }, - { - "epoch": 0.92, - "learning_rate": 1.842952860590852e-06, - "loss": 1.0353, - "step": 2952 - }, - { - "epoch": 0.92, - "learning_rate": 1.8294602307135477e-06, - "loss": 0.9785, - "step": 2953 - }, - { - "epoch": 0.92, - "learning_rate": 1.816016253124836e-06, - "loss": 1.0034, - "step": 2954 - }, - { - "epoch": 0.92, - "learning_rate": 1.8026209414031658e-06, - "loss": 0.978, - "step": 2955 - }, - { - "epoch": 0.92, - "learning_rate": 1.7892743090777875e-06, - "loss": 0.9022, - "step": 2956 - }, - { - "epoch": 0.92, - "learning_rate": 1.775976369628829e-06, - "loss": 1.0279, - "step": 2957 - }, - { - "epoch": 0.92, - "learning_rate": 1.7627271364872134e-06, - "loss": 0.9569, - "step": 2958 - }, - { - "epoch": 0.92, - "learning_rate": 1.7495266230346808e-06, - "loss": 1.1031, - "step": 2959 - }, - { - "epoch": 0.92, - "learning_rate": 1.7363748426037607e-06, - "loss": 0.9568, - "step": 2960 - }, - { - "epoch": 0.92, - "learning_rate": 1.7232718084777665e-06, - "loss": 0.9044, - "step": 2961 - }, - { - "epoch": 0.92, - "learning_rate": 1.7102175338907734e-06, - "loss": 1.0756, - "step": 2962 - }, - { - "epoch": 0.92, - "learning_rate": 1.6972120320276118e-06, - "loss": 0.8913, - "step": 2963 - }, - { - "epoch": 0.92, - "learning_rate": 1.6842553160238472e-06, - "loss": 1.0118, - "step": 2964 - }, - { - "epoch": 0.92, - "learning_rate": 1.6713473989657668e-06, - "loss": 1.028, - "step": 2965 - }, - { - "epoch": 0.92, - "learning_rate": 1.6584882938903922e-06, - "loss": 0.9501, - "step": 2966 - }, - { - "epoch": 0.92, - "learning_rate": 1.6456780137854343e-06, - "loss": 1.0292, - "step": 2967 - }, - { - "epoch": 0.92, - "learning_rate": 1.6329165715892714e-06, - "loss": 0.9966, - "step": 2968 - }, - { - "epoch": 0.92, - "learning_rate": 1.6202039801909763e-06, - "loss": 0.9922, - "step": 2969 - }, - { - "epoch": 0.92, - "learning_rate": 1.6075402524302895e-06, - "loss": 1.0183, - "step": 2970 - }, - { - "epoch": 0.92, - "learning_rate": 1.594925401097569e-06, - "loss": 0.9771, - "step": 2971 - }, - { - "epoch": 0.92, - "learning_rate": 1.5823594389338391e-06, - "loss": 1.068, - "step": 2972 - }, - { - "epoch": 0.92, - "learning_rate": 1.5698423786307314e-06, - "loss": 0.9267, - "step": 2973 - }, - { - "epoch": 0.92, - "learning_rate": 1.557374232830483e-06, - "loss": 0.8393, - "step": 2974 - }, - { - "epoch": 0.92, - "learning_rate": 1.5449550141259427e-06, - "loss": 1.0707, - "step": 2975 - }, - { - "epoch": 0.92, - "learning_rate": 1.5325847350605215e-06, - "loss": 0.9493, - "step": 2976 - }, - { - "epoch": 0.92, - "learning_rate": 1.520263408128214e-06, - "loss": 0.9459, - "step": 2977 - }, - { - "epoch": 0.92, - "learning_rate": 1.5079910457735713e-06, - "loss": 1.0328, - "step": 2978 - }, - { - "epoch": 0.92, - "learning_rate": 1.4957676603917004e-06, - "loss": 0.9188, - "step": 2979 - }, - { - "epoch": 0.92, - "learning_rate": 1.4835932643282257e-06, - "loss": 1.0928, - "step": 2980 - }, - { - "epoch": 0.92, - "learning_rate": 1.4714678698792895e-06, - "loss": 0.9463, - "step": 2981 - }, - { - "epoch": 0.93, - "learning_rate": 1.4593914892915673e-06, - "loss": 0.9837, - "step": 2982 - }, - { - "epoch": 0.93, - "learning_rate": 1.447364134762197e-06, - "loss": 0.91, - "step": 2983 - }, - { - "epoch": 0.93, - "learning_rate": 1.435385818438828e-06, - "loss": 0.953, - "step": 2984 - }, - { - "epoch": 0.93, - "learning_rate": 1.4234565524195665e-06, - "loss": 0.9487, - "step": 2985 - }, - { - "epoch": 0.93, - "learning_rate": 1.411576348752991e-06, - "loss": 1.0857, - "step": 2986 - }, - { - "epoch": 0.93, - "learning_rate": 1.3997452194381034e-06, - "loss": 0.9822, - "step": 2987 - }, - { - "epoch": 0.93, - "learning_rate": 1.3879631764243672e-06, - "loss": 1.0665, - "step": 2988 - }, - { - "epoch": 0.93, - "learning_rate": 1.3762302316116526e-06, - "loss": 0.8318, - "step": 2989 - }, - { - "epoch": 0.93, - "learning_rate": 1.3645463968502415e-06, - "loss": 1.0392, - "step": 2990 - }, - { - "epoch": 0.93, - "learning_rate": 1.3529116839408161e-06, - "loss": 1.0753, - "step": 2991 - }, - { - "epoch": 0.93, - "learning_rate": 1.3413261046344605e-06, - "loss": 1.0261, - "step": 2992 - }, - { - "epoch": 0.93, - "learning_rate": 1.329789670632603e-06, - "loss": 0.914, - "step": 2993 - }, - { - "epoch": 0.93, - "learning_rate": 1.3183023935870563e-06, - "loss": 1.0076, - "step": 2994 - }, - { - "epoch": 0.93, - "learning_rate": 1.3068642850999957e-06, - "loss": 0.947, - "step": 2995 - }, - { - "epoch": 0.93, - "learning_rate": 1.2954753567238965e-06, - "loss": 1.0626, - "step": 2996 - }, - { - "epoch": 0.93, - "learning_rate": 1.2841356199616073e-06, - "loss": 0.925, - "step": 2997 - }, - { - "epoch": 0.93, - "learning_rate": 1.2728450862662612e-06, - "loss": 1.1079, - "step": 2998 - }, - { - "epoch": 0.93, - "learning_rate": 1.2616037670413082e-06, - "loss": 1.031, - "step": 2999 - }, - { - "epoch": 0.93, - "learning_rate": 1.2504116736405003e-06, - "loss": 1.0456, - "step": 3000 - }, - { - "epoch": 0.93, - "learning_rate": 1.2392688173678501e-06, - "loss": 0.9415, - "step": 3001 - }, - { - "epoch": 0.93, - "learning_rate": 1.2281752094776556e-06, - "loss": 0.9803, - "step": 3002 - }, - { - "epoch": 0.93, - "learning_rate": 1.2171308611744759e-06, - "loss": 1.0234, - "step": 3003 - }, - { - "epoch": 0.93, - "learning_rate": 1.2061357836131105e-06, - "loss": 1.0318, - "step": 3004 - }, - { - "epoch": 0.93, - "learning_rate": 1.1951899878985984e-06, - "loss": 0.9229, - "step": 3005 - }, - { - "epoch": 0.93, - "learning_rate": 1.1842934850861964e-06, - "loss": 0.8715, - "step": 3006 - }, - { - "epoch": 0.93, - "learning_rate": 1.17344628618139e-06, - "loss": 0.9794, - "step": 3007 - }, - { - "epoch": 0.93, - "learning_rate": 1.1626484021398542e-06, - "loss": 1.0308, - "step": 3008 - }, - { - "epoch": 0.93, - "learning_rate": 1.1518998438674656e-06, - "loss": 0.9145, - "step": 3009 - }, - { - "epoch": 0.93, - "learning_rate": 1.1412006222202787e-06, - "loss": 0.9049, - "step": 3010 - }, - { - "epoch": 0.93, - "learning_rate": 1.1305507480045108e-06, - "loss": 1.0912, - "step": 3011 - }, - { - "epoch": 0.93, - "learning_rate": 1.1199502319765465e-06, - "loss": 1.0493, - "step": 3012 - }, - { - "epoch": 0.93, - "learning_rate": 1.109399084842927e-06, - "loss": 0.9468, - "step": 3013 - }, - { - "epoch": 0.94, - "learning_rate": 1.0988973172603056e-06, - "loss": 0.9743, - "step": 3014 - }, - { - "epoch": 0.94, - "learning_rate": 1.0884449398354867e-06, - "loss": 0.995, - "step": 3015 - }, - { - "epoch": 0.94, - "learning_rate": 1.0780419631253757e-06, - "loss": 1.0956, - "step": 3016 - }, - { - "epoch": 0.94, - "learning_rate": 1.0676883976369956e-06, - "loss": 0.9673, - "step": 3017 - }, - { - "epoch": 0.94, - "learning_rate": 1.0573842538274425e-06, - "loss": 1.0121, - "step": 3018 - }, - { - "epoch": 0.94, - "learning_rate": 1.0471295421039251e-06, - "loss": 1.0136, - "step": 3019 - }, - { - "epoch": 0.94, - "learning_rate": 1.0369242728237083e-06, - "loss": 0.9364, - "step": 3020 - }, - { - "epoch": 0.94, - "learning_rate": 1.0267684562941193e-06, - "loss": 1.0116, - "step": 3021 - }, - { - "epoch": 0.94, - "learning_rate": 1.016662102772542e-06, - "loss": 0.9254, - "step": 3022 - }, - { - "epoch": 0.94, - "learning_rate": 1.0066052224664057e-06, - "loss": 0.9988, - "step": 3023 - }, - { - "epoch": 0.94, - "learning_rate": 9.965978255331632e-07, - "loss": 0.9929, - "step": 3024 - }, - { - "epoch": 0.94, - "learning_rate": 9.86639922080307e-07, - "loss": 0.9514, - "step": 3025 - }, - { - "epoch": 0.94, - "learning_rate": 9.767315221653196e-07, - "loss": 0.9839, - "step": 3026 - }, - { - "epoch": 0.94, - "learning_rate": 9.668726357956904e-07, - "loss": 1.0002, - "step": 3027 - }, - { - "epoch": 0.94, - "learning_rate": 9.570632729289098e-07, - "loss": 0.9812, - "step": 3028 - }, - { - "epoch": 0.94, - "learning_rate": 9.473034434724359e-07, - "loss": 1.0107, - "step": 3029 - }, - { - "epoch": 0.94, - "learning_rate": 9.375931572837171e-07, - "loss": 0.9111, - "step": 3030 - }, - { - "epoch": 0.94, - "learning_rate": 9.279324241701415e-07, - "loss": 1.0882, - "step": 3031 - }, - { - "epoch": 0.94, - "learning_rate": 9.183212538890651e-07, - "loss": 0.9326, - "step": 3032 - }, - { - "epoch": 0.94, - "learning_rate": 9.087596561477729e-07, - "loss": 0.958, - "step": 3033 - }, - { - "epoch": 0.94, - "learning_rate": 8.992476406034844e-07, - "loss": 1.0231, - "step": 3034 - }, - { - "epoch": 0.94, - "learning_rate": 8.897852168633536e-07, - "loss": 1.0291, - "step": 3035 - }, - { - "epoch": 0.94, - "learning_rate": 8.8037239448443e-07, - "loss": 0.9297, - "step": 3036 - }, - { - "epoch": 0.94, - "learning_rate": 8.710091829736699e-07, - "loss": 1.0526, - "step": 3037 - }, - { - "epoch": 0.94, - "learning_rate": 8.616955917879422e-07, - "loss": 0.9776, - "step": 3038 - }, - { - "epoch": 0.94, - "learning_rate": 8.524316303339608e-07, - "loss": 1.0057, - "step": 3039 - }, - { - "epoch": 0.94, - "learning_rate": 8.432173079683469e-07, - "loss": 1.021, - "step": 3040 - }, - { - "epoch": 0.94, - "learning_rate": 8.340526339975674e-07, - "loss": 0.9662, - "step": 3041 - }, - { - "epoch": 0.94, - "learning_rate": 8.249376176779677e-07, - "loss": 0.8767, - "step": 3042 - }, - { - "epoch": 0.94, - "learning_rate": 8.158722682157005e-07, - "loss": 0.9735, - "step": 3043 - }, - { - "epoch": 0.94, - "learning_rate": 8.068565947667917e-07, - "loss": 1.0555, - "step": 3044 - }, - { - "epoch": 0.94, - "learning_rate": 7.978906064370739e-07, - "loss": 0.8712, - "step": 3045 - }, - { - "epoch": 0.94, - "learning_rate": 7.889743122822035e-07, - "loss": 0.9367, - "step": 3046 - }, - { - "epoch": 0.95, - "learning_rate": 7.80107721307649e-07, - "loss": 0.9936, - "step": 3047 - }, - { - "epoch": 0.95, - "learning_rate": 7.712908424686693e-07, - "loss": 1.0201, - "step": 3048 - }, - { - "epoch": 0.95, - "learning_rate": 7.625236846703243e-07, - "loss": 1.0631, - "step": 3049 - }, - { - "epoch": 0.95, - "learning_rate": 7.538062567674531e-07, - "loss": 1.0006, - "step": 3050 - }, - { - "epoch": 0.95, - "learning_rate": 7.451385675646628e-07, - "loss": 1.0501, - "step": 3051 - }, - { - "epoch": 0.95, - "learning_rate": 7.365206258163282e-07, - "loss": 0.9586, - "step": 3052 - }, - { - "epoch": 0.95, - "learning_rate": 7.279524402265758e-07, - "loss": 0.9469, - "step": 3053 - }, - { - "epoch": 0.95, - "learning_rate": 7.194340194492832e-07, - "loss": 0.9986, - "step": 3054 - }, - { - "epoch": 0.95, - "learning_rate": 7.109653720880683e-07, - "loss": 0.9032, - "step": 3055 - }, - { - "epoch": 0.95, - "learning_rate": 7.025465066962611e-07, - "loss": 1.0244, - "step": 3056 - }, - { - "epoch": 0.95, - "learning_rate": 6.941774317769267e-07, - "loss": 0.9912, - "step": 3057 - }, - { - "epoch": 0.95, - "learning_rate": 6.858581557828481e-07, - "loss": 0.9109, - "step": 3058 - }, - { - "epoch": 0.95, - "learning_rate": 6.775886871164872e-07, - "loss": 0.9844, - "step": 3059 - }, - { - "epoch": 0.95, - "learning_rate": 6.69369034130024e-07, - "loss": 0.9761, - "step": 3060 - }, - { - "epoch": 0.95, - "learning_rate": 6.61199205125318e-07, - "loss": 0.99, - "step": 3061 - }, - { - "epoch": 0.95, - "learning_rate": 6.530792083538906e-07, - "loss": 1.0011, - "step": 3062 - }, - { - "epoch": 0.95, - "learning_rate": 6.450090520169649e-07, - "loss": 0.9526, - "step": 3063 - }, - { - "epoch": 0.95, - "learning_rate": 6.369887442653876e-07, - "loss": 1.0403, - "step": 3064 - }, - { - "epoch": 0.95, - "learning_rate": 6.290182931996846e-07, - "loss": 0.9613, - "step": 3065 - }, - { - "epoch": 0.95, - "learning_rate": 6.210977068700219e-07, - "loss": 0.9944, - "step": 3066 - }, - { - "epoch": 0.95, - "learning_rate": 6.132269932761947e-07, - "loss": 0.9839, - "step": 3067 - }, - { - "epoch": 0.95, - "learning_rate": 6.054061603676331e-07, - "loss": 0.9181, - "step": 3068 - }, - { - "epoch": 0.95, - "learning_rate": 5.976352160433796e-07, - "loss": 1.0326, - "step": 3069 - }, - { - "epoch": 0.95, - "learning_rate": 5.899141681521058e-07, - "loss": 0.978, - "step": 3070 - }, - { - "epoch": 0.95, - "learning_rate": 5.822430244920685e-07, - "loss": 0.9843, - "step": 3071 - }, - { - "epoch": 0.95, - "learning_rate": 5.746217928111252e-07, - "loss": 1.0284, - "step": 3072 - }, - { - "epoch": 0.95, - "learning_rate": 5.670504808067412e-07, - "loss": 1.0222, - "step": 3073 - }, - { - "epoch": 0.95, - "learning_rate": 5.595290961259381e-07, - "loss": 0.9352, - "step": 3074 - }, - { - "epoch": 0.95, - "learning_rate": 5.520576463653226e-07, - "loss": 0.943, - "step": 3075 - }, - { - "epoch": 0.95, - "learning_rate": 5.446361390710697e-07, - "loss": 0.9752, - "step": 3076 - }, - { - "epoch": 0.95, - "learning_rate": 5.372645817389055e-07, - "loss": 1.0655, - "step": 3077 - }, - { - "epoch": 0.95, - "learning_rate": 5.299429818141078e-07, - "loss": 0.8504, - "step": 3078 - }, - { - "epoch": 0.96, - "learning_rate": 5.226713466915e-07, - "loss": 0.94, - "step": 3079 - }, - { - "epoch": 0.96, - "learning_rate": 5.154496837154466e-07, - "loss": 1.0717, - "step": 3080 - }, - { - "epoch": 0.96, - "learning_rate": 5.082780001798237e-07, - "loss": 1.0202, - "step": 3081 - }, - { - "epoch": 0.96, - "learning_rate": 5.011563033280431e-07, - "loss": 1.0385, - "step": 3082 - }, - { - "epoch": 0.96, - "learning_rate": 4.940846003530231e-07, - "loss": 0.9338, - "step": 3083 - }, - { - "epoch": 0.96, - "learning_rate": 4.870628983971948e-07, - "loss": 0.9551, - "step": 3084 - }, - { - "epoch": 0.96, - "learning_rate": 4.800912045524797e-07, - "loss": 1.073, - "step": 3085 - }, - { - "epoch": 0.96, - "learning_rate": 4.731695258602953e-07, - "loss": 0.9683, - "step": 3086 - }, - { - "epoch": 0.96, - "learning_rate": 4.6629786931154384e-07, - "loss": 0.968, - "step": 3087 - }, - { - "epoch": 0.96, - "learning_rate": 4.5947624184660144e-07, - "loss": 1.0187, - "step": 3088 - }, - { - "epoch": 0.96, - "learning_rate": 4.527046503553234e-07, - "loss": 0.9888, - "step": 3089 - }, - { - "epoch": 0.96, - "learning_rate": 4.4598310167702216e-07, - "loss": 1.12, - "step": 3090 - }, - { - "epoch": 0.96, - "learning_rate": 4.393116026004618e-07, - "loss": 1.0124, - "step": 3091 - }, - { - "epoch": 0.96, - "learning_rate": 4.326901598638744e-07, - "loss": 0.951, - "step": 3092 - }, - { - "epoch": 0.96, - "learning_rate": 4.2611878015491044e-07, - "loss": 1.0364, - "step": 3093 - }, - { - "epoch": 0.96, - "learning_rate": 4.195974701106775e-07, - "loss": 0.9784, - "step": 3094 - }, - { - "epoch": 0.96, - "learning_rate": 4.131262363177013e-07, - "loss": 0.9419, - "step": 3095 - }, - { - "epoch": 0.96, - "learning_rate": 4.067050853119314e-07, - "loss": 0.9818, - "step": 3096 - }, - { - "epoch": 0.96, - "learning_rate": 4.0033402357874116e-07, - "loss": 0.9442, - "step": 3097 - }, - { - "epoch": 0.96, - "learning_rate": 3.940130575529055e-07, - "loss": 1.0248, - "step": 3098 - }, - { - "epoch": 0.96, - "learning_rate": 3.8774219361860655e-07, - "loss": 0.9275, - "step": 3099 - }, - { - "epoch": 0.96, - "learning_rate": 3.8152143810942233e-07, - "loss": 1.0492, - "step": 3100 - }, - { - "epoch": 0.96, - "learning_rate": 3.75350797308327e-07, - "loss": 0.9306, - "step": 3101 - }, - { - "epoch": 0.96, - "learning_rate": 3.6923027744766303e-07, - "loss": 0.9661, - "step": 3102 - }, - { - "epoch": 0.96, - "learning_rate": 3.6315988470916884e-07, - "loss": 0.9852, - "step": 3103 - }, - { - "epoch": 0.96, - "learning_rate": 3.5713962522394005e-07, - "loss": 1.0109, - "step": 3104 - }, - { - "epoch": 0.96, - "learning_rate": 3.5116950507245725e-07, - "loss": 0.9922, - "step": 3105 - }, - { - "epoch": 0.96, - "learning_rate": 3.452495302845304e-07, - "loss": 1.063, - "step": 3106 - }, - { - "epoch": 0.96, - "learning_rate": 3.3937970683934893e-07, - "loss": 0.8755, - "step": 3107 - }, - { - "epoch": 0.96, - "learning_rate": 3.335600406654371e-07, - "loss": 1.0589, - "step": 3108 - }, - { - "epoch": 0.96, - "learning_rate": 3.277905376406654e-07, - "loss": 0.9647, - "step": 3109 - }, - { - "epoch": 0.96, - "learning_rate": 3.220712035922335e-07, - "loss": 0.9915, - "step": 3110 - }, - { - "epoch": 0.97, - "learning_rate": 3.164020442966764e-07, - "loss": 0.9427, - "step": 3111 - }, - { - "epoch": 0.97, - "learning_rate": 3.107830654798527e-07, - "loss": 0.8197, - "step": 3112 - }, - { - "epoch": 0.97, - "learning_rate": 3.0521427281693385e-07, - "loss": 1.1582, - "step": 3113 - }, - { - "epoch": 0.97, - "learning_rate": 2.9969567193239845e-07, - "loss": 0.9415, - "step": 3114 - }, - { - "epoch": 0.97, - "learning_rate": 2.942272684000491e-07, - "loss": 0.893, - "step": 3115 - }, - { - "epoch": 0.97, - "learning_rate": 2.888090677429733e-07, - "loss": 1.1188, - "step": 3116 - }, - { - "epoch": 0.97, - "learning_rate": 2.8344107543356036e-07, - "loss": 0.9169, - "step": 3117 - }, - { - "epoch": 0.97, - "learning_rate": 2.7812329689347883e-07, - "loss": 1.0138, - "step": 3118 - }, - { - "epoch": 0.97, - "learning_rate": 2.7285573749370466e-07, - "loss": 0.9612, - "step": 3119 - }, - { - "epoch": 0.97, - "learning_rate": 2.6763840255447095e-07, - "loss": 0.9427, - "step": 3120 - }, - { - "epoch": 0.97, - "learning_rate": 2.6247129734528476e-07, - "loss": 0.9933, - "step": 3121 - }, - { - "epoch": 0.97, - "learning_rate": 2.573544270849326e-07, - "loss": 1.0122, - "step": 3122 - }, - { - "epoch": 0.97, - "learning_rate": 2.522877969414583e-07, - "loss": 0.9465, - "step": 3123 - }, - { - "epoch": 0.97, - "learning_rate": 2.4727141203216285e-07, - "loss": 1.0208, - "step": 3124 - }, - { - "epoch": 0.97, - "learning_rate": 2.423052774235934e-07, - "loss": 0.9678, - "step": 3125 - }, - { - "epoch": 0.97, - "learning_rate": 2.3738939813156557e-07, - "loss": 1.0279, - "step": 3126 - }, - { - "epoch": 0.97, - "learning_rate": 2.3252377912110212e-07, - "loss": 0.9522, - "step": 3127 - }, - { - "epoch": 0.97, - "learning_rate": 2.2770842530649982e-07, - "loss": 0.9797, - "step": 3128 - }, - { - "epoch": 0.97, - "learning_rate": 2.2294334155125717e-07, - "loss": 1.0045, - "step": 3129 - }, - { - "epoch": 0.97, - "learning_rate": 2.1822853266812438e-07, - "loss": 0.9964, - "step": 3130 - }, - { - "epoch": 0.97, - "learning_rate": 2.1356400341905335e-07, - "loss": 0.8745, - "step": 3131 - }, - { - "epoch": 0.97, - "learning_rate": 2.0894975851523113e-07, - "loss": 1.0828, - "step": 3132 - }, - { - "epoch": 0.97, - "learning_rate": 2.0438580261704088e-07, - "loss": 1.0141, - "step": 3133 - }, - { - "epoch": 0.97, - "learning_rate": 1.9987214033408418e-07, - "loss": 1.0027, - "step": 3134 - }, - { - "epoch": 0.97, - "learning_rate": 1.9540877622516995e-07, - "loss": 1.0235, - "step": 3135 - }, - { - "epoch": 0.97, - "learning_rate": 1.9099571479829215e-07, - "loss": 0.8917, - "step": 3136 - }, - { - "epoch": 0.97, - "learning_rate": 1.866329605106576e-07, - "loss": 1.0804, - "step": 3137 - }, - { - "epoch": 0.97, - "learning_rate": 1.8232051776864156e-07, - "loss": 0.9245, - "step": 3138 - }, - { - "epoch": 0.97, - "learning_rate": 1.7805839092781552e-07, - "loss": 0.9267, - "step": 3139 - }, - { - "epoch": 0.97, - "learning_rate": 1.7384658429293598e-07, - "loss": 0.9629, - "step": 3140 - }, - { - "epoch": 0.97, - "learning_rate": 1.6968510211792798e-07, - "loss": 1.005, - "step": 3141 - }, - { - "epoch": 0.97, - "learning_rate": 1.655739486059016e-07, - "loss": 1.0034, - "step": 3142 - }, - { - "epoch": 0.98, - "learning_rate": 1.615131279091131e-07, - "loss": 0.8545, - "step": 3143 - }, - { - "epoch": 0.98, - "learning_rate": 1.575026441289984e-07, - "loss": 0.9477, - "step": 3144 - }, - { - "epoch": 0.98, - "learning_rate": 1.5354250131615622e-07, - "loss": 1.042, - "step": 3145 - }, - { - "epoch": 0.98, - "learning_rate": 1.4963270347032598e-07, - "loss": 1.0102, - "step": 3146 - }, - { - "epoch": 0.98, - "learning_rate": 1.4577325454041002e-07, - "loss": 0.9016, - "step": 3147 - }, - { - "epoch": 0.98, - "learning_rate": 1.4196415842445687e-07, - "loss": 1.0679, - "step": 3148 - }, - { - "epoch": 0.98, - "learning_rate": 1.3820541896965578e-07, - "loss": 0.9644, - "step": 3149 - }, - { - "epoch": 0.98, - "learning_rate": 1.3449703997233665e-07, - "loss": 1.1097, - "step": 3150 - }, - { - "epoch": 0.98, - "learning_rate": 1.3083902517797008e-07, - "loss": 0.9487, - "step": 3151 - }, - { - "epoch": 0.98, - "learning_rate": 1.2723137828115073e-07, - "loss": 0.9431, - "step": 3152 - }, - { - "epoch": 0.98, - "learning_rate": 1.2367410292560277e-07, - "loss": 1.1201, - "step": 3153 - }, - { - "epoch": 0.98, - "learning_rate": 1.2016720270417447e-07, - "loss": 0.9031, - "step": 3154 - }, - { - "epoch": 0.98, - "learning_rate": 1.167106811588492e-07, - "loss": 1.0205, - "step": 3155 - }, - { - "epoch": 0.98, - "learning_rate": 1.1330454178071214e-07, - "loss": 0.9059, - "step": 3156 - }, - { - "epoch": 0.98, - "learning_rate": 1.09948788009967e-07, - "loss": 0.9591, - "step": 3157 - }, - { - "epoch": 0.98, - "learning_rate": 1.066434232359248e-07, - "loss": 1.0326, - "step": 3158 - }, - { - "epoch": 0.98, - "learning_rate": 1.0338845079700954e-07, - "loss": 1.0455, - "step": 3159 - }, - { - "epoch": 0.98, - "learning_rate": 1.0018387398074147e-07, - "loss": 0.9579, - "step": 3160 - }, - { - "epoch": 0.98, - "learning_rate": 9.702969602375378e-08, - "loss": 1.0092, - "step": 3161 - }, - { - "epoch": 0.98, - "learning_rate": 9.392592011177037e-08, - "loss": 0.9027, - "step": 3162 - }, - { - "epoch": 0.98, - "learning_rate": 9.087254937960033e-08, - "loss": 0.955, - "step": 3163 - }, - { - "epoch": 0.98, - "learning_rate": 8.786958691115454e-08, - "loss": 0.9269, - "step": 3164 - }, - { - "epoch": 0.98, - "learning_rate": 8.491703573942355e-08, - "loss": 1.023, - "step": 3165 - }, - { - "epoch": 0.98, - "learning_rate": 8.201489884649417e-08, - "loss": 1.0375, - "step": 3166 - }, - { - "epoch": 0.98, - "learning_rate": 7.916317916352168e-08, - "loss": 0.9354, - "step": 3167 - }, - { - "epoch": 0.98, - "learning_rate": 7.636187957074659e-08, - "loss": 0.9937, - "step": 3168 - }, - { - "epoch": 0.98, - "learning_rate": 7.361100289748901e-08, - "loss": 0.9603, - "step": 3169 - }, - { - "epoch": 0.98, - "learning_rate": 7.091055192213203e-08, - "loss": 1.0482, - "step": 3170 - }, - { - "epoch": 0.98, - "learning_rate": 6.826052937212724e-08, - "loss": 0.9703, - "step": 3171 - }, - { - "epoch": 0.98, - "learning_rate": 6.56609379240114e-08, - "loss": 1.0429, - "step": 3172 - }, - { - "epoch": 0.98, - "learning_rate": 6.311178020336761e-08, - "loss": 0.9638, - "step": 3173 - }, - { - "epoch": 0.98, - "learning_rate": 6.061305878485301e-08, - "loss": 0.9991, - "step": 3174 - }, - { - "epoch": 0.98, - "learning_rate": 5.816477619217109e-08, - "loss": 0.8428, - "step": 3175 - }, - { - "epoch": 0.99, - "learning_rate": 5.576693489809381e-08, - "loss": 0.9243, - "step": 3176 - }, - { - "epoch": 0.99, - "learning_rate": 5.3419537324445044e-08, - "loss": 1.0116, - "step": 3177 - }, - { - "epoch": 0.99, - "learning_rate": 5.1122585842089396e-08, - "loss": 0.9647, - "step": 3178 - }, - { - "epoch": 0.99, - "learning_rate": 4.8876082770960005e-08, - "loss": 0.9901, - "step": 3179 - }, - { - "epoch": 0.99, - "learning_rate": 4.668003038002522e-08, - "loss": 0.931, - "step": 3180 - }, - { - "epoch": 0.99, - "learning_rate": 4.4534430887299694e-08, - "loss": 1.1485, - "step": 3181 - }, - { - "epoch": 0.99, - "learning_rate": 4.243928645983331e-08, - "loss": 1.036, - "step": 3182 - }, - { - "epoch": 0.99, - "learning_rate": 4.039459921373334e-08, - "loss": 0.969, - "step": 3183 - }, - { - "epoch": 0.99, - "learning_rate": 3.84003712141312e-08, - "loss": 1.051, - "step": 3184 - }, - { - "epoch": 0.99, - "learning_rate": 3.645660447519905e-08, - "loss": 0.9761, - "step": 3185 - }, - { - "epoch": 0.99, - "learning_rate": 3.4563300960144264e-08, - "loss": 1.0042, - "step": 3186 - }, - { - "epoch": 0.99, - "learning_rate": 3.2720462581209424e-08, - "loss": 1.0245, - "step": 3187 - }, - { - "epoch": 0.99, - "learning_rate": 3.092809119965012e-08, - "loss": 1.0989, - "step": 3188 - }, - { - "epoch": 0.99, - "learning_rate": 2.9186188625779376e-08, - "loss": 0.8807, - "step": 3189 - }, - { - "epoch": 0.99, - "learning_rate": 2.7494756618906547e-08, - "loss": 0.9602, - "step": 3190 - }, - { - "epoch": 0.99, - "learning_rate": 2.5853796887387316e-08, - "loss": 0.9731, - "step": 3191 - }, - { - "epoch": 0.99, - "learning_rate": 2.426331108859037e-08, - "loss": 1.0561, - "step": 3192 - }, - { - "epoch": 0.99, - "learning_rate": 2.27233008289085e-08, - "loss": 0.9427, - "step": 3193 - }, - { - "epoch": 0.99, - "learning_rate": 2.1233767663747516e-08, - "loss": 1.0726, - "step": 3194 - }, - { - "epoch": 0.99, - "learning_rate": 1.9794713097548434e-08, - "loss": 1.0562, - "step": 3195 - }, - { - "epoch": 0.99, - "learning_rate": 1.840613858374862e-08, - "loss": 0.9356, - "step": 3196 - }, - { - "epoch": 0.99, - "learning_rate": 1.706804552481511e-08, - "loss": 1.0371, - "step": 3197 - }, - { - "epoch": 0.99, - "learning_rate": 1.5780435272216844e-08, - "loss": 1.0193, - "step": 3198 - }, - { - "epoch": 0.99, - "learning_rate": 1.4543309126446858e-08, - "loss": 0.9809, - "step": 3199 - }, - { - "epoch": 0.99, - "learning_rate": 1.3356668337000112e-08, - "loss": 0.9593, - "step": 3200 - }, - { - "epoch": 0.99, - "learning_rate": 1.2220514102390113e-08, - "loss": 0.9234, - "step": 3201 - }, - { - "epoch": 0.99, - "learning_rate": 1.1134847570126727e-08, - "loss": 0.9825, - "step": 3202 - }, - { - "epoch": 0.99, - "learning_rate": 1.009966983674393e-08, - "loss": 1.0412, - "step": 3203 - }, - { - "epoch": 0.99, - "learning_rate": 9.114981947760947e-09, - "loss": 0.9702, - "step": 3204 - }, - { - "epoch": 0.99, - "learning_rate": 8.180784897715565e-09, - "loss": 0.9313, - "step": 3205 - }, - { - "epoch": 0.99, - "learning_rate": 7.297079630158576e-09, - "loss": 1.013, - "step": 3206 - }, - { - "epoch": 0.99, - "learning_rate": 6.463867037614923e-09, - "loss": 0.9655, - "step": 3207 - }, - { - "epoch": 1.0, - "learning_rate": 5.6811479616503126e-09, - "loss": 1.1266, - "step": 3208 - }, - { - "epoch": 1.0, - "learning_rate": 4.948923192793498e-09, - "loss": 0.9024, - "step": 3209 - }, - { - "epoch": 1.0, - "learning_rate": 4.267193470602893e-09, - "loss": 0.9659, - "step": 3210 - }, - { - "epoch": 1.0, - "learning_rate": 3.6359594836277156e-09, - "loss": 0.9528, - "step": 3211 - }, - { - "epoch": 1.0, - "learning_rate": 3.055221869402436e-09, - "loss": 0.9365, - "step": 3212 - }, - { - "epoch": 1.0, - "learning_rate": 2.5249812144856333e-09, - "loss": 0.9425, - "step": 3213 - }, - { - "epoch": 1.0, - "learning_rate": 2.045238054415588e-09, - "loss": 1.1141, - "step": 3214 - }, - { - "epoch": 1.0, - "learning_rate": 1.615992873732486e-09, - "loss": 0.959, - "step": 3215 - }, - { - "epoch": 1.0, - "learning_rate": 1.237246105978418e-09, - "loss": 0.9233, - "step": 3216 - }, - { - "epoch": 1.0, - "learning_rate": 9.089981336807274e-10, - "loss": 0.9565, - "step": 3217 - }, - { - "epoch": 1.0, - "learning_rate": 6.312492883797649e-10, - "loss": 1.0509, - "step": 3218 - }, - { - "epoch": 1.0, - "learning_rate": 4.039998505900311e-10, - "loss": 1.0656, - "step": 3219 - }, - { - "epoch": 1.0, - "learning_rate": 2.272500498445851e-10, - "loss": 0.8974, - "step": 3220 - }, - { - "epoch": 1.0, - "learning_rate": 1.0100006465618705e-10, - "loss": 1.0063, - "step": 3221 - }, - { - "epoch": 1.0, - "learning_rate": 2.5250022539502482e-11, - "loss": 0.9517, - "step": 3222 - }, - { - "epoch": 1.0, - "learning_rate": 0.0, - "loss": 1.01, - "step": 3223 - }, - { - "epoch": 1.0, - "step": 3223, - "total_flos": 0.0, - "train_loss": 0.08356392507482086, - "train_runtime": 7870.2219, - "train_samples_per_second": 841.022, - "train_steps_per_second": 0.41 - } - ], - "logging_steps": 1.0, - "max_steps": 3223, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 25, - "total_flos": 0.0, - "train_batch_size": 8, - "trial_name": null, - "trial_params": null -}