diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,8236 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 6.998722860791826, - "global_step": 1370, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.01, - "learning_rate": 0.0, - "loss": 1.0562, - "step": 1 - }, - { - "epoch": 0.01, - "learning_rate": 1e-05, - "loss": 0.4675, - "step": 2 - }, - { - "epoch": 0.02, - "learning_rate": 1e-05, - "loss": 0.3332, - "step": 3 - }, - { - "epoch": 0.02, - "learning_rate": 1e-05, - "loss": 0.4042, - "step": 4 - }, - { - "epoch": 0.03, - "learning_rate": 1e-05, - "loss": 0.4214, - "step": 5 - }, - { - "epoch": 0.03, - "learning_rate": 1e-05, - "loss": 0.3946, - "step": 6 - }, - { - "epoch": 0.04, - "learning_rate": 1e-05, - "loss": 0.3061, - "step": 7 - }, - { - "epoch": 0.04, - "learning_rate": 1e-05, - "loss": 0.2138, - "step": 8 - }, - { - "epoch": 0.05, - "learning_rate": 1e-05, - "loss": 0.3077, - "step": 9 - }, - { - "epoch": 0.05, - "learning_rate": 1e-05, - "loss": 0.3196, - "step": 10 - }, - { - "epoch": 0.06, - "learning_rate": 1e-05, - "loss": 0.235, - "step": 11 - }, - { - "epoch": 0.06, - "learning_rate": 1e-05, - "loss": 0.1768, - "step": 12 - }, - { - "epoch": 0.07, - "learning_rate": 1e-05, - "loss": 0.1698, - "step": 13 - }, - { - "epoch": 0.07, - "learning_rate": 1e-05, - "loss": 0.1664, - "step": 14 - }, - { - "epoch": 0.08, - "learning_rate": 1e-05, - "loss": 0.2661, - "step": 15 - }, - { - "epoch": 0.08, - "learning_rate": 1e-05, - "loss": 0.1765, - "step": 16 - }, - { - "epoch": 0.09, - "learning_rate": 1e-05, - "loss": 0.1538, - "step": 17 - }, - { - "epoch": 0.09, - "learning_rate": 1e-05, - "loss": 0.1342, - "step": 18 - }, - { - "epoch": 0.1, - "learning_rate": 1e-05, - "loss": 0.0749, - "step": 19 - }, - { - "epoch": 0.1, - "learning_rate": 1e-05, - "loss": 0.0845, - "step": 20 - }, - { - "epoch": 0.11, - "learning_rate": 1e-05, - "loss": 0.3328, - "step": 21 - }, - { - "epoch": 0.11, - "learning_rate": 1e-05, - "loss": 0.068, - "step": 22 - }, - { - "epoch": 0.12, - "learning_rate": 1e-05, - "loss": 0.1653, - "step": 23 - }, - { - "epoch": 0.12, - "learning_rate": 1e-05, - "loss": 0.1063, - "step": 24 - }, - { - "epoch": 0.13, - "learning_rate": 1e-05, - "loss": 0.1936, - "step": 25 - }, - { - "epoch": 0.13, - "learning_rate": 1e-05, - "loss": 0.1183, - "step": 26 - }, - { - "epoch": 0.14, - "learning_rate": 1e-05, - "loss": 0.2157, - "step": 27 - }, - { - "epoch": 0.14, - "learning_rate": 1e-05, - "loss": 0.1069, - "step": 28 - }, - { - "epoch": 0.15, - "learning_rate": 1e-05, - "loss": 0.3282, - "step": 29 - }, - { - "epoch": 0.15, - "learning_rate": 1e-05, - "loss": 0.1977, - "step": 30 - }, - { - "epoch": 0.16, - "learning_rate": 1e-05, - "loss": 0.1899, - "step": 31 - }, - { - "epoch": 0.16, - "learning_rate": 1e-05, - "loss": 0.1373, - "step": 32 - }, - { - "epoch": 0.17, - "learning_rate": 1e-05, - "loss": 0.0571, - "step": 33 - }, - { - "epoch": 0.17, - "learning_rate": 1e-05, - "loss": 0.1905, - "step": 34 - }, - { - "epoch": 0.18, - "learning_rate": 1e-05, - "loss": 0.1854, - "step": 35 - }, - { - "epoch": 0.18, - "learning_rate": 1e-05, - "loss": 0.1395, - "step": 36 - }, - { - "epoch": 0.19, - "learning_rate": 1e-05, - "loss": 0.1495, - "step": 37 - }, - { - "epoch": 0.19, - "learning_rate": 1e-05, - "loss": 0.2867, - "step": 38 - }, - { - "epoch": 0.2, - "learning_rate": 1e-05, - "loss": 0.1169, - "step": 39 - }, - { - "epoch": 0.2, - "learning_rate": 1e-05, - "loss": 0.074, - "step": 40 - }, - { - "epoch": 0.21, - "learning_rate": 1e-05, - "loss": 0.1304, - "step": 41 - }, - { - "epoch": 0.21, - "learning_rate": 1e-05, - "loss": 0.2853, - "step": 42 - }, - { - "epoch": 0.22, - "learning_rate": 1e-05, - "loss": 0.0653, - "step": 43 - }, - { - "epoch": 0.22, - "learning_rate": 1e-05, - "loss": 0.1973, - "step": 44 - }, - { - "epoch": 0.23, - "learning_rate": 1e-05, - "loss": 0.1049, - "step": 45 - }, - { - "epoch": 0.23, - "learning_rate": 1e-05, - "loss": 0.2182, - "step": 46 - }, - { - "epoch": 0.24, - "learning_rate": 1e-05, - "loss": 0.0839, - "step": 47 - }, - { - "epoch": 0.25, - "learning_rate": 1e-05, - "loss": 0.1563, - "step": 48 - }, - { - "epoch": 0.25, - "learning_rate": 1e-05, - "loss": 0.1184, - "step": 49 - }, - { - "epoch": 0.26, - "learning_rate": 1e-05, - "loss": 0.1367, - "step": 50 - }, - { - "epoch": 0.26, - "learning_rate": 1e-05, - "loss": 0.0049, - "step": 51 - }, - { - "epoch": 0.27, - "learning_rate": 1e-05, - "loss": 0.1879, - "step": 52 - }, - { - "epoch": 0.27, - "learning_rate": 1e-05, - "loss": 0.1286, - "step": 53 - }, - { - "epoch": 0.28, - "learning_rate": 1e-05, - "loss": 0.112, - "step": 54 - }, - { - "epoch": 0.28, - "learning_rate": 1e-05, - "loss": 0.0938, - "step": 55 - }, - { - "epoch": 0.29, - "learning_rate": 1e-05, - "loss": 0.0198, - "step": 56 - }, - { - "epoch": 0.29, - "learning_rate": 1e-05, - "loss": 0.1999, - "step": 57 - }, - { - "epoch": 0.3, - "learning_rate": 1e-05, - "loss": 0.0676, - "step": 58 - }, - { - "epoch": 0.3, - "learning_rate": 1e-05, - "loss": 0.1658, - "step": 59 - }, - { - "epoch": 0.31, - "learning_rate": 1e-05, - "loss": 0.0914, - "step": 60 - }, - { - "epoch": 0.31, - "learning_rate": 1e-05, - "loss": 0.1006, - "step": 61 - }, - { - "epoch": 0.32, - "learning_rate": 1e-05, - "loss": 0.0464, - "step": 62 - }, - { - "epoch": 0.32, - "learning_rate": 1e-05, - "loss": 0.1222, - "step": 63 - }, - { - "epoch": 0.33, - "learning_rate": 1e-05, - "loss": 0.0143, - "step": 64 - }, - { - "epoch": 0.33, - "learning_rate": 1e-05, - "loss": 0.2137, - "step": 65 - }, - { - "epoch": 0.34, - "learning_rate": 1e-05, - "loss": 0.2755, - "step": 66 - }, - { - "epoch": 0.34, - "learning_rate": 1e-05, - "loss": 0.1399, - "step": 67 - }, - { - "epoch": 0.35, - "learning_rate": 1e-05, - "loss": 0.2166, - "step": 68 - }, - { - "epoch": 0.35, - "learning_rate": 1e-05, - "loss": 0.1916, - "step": 69 - }, - { - "epoch": 0.36, - "learning_rate": 1e-05, - "loss": 0.1414, - "step": 70 - }, - { - "epoch": 0.36, - "learning_rate": 1e-05, - "loss": 0.0748, - "step": 71 - }, - { - "epoch": 0.37, - "learning_rate": 1e-05, - "loss": 0.0892, - "step": 72 - }, - { - "epoch": 0.37, - "learning_rate": 1e-05, - "loss": 0.0685, - "step": 73 - }, - { - "epoch": 0.38, - "learning_rate": 1e-05, - "loss": 0.1457, - "step": 74 - }, - { - "epoch": 0.38, - "learning_rate": 1e-05, - "loss": 0.1464, - "step": 75 - }, - { - "epoch": 0.39, - "learning_rate": 1e-05, - "loss": 0.101, - "step": 76 - }, - { - "epoch": 0.39, - "learning_rate": 1e-05, - "loss": 0.2092, - "step": 77 - }, - { - "epoch": 0.4, - "learning_rate": 1e-05, - "loss": 0.1977, - "step": 78 - }, - { - "epoch": 0.4, - "learning_rate": 1e-05, - "loss": 0.1607, - "step": 79 - }, - { - "epoch": 0.41, - "learning_rate": 1e-05, - "loss": 0.2879, - "step": 80 - }, - { - "epoch": 0.41, - "learning_rate": 1e-05, - "loss": 0.2386, - "step": 81 - }, - { - "epoch": 0.42, - "learning_rate": 1e-05, - "loss": 0.2433, - "step": 82 - }, - { - "epoch": 0.42, - "learning_rate": 1e-05, - "loss": 0.159, - "step": 83 - }, - { - "epoch": 0.43, - "learning_rate": 1e-05, - "loss": 0.095, - "step": 84 - }, - { - "epoch": 0.43, - "learning_rate": 1e-05, - "loss": 0.0821, - "step": 85 - }, - { - "epoch": 0.44, - "learning_rate": 1e-05, - "loss": 0.0784, - "step": 86 - }, - { - "epoch": 0.44, - "learning_rate": 1e-05, - "loss": 0.1487, - "step": 87 - }, - { - "epoch": 0.45, - "learning_rate": 1e-05, - "loss": 0.1947, - "step": 88 - }, - { - "epoch": 0.45, - "learning_rate": 1e-05, - "loss": 0.1335, - "step": 89 - }, - { - "epoch": 0.46, - "learning_rate": 1e-05, - "loss": 0.3123, - "step": 90 - }, - { - "epoch": 0.46, - "learning_rate": 1e-05, - "loss": 0.0989, - "step": 91 - }, - { - "epoch": 0.47, - "learning_rate": 1e-05, - "loss": 0.0772, - "step": 92 - }, - { - "epoch": 0.48, - "learning_rate": 1e-05, - "loss": 0.1531, - "step": 93 - }, - { - "epoch": 0.48, - "learning_rate": 1e-05, - "loss": 0.0714, - "step": 94 - }, - { - "epoch": 0.49, - "learning_rate": 1e-05, - "loss": 0.1081, - "step": 95 - }, - { - "epoch": 0.49, - "learning_rate": 1e-05, - "loss": 0.0669, - "step": 96 - }, - { - "epoch": 0.5, - "learning_rate": 1e-05, - "loss": 0.1384, - "step": 97 - }, - { - "epoch": 0.5, - "learning_rate": 1e-05, - "loss": 0.1662, - "step": 98 - }, - { - "epoch": 0.51, - "learning_rate": 1e-05, - "loss": 0.1573, - "step": 99 - }, - { - "epoch": 0.51, - "learning_rate": 1e-05, - "loss": 0.0713, - "step": 100 - }, - { - "epoch": 0.52, - "learning_rate": 1e-05, - "loss": 0.2945, - "step": 101 - }, - { - "epoch": 0.52, - "learning_rate": 1e-05, - "loss": 0.2392, - "step": 102 - }, - { - "epoch": 0.53, - "learning_rate": 1e-05, - "loss": 0.0472, - "step": 103 - }, - { - "epoch": 0.53, - "learning_rate": 1e-05, - "loss": 0.1296, - "step": 104 - }, - { - "epoch": 0.54, - "learning_rate": 1e-05, - "loss": 0.1508, - "step": 105 - }, - { - "epoch": 0.54, - "learning_rate": 1e-05, - "loss": 0.0673, - "step": 106 - }, - { - "epoch": 0.55, - "learning_rate": 1e-05, - "loss": 0.1289, - "step": 107 - }, - { - "epoch": 0.55, - "learning_rate": 1e-05, - "loss": 0.1599, - "step": 108 - }, - { - "epoch": 0.56, - "learning_rate": 1e-05, - "loss": 0.1136, - "step": 109 - }, - { - "epoch": 0.56, - "learning_rate": 1e-05, - "loss": 0.1192, - "step": 110 - }, - { - "epoch": 0.57, - "learning_rate": 1e-05, - "loss": 0.1378, - "step": 111 - }, - { - "epoch": 0.57, - "learning_rate": 1e-05, - "loss": 0.0723, - "step": 112 - }, - { - "epoch": 0.58, - "learning_rate": 1e-05, - "loss": 0.095, - "step": 113 - }, - { - "epoch": 0.58, - "learning_rate": 1e-05, - "loss": 0.1105, - "step": 114 - }, - { - "epoch": 0.59, - "learning_rate": 1e-05, - "loss": 0.1114, - "step": 115 - }, - { - "epoch": 0.59, - "learning_rate": 1e-05, - "loss": 0.3623, - "step": 116 - }, - { - "epoch": 0.6, - "learning_rate": 1e-05, - "loss": 0.2149, - "step": 117 - }, - { - "epoch": 0.6, - "learning_rate": 1e-05, - "loss": 0.1858, - "step": 118 - }, - { - "epoch": 0.61, - "learning_rate": 1e-05, - "loss": 0.1264, - "step": 119 - }, - { - "epoch": 0.61, - "learning_rate": 1e-05, - "loss": 0.1113, - "step": 120 - }, - { - "epoch": 0.62, - "learning_rate": 1e-05, - "loss": 0.113, - "step": 121 - }, - { - "epoch": 0.62, - "learning_rate": 1e-05, - "loss": 0.1364, - "step": 122 - }, - { - "epoch": 0.63, - "learning_rate": 1e-05, - "loss": 0.0383, - "step": 123 - }, - { - "epoch": 0.63, - "learning_rate": 1e-05, - "loss": 0.0848, - "step": 124 - }, - { - "epoch": 0.64, - "learning_rate": 1e-05, - "loss": 0.0921, - "step": 125 - }, - { - "epoch": 0.64, - "learning_rate": 1e-05, - "loss": 0.1009, - "step": 126 - }, - { - "epoch": 0.65, - "learning_rate": 1e-05, - "loss": 0.0949, - "step": 127 - }, - { - "epoch": 0.65, - "learning_rate": 1e-05, - "loss": 0.0877, - "step": 128 - }, - { - "epoch": 0.66, - "learning_rate": 1e-05, - "loss": 0.1961, - "step": 129 - }, - { - "epoch": 0.66, - "learning_rate": 1e-05, - "loss": 0.0304, - "step": 130 - }, - { - "epoch": 0.67, - "learning_rate": 1e-05, - "loss": 0.1637, - "step": 131 - }, - { - "epoch": 0.67, - "learning_rate": 1e-05, - "loss": 0.1214, - "step": 132 - }, - { - "epoch": 0.68, - "learning_rate": 1e-05, - "loss": 0.13, - "step": 133 - }, - { - "epoch": 0.68, - "learning_rate": 1e-05, - "loss": 0.0122, - "step": 134 - }, - { - "epoch": 0.69, - "learning_rate": 1e-05, - "loss": 0.1911, - "step": 135 - }, - { - "epoch": 0.69, - "learning_rate": 1e-05, - "loss": 0.0672, - "step": 136 - }, - { - "epoch": 0.7, - "learning_rate": 1e-05, - "loss": 0.2112, - "step": 137 - }, - { - "epoch": 0.7, - "learning_rate": 1e-05, - "loss": 0.2525, - "step": 138 - }, - { - "epoch": 0.71, - "learning_rate": 1e-05, - "loss": 0.1582, - "step": 139 - }, - { - "epoch": 0.72, - "learning_rate": 1e-05, - "loss": 0.0819, - "step": 140 - }, - { - "epoch": 0.72, - "learning_rate": 1e-05, - "loss": 0.0925, - "step": 141 - }, - { - "epoch": 0.73, - "learning_rate": 1e-05, - "loss": 0.0635, - "step": 142 - }, - { - "epoch": 0.73, - "learning_rate": 1e-05, - "loss": 0.337, - "step": 143 - }, - { - "epoch": 0.74, - "learning_rate": 1e-05, - "loss": 0.2296, - "step": 144 - }, - { - "epoch": 0.74, - "learning_rate": 1e-05, - "loss": 0.1795, - "step": 145 - }, - { - "epoch": 0.75, - "learning_rate": 1e-05, - "loss": 0.0803, - "step": 146 - }, - { - "epoch": 0.75, - "learning_rate": 1e-05, - "loss": 0.1189, - "step": 147 - }, - { - "epoch": 0.76, - "learning_rate": 1e-05, - "loss": 0.1089, - "step": 148 - }, - { - "epoch": 0.76, - "learning_rate": 1e-05, - "loss": 0.1216, - "step": 149 - }, - { - "epoch": 0.77, - "learning_rate": 1e-05, - "loss": 0.1148, - "step": 150 - }, - { - "epoch": 0.77, - "learning_rate": 1e-05, - "loss": 0.0407, - "step": 151 - }, - { - "epoch": 0.78, - "learning_rate": 1e-05, - "loss": 0.2256, - "step": 152 - }, - { - "epoch": 0.78, - "learning_rate": 1e-05, - "loss": 0.0644, - "step": 153 - }, - { - "epoch": 0.79, - "learning_rate": 1e-05, - "loss": 0.1588, - "step": 154 - }, - { - "epoch": 0.79, - "learning_rate": 1e-05, - "loss": 0.3197, - "step": 155 - }, - { - "epoch": 0.8, - "learning_rate": 1e-05, - "loss": 0.2215, - "step": 156 - }, - { - "epoch": 0.8, - "learning_rate": 1e-05, - "loss": 0.151, - "step": 157 - }, - { - "epoch": 0.81, - "learning_rate": 1e-05, - "loss": 0.0279, - "step": 158 - }, - { - "epoch": 0.81, - "learning_rate": 1e-05, - "loss": 0.0064, - "step": 159 - }, - { - "epoch": 0.82, - "learning_rate": 1e-05, - "loss": 0.199, - "step": 160 - }, - { - "epoch": 0.82, - "learning_rate": 1e-05, - "loss": 0.0465, - "step": 161 - }, - { - "epoch": 0.83, - "learning_rate": 1e-05, - "loss": 0.2614, - "step": 162 - }, - { - "epoch": 0.83, - "learning_rate": 1e-05, - "loss": 0.0382, - "step": 163 - }, - { - "epoch": 0.84, - "learning_rate": 1e-05, - "loss": 0.0665, - "step": 164 - }, - { - "epoch": 0.84, - "learning_rate": 1e-05, - "loss": 0.1416, - "step": 165 - }, - { - "epoch": 0.85, - "learning_rate": 1e-05, - "loss": 0.1066, - "step": 166 - }, - { - "epoch": 0.85, - "learning_rate": 1e-05, - "loss": 0.0894, - "step": 167 - }, - { - "epoch": 0.86, - "learning_rate": 1e-05, - "loss": 0.0602, - "step": 168 - }, - { - "epoch": 0.86, - "learning_rate": 1e-05, - "loss": 0.1677, - "step": 169 - }, - { - "epoch": 0.87, - "learning_rate": 1e-05, - "loss": 0.0181, - "step": 170 - }, - { - "epoch": 0.87, - "learning_rate": 1e-05, - "loss": 0.4301, - "step": 171 - }, - { - "epoch": 0.88, - "learning_rate": 1e-05, - "loss": 0.1563, - "step": 172 - }, - { - "epoch": 0.88, - "learning_rate": 1e-05, - "loss": 0.1198, - "step": 173 - }, - { - "epoch": 0.89, - "learning_rate": 1e-05, - "loss": 0.1015, - "step": 174 - }, - { - "epoch": 0.89, - "learning_rate": 1e-05, - "loss": 0.1107, - "step": 175 - }, - { - "epoch": 0.9, - "learning_rate": 1e-05, - "loss": 0.1831, - "step": 176 - }, - { - "epoch": 0.9, - "learning_rate": 1e-05, - "loss": 0.0354, - "step": 177 - }, - { - "epoch": 0.91, - "learning_rate": 1e-05, - "loss": 0.2364, - "step": 178 - }, - { - "epoch": 0.91, - "learning_rate": 1e-05, - "loss": 0.1992, - "step": 179 - }, - { - "epoch": 0.92, - "learning_rate": 1e-05, - "loss": 0.1165, - "step": 180 - }, - { - "epoch": 0.92, - "learning_rate": 1e-05, - "loss": 0.198, - "step": 181 - }, - { - "epoch": 0.93, - "learning_rate": 1e-05, - "loss": 0.2144, - "step": 182 - }, - { - "epoch": 0.93, - "learning_rate": 1e-05, - "loss": 0.1842, - "step": 183 - }, - { - "epoch": 0.94, - "learning_rate": 1e-05, - "loss": 0.0275, - "step": 184 - }, - { - "epoch": 0.95, - "learning_rate": 1e-05, - "loss": 0.0246, - "step": 185 - }, - { - "epoch": 0.95, - "learning_rate": 1e-05, - "loss": 0.1705, - "step": 186 - }, - { - "epoch": 0.96, - "learning_rate": 1e-05, - "loss": 0.0736, - "step": 187 - }, - { - "epoch": 0.96, - "learning_rate": 1e-05, - "loss": 0.0545, - "step": 188 - }, - { - "epoch": 0.97, - "learning_rate": 1e-05, - "loss": 0.2176, - "step": 189 - }, - { - "epoch": 0.97, - "learning_rate": 1e-05, - "loss": 0.0435, - "step": 190 - }, - { - "epoch": 0.98, - "learning_rate": 1e-05, - "loss": 0.0855, - "step": 191 - }, - { - "epoch": 0.98, - "learning_rate": 1e-05, - "loss": 0.1094, - "step": 192 - }, - { - "epoch": 0.99, - "learning_rate": 1e-05, - "loss": 0.0023, - "step": 193 - }, - { - "epoch": 0.99, - "learning_rate": 1e-05, - "loss": 0.1278, - "step": 194 - }, - { - "epoch": 1.0, - "learning_rate": 1e-05, - "loss": 0.1514, - "step": 195 - }, - { - "epoch": 1.0, - "learning_rate": 1e-05, - "loss": 0.0519, - "step": 196 - }, - { - "epoch": 1.01, - "learning_rate": 1e-05, - "loss": 0.0148, - "step": 197 - }, - { - "epoch": 1.01, - "learning_rate": 1e-05, - "loss": 0.0307, - "step": 198 - }, - { - "epoch": 1.02, - "learning_rate": 1e-05, - "loss": 0.1395, - "step": 199 - }, - { - "epoch": 1.02, - "learning_rate": 1e-05, - "loss": 0.0545, - "step": 200 - }, - { - "epoch": 1.03, - "learning_rate": 1e-05, - "loss": 0.0808, - "step": 201 - }, - { - "epoch": 1.03, - "learning_rate": 1e-05, - "loss": 0.0461, - "step": 202 - }, - { - "epoch": 1.04, - "learning_rate": 1e-05, - "loss": 0.0158, - "step": 203 - }, - { - "epoch": 1.04, - "learning_rate": 1e-05, - "loss": 0.0845, - "step": 204 - }, - { - "epoch": 1.05, - "learning_rate": 1e-05, - "loss": 0.1393, - "step": 205 - }, - { - "epoch": 1.05, - "learning_rate": 1e-05, - "loss": 0.0598, - "step": 206 - }, - { - "epoch": 1.06, - "learning_rate": 1e-05, - "loss": 0.0928, - "step": 207 - }, - { - "epoch": 1.06, - "learning_rate": 1e-05, - "loss": 0.0459, - "step": 208 - }, - { - "epoch": 1.07, - "learning_rate": 1e-05, - "loss": 0.046, - "step": 209 - }, - { - "epoch": 1.07, - "learning_rate": 1e-05, - "loss": 0.0007, - "step": 210 - }, - { - "epoch": 1.08, - "learning_rate": 1e-05, - "loss": 0.0415, - "step": 211 - }, - { - "epoch": 1.08, - "learning_rate": 1e-05, - "loss": 0.0789, - "step": 212 - }, - { - "epoch": 1.09, - "learning_rate": 1e-05, - "loss": 0.0958, - "step": 213 - }, - { - "epoch": 1.09, - "learning_rate": 1e-05, - "loss": 0.0824, - "step": 214 - }, - { - "epoch": 1.1, - "learning_rate": 1e-05, - "loss": 0.0241, - "step": 215 - }, - { - "epoch": 1.1, - "learning_rate": 1e-05, - "loss": 0.05, - "step": 216 - }, - { - "epoch": 1.11, - "learning_rate": 1e-05, - "loss": 0.0578, - "step": 217 - }, - { - "epoch": 1.11, - "learning_rate": 1e-05, - "loss": 0.0381, - "step": 218 - }, - { - "epoch": 1.12, - "learning_rate": 1e-05, - "loss": 0.0453, - "step": 219 - }, - { - "epoch": 1.12, - "learning_rate": 1e-05, - "loss": 0.024, - "step": 220 - }, - { - "epoch": 1.13, - "learning_rate": 1e-05, - "loss": 0.0314, - "step": 221 - }, - { - "epoch": 1.13, - "learning_rate": 1e-05, - "loss": 0.0575, - "step": 222 - }, - { - "epoch": 1.14, - "learning_rate": 1e-05, - "loss": 0.136, - "step": 223 - }, - { - "epoch": 1.14, - "learning_rate": 1e-05, - "loss": 0.0502, - "step": 224 - }, - { - "epoch": 1.15, - "learning_rate": 1e-05, - "loss": 0.2071, - "step": 225 - }, - { - "epoch": 1.15, - "learning_rate": 1e-05, - "loss": 0.0483, - "step": 226 - }, - { - "epoch": 1.16, - "learning_rate": 1e-05, - "loss": 0.1118, - "step": 227 - }, - { - "epoch": 1.16, - "learning_rate": 1e-05, - "loss": 0.0858, - "step": 228 - }, - { - "epoch": 1.17, - "learning_rate": 1e-05, - "loss": 0.0048, - "step": 229 - }, - { - "epoch": 1.17, - "learning_rate": 1e-05, - "loss": 0.0405, - "step": 230 - }, - { - "epoch": 1.18, - "learning_rate": 1e-05, - "loss": 0.0719, - "step": 231 - }, - { - "epoch": 1.19, - "learning_rate": 1e-05, - "loss": 0.0146, - "step": 232 - }, - { - "epoch": 1.19, - "learning_rate": 1e-05, - "loss": 0.083, - "step": 233 - }, - { - "epoch": 1.2, - "learning_rate": 1e-05, - "loss": 0.0878, - "step": 234 - }, - { - "epoch": 1.2, - "learning_rate": 1e-05, - "loss": 0.0432, - "step": 235 - }, - { - "epoch": 1.21, - "learning_rate": 1e-05, - "loss": 0.0508, - "step": 236 - }, - { - "epoch": 1.21, - "learning_rate": 1e-05, - "loss": 0.0574, - "step": 237 - }, - { - "epoch": 1.22, - "learning_rate": 1e-05, - "loss": 0.0288, - "step": 238 - }, - { - "epoch": 1.22, - "learning_rate": 1e-05, - "loss": 0.0995, - "step": 239 - }, - { - "epoch": 1.23, - "learning_rate": 1e-05, - "loss": 0.1162, - "step": 240 - }, - { - "epoch": 1.23, - "learning_rate": 1e-05, - "loss": 0.0281, - "step": 241 - }, - { - "epoch": 1.24, - "learning_rate": 1e-05, - "loss": 0.0097, - "step": 242 - }, - { - "epoch": 1.24, - "learning_rate": 1e-05, - "loss": 0.0669, - "step": 243 - }, - { - "epoch": 1.25, - "learning_rate": 1e-05, - "loss": 0.0366, - "step": 244 - }, - { - "epoch": 1.25, - "learning_rate": 1e-05, - "loss": 0.0755, - "step": 245 - }, - { - "epoch": 1.26, - "learning_rate": 1e-05, - "loss": 0.038, - "step": 246 - }, - { - "epoch": 1.26, - "learning_rate": 1e-05, - "loss": 0.0146, - "step": 247 - }, - { - "epoch": 1.27, - "learning_rate": 1e-05, - "loss": 0.0297, - "step": 248 - }, - { - "epoch": 1.27, - "learning_rate": 1e-05, - "loss": 0.0166, - "step": 249 - }, - { - "epoch": 1.28, - "learning_rate": 1e-05, - "loss": 0.0809, - "step": 250 - }, - { - "epoch": 1.28, - "learning_rate": 1e-05, - "loss": 0.0964, - "step": 251 - }, - { - "epoch": 1.29, - "learning_rate": 1e-05, - "loss": 0.0212, - "step": 252 - }, - { - "epoch": 1.29, - "learning_rate": 1e-05, - "loss": 0.0685, - "step": 253 - }, - { - "epoch": 1.3, - "learning_rate": 1e-05, - "loss": 0.0599, - "step": 254 - }, - { - "epoch": 1.3, - "learning_rate": 1e-05, - "loss": 0.0527, - "step": 255 - }, - { - "epoch": 1.31, - "learning_rate": 1e-05, - "loss": 0.0657, - "step": 256 - }, - { - "epoch": 1.31, - "learning_rate": 1e-05, - "loss": 0.0047, - "step": 257 - }, - { - "epoch": 1.32, - "learning_rate": 1e-05, - "loss": 0.1389, - "step": 258 - }, - { - "epoch": 1.32, - "learning_rate": 1e-05, - "loss": 0.041, - "step": 259 - }, - { - "epoch": 1.33, - "learning_rate": 1e-05, - "loss": 0.0826, - "step": 260 - }, - { - "epoch": 1.33, - "learning_rate": 1e-05, - "loss": 0.172, - "step": 261 - }, - { - "epoch": 1.34, - "learning_rate": 1e-05, - "loss": 0.0584, - "step": 262 - }, - { - "epoch": 1.34, - "learning_rate": 1e-05, - "loss": 0.0729, - "step": 263 - }, - { - "epoch": 1.35, - "learning_rate": 1e-05, - "loss": 0.0758, - "step": 264 - }, - { - "epoch": 1.35, - "learning_rate": 1e-05, - "loss": 0.0384, - "step": 265 - }, - { - "epoch": 1.36, - "learning_rate": 1e-05, - "loss": 0.0507, - "step": 266 - }, - { - "epoch": 1.36, - "learning_rate": 1e-05, - "loss": 0.0573, - "step": 267 - }, - { - "epoch": 1.37, - "learning_rate": 1e-05, - "loss": 0.0237, - "step": 268 - }, - { - "epoch": 1.37, - "learning_rate": 1e-05, - "loss": 0.0828, - "step": 269 - }, - { - "epoch": 1.38, - "learning_rate": 1e-05, - "loss": 0.0392, - "step": 270 - }, - { - "epoch": 1.38, - "learning_rate": 1e-05, - "loss": 0.032, - "step": 271 - }, - { - "epoch": 1.39, - "learning_rate": 1e-05, - "loss": 0.0119, - "step": 272 - }, - { - "epoch": 1.39, - "learning_rate": 1e-05, - "loss": 0.0365, - "step": 273 - }, - { - "epoch": 1.4, - "learning_rate": 1e-05, - "loss": 0.0832, - "step": 274 - }, - { - "epoch": 1.4, - "learning_rate": 1e-05, - "loss": 0.0032, - "step": 275 - }, - { - "epoch": 1.41, - "learning_rate": 1e-05, - "loss": 0.028, - "step": 276 - }, - { - "epoch": 1.42, - "learning_rate": 1e-05, - "loss": 0.1069, - "step": 277 - }, - { - "epoch": 1.42, - "learning_rate": 1e-05, - "loss": 0.1684, - "step": 278 - }, - { - "epoch": 1.43, - "learning_rate": 1e-05, - "loss": 0.0604, - "step": 279 - }, - { - "epoch": 1.43, - "learning_rate": 1e-05, - "loss": 0.0674, - "step": 280 - }, - { - "epoch": 1.44, - "learning_rate": 1e-05, - "loss": 0.1875, - "step": 281 - }, - { - "epoch": 1.44, - "learning_rate": 1e-05, - "loss": 0.0029, - "step": 282 - }, - { - "epoch": 1.45, - "learning_rate": 1e-05, - "loss": 0.0255, - "step": 283 - }, - { - "epoch": 1.45, - "learning_rate": 1e-05, - "loss": 0.0997, - "step": 284 - }, - { - "epoch": 1.46, - "learning_rate": 1e-05, - "loss": 0.0452, - "step": 285 - }, - { - "epoch": 1.46, - "learning_rate": 1e-05, - "loss": 0.0746, - "step": 286 - }, - { - "epoch": 1.47, - "learning_rate": 1e-05, - "loss": 0.0116, - "step": 287 - }, - { - "epoch": 1.47, - "learning_rate": 1e-05, - "loss": 0.0664, - "step": 288 - }, - { - "epoch": 1.48, - "learning_rate": 1e-05, - "loss": 0.1026, - "step": 289 - }, - { - "epoch": 1.48, - "learning_rate": 1e-05, - "loss": 0.0351, - "step": 290 - }, - { - "epoch": 1.49, - "learning_rate": 1e-05, - "loss": 0.0463, - "step": 291 - }, - { - "epoch": 1.49, - "learning_rate": 1e-05, - "loss": 0.2448, - "step": 292 - }, - { - "epoch": 1.5, - "learning_rate": 1e-05, - "loss": 0.1813, - "step": 293 - }, - { - "epoch": 1.5, - "learning_rate": 1e-05, - "loss": 0.0877, - "step": 294 - }, - { - "epoch": 1.51, - "learning_rate": 1e-05, - "loss": 0.074, - "step": 295 - }, - { - "epoch": 1.51, - "learning_rate": 1e-05, - "loss": 0.0288, - "step": 296 - }, - { - "epoch": 1.52, - "learning_rate": 1e-05, - "loss": 0.021, - "step": 297 - }, - { - "epoch": 1.52, - "learning_rate": 1e-05, - "loss": 0.0533, - "step": 298 - }, - { - "epoch": 1.53, - "learning_rate": 1e-05, - "loss": 0.042, - "step": 299 - }, - { - "epoch": 1.53, - "learning_rate": 1e-05, - "loss": 0.1755, - "step": 300 - }, - { - "epoch": 1.54, - "learning_rate": 1e-05, - "loss": 0.0079, - "step": 301 - }, - { - "epoch": 1.54, - "learning_rate": 1e-05, - "loss": 0.0291, - "step": 302 - }, - { - "epoch": 1.55, - "learning_rate": 1e-05, - "loss": 0.0373, - "step": 303 - }, - { - "epoch": 1.55, - "learning_rate": 1e-05, - "loss": 0.1668, - "step": 304 - }, - { - "epoch": 1.56, - "learning_rate": 1e-05, - "loss": 0.1088, - "step": 305 - }, - { - "epoch": 1.56, - "learning_rate": 1e-05, - "loss": 0.0273, - "step": 306 - }, - { - "epoch": 1.57, - "learning_rate": 1e-05, - "loss": 0.1134, - "step": 307 - }, - { - "epoch": 1.57, - "learning_rate": 1e-05, - "loss": 0.0703, - "step": 308 - }, - { - "epoch": 1.58, - "learning_rate": 1e-05, - "loss": 0.0572, - "step": 309 - }, - { - "epoch": 1.58, - "learning_rate": 1e-05, - "loss": 0.0153, - "step": 310 - }, - { - "epoch": 1.59, - "learning_rate": 1e-05, - "loss": 0.0403, - "step": 311 - }, - { - "epoch": 1.59, - "learning_rate": 1e-05, - "loss": 0.0547, - "step": 312 - }, - { - "epoch": 1.6, - "learning_rate": 1e-05, - "loss": 0.0689, - "step": 313 - }, - { - "epoch": 1.6, - "learning_rate": 1e-05, - "loss": 0.0714, - "step": 314 - }, - { - "epoch": 1.61, - "learning_rate": 1e-05, - "loss": 0.0256, - "step": 315 - }, - { - "epoch": 1.61, - "learning_rate": 1e-05, - "loss": 0.0505, - "step": 316 - }, - { - "epoch": 1.62, - "learning_rate": 1e-05, - "loss": 0.0283, - "step": 317 - }, - { - "epoch": 1.62, - "learning_rate": 1e-05, - "loss": 0.1025, - "step": 318 - }, - { - "epoch": 1.63, - "learning_rate": 1e-05, - "loss": 0.0101, - "step": 319 - }, - { - "epoch": 1.63, - "learning_rate": 1e-05, - "loss": 0.0805, - "step": 320 - }, - { - "epoch": 1.64, - "learning_rate": 1e-05, - "loss": 0.0834, - "step": 321 - }, - { - "epoch": 1.64, - "learning_rate": 1e-05, - "loss": 0.0599, - "step": 322 - }, - { - "epoch": 1.65, - "learning_rate": 1e-05, - "loss": 0.0204, - "step": 323 - }, - { - "epoch": 1.66, - "learning_rate": 1e-05, - "loss": 0.0145, - "step": 324 - }, - { - "epoch": 1.66, - "learning_rate": 1e-05, - "loss": 0.1251, - "step": 325 - }, - { - "epoch": 1.67, - "learning_rate": 1e-05, - "loss": 0.0421, - "step": 326 - }, - { - "epoch": 1.67, - "learning_rate": 1e-05, - "loss": 0.0871, - "step": 327 - }, - { - "epoch": 1.68, - "learning_rate": 1e-05, - "loss": 0.096, - "step": 328 - }, - { - "epoch": 1.68, - "learning_rate": 1e-05, - "loss": 0.0823, - "step": 329 - }, - { - "epoch": 1.69, - "learning_rate": 1e-05, - "loss": 0.1059, - "step": 330 - }, - { - "epoch": 1.69, - "learning_rate": 1e-05, - "loss": 0.039, - "step": 331 - }, - { - "epoch": 1.7, - "learning_rate": 1e-05, - "loss": 0.0183, - "step": 332 - }, - { - "epoch": 1.7, - "learning_rate": 1e-05, - "loss": 0.0215, - "step": 333 - }, - { - "epoch": 1.71, - "learning_rate": 1e-05, - "loss": 0.0335, - "step": 334 - }, - { - "epoch": 1.71, - "learning_rate": 1e-05, - "loss": 0.0089, - "step": 335 - }, - { - "epoch": 1.72, - "learning_rate": 1e-05, - "loss": 0.001, - "step": 336 - }, - { - "epoch": 1.72, - "learning_rate": 1e-05, - "loss": 0.1457, - "step": 337 - }, - { - "epoch": 1.73, - "learning_rate": 1e-05, - "loss": 0.0318, - "step": 338 - }, - { - "epoch": 1.73, - "learning_rate": 1e-05, - "loss": 0.1633, - "step": 339 - }, - { - "epoch": 1.74, - "learning_rate": 1e-05, - "loss": 0.1226, - "step": 340 - }, - { - "epoch": 1.74, - "learning_rate": 1e-05, - "loss": 0.0449, - "step": 341 - }, - { - "epoch": 1.75, - "learning_rate": 1e-05, - "loss": 0.0969, - "step": 342 - }, - { - "epoch": 1.75, - "learning_rate": 1e-05, - "loss": 0.0949, - "step": 343 - }, - { - "epoch": 1.76, - "learning_rate": 1e-05, - "loss": 0.0887, - "step": 344 - }, - { - "epoch": 1.76, - "learning_rate": 1e-05, - "loss": 0.0147, - "step": 345 - }, - { - "epoch": 1.77, - "learning_rate": 1e-05, - "loss": 0.021, - "step": 346 - }, - { - "epoch": 1.77, - "learning_rate": 1e-05, - "loss": 0.0226, - "step": 347 - }, - { - "epoch": 1.78, - "learning_rate": 1e-05, - "loss": 0.0279, - "step": 348 - }, - { - "epoch": 1.78, - "learning_rate": 1e-05, - "loss": 0.0476, - "step": 349 - }, - { - "epoch": 1.79, - "learning_rate": 1e-05, - "loss": 0.0382, - "step": 350 - }, - { - "epoch": 1.79, - "learning_rate": 1e-05, - "loss": 0.0442, - "step": 351 - }, - { - "epoch": 1.8, - "learning_rate": 1e-05, - "loss": 0.0845, - "step": 352 - }, - { - "epoch": 1.8, - "learning_rate": 1e-05, - "loss": 0.0842, - "step": 353 - }, - { - "epoch": 1.81, - "learning_rate": 1e-05, - "loss": 0.0967, - "step": 354 - }, - { - "epoch": 1.81, - "learning_rate": 1e-05, - "loss": 0.0387, - "step": 355 - }, - { - "epoch": 1.82, - "learning_rate": 1e-05, - "loss": 0.0396, - "step": 356 - }, - { - "epoch": 1.82, - "learning_rate": 1e-05, - "loss": 0.0364, - "step": 357 - }, - { - "epoch": 1.83, - "learning_rate": 1e-05, - "loss": 0.0366, - "step": 358 - }, - { - "epoch": 1.83, - "learning_rate": 1e-05, - "loss": 0.0676, - "step": 359 - }, - { - "epoch": 1.84, - "learning_rate": 1e-05, - "loss": 0.054, - "step": 360 - }, - { - "epoch": 1.84, - "learning_rate": 1e-05, - "loss": 0.025, - "step": 361 - }, - { - "epoch": 1.85, - "learning_rate": 1e-05, - "loss": 0.1253, - "step": 362 - }, - { - "epoch": 1.85, - "learning_rate": 1e-05, - "loss": 0.0427, - "step": 363 - }, - { - "epoch": 1.86, - "learning_rate": 1e-05, - "loss": 0.0504, - "step": 364 - }, - { - "epoch": 1.86, - "learning_rate": 1e-05, - "loss": 0.1029, - "step": 365 - }, - { - "epoch": 1.87, - "learning_rate": 1e-05, - "loss": 0.0917, - "step": 366 - }, - { - "epoch": 1.87, - "learning_rate": 1e-05, - "loss": 0.1112, - "step": 367 - }, - { - "epoch": 1.88, - "learning_rate": 1e-05, - "loss": 0.068, - "step": 368 - }, - { - "epoch": 1.89, - "learning_rate": 1e-05, - "loss": 0.0719, - "step": 369 - }, - { - "epoch": 1.89, - "learning_rate": 1e-05, - "loss": 0.0385, - "step": 370 - }, - { - "epoch": 1.9, - "learning_rate": 1e-05, - "loss": 0.1012, - "step": 371 - }, - { - "epoch": 1.9, - "learning_rate": 1e-05, - "loss": 0.0363, - "step": 372 - }, - { - "epoch": 1.91, - "learning_rate": 1e-05, - "loss": 0.0378, - "step": 373 - }, - { - "epoch": 1.91, - "learning_rate": 1e-05, - "loss": 0.1173, - "step": 374 - }, - { - "epoch": 1.92, - "learning_rate": 1e-05, - "loss": 0.1131, - "step": 375 - }, - { - "epoch": 1.92, - "learning_rate": 1e-05, - "loss": 0.0358, - "step": 376 - }, - { - "epoch": 1.93, - "learning_rate": 1e-05, - "loss": 0.1565, - "step": 377 - }, - { - "epoch": 1.93, - "learning_rate": 1e-05, - "loss": 0.0629, - "step": 378 - }, - { - "epoch": 1.94, - "learning_rate": 1e-05, - "loss": 0.0342, - "step": 379 - }, - { - "epoch": 1.94, - "learning_rate": 1e-05, - "loss": 0.1085, - "step": 380 - }, - { - "epoch": 1.95, - "learning_rate": 1e-05, - "loss": 0.0749, - "step": 381 - }, - { - "epoch": 1.95, - "learning_rate": 1e-05, - "loss": 0.1015, - "step": 382 - }, - { - "epoch": 1.96, - "learning_rate": 1e-05, - "loss": 0.0954, - "step": 383 - }, - { - "epoch": 1.96, - "learning_rate": 1e-05, - "loss": 0.0926, - "step": 384 - }, - { - "epoch": 1.97, - "learning_rate": 1e-05, - "loss": 0.135, - "step": 385 - }, - { - "epoch": 1.97, - "learning_rate": 1e-05, - "loss": 0.0751, - "step": 386 - }, - { - "epoch": 1.98, - "learning_rate": 1e-05, - "loss": 0.0873, - "step": 387 - }, - { - "epoch": 1.98, - "learning_rate": 1e-05, - "loss": 0.0328, - "step": 388 - }, - { - "epoch": 1.99, - "learning_rate": 1e-05, - "loss": 0.0938, - "step": 389 - }, - { - "epoch": 1.99, - "learning_rate": 1e-05, - "loss": 0.0299, - "step": 390 - }, - { - "epoch": 2.0, - "learning_rate": 1e-05, - "loss": 0.25, - "step": 391 - }, - { - "epoch": 2.0, - "learning_rate": 1e-05, - "loss": 0.0366, - "step": 392 - }, - { - "epoch": 2.01, - "learning_rate": 1e-05, - "loss": 0.0182, - "step": 393 - }, - { - "epoch": 2.01, - "learning_rate": 1e-05, - "loss": 0.0399, - "step": 394 - }, - { - "epoch": 2.02, - "learning_rate": 1e-05, - "loss": 0.0875, - "step": 395 - }, - { - "epoch": 2.02, - "learning_rate": 1e-05, - "loss": 0.0208, - "step": 396 - }, - { - "epoch": 2.03, - "learning_rate": 1e-05, - "loss": 0.0359, - "step": 397 - }, - { - "epoch": 2.03, - "learning_rate": 1e-05, - "loss": 0.0998, - "step": 398 - }, - { - "epoch": 2.04, - "learning_rate": 1e-05, - "loss": 0.0102, - "step": 399 - }, - { - "epoch": 2.04, - "learning_rate": 1e-05, - "loss": 0.0126, - "step": 400 - }, - { - "epoch": 2.05, - "learning_rate": 1e-05, - "loss": 0.0814, - "step": 401 - }, - { - "epoch": 2.05, - "learning_rate": 1e-05, - "loss": 0.0727, - "step": 402 - }, - { - "epoch": 2.06, - "learning_rate": 1e-05, - "loss": 0.022, - "step": 403 - }, - { - "epoch": 2.06, - "learning_rate": 1e-05, - "loss": 0.056, - "step": 404 - }, - { - "epoch": 2.07, - "learning_rate": 1e-05, - "loss": 0.0691, - "step": 405 - }, - { - "epoch": 2.07, - "learning_rate": 1e-05, - "loss": 0.0049, - "step": 406 - }, - { - "epoch": 2.08, - "learning_rate": 1e-05, - "loss": 0.0156, - "step": 407 - }, - { - "epoch": 2.08, - "learning_rate": 1e-05, - "loss": 0.0529, - "step": 408 - }, - { - "epoch": 2.09, - "learning_rate": 1e-05, - "loss": 0.031, - "step": 409 - }, - { - "epoch": 2.09, - "learning_rate": 1e-05, - "loss": 0.0159, - "step": 410 - }, - { - "epoch": 2.1, - "learning_rate": 1e-05, - "loss": 0.0461, - "step": 411 - }, - { - "epoch": 2.1, - "learning_rate": 1e-05, - "loss": 0.0773, - "step": 412 - }, - { - "epoch": 2.11, - "learning_rate": 1e-05, - "loss": 0.0845, - "step": 413 - }, - { - "epoch": 2.11, - "learning_rate": 1e-05, - "loss": 0.0074, - "step": 414 - }, - { - "epoch": 2.12, - "learning_rate": 1e-05, - "loss": 0.0143, - "step": 415 - }, - { - "epoch": 2.13, - "learning_rate": 1e-05, - "loss": 0.0104, - "step": 416 - }, - { - "epoch": 2.13, - "learning_rate": 1e-05, - "loss": 0.0436, - "step": 417 - }, - { - "epoch": 2.14, - "learning_rate": 1e-05, - "loss": 0.0151, - "step": 418 - }, - { - "epoch": 2.14, - "learning_rate": 1e-05, - "loss": 0.0272, - "step": 419 - }, - { - "epoch": 2.15, - "learning_rate": 1e-05, - "loss": 0.0761, - "step": 420 - }, - { - "epoch": 2.15, - "learning_rate": 1e-05, - "loss": 0.0123, - "step": 421 - }, - { - "epoch": 2.16, - "learning_rate": 1e-05, - "loss": 0.0681, - "step": 422 - }, - { - "epoch": 2.16, - "learning_rate": 1e-05, - "loss": 0.0303, - "step": 423 - }, - { - "epoch": 2.17, - "learning_rate": 1e-05, - "loss": 0.0555, - "step": 424 - }, - { - "epoch": 2.17, - "learning_rate": 1e-05, - "loss": 0.0878, - "step": 425 - }, - { - "epoch": 2.18, - "learning_rate": 1e-05, - "loss": 0.0801, - "step": 426 - }, - { - "epoch": 2.18, - "learning_rate": 1e-05, - "loss": 0.0284, - "step": 427 - }, - { - "epoch": 2.19, - "learning_rate": 1e-05, - "loss": 0.041, - "step": 428 - }, - { - "epoch": 2.19, - "learning_rate": 1e-05, - "loss": 0.1293, - "step": 429 - }, - { - "epoch": 2.2, - "learning_rate": 1e-05, - "loss": 0.0437, - "step": 430 - }, - { - "epoch": 2.2, - "learning_rate": 1e-05, - "loss": 0.07, - "step": 431 - }, - { - "epoch": 2.21, - "learning_rate": 1e-05, - "loss": 0.057, - "step": 432 - }, - { - "epoch": 2.21, - "learning_rate": 1e-05, - "loss": 0.0213, - "step": 433 - }, - { - "epoch": 2.22, - "learning_rate": 1e-05, - "loss": 0.0186, - "step": 434 - }, - { - "epoch": 2.22, - "learning_rate": 1e-05, - "loss": 0.0132, - "step": 435 - }, - { - "epoch": 2.23, - "learning_rate": 1e-05, - "loss": 0.0487, - "step": 436 - }, - { - "epoch": 2.23, - "learning_rate": 1e-05, - "loss": 0.0066, - "step": 437 - }, - { - "epoch": 2.24, - "learning_rate": 1e-05, - "loss": 0.0793, - "step": 438 - }, - { - "epoch": 2.24, - "learning_rate": 1e-05, - "loss": 0.0406, - "step": 439 - }, - { - "epoch": 2.25, - "learning_rate": 1e-05, - "loss": 0.0216, - "step": 440 - }, - { - "epoch": 2.25, - "learning_rate": 1e-05, - "loss": 0.0269, - "step": 441 - }, - { - "epoch": 2.26, - "learning_rate": 1e-05, - "loss": 0.0583, - "step": 442 - }, - { - "epoch": 2.26, - "learning_rate": 1e-05, - "loss": 0.0421, - "step": 443 - }, - { - "epoch": 2.27, - "learning_rate": 1e-05, - "loss": 0.0305, - "step": 444 - }, - { - "epoch": 2.27, - "learning_rate": 1e-05, - "loss": 0.0476, - "step": 445 - }, - { - "epoch": 2.28, - "learning_rate": 1e-05, - "loss": 0.0188, - "step": 446 - }, - { - "epoch": 2.28, - "learning_rate": 1e-05, - "loss": 0.0147, - "step": 447 - }, - { - "epoch": 2.29, - "learning_rate": 1e-05, - "loss": 0.0272, - "step": 448 - }, - { - "epoch": 2.29, - "learning_rate": 1e-05, - "loss": 0.0036, - "step": 449 - }, - { - "epoch": 2.3, - "learning_rate": 1e-05, - "loss": 0.0176, - "step": 450 - }, - { - "epoch": 2.3, - "learning_rate": 1e-05, - "loss": 0.0116, - "step": 451 - }, - { - "epoch": 2.31, - "learning_rate": 1e-05, - "loss": 0.0749, - "step": 452 - }, - { - "epoch": 2.31, - "learning_rate": 1e-05, - "loss": 0.0338, - "step": 453 - }, - { - "epoch": 2.32, - "learning_rate": 1e-05, - "loss": 0.051, - "step": 454 - }, - { - "epoch": 2.32, - "learning_rate": 1e-05, - "loss": 0.0291, - "step": 455 - }, - { - "epoch": 2.33, - "learning_rate": 1e-05, - "loss": 0.0544, - "step": 456 - }, - { - "epoch": 2.33, - "learning_rate": 1e-05, - "loss": 0.0319, - "step": 457 - }, - { - "epoch": 2.34, - "learning_rate": 1e-05, - "loss": 0.1306, - "step": 458 - }, - { - "epoch": 2.34, - "learning_rate": 1e-05, - "loss": 0.0187, - "step": 459 - }, - { - "epoch": 2.35, - "learning_rate": 1e-05, - "loss": 0.0082, - "step": 460 - }, - { - "epoch": 2.36, - "learning_rate": 1e-05, - "loss": 0.0135, - "step": 461 - }, - { - "epoch": 2.36, - "learning_rate": 1e-05, - "loss": 0.0405, - "step": 462 - }, - { - "epoch": 2.37, - "learning_rate": 1e-05, - "loss": 0.0217, - "step": 463 - }, - { - "epoch": 2.37, - "learning_rate": 1e-05, - "loss": 0.0575, - "step": 464 - }, - { - "epoch": 2.38, - "learning_rate": 1e-05, - "loss": 0.0798, - "step": 465 - }, - { - "epoch": 2.38, - "learning_rate": 1e-05, - "loss": 0.0174, - "step": 466 - }, - { - "epoch": 2.39, - "learning_rate": 1e-05, - "loss": 0.0632, - "step": 467 - }, - { - "epoch": 2.39, - "learning_rate": 1e-05, - "loss": 0.0061, - "step": 468 - }, - { - "epoch": 2.4, - "learning_rate": 1e-05, - "loss": 0.0144, - "step": 469 - }, - { - "epoch": 2.4, - "learning_rate": 1e-05, - "loss": 0.0324, - "step": 470 - }, - { - "epoch": 2.41, - "learning_rate": 1e-05, - "loss": 0.0593, - "step": 471 - }, - { - "epoch": 2.41, - "learning_rate": 1e-05, - "loss": 0.0274, - "step": 472 - }, - { - "epoch": 2.42, - "learning_rate": 1e-05, - "loss": 0.0252, - "step": 473 - }, - { - "epoch": 2.42, - "learning_rate": 1e-05, - "loss": 0.0239, - "step": 474 - }, - { - "epoch": 2.43, - "learning_rate": 1e-05, - "loss": 0.0207, - "step": 475 - }, - { - "epoch": 2.43, - "learning_rate": 1e-05, - "loss": 0.046, - "step": 476 - }, - { - "epoch": 2.44, - "learning_rate": 1e-05, - "loss": 0.0387, - "step": 477 - }, - { - "epoch": 2.44, - "learning_rate": 1e-05, - "loss": 0.0267, - "step": 478 - }, - { - "epoch": 2.45, - "learning_rate": 1e-05, - "loss": 0.0253, - "step": 479 - }, - { - "epoch": 2.45, - "learning_rate": 1e-05, - "loss": 0.0022, - "step": 480 - }, - { - "epoch": 2.46, - "learning_rate": 1e-05, - "loss": 0.081, - "step": 481 - }, - { - "epoch": 2.46, - "learning_rate": 1e-05, - "loss": 0.0126, - "step": 482 - }, - { - "epoch": 2.47, - "learning_rate": 1e-05, - "loss": 0.0183, - "step": 483 - }, - { - "epoch": 2.47, - "learning_rate": 1e-05, - "loss": 0.0195, - "step": 484 - }, - { - "epoch": 2.48, - "learning_rate": 1e-05, - "loss": 0.0609, - "step": 485 - }, - { - "epoch": 2.48, - "learning_rate": 1e-05, - "loss": 0.0382, - "step": 486 - }, - { - "epoch": 2.49, - "learning_rate": 1e-05, - "loss": 0.0207, - "step": 487 - }, - { - "epoch": 2.49, - "learning_rate": 1e-05, - "loss": 0.022, - "step": 488 - }, - { - "epoch": 2.5, - "learning_rate": 1e-05, - "loss": 0.1224, - "step": 489 - }, - { - "epoch": 2.5, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 490 - }, - { - "epoch": 2.51, - "learning_rate": 1e-05, - "loss": 0.0294, - "step": 491 - }, - { - "epoch": 2.51, - "learning_rate": 1e-05, - "loss": 0.061, - "step": 492 - }, - { - "epoch": 2.52, - "learning_rate": 1e-05, - "loss": 0.0948, - "step": 493 - }, - { - "epoch": 2.52, - "learning_rate": 1e-05, - "loss": 0.0544, - "step": 494 - }, - { - "epoch": 2.53, - "learning_rate": 1e-05, - "loss": 0.063, - "step": 495 - }, - { - "epoch": 2.53, - "learning_rate": 1e-05, - "loss": 0.0178, - "step": 496 - }, - { - "epoch": 2.54, - "learning_rate": 1e-05, - "loss": 0.0957, - "step": 497 - }, - { - "epoch": 2.54, - "learning_rate": 1e-05, - "loss": 0.0038, - "step": 498 - }, - { - "epoch": 2.55, - "learning_rate": 1e-05, - "loss": 0.0079, - "step": 499 - }, - { - "epoch": 2.55, - "learning_rate": 1e-05, - "loss": 0.0004, - "step": 500 - }, - { - "epoch": 2.56, - "learning_rate": 1e-05, - "loss": 0.0074, - "step": 501 - }, - { - "epoch": 2.56, - "learning_rate": 1e-05, - "loss": 0.0368, - "step": 502 - }, - { - "epoch": 2.57, - "learning_rate": 1e-05, - "loss": 0.0316, - "step": 503 - }, - { - "epoch": 2.57, - "learning_rate": 1e-05, - "loss": 0.0738, - "step": 504 - }, - { - "epoch": 2.58, - "learning_rate": 1e-05, - "loss": 0.0463, - "step": 505 - }, - { - "epoch": 2.58, - "learning_rate": 1e-05, - "loss": 0.0237, - "step": 506 - }, - { - "epoch": 2.59, - "learning_rate": 1e-05, - "loss": 0.0076, - "step": 507 - }, - { - "epoch": 2.6, - "learning_rate": 1e-05, - "loss": 0.0393, - "step": 508 - }, - { - "epoch": 2.6, - "learning_rate": 1e-05, - "loss": 0.1051, - "step": 509 - }, - { - "epoch": 2.61, - "learning_rate": 1e-05, - "loss": 0.1106, - "step": 510 - }, - { - "epoch": 2.61, - "learning_rate": 1e-05, - "loss": 0.0295, - "step": 511 - }, - { - "epoch": 2.62, - "learning_rate": 1e-05, - "loss": 0.0935, - "step": 512 - }, - { - "epoch": 2.62, - "learning_rate": 1e-05, - "loss": 0.0326, - "step": 513 - }, - { - "epoch": 2.63, - "learning_rate": 1e-05, - "loss": 0.0084, - "step": 514 - }, - { - "epoch": 2.63, - "learning_rate": 1e-05, - "loss": 0.015, - "step": 515 - }, - { - "epoch": 2.64, - "learning_rate": 1e-05, - "loss": 0.0312, - "step": 516 - }, - { - "epoch": 2.64, - "learning_rate": 1e-05, - "loss": 0.0144, - "step": 517 - }, - { - "epoch": 2.65, - "learning_rate": 1e-05, - "loss": 0.0474, - "step": 518 - }, - { - "epoch": 2.65, - "learning_rate": 1e-05, - "loss": 0.0682, - "step": 519 - }, - { - "epoch": 2.66, - "learning_rate": 1e-05, - "loss": 0.0238, - "step": 520 - }, - { - "epoch": 2.66, - "learning_rate": 1e-05, - "loss": 0.0927, - "step": 521 - }, - { - "epoch": 2.67, - "learning_rate": 1e-05, - "loss": 0.0074, - "step": 522 - }, - { - "epoch": 2.67, - "learning_rate": 1e-05, - "loss": 0.0222, - "step": 523 - }, - { - "epoch": 2.68, - "learning_rate": 1e-05, - "loss": 0.0841, - "step": 524 - }, - { - "epoch": 2.68, - "learning_rate": 1e-05, - "loss": 0.1028, - "step": 525 - }, - { - "epoch": 2.69, - "learning_rate": 1e-05, - "loss": 0.1052, - "step": 526 - }, - { - "epoch": 2.69, - "learning_rate": 1e-05, - "loss": 0.0465, - "step": 527 - }, - { - "epoch": 2.7, - "learning_rate": 1e-05, - "loss": 0.0116, - "step": 528 - }, - { - "epoch": 2.7, - "learning_rate": 1e-05, - "loss": 0.0257, - "step": 529 - }, - { - "epoch": 2.71, - "learning_rate": 1e-05, - "loss": 0.0691, - "step": 530 - }, - { - "epoch": 2.71, - "learning_rate": 1e-05, - "loss": 0.0721, - "step": 531 - }, - { - "epoch": 2.72, - "learning_rate": 1e-05, - "loss": 0.072, - "step": 532 - }, - { - "epoch": 2.72, - "learning_rate": 1e-05, - "loss": 0.0938, - "step": 533 - }, - { - "epoch": 2.73, - "learning_rate": 1e-05, - "loss": 0.0141, - "step": 534 - }, - { - "epoch": 2.73, - "learning_rate": 1e-05, - "loss": 0.064, - "step": 535 - }, - { - "epoch": 2.74, - "learning_rate": 1e-05, - "loss": 0.0194, - "step": 536 - }, - { - "epoch": 2.74, - "learning_rate": 1e-05, - "loss": 0.0304, - "step": 537 - }, - { - "epoch": 2.75, - "learning_rate": 1e-05, - "loss": 0.0347, - "step": 538 - }, - { - "epoch": 2.75, - "learning_rate": 1e-05, - "loss": 0.0282, - "step": 539 - }, - { - "epoch": 2.76, - "learning_rate": 1e-05, - "loss": 0.0635, - "step": 540 - }, - { - "epoch": 2.76, - "learning_rate": 1e-05, - "loss": 0.0376, - "step": 541 - }, - { - "epoch": 2.77, - "learning_rate": 1e-05, - "loss": 0.0686, - "step": 542 - }, - { - "epoch": 2.77, - "learning_rate": 1e-05, - "loss": 0.0183, - "step": 543 - }, - { - "epoch": 2.78, - "learning_rate": 1e-05, - "loss": 0.1212, - "step": 544 - }, - { - "epoch": 2.78, - "learning_rate": 1e-05, - "loss": 0.018, - "step": 545 - }, - { - "epoch": 2.79, - "learning_rate": 1e-05, - "loss": 0.0446, - "step": 546 - }, - { - "epoch": 2.79, - "learning_rate": 1e-05, - "loss": 0.0031, - "step": 547 - }, - { - "epoch": 2.8, - "learning_rate": 1e-05, - "loss": 0.0174, - "step": 548 - }, - { - "epoch": 2.8, - "learning_rate": 1e-05, - "loss": 0.0256, - "step": 549 - }, - { - "epoch": 2.81, - "learning_rate": 1e-05, - "loss": 0.1656, - "step": 550 - }, - { - "epoch": 2.81, - "learning_rate": 1e-05, - "loss": 0.0738, - "step": 551 - }, - { - "epoch": 2.82, - "learning_rate": 1e-05, - "loss": 0.0873, - "step": 552 - }, - { - "epoch": 2.83, - "learning_rate": 1e-05, - "loss": 0.0372, - "step": 553 - }, - { - "epoch": 2.83, - "learning_rate": 1e-05, - "loss": 0.0425, - "step": 554 - }, - { - "epoch": 2.84, - "learning_rate": 1e-05, - "loss": 0.0291, - "step": 555 - }, - { - "epoch": 2.84, - "learning_rate": 1e-05, - "loss": 0.0214, - "step": 556 - }, - { - "epoch": 2.85, - "learning_rate": 1e-05, - "loss": 0.0401, - "step": 557 - }, - { - "epoch": 2.85, - "learning_rate": 1e-05, - "loss": 0.0679, - "step": 558 - }, - { - "epoch": 2.86, - "learning_rate": 1e-05, - "loss": 0.0271, - "step": 559 - }, - { - "epoch": 2.86, - "learning_rate": 1e-05, - "loss": 0.0211, - "step": 560 - }, - { - "epoch": 2.87, - "learning_rate": 1e-05, - "loss": 0.0704, - "step": 561 - }, - { - "epoch": 2.87, - "learning_rate": 1e-05, - "loss": 0.1727, - "step": 562 - }, - { - "epoch": 2.88, - "learning_rate": 1e-05, - "loss": 0.0594, - "step": 563 - }, - { - "epoch": 2.88, - "learning_rate": 1e-05, - "loss": 0.0852, - "step": 564 - }, - { - "epoch": 2.89, - "learning_rate": 1e-05, - "loss": 0.0152, - "step": 565 - }, - { - "epoch": 2.89, - "learning_rate": 1e-05, - "loss": 0.02, - "step": 566 - }, - { - "epoch": 2.9, - "learning_rate": 1e-05, - "loss": 0.0298, - "step": 567 - }, - { - "epoch": 2.9, - "learning_rate": 1e-05, - "loss": 0.0079, - "step": 568 - }, - { - "epoch": 2.91, - "learning_rate": 1e-05, - "loss": 0.0354, - "step": 569 - }, - { - "epoch": 2.91, - "learning_rate": 1e-05, - "loss": 0.0093, - "step": 570 - }, - { - "epoch": 2.92, - "learning_rate": 1e-05, - "loss": 0.0266, - "step": 571 - }, - { - "epoch": 2.92, - "learning_rate": 1e-05, - "loss": 0.0313, - "step": 572 - }, - { - "epoch": 2.93, - "learning_rate": 1e-05, - "loss": 0.1279, - "step": 573 - }, - { - "epoch": 2.93, - "learning_rate": 1e-05, - "loss": 0.1133, - "step": 574 - }, - { - "epoch": 2.94, - "learning_rate": 1e-05, - "loss": 0.0026, - "step": 575 - }, - { - "epoch": 2.94, - "learning_rate": 1e-05, - "loss": 0.0782, - "step": 576 - }, - { - "epoch": 2.95, - "learning_rate": 1e-05, - "loss": 0.0416, - "step": 577 - }, - { - "epoch": 2.95, - "learning_rate": 1e-05, - "loss": 0.0081, - "step": 578 - }, - { - "epoch": 2.96, - "learning_rate": 1e-05, - "loss": 0.0213, - "step": 579 - }, - { - "epoch": 2.96, - "learning_rate": 1e-05, - "loss": 0.0061, - "step": 580 - }, - { - "epoch": 2.97, - "learning_rate": 1e-05, - "loss": 0.0564, - "step": 581 - }, - { - "epoch": 2.97, - "learning_rate": 1e-05, - "loss": 0.0517, - "step": 582 - }, - { - "epoch": 2.98, - "learning_rate": 1e-05, - "loss": 0.0276, - "step": 583 - }, - { - "epoch": 2.98, - "learning_rate": 1e-05, - "loss": 0.0617, - "step": 584 - }, - { - "epoch": 2.99, - "learning_rate": 1e-05, - "loss": 0.0656, - "step": 585 - }, - { - "epoch": 2.99, - "learning_rate": 1e-05, - "loss": 0.035, - "step": 586 - }, - { - "epoch": 3.0, - "learning_rate": 1e-05, - "loss": 0.0153, - "step": 587 - }, - { - "epoch": 3.0, - "learning_rate": 1e-05, - "loss": 0.0274, - "step": 588 - }, - { - "epoch": 3.01, - "learning_rate": 1e-05, - "loss": 0.0145, - "step": 589 - }, - { - "epoch": 3.01, - "learning_rate": 1e-05, - "loss": 0.0075, - "step": 590 - }, - { - "epoch": 3.02, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 591 - }, - { - "epoch": 3.02, - "learning_rate": 1e-05, - "loss": 0.0537, - "step": 592 - }, - { - "epoch": 3.03, - "learning_rate": 1e-05, - "loss": 0.0137, - "step": 593 - }, - { - "epoch": 3.03, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 594 - }, - { - "epoch": 3.04, - "learning_rate": 1e-05, - "loss": 0.0129, - "step": 595 - }, - { - "epoch": 3.04, - "learning_rate": 1e-05, - "loss": 0.0151, - "step": 596 - }, - { - "epoch": 3.05, - "learning_rate": 1e-05, - "loss": 0.0347, - "step": 597 - }, - { - "epoch": 3.05, - "learning_rate": 1e-05, - "loss": 0.1182, - "step": 598 - }, - { - "epoch": 3.06, - "learning_rate": 1e-05, - "loss": 0.0445, - "step": 599 - }, - { - "epoch": 3.07, - "learning_rate": 1e-05, - "loss": 0.1153, - "step": 600 - }, - { - "epoch": 3.07, - "learning_rate": 1e-05, - "loss": 0.0183, - "step": 601 - }, - { - "epoch": 3.08, - "learning_rate": 1e-05, - "loss": 0.1174, - "step": 602 - }, - { - "epoch": 3.08, - "learning_rate": 1e-05, - "loss": 0.0447, - "step": 603 - }, - { - "epoch": 3.09, - "learning_rate": 1e-05, - "loss": 0.0211, - "step": 604 - }, - { - "epoch": 3.09, - "learning_rate": 1e-05, - "loss": 0.0001, - "step": 605 - }, - { - "epoch": 3.1, - "learning_rate": 1e-05, - "loss": 0.0681, - "step": 606 - }, - { - "epoch": 3.1, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 607 - }, - { - "epoch": 3.11, - "learning_rate": 1e-05, - "loss": 0.0516, - "step": 608 - }, - { - "epoch": 3.11, - "learning_rate": 1e-05, - "loss": 0.0177, - "step": 609 - }, - { - "epoch": 3.12, - "learning_rate": 1e-05, - "loss": 0.0015, - "step": 610 - }, - { - "epoch": 3.12, - "learning_rate": 1e-05, - "loss": 0.0008, - "step": 611 - }, - { - "epoch": 3.13, - "learning_rate": 1e-05, - "loss": 0.0019, - "step": 612 - }, - { - "epoch": 3.13, - "learning_rate": 1e-05, - "loss": 0.0158, - "step": 613 - }, - { - "epoch": 3.14, - "learning_rate": 1e-05, - "loss": 0.0087, - "step": 614 - }, - { - "epoch": 3.14, - "learning_rate": 1e-05, - "loss": 0.0066, - "step": 615 - }, - { - "epoch": 3.15, - "learning_rate": 1e-05, - "loss": 0.0015, - "step": 616 - }, - { - "epoch": 3.15, - "learning_rate": 1e-05, - "loss": 0.0435, - "step": 617 - }, - { - "epoch": 3.16, - "learning_rate": 1e-05, - "loss": 0.0325, - "step": 618 - }, - { - "epoch": 3.16, - "learning_rate": 1e-05, - "loss": 0.0163, - "step": 619 - }, - { - "epoch": 3.17, - "learning_rate": 1e-05, - "loss": 0.0404, - "step": 620 - }, - { - "epoch": 3.17, - "learning_rate": 1e-05, - "loss": 0.0107, - "step": 621 - }, - { - "epoch": 3.18, - "learning_rate": 1e-05, - "loss": 0.0217, - "step": 622 - }, - { - "epoch": 3.18, - "learning_rate": 1e-05, - "loss": 0.1039, - "step": 623 - }, - { - "epoch": 3.19, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 624 - }, - { - "epoch": 3.19, - "learning_rate": 1e-05, - "loss": 0.0186, - "step": 625 - }, - { - "epoch": 3.2, - "learning_rate": 1e-05, - "loss": 0.017, - "step": 626 - }, - { - "epoch": 3.2, - "learning_rate": 1e-05, - "loss": 0.0025, - "step": 627 - }, - { - "epoch": 3.21, - "learning_rate": 1e-05, - "loss": 0.0041, - "step": 628 - }, - { - "epoch": 3.21, - "learning_rate": 1e-05, - "loss": 0.016, - "step": 629 - }, - { - "epoch": 3.22, - "learning_rate": 1e-05, - "loss": 0.0354, - "step": 630 - }, - { - "epoch": 3.22, - "learning_rate": 1e-05, - "loss": 0.0111, - "step": 631 - }, - { - "epoch": 3.23, - "learning_rate": 1e-05, - "loss": 0.084, - "step": 632 - }, - { - "epoch": 3.23, - "learning_rate": 1e-05, - "loss": 0.028, - "step": 633 - }, - { - "epoch": 3.24, - "learning_rate": 1e-05, - "loss": 0.0048, - "step": 634 - }, - { - "epoch": 3.24, - "learning_rate": 1e-05, - "loss": 0.0096, - "step": 635 - }, - { - "epoch": 3.25, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 636 - }, - { - "epoch": 3.25, - "learning_rate": 1e-05, - "loss": 0.1489, - "step": 637 - }, - { - "epoch": 3.26, - "learning_rate": 1e-05, - "loss": 0.0037, - "step": 638 - }, - { - "epoch": 3.26, - "learning_rate": 1e-05, - "loss": 0.0878, - "step": 639 - }, - { - "epoch": 3.27, - "learning_rate": 1e-05, - "loss": 0.0076, - "step": 640 - }, - { - "epoch": 3.27, - "learning_rate": 1e-05, - "loss": 0.0123, - "step": 641 - }, - { - "epoch": 3.28, - "learning_rate": 1e-05, - "loss": 0.0499, - "step": 642 - }, - { - "epoch": 3.28, - "learning_rate": 1e-05, - "loss": 0.0102, - "step": 643 - }, - { - "epoch": 3.29, - "learning_rate": 1e-05, - "loss": 0.0265, - "step": 644 - }, - { - "epoch": 3.3, - "learning_rate": 1e-05, - "loss": 0.0144, - "step": 645 - }, - { - "epoch": 3.3, - "learning_rate": 1e-05, - "loss": 0.0245, - "step": 646 - }, - { - "epoch": 3.31, - "learning_rate": 1e-05, - "loss": 0.0482, - "step": 647 - }, - { - "epoch": 3.31, - "learning_rate": 1e-05, - "loss": 0.0146, - "step": 648 - }, - { - "epoch": 3.32, - "learning_rate": 1e-05, - "loss": 0.0268, - "step": 649 - }, - { - "epoch": 3.32, - "learning_rate": 1e-05, - "loss": 0.0232, - "step": 650 - }, - { - "epoch": 3.33, - "learning_rate": 1e-05, - "loss": 0.0168, - "step": 651 - }, - { - "epoch": 3.33, - "learning_rate": 1e-05, - "loss": 0.0155, - "step": 652 - }, - { - "epoch": 3.34, - "learning_rate": 1e-05, - "loss": 0.0496, - "step": 653 - }, - { - "epoch": 3.34, - "learning_rate": 1e-05, - "loss": 0.0169, - "step": 654 - }, - { - "epoch": 3.35, - "learning_rate": 1e-05, - "loss": 0.0084, - "step": 655 - }, - { - "epoch": 3.35, - "learning_rate": 1e-05, - "loss": 0.0529, - "step": 656 - }, - { - "epoch": 3.36, - "learning_rate": 1e-05, - "loss": 0.0111, - "step": 657 - }, - { - "epoch": 3.36, - "learning_rate": 1e-05, - "loss": 0.0954, - "step": 658 - }, - { - "epoch": 3.37, - "learning_rate": 1e-05, - "loss": 0.0222, - "step": 659 - }, - { - "epoch": 3.37, - "learning_rate": 1e-05, - "loss": 0.019, - "step": 660 - }, - { - "epoch": 3.38, - "learning_rate": 1e-05, - "loss": 0.0166, - "step": 661 - }, - { - "epoch": 3.38, - "learning_rate": 1e-05, - "loss": 0.024, - "step": 662 - }, - { - "epoch": 3.39, - "learning_rate": 1e-05, - "loss": 0.0682, - "step": 663 - }, - { - "epoch": 3.39, - "learning_rate": 1e-05, - "loss": 0.0676, - "step": 664 - }, - { - "epoch": 3.4, - "learning_rate": 1e-05, - "loss": 0.0017, - "step": 665 - }, - { - "epoch": 3.4, - "learning_rate": 1e-05, - "loss": 0.0849, - "step": 666 - }, - { - "epoch": 3.41, - "learning_rate": 1e-05, - "loss": 0.0053, - "step": 667 - }, - { - "epoch": 3.41, - "learning_rate": 1e-05, - "loss": 0.1424, - "step": 668 - }, - { - "epoch": 3.42, - "learning_rate": 1e-05, - "loss": 0.1053, - "step": 669 - }, - { - "epoch": 3.42, - "learning_rate": 1e-05, - "loss": 0.0417, - "step": 670 - }, - { - "epoch": 3.43, - "learning_rate": 1e-05, - "loss": 0.0578, - "step": 671 - }, - { - "epoch": 3.43, - "learning_rate": 1e-05, - "loss": 0.0364, - "step": 672 - }, - { - "epoch": 3.44, - "learning_rate": 1e-05, - "loss": 0.0583, - "step": 673 - }, - { - "epoch": 3.44, - "learning_rate": 1e-05, - "loss": 0.0337, - "step": 674 - }, - { - "epoch": 3.45, - "learning_rate": 1e-05, - "loss": 0.0048, - "step": 675 - }, - { - "epoch": 3.45, - "learning_rate": 1e-05, - "loss": 0.0901, - "step": 676 - }, - { - "epoch": 3.46, - "learning_rate": 1e-05, - "loss": 0.0958, - "step": 677 - }, - { - "epoch": 3.46, - "learning_rate": 1e-05, - "loss": 0.0605, - "step": 678 - }, - { - "epoch": 3.47, - "learning_rate": 1e-05, - "loss": 0.0221, - "step": 679 - }, - { - "epoch": 3.47, - "learning_rate": 1e-05, - "loss": 0.009, - "step": 680 - }, - { - "epoch": 3.48, - "learning_rate": 1e-05, - "loss": 0.0236, - "step": 681 - }, - { - "epoch": 3.48, - "learning_rate": 1e-05, - "loss": 0.0008, - "step": 682 - }, - { - "epoch": 3.49, - "learning_rate": 1e-05, - "loss": 0.0099, - "step": 683 - }, - { - "epoch": 3.49, - "learning_rate": 1e-05, - "loss": 0.0159, - "step": 684 - }, - { - "epoch": 3.5, - "learning_rate": 1e-05, - "loss": 0.0645, - "step": 685 - }, - { - "epoch": 3.5, - "learning_rate": 1e-05, - "loss": 0.0404, - "step": 686 - }, - { - "epoch": 3.51, - "learning_rate": 1e-05, - "loss": 0.0136, - "step": 687 - }, - { - "epoch": 3.51, - "learning_rate": 1e-05, - "loss": 0.0299, - "step": 688 - }, - { - "epoch": 3.52, - "learning_rate": 1e-05, - "loss": 0.0017, - "step": 689 - }, - { - "epoch": 3.52, - "learning_rate": 1e-05, - "loss": 0.1964, - "step": 690 - }, - { - "epoch": 3.53, - "learning_rate": 1e-05, - "loss": 0.0084, - "step": 691 - }, - { - "epoch": 3.54, - "learning_rate": 1e-05, - "loss": 0.0227, - "step": 692 - }, - { - "epoch": 3.54, - "learning_rate": 1e-05, - "loss": 0.0574, - "step": 693 - }, - { - "epoch": 3.55, - "learning_rate": 1e-05, - "loss": 0.1469, - "step": 694 - }, - { - "epoch": 3.55, - "learning_rate": 1e-05, - "loss": 0.0734, - "step": 695 - }, - { - "epoch": 3.56, - "learning_rate": 1e-05, - "loss": 0.1699, - "step": 696 - }, - { - "epoch": 3.56, - "learning_rate": 1e-05, - "loss": 0.0328, - "step": 697 - }, - { - "epoch": 3.57, - "learning_rate": 1e-05, - "loss": 0.0255, - "step": 698 - }, - { - "epoch": 3.57, - "learning_rate": 1e-05, - "loss": 0.0131, - "step": 699 - }, - { - "epoch": 3.58, - "learning_rate": 1e-05, - "loss": 0.0203, - "step": 700 - }, - { - "epoch": 3.58, - "learning_rate": 1e-05, - "loss": 0.0103, - "step": 701 - }, - { - "epoch": 3.59, - "learning_rate": 1e-05, - "loss": 0.0289, - "step": 702 - }, - { - "epoch": 3.59, - "learning_rate": 1e-05, - "loss": 0.0152, - "step": 703 - }, - { - "epoch": 3.6, - "learning_rate": 1e-05, - "loss": 0.04, - "step": 704 - }, - { - "epoch": 3.6, - "learning_rate": 1e-05, - "loss": 0.0675, - "step": 705 - }, - { - "epoch": 3.61, - "learning_rate": 1e-05, - "loss": 0.1065, - "step": 706 - }, - { - "epoch": 3.61, - "learning_rate": 1e-05, - "loss": 0.0459, - "step": 707 - }, - { - "epoch": 3.62, - "learning_rate": 1e-05, - "loss": 0.0168, - "step": 708 - }, - { - "epoch": 3.62, - "learning_rate": 1e-05, - "loss": 0.1417, - "step": 709 - }, - { - "epoch": 3.63, - "learning_rate": 1e-05, - "loss": 0.0311, - "step": 710 - }, - { - "epoch": 3.63, - "learning_rate": 1e-05, - "loss": 0.0499, - "step": 711 - }, - { - "epoch": 3.64, - "learning_rate": 1e-05, - "loss": 0.0463, - "step": 712 - }, - { - "epoch": 3.64, - "learning_rate": 1e-05, - "loss": 0.0196, - "step": 713 - }, - { - "epoch": 3.65, - "learning_rate": 1e-05, - "loss": 0.0913, - "step": 714 - }, - { - "epoch": 3.65, - "learning_rate": 1e-05, - "loss": 0.0069, - "step": 715 - }, - { - "epoch": 3.66, - "learning_rate": 1e-05, - "loss": 0.0614, - "step": 716 - }, - { - "epoch": 3.66, - "learning_rate": 1e-05, - "loss": 0.0156, - "step": 717 - }, - { - "epoch": 3.67, - "learning_rate": 1e-05, - "loss": 0.0663, - "step": 718 - }, - { - "epoch": 3.67, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 719 - }, - { - "epoch": 3.68, - "learning_rate": 1e-05, - "loss": 0.0366, - "step": 720 - }, - { - "epoch": 3.68, - "learning_rate": 1e-05, - "loss": 0.0066, - "step": 721 - }, - { - "epoch": 3.69, - "learning_rate": 1e-05, - "loss": 0.0245, - "step": 722 - }, - { - "epoch": 3.69, - "learning_rate": 1e-05, - "loss": 0.0113, - "step": 723 - }, - { - "epoch": 3.7, - "learning_rate": 1e-05, - "loss": 0.0165, - "step": 724 - }, - { - "epoch": 3.7, - "learning_rate": 1e-05, - "loss": 0.0322, - "step": 725 - }, - { - "epoch": 3.71, - "learning_rate": 1e-05, - "loss": 0.0445, - "step": 726 - }, - { - "epoch": 3.71, - "learning_rate": 1e-05, - "loss": 0.0187, - "step": 727 - }, - { - "epoch": 3.72, - "learning_rate": 1e-05, - "loss": 0.0121, - "step": 728 - }, - { - "epoch": 3.72, - "learning_rate": 1e-05, - "loss": 0.0906, - "step": 729 - }, - { - "epoch": 3.73, - "learning_rate": 1e-05, - "loss": 1.1047, - "step": 730 - }, - { - "epoch": 3.73, - "learning_rate": 1e-05, - "loss": 0.0414, - "step": 731 - }, - { - "epoch": 3.74, - "learning_rate": 1e-05, - "loss": 0.0593, - "step": 732 - }, - { - "epoch": 3.74, - "learning_rate": 1e-05, - "loss": 0.0972, - "step": 733 - }, - { - "epoch": 3.75, - "learning_rate": 1e-05, - "loss": 0.0155, - "step": 734 - }, - { - "epoch": 3.75, - "learning_rate": 1e-05, - "loss": 0.0008, - "step": 735 - }, - { - "epoch": 3.76, - "learning_rate": 1e-05, - "loss": 0.0085, - "step": 736 - }, - { - "epoch": 3.77, - "learning_rate": 1e-05, - "loss": 0.0704, - "step": 737 - }, - { - "epoch": 3.77, - "learning_rate": 1e-05, - "loss": 0.0245, - "step": 738 - }, - { - "epoch": 3.78, - "learning_rate": 1e-05, - "loss": 0.0404, - "step": 739 - }, - { - "epoch": 3.78, - "learning_rate": 1e-05, - "loss": 0.0119, - "step": 740 - }, - { - "epoch": 3.79, - "learning_rate": 1e-05, - "loss": 0.015, - "step": 741 - }, - { - "epoch": 3.79, - "learning_rate": 1e-05, - "loss": 0.023, - "step": 742 - }, - { - "epoch": 3.8, - "learning_rate": 1e-05, - "loss": 0.0068, - "step": 743 - }, - { - "epoch": 3.8, - "learning_rate": 1e-05, - "loss": 0.0196, - "step": 744 - }, - { - "epoch": 3.81, - "learning_rate": 1e-05, - "loss": 0.0361, - "step": 745 - }, - { - "epoch": 3.81, - "learning_rate": 1e-05, - "loss": 0.0726, - "step": 746 - }, - { - "epoch": 3.82, - "learning_rate": 1e-05, - "loss": 0.0749, - "step": 747 - }, - { - "epoch": 3.82, - "learning_rate": 1e-05, - "loss": 0.076, - "step": 748 - }, - { - "epoch": 3.83, - "learning_rate": 1e-05, - "loss": 0.0021, - "step": 749 - }, - { - "epoch": 3.83, - "learning_rate": 1e-05, - "loss": 0.0346, - "step": 750 - }, - { - "epoch": 3.84, - "learning_rate": 1e-05, - "loss": 0.0294, - "step": 751 - }, - { - "epoch": 3.84, - "learning_rate": 1e-05, - "loss": 0.0949, - "step": 752 - }, - { - "epoch": 3.85, - "learning_rate": 1e-05, - "loss": 0.0242, - "step": 753 - }, - { - "epoch": 3.85, - "learning_rate": 1e-05, - "loss": 0.0437, - "step": 754 - }, - { - "epoch": 3.86, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 755 - }, - { - "epoch": 3.86, - "learning_rate": 1e-05, - "loss": 0.0189, - "step": 756 - }, - { - "epoch": 3.87, - "learning_rate": 1e-05, - "loss": 0.03, - "step": 757 - }, - { - "epoch": 3.87, - "learning_rate": 1e-05, - "loss": 0.0082, - "step": 758 - }, - { - "epoch": 3.88, - "learning_rate": 1e-05, - "loss": 0.0612, - "step": 759 - }, - { - "epoch": 3.88, - "learning_rate": 1e-05, - "loss": 0.0174, - "step": 760 - }, - { - "epoch": 3.89, - "learning_rate": 1e-05, - "loss": 0.0124, - "step": 761 - }, - { - "epoch": 3.89, - "learning_rate": 1e-05, - "loss": 0.0276, - "step": 762 - }, - { - "epoch": 3.9, - "learning_rate": 1e-05, - "loss": 0.0406, - "step": 763 - }, - { - "epoch": 3.9, - "learning_rate": 1e-05, - "loss": 0.0645, - "step": 764 - }, - { - "epoch": 3.91, - "learning_rate": 1e-05, - "loss": 0.003, - "step": 765 - }, - { - "epoch": 3.91, - "learning_rate": 1e-05, - "loss": 0.0307, - "step": 766 - }, - { - "epoch": 3.92, - "learning_rate": 1e-05, - "loss": 0.0154, - "step": 767 - }, - { - "epoch": 3.92, - "learning_rate": 1e-05, - "loss": 0.0919, - "step": 768 - }, - { - "epoch": 3.93, - "learning_rate": 1e-05, - "loss": 0.0569, - "step": 769 - }, - { - "epoch": 3.93, - "learning_rate": 1e-05, - "loss": 0.0263, - "step": 770 - }, - { - "epoch": 3.94, - "learning_rate": 1e-05, - "loss": 0.0175, - "step": 771 - }, - { - "epoch": 3.94, - "learning_rate": 1e-05, - "loss": 0.0174, - "step": 772 - }, - { - "epoch": 3.95, - "learning_rate": 1e-05, - "loss": 0.0307, - "step": 773 - }, - { - "epoch": 3.95, - "learning_rate": 1e-05, - "loss": 0.3598, - "step": 774 - }, - { - "epoch": 3.96, - "learning_rate": 1e-05, - "loss": 0.0602, - "step": 775 - }, - { - "epoch": 3.96, - "learning_rate": 1e-05, - "loss": 0.0278, - "step": 776 - }, - { - "epoch": 3.97, - "learning_rate": 1e-05, - "loss": 0.0101, - "step": 777 - }, - { - "epoch": 3.97, - "learning_rate": 1e-05, - "loss": 0.0334, - "step": 778 - }, - { - "epoch": 3.98, - "learning_rate": 1e-05, - "loss": 0.0468, - "step": 779 - }, - { - "epoch": 3.98, - "learning_rate": 1e-05, - "loss": 0.0199, - "step": 780 - }, - { - "epoch": 3.99, - "learning_rate": 1e-05, - "loss": 0.0427, - "step": 781 - }, - { - "epoch": 3.99, - "learning_rate": 1e-05, - "loss": 0.0092, - "step": 782 - }, - { - "epoch": 4.0, - "learning_rate": 1e-05, - "loss": 0.0084, - "step": 783 - }, - { - "epoch": 4.01, - "learning_rate": 1e-05, - "loss": 0.0011, - "step": 784 - }, - { - "epoch": 4.01, - "learning_rate": 1e-05, - "loss": 0.0364, - "step": 785 - }, - { - "epoch": 4.02, - "learning_rate": 1e-05, - "loss": 0.0053, - "step": 786 - }, - { - "epoch": 4.02, - "learning_rate": 1e-05, - "loss": 0.0388, - "step": 787 - }, - { - "epoch": 4.03, - "learning_rate": 1e-05, - "loss": 0.0147, - "step": 788 - }, - { - "epoch": 4.03, - "learning_rate": 1e-05, - "loss": 0.0057, - "step": 789 - }, - { - "epoch": 4.04, - "learning_rate": 1e-05, - "loss": 0.0173, - "step": 790 - }, - { - "epoch": 4.04, - "learning_rate": 1e-05, - "loss": 0.0499, - "step": 791 - }, - { - "epoch": 4.05, - "learning_rate": 1e-05, - "loss": 0.0119, - "step": 792 - }, - { - "epoch": 4.05, - "learning_rate": 1e-05, - "loss": 0.0192, - "step": 793 - }, - { - "epoch": 4.06, - "learning_rate": 1e-05, - "loss": 0.0068, - "step": 794 - }, - { - "epoch": 4.06, - "learning_rate": 1e-05, - "loss": 0.0248, - "step": 795 - }, - { - "epoch": 4.07, - "learning_rate": 1e-05, - "loss": 0.0222, - "step": 796 - }, - { - "epoch": 4.07, - "learning_rate": 1e-05, - "loss": 0.0024, - "step": 797 - }, - { - "epoch": 4.08, - "learning_rate": 1e-05, - "loss": 0.0084, - "step": 798 - }, - { - "epoch": 4.08, - "learning_rate": 1e-05, - "loss": 0.0023, - "step": 799 - }, - { - "epoch": 4.09, - "learning_rate": 1e-05, - "loss": 0.0657, - "step": 800 - }, - { - "epoch": 4.09, - "learning_rate": 1e-05, - "loss": 0.0138, - "step": 801 - }, - { - "epoch": 4.1, - "learning_rate": 1e-05, - "loss": 0.0732, - "step": 802 - }, - { - "epoch": 4.1, - "learning_rate": 1e-05, - "loss": 0.0032, - "step": 803 - }, - { - "epoch": 4.11, - "learning_rate": 1e-05, - "loss": 0.008, - "step": 804 - }, - { - "epoch": 4.11, - "learning_rate": 1e-05, - "loss": 0.0206, - "step": 805 - }, - { - "epoch": 4.12, - "learning_rate": 1e-05, - "loss": 0.013, - "step": 806 - }, - { - "epoch": 4.12, - "learning_rate": 1e-05, - "loss": 0.0155, - "step": 807 - }, - { - "epoch": 4.13, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 808 - }, - { - "epoch": 4.13, - "learning_rate": 1e-05, - "loss": 0.0331, - "step": 809 - }, - { - "epoch": 4.14, - "learning_rate": 1e-05, - "loss": 0.0148, - "step": 810 - }, - { - "epoch": 4.14, - "learning_rate": 1e-05, - "loss": 0.0173, - "step": 811 - }, - { - "epoch": 4.15, - "learning_rate": 1e-05, - "loss": 0.0033, - "step": 812 - }, - { - "epoch": 4.15, - "learning_rate": 1e-05, - "loss": 0.0217, - "step": 813 - }, - { - "epoch": 4.16, - "learning_rate": 1e-05, - "loss": 0.0332, - "step": 814 - }, - { - "epoch": 4.16, - "learning_rate": 1e-05, - "loss": 0.0121, - "step": 815 - }, - { - "epoch": 4.17, - "learning_rate": 1e-05, - "loss": 0.0106, - "step": 816 - }, - { - "epoch": 4.17, - "learning_rate": 1e-05, - "loss": 0.0241, - "step": 817 - }, - { - "epoch": 4.18, - "learning_rate": 1e-05, - "loss": 0.0271, - "step": 818 - }, - { - "epoch": 4.18, - "learning_rate": 1e-05, - "loss": 0.0003, - "step": 819 - }, - { - "epoch": 4.19, - "learning_rate": 1e-05, - "loss": 0.0009, - "step": 820 - }, - { - "epoch": 4.19, - "learning_rate": 1e-05, - "loss": 0.0003, - "step": 821 - }, - { - "epoch": 4.2, - "learning_rate": 1e-05, - "loss": 0.008, - "step": 822 - }, - { - "epoch": 4.2, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 823 - }, - { - "epoch": 4.21, - "learning_rate": 1e-05, - "loss": 0.0108, - "step": 824 - }, - { - "epoch": 4.21, - "learning_rate": 1e-05, - "loss": 0.0131, - "step": 825 - }, - { - "epoch": 4.22, - "learning_rate": 1e-05, - "loss": 0.0346, - "step": 826 - }, - { - "epoch": 4.22, - "learning_rate": 1e-05, - "loss": 0.0625, - "step": 827 - }, - { - "epoch": 4.23, - "learning_rate": 1e-05, - "loss": 0.0916, - "step": 828 - }, - { - "epoch": 4.23, - "learning_rate": 1e-05, - "loss": 0.0556, - "step": 829 - }, - { - "epoch": 4.24, - "learning_rate": 1e-05, - "loss": 0.053, - "step": 830 - }, - { - "epoch": 4.25, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 831 - }, - { - "epoch": 4.25, - "learning_rate": 1e-05, - "loss": 0.0294, - "step": 832 - }, - { - "epoch": 4.26, - "learning_rate": 1e-05, - "loss": 0.0031, - "step": 833 - }, - { - "epoch": 4.26, - "learning_rate": 1e-05, - "loss": 0.0077, - "step": 834 - }, - { - "epoch": 4.27, - "learning_rate": 1e-05, - "loss": 0.0109, - "step": 835 - }, - { - "epoch": 4.27, - "learning_rate": 1e-05, - "loss": 0.0169, - "step": 836 - }, - { - "epoch": 4.28, - "learning_rate": 1e-05, - "loss": 0.0113, - "step": 837 - }, - { - "epoch": 4.28, - "learning_rate": 1e-05, - "loss": 0.0093, - "step": 838 - }, - { - "epoch": 4.29, - "learning_rate": 1e-05, - "loss": 0.0102, - "step": 839 - }, - { - "epoch": 4.29, - "learning_rate": 1e-05, - "loss": 0.0387, - "step": 840 - }, - { - "epoch": 4.3, - "learning_rate": 1e-05, - "loss": 0.0135, - "step": 841 - }, - { - "epoch": 4.3, - "learning_rate": 1e-05, - "loss": 0.0272, - "step": 842 - }, - { - "epoch": 4.31, - "learning_rate": 1e-05, - "loss": 0.1128, - "step": 843 - }, - { - "epoch": 4.31, - "learning_rate": 1e-05, - "loss": 0.022, - "step": 844 - }, - { - "epoch": 4.32, - "learning_rate": 1e-05, - "loss": 0.0061, - "step": 845 - }, - { - "epoch": 4.32, - "learning_rate": 1e-05, - "loss": 0.0134, - "step": 846 - }, - { - "epoch": 4.33, - "learning_rate": 1e-05, - "loss": 0.0036, - "step": 847 - }, - { - "epoch": 4.33, - "learning_rate": 1e-05, - "loss": 0.0145, - "step": 848 - }, - { - "epoch": 4.34, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 849 - }, - { - "epoch": 4.34, - "learning_rate": 1e-05, - "loss": 0.0025, - "step": 850 - }, - { - "epoch": 4.35, - "learning_rate": 1e-05, - "loss": 0.0036, - "step": 851 - }, - { - "epoch": 4.35, - "learning_rate": 1e-05, - "loss": 0.0317, - "step": 852 - }, - { - "epoch": 4.36, - "learning_rate": 1e-05, - "loss": 0.0179, - "step": 853 - }, - { - "epoch": 4.36, - "learning_rate": 1e-05, - "loss": 0.0224, - "step": 854 - }, - { - "epoch": 4.37, - "learning_rate": 1e-05, - "loss": 0.0264, - "step": 855 - }, - { - "epoch": 4.37, - "learning_rate": 1e-05, - "loss": 0.1031, - "step": 856 - }, - { - "epoch": 4.38, - "learning_rate": 1e-05, - "loss": 0.0061, - "step": 857 - }, - { - "epoch": 4.38, - "learning_rate": 1e-05, - "loss": 0.0178, - "step": 858 - }, - { - "epoch": 4.39, - "learning_rate": 1e-05, - "loss": 0.1004, - "step": 859 - }, - { - "epoch": 4.39, - "learning_rate": 1e-05, - "loss": 0.0144, - "step": 860 - }, - { - "epoch": 4.4, - "learning_rate": 1e-05, - "loss": 0.0335, - "step": 861 - }, - { - "epoch": 4.4, - "learning_rate": 1e-05, - "loss": 0.0311, - "step": 862 - }, - { - "epoch": 4.41, - "learning_rate": 1e-05, - "loss": 0.0023, - "step": 863 - }, - { - "epoch": 4.41, - "learning_rate": 1e-05, - "loss": 0.027, - "step": 864 - }, - { - "epoch": 4.42, - "learning_rate": 1e-05, - "loss": 0.0005, - "step": 865 - }, - { - "epoch": 4.42, - "learning_rate": 1e-05, - "loss": 0.0173, - "step": 866 - }, - { - "epoch": 4.43, - "learning_rate": 1e-05, - "loss": 0.0117, - "step": 867 - }, - { - "epoch": 4.43, - "learning_rate": 1e-05, - "loss": 0.0224, - "step": 868 - }, - { - "epoch": 4.44, - "learning_rate": 1e-05, - "loss": 0.0062, - "step": 869 - }, - { - "epoch": 4.44, - "learning_rate": 1e-05, - "loss": 0.0258, - "step": 870 - }, - { - "epoch": 4.45, - "learning_rate": 1e-05, - "loss": 0.0675, - "step": 871 - }, - { - "epoch": 4.45, - "learning_rate": 1e-05, - "loss": 0.0137, - "step": 872 - }, - { - "epoch": 4.46, - "learning_rate": 1e-05, - "loss": 0.0789, - "step": 873 - }, - { - "epoch": 4.46, - "learning_rate": 1e-05, - "loss": 0.0033, - "step": 874 - }, - { - "epoch": 4.47, - "learning_rate": 1e-05, - "loss": 0.0181, - "step": 875 - }, - { - "epoch": 4.48, - "learning_rate": 1e-05, - "loss": 0.0307, - "step": 876 - }, - { - "epoch": 4.48, - "learning_rate": 1e-05, - "loss": 0.0051, - "step": 877 - }, - { - "epoch": 4.49, - "learning_rate": 1e-05, - "loss": 0.0058, - "step": 878 - }, - { - "epoch": 4.49, - "learning_rate": 1e-05, - "loss": 0.0089, - "step": 879 - }, - { - "epoch": 4.5, - "learning_rate": 1e-05, - "loss": 0.0346, - "step": 880 - }, - { - "epoch": 4.5, - "learning_rate": 1e-05, - "loss": 0.009, - "step": 881 - }, - { - "epoch": 4.51, - "learning_rate": 1e-05, - "loss": 0.0081, - "step": 882 - }, - { - "epoch": 4.51, - "learning_rate": 1e-05, - "loss": 0.0059, - "step": 883 - }, - { - "epoch": 4.52, - "learning_rate": 1e-05, - "loss": 0.0404, - "step": 884 - }, - { - "epoch": 4.52, - "learning_rate": 1e-05, - "loss": 0.0088, - "step": 885 - }, - { - "epoch": 4.53, - "learning_rate": 1e-05, - "loss": 0.0362, - "step": 886 - }, - { - "epoch": 4.53, - "learning_rate": 1e-05, - "loss": 0.0671, - "step": 887 - }, - { - "epoch": 4.54, - "learning_rate": 1e-05, - "loss": 0.0053, - "step": 888 - }, - { - "epoch": 4.54, - "learning_rate": 1e-05, - "loss": 0.0077, - "step": 889 - }, - { - "epoch": 4.55, - "learning_rate": 1e-05, - "loss": 0.0563, - "step": 890 - }, - { - "epoch": 4.55, - "learning_rate": 1e-05, - "loss": 0.0199, - "step": 891 - }, - { - "epoch": 4.56, - "learning_rate": 1e-05, - "loss": 0.0098, - "step": 892 - }, - { - "epoch": 4.56, - "learning_rate": 1e-05, - "loss": 0.0044, - "step": 893 - }, - { - "epoch": 4.57, - "learning_rate": 1e-05, - "loss": 0.0029, - "step": 894 - }, - { - "epoch": 4.57, - "learning_rate": 1e-05, - "loss": 0.0105, - "step": 895 - }, - { - "epoch": 4.58, - "learning_rate": 1e-05, - "loss": 0.0621, - "step": 896 - }, - { - "epoch": 4.58, - "learning_rate": 1e-05, - "loss": 0.0103, - "step": 897 - }, - { - "epoch": 4.59, - "learning_rate": 1e-05, - "loss": 0.0059, - "step": 898 - }, - { - "epoch": 4.59, - "learning_rate": 1e-05, - "loss": 0.011, - "step": 899 - }, - { - "epoch": 4.6, - "learning_rate": 1e-05, - "loss": 0.009, - "step": 900 - }, - { - "epoch": 4.6, - "learning_rate": 1e-05, - "loss": 0.0507, - "step": 901 - }, - { - "epoch": 4.61, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 902 - }, - { - "epoch": 4.61, - "learning_rate": 1e-05, - "loss": 0.0893, - "step": 903 - }, - { - "epoch": 4.62, - "learning_rate": 1e-05, - "loss": 0.019, - "step": 904 - }, - { - "epoch": 4.62, - "learning_rate": 1e-05, - "loss": 0.0131, - "step": 905 - }, - { - "epoch": 4.63, - "learning_rate": 1e-05, - "loss": 0.0075, - "step": 906 - }, - { - "epoch": 4.63, - "learning_rate": 1e-05, - "loss": 0.0055, - "step": 907 - }, - { - "epoch": 4.64, - "learning_rate": 1e-05, - "loss": 0.0294, - "step": 908 - }, - { - "epoch": 4.64, - "learning_rate": 1e-05, - "loss": 0.0904, - "step": 909 - }, - { - "epoch": 4.65, - "learning_rate": 1e-05, - "loss": 0.001, - "step": 910 - }, - { - "epoch": 4.65, - "learning_rate": 1e-05, - "loss": 0.0133, - "step": 911 - }, - { - "epoch": 4.66, - "learning_rate": 1e-05, - "loss": 0.0646, - "step": 912 - }, - { - "epoch": 4.66, - "learning_rate": 1e-05, - "loss": 0.0442, - "step": 913 - }, - { - "epoch": 4.67, - "learning_rate": 1e-05, - "loss": 0.0433, - "step": 914 - }, - { - "epoch": 4.67, - "learning_rate": 1e-05, - "loss": 0.0256, - "step": 915 - }, - { - "epoch": 4.68, - "learning_rate": 1e-05, - "loss": 0.0372, - "step": 916 - }, - { - "epoch": 4.68, - "learning_rate": 1e-05, - "loss": 0.0172, - "step": 917 - }, - { - "epoch": 4.69, - "learning_rate": 1e-05, - "loss": 0.0009, - "step": 918 - }, - { - "epoch": 4.69, - "learning_rate": 1e-05, - "loss": 0.0487, - "step": 919 - }, - { - "epoch": 4.7, - "learning_rate": 1e-05, - "loss": 0.0081, - "step": 920 - }, - { - "epoch": 4.7, - "learning_rate": 1e-05, - "loss": 0.0362, - "step": 921 - }, - { - "epoch": 4.71, - "learning_rate": 1e-05, - "loss": 0.0231, - "step": 922 - }, - { - "epoch": 4.72, - "learning_rate": 1e-05, - "loss": 0.0048, - "step": 923 - }, - { - "epoch": 4.72, - "learning_rate": 1e-05, - "loss": 0.0103, - "step": 924 - }, - { - "epoch": 4.73, - "learning_rate": 1e-05, - "loss": 0.0153, - "step": 925 - }, - { - "epoch": 4.73, - "learning_rate": 1e-05, - "loss": 0.0018, - "step": 926 - }, - { - "epoch": 4.74, - "learning_rate": 1e-05, - "loss": 0.0264, - "step": 927 - }, - { - "epoch": 4.74, - "learning_rate": 1e-05, - "loss": 0.0128, - "step": 928 - }, - { - "epoch": 4.75, - "learning_rate": 1e-05, - "loss": 0.0013, - "step": 929 - }, - { - "epoch": 4.75, - "learning_rate": 1e-05, - "loss": 0.0016, - "step": 930 - }, - { - "epoch": 4.76, - "learning_rate": 1e-05, - "loss": 0.0252, - "step": 931 - }, - { - "epoch": 4.76, - "learning_rate": 1e-05, - "loss": 0.0117, - "step": 932 - }, - { - "epoch": 4.77, - "learning_rate": 1e-05, - "loss": 0.0239, - "step": 933 - }, - { - "epoch": 4.77, - "learning_rate": 1e-05, - "loss": 0.0073, - "step": 934 - }, - { - "epoch": 4.78, - "learning_rate": 1e-05, - "loss": 0.0411, - "step": 935 - }, - { - "epoch": 4.78, - "learning_rate": 1e-05, - "loss": 0.0631, - "step": 936 - }, - { - "epoch": 4.79, - "learning_rate": 1e-05, - "loss": 0.0257, - "step": 937 - }, - { - "epoch": 4.79, - "learning_rate": 1e-05, - "loss": 0.0039, - "step": 938 - }, - { - "epoch": 4.8, - "learning_rate": 1e-05, - "loss": 0.0455, - "step": 939 - }, - { - "epoch": 4.8, - "learning_rate": 1e-05, - "loss": 0.0219, - "step": 940 - }, - { - "epoch": 4.81, - "learning_rate": 1e-05, - "loss": 0.0954, - "step": 941 - }, - { - "epoch": 4.81, - "learning_rate": 1e-05, - "loss": 0.0019, - "step": 942 - }, - { - "epoch": 4.82, - "learning_rate": 1e-05, - "loss": 0.0508, - "step": 943 - }, - { - "epoch": 4.82, - "learning_rate": 1e-05, - "loss": 0.0181, - "step": 944 - }, - { - "epoch": 4.83, - "learning_rate": 1e-05, - "loss": 0.0049, - "step": 945 - }, - { - "epoch": 4.83, - "learning_rate": 1e-05, - "loss": 0.0039, - "step": 946 - }, - { - "epoch": 4.84, - "learning_rate": 1e-05, - "loss": 0.0929, - "step": 947 - }, - { - "epoch": 4.84, - "learning_rate": 1e-05, - "loss": 0.0421, - "step": 948 - }, - { - "epoch": 4.85, - "learning_rate": 1e-05, - "loss": 0.0419, - "step": 949 - }, - { - "epoch": 4.85, - "learning_rate": 1e-05, - "loss": 0.0062, - "step": 950 - }, - { - "epoch": 4.86, - "learning_rate": 1e-05, - "loss": 0.0104, - "step": 951 - }, - { - "epoch": 4.86, - "learning_rate": 1e-05, - "loss": 0.0133, - "step": 952 - }, - { - "epoch": 4.87, - "learning_rate": 1e-05, - "loss": 0.019, - "step": 953 - }, - { - "epoch": 4.87, - "learning_rate": 1e-05, - "loss": 0.0215, - "step": 954 - }, - { - "epoch": 4.88, - "learning_rate": 1e-05, - "loss": 0.0113, - "step": 955 - }, - { - "epoch": 4.88, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 956 - }, - { - "epoch": 4.89, - "learning_rate": 1e-05, - "loss": 0.0223, - "step": 957 - }, - { - "epoch": 4.89, - "learning_rate": 1e-05, - "loss": 0.0169, - "step": 958 - }, - { - "epoch": 4.9, - "learning_rate": 1e-05, - "loss": 0.0088, - "step": 959 - }, - { - "epoch": 4.9, - "learning_rate": 1e-05, - "loss": 0.0126, - "step": 960 - }, - { - "epoch": 4.91, - "learning_rate": 1e-05, - "loss": 0.033, - "step": 961 - }, - { - "epoch": 4.91, - "learning_rate": 1e-05, - "loss": 0.0149, - "step": 962 - }, - { - "epoch": 4.92, - "learning_rate": 1e-05, - "loss": 0.0033, - "step": 963 - }, - { - "epoch": 4.92, - "learning_rate": 1e-05, - "loss": 0.0407, - "step": 964 - }, - { - "epoch": 4.93, - "learning_rate": 1e-05, - "loss": 0.0089, - "step": 965 - }, - { - "epoch": 4.93, - "learning_rate": 1e-05, - "loss": 0.0839, - "step": 966 - }, - { - "epoch": 4.94, - "learning_rate": 1e-05, - "loss": 0.0378, - "step": 967 - }, - { - "epoch": 4.95, - "learning_rate": 1e-05, - "loss": 0.0448, - "step": 968 - }, - { - "epoch": 4.95, - "learning_rate": 1e-05, - "loss": 0.0478, - "step": 969 - }, - { - "epoch": 4.96, - "learning_rate": 1e-05, - "loss": 0.0079, - "step": 970 - }, - { - "epoch": 4.96, - "learning_rate": 1e-05, - "loss": 0.0385, - "step": 971 - }, - { - "epoch": 4.97, - "learning_rate": 1e-05, - "loss": 0.0097, - "step": 972 - }, - { - "epoch": 4.97, - "learning_rate": 1e-05, - "loss": 0.0004, - "step": 973 - }, - { - "epoch": 4.98, - "learning_rate": 1e-05, - "loss": 0.0188, - "step": 974 - }, - { - "epoch": 4.98, - "learning_rate": 1e-05, - "loss": 0.0204, - "step": 975 - }, - { - "epoch": 4.99, - "learning_rate": 1e-05, - "loss": 0.0363, - "step": 976 - }, - { - "epoch": 4.99, - "learning_rate": 1e-05, - "loss": 0.0145, - "step": 977 - }, - { - "epoch": 5.0, - "learning_rate": 1e-05, - "loss": 0.0038, - "step": 978 - }, - { - "epoch": 5.0, - "learning_rate": 1e-05, - "loss": 0.0545, - "step": 979 - }, - { - "epoch": 5.01, - "learning_rate": 1e-05, - "loss": 0.0344, - "step": 980 - }, - { - "epoch": 5.01, - "learning_rate": 1e-05, - "loss": 0.0097, - "step": 981 - }, - { - "epoch": 5.02, - "learning_rate": 1e-05, - "loss": 0.0035, - "step": 982 - }, - { - "epoch": 5.02, - "learning_rate": 1e-05, - "loss": 0.0301, - "step": 983 - }, - { - "epoch": 5.03, - "learning_rate": 1e-05, - "loss": 0.0112, - "step": 984 - }, - { - "epoch": 5.03, - "learning_rate": 1e-05, - "loss": 0.1391, - "step": 985 - }, - { - "epoch": 5.04, - "learning_rate": 1e-05, - "loss": 0.0044, - "step": 986 - }, - { - "epoch": 5.04, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 987 - }, - { - "epoch": 5.05, - "learning_rate": 1e-05, - "loss": 0.008, - "step": 988 - }, - { - "epoch": 5.05, - "learning_rate": 1e-05, - "loss": 0.0013, - "step": 989 - }, - { - "epoch": 5.06, - "learning_rate": 1e-05, - "loss": 0.0014, - "step": 990 - }, - { - "epoch": 5.06, - "learning_rate": 1e-05, - "loss": 0.0073, - "step": 991 - }, - { - "epoch": 5.07, - "learning_rate": 1e-05, - "loss": 0.0062, - "step": 992 - }, - { - "epoch": 5.07, - "learning_rate": 1e-05, - "loss": 0.0147, - "step": 993 - }, - { - "epoch": 5.08, - "learning_rate": 1e-05, - "loss": 0.0074, - "step": 994 - }, - { - "epoch": 5.08, - "learning_rate": 1e-05, - "loss": 0.0036, - "step": 995 - }, - { - "epoch": 5.09, - "learning_rate": 1e-05, - "loss": 0.0099, - "step": 996 - }, - { - "epoch": 5.09, - "learning_rate": 1e-05, - "loss": 0.0099, - "step": 997 - }, - { - "epoch": 5.1, - "learning_rate": 1e-05, - "loss": 0.0022, - "step": 998 - }, - { - "epoch": 5.1, - "learning_rate": 1e-05, - "loss": 0.007, - "step": 999 - }, - { - "epoch": 5.11, - "learning_rate": 1e-05, - "loss": 0.0021, - "step": 1000 - }, - { - "epoch": 5.11, - "learning_rate": 1e-05, - "loss": 0.0064, - "step": 1001 - }, - { - "epoch": 5.12, - "learning_rate": 1e-05, - "loss": 0.002, - "step": 1002 - }, - { - "epoch": 5.12, - "learning_rate": 1e-05, - "loss": 0.0969, - "step": 1003 - }, - { - "epoch": 5.13, - "learning_rate": 1e-05, - "loss": 0.0139, - "step": 1004 - }, - { - "epoch": 5.13, - "learning_rate": 1e-05, - "loss": 0.0659, - "step": 1005 - }, - { - "epoch": 5.14, - "learning_rate": 1e-05, - "loss": 0.008, - "step": 1006 - }, - { - "epoch": 5.14, - "learning_rate": 1e-05, - "loss": 0.0148, - "step": 1007 - }, - { - "epoch": 5.15, - "learning_rate": 1e-05, - "loss": 0.0103, - "step": 1008 - }, - { - "epoch": 5.15, - "learning_rate": 1e-05, - "loss": 0.0015, - "step": 1009 - }, - { - "epoch": 5.16, - "learning_rate": 1e-05, - "loss": 0.0047, - "step": 1010 - }, - { - "epoch": 5.16, - "learning_rate": 1e-05, - "loss": 0.0012, - "step": 1011 - }, - { - "epoch": 5.17, - "learning_rate": 1e-05, - "loss": 0.023, - "step": 1012 - }, - { - "epoch": 5.17, - "learning_rate": 1e-05, - "loss": 0.0078, - "step": 1013 - }, - { - "epoch": 5.18, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 1014 - }, - { - "epoch": 5.19, - "learning_rate": 1e-05, - "loss": 0.0021, - "step": 1015 - }, - { - "epoch": 5.19, - "learning_rate": 1e-05, - "loss": 0.0141, - "step": 1016 - }, - { - "epoch": 5.2, - "learning_rate": 1e-05, - "loss": 0.0071, - "step": 1017 - }, - { - "epoch": 5.2, - "learning_rate": 1e-05, - "loss": 0.0023, - "step": 1018 - }, - { - "epoch": 5.21, - "learning_rate": 1e-05, - "loss": 0.0018, - "step": 1019 - }, - { - "epoch": 5.21, - "learning_rate": 1e-05, - "loss": 0.0133, - "step": 1020 - }, - { - "epoch": 5.22, - "learning_rate": 1e-05, - "loss": 0.0007, - "step": 1021 - }, - { - "epoch": 5.22, - "learning_rate": 1e-05, - "loss": 0.013, - "step": 1022 - }, - { - "epoch": 5.23, - "learning_rate": 1e-05, - "loss": 0.0041, - "step": 1023 - }, - { - "epoch": 5.23, - "learning_rate": 1e-05, - "loss": 0.0131, - "step": 1024 - }, - { - "epoch": 5.24, - "learning_rate": 1e-05, - "loss": 0.0051, - "step": 1025 - }, - { - "epoch": 5.24, - "learning_rate": 1e-05, - "loss": 0.0051, - "step": 1026 - }, - { - "epoch": 5.25, - "learning_rate": 1e-05, - "loss": 0.0055, - "step": 1027 - }, - { - "epoch": 5.25, - "learning_rate": 1e-05, - "loss": 0.0704, - "step": 1028 - }, - { - "epoch": 5.26, - "learning_rate": 1e-05, - "loss": 0.0007, - "step": 1029 - }, - { - "epoch": 5.26, - "learning_rate": 1e-05, - "loss": 0.0325, - "step": 1030 - }, - { - "epoch": 5.27, - "learning_rate": 1e-05, - "loss": 0.0106, - "step": 1031 - }, - { - "epoch": 5.27, - "learning_rate": 1e-05, - "loss": 0.0135, - "step": 1032 - }, - { - "epoch": 5.28, - "learning_rate": 1e-05, - "loss": 0.0225, - "step": 1033 - }, - { - "epoch": 5.28, - "learning_rate": 1e-05, - "loss": 0.0205, - "step": 1034 - }, - { - "epoch": 5.29, - "learning_rate": 1e-05, - "loss": 0.019, - "step": 1035 - }, - { - "epoch": 5.29, - "learning_rate": 1e-05, - "loss": 0.0324, - "step": 1036 - }, - { - "epoch": 5.3, - "learning_rate": 1e-05, - "loss": 0.0061, - "step": 1037 - }, - { - "epoch": 5.3, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 1038 - }, - { - "epoch": 5.31, - "learning_rate": 1e-05, - "loss": 0.0042, - "step": 1039 - }, - { - "epoch": 5.31, - "learning_rate": 1e-05, - "loss": 0.0108, - "step": 1040 - }, - { - "epoch": 5.32, - "learning_rate": 1e-05, - "loss": 0.1134, - "step": 1041 - }, - { - "epoch": 5.32, - "learning_rate": 1e-05, - "loss": 0.0139, - "step": 1042 - }, - { - "epoch": 5.33, - "learning_rate": 1e-05, - "loss": 0.0156, - "step": 1043 - }, - { - "epoch": 5.33, - "learning_rate": 1e-05, - "loss": 0.012, - "step": 1044 - }, - { - "epoch": 5.34, - "learning_rate": 1e-05, - "loss": 0.0542, - "step": 1045 - }, - { - "epoch": 5.34, - "learning_rate": 1e-05, - "loss": 0.0213, - "step": 1046 - }, - { - "epoch": 5.35, - "learning_rate": 1e-05, - "loss": 0.0164, - "step": 1047 - }, - { - "epoch": 5.35, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 1048 - }, - { - "epoch": 5.36, - "learning_rate": 1e-05, - "loss": 0.065, - "step": 1049 - }, - { - "epoch": 5.36, - "learning_rate": 1e-05, - "loss": 0.0022, - "step": 1050 - }, - { - "epoch": 5.37, - "learning_rate": 1e-05, - "loss": 0.001, - "step": 1051 - }, - { - "epoch": 5.37, - "learning_rate": 1e-05, - "loss": 0.0257, - "step": 1052 - }, - { - "epoch": 5.38, - "learning_rate": 1e-05, - "loss": 0.0008, - "step": 1053 - }, - { - "epoch": 5.38, - "learning_rate": 1e-05, - "loss": 0.0053, - "step": 1054 - }, - { - "epoch": 5.39, - "learning_rate": 1e-05, - "loss": 0.0038, - "step": 1055 - }, - { - "epoch": 5.39, - "learning_rate": 1e-05, - "loss": 0.0274, - "step": 1056 - }, - { - "epoch": 5.4, - "learning_rate": 1e-05, - "loss": 0.0333, - "step": 1057 - }, - { - "epoch": 5.4, - "learning_rate": 1e-05, - "loss": 0.0216, - "step": 1058 - }, - { - "epoch": 5.41, - "learning_rate": 1e-05, - "loss": 0.018, - "step": 1059 - }, - { - "epoch": 5.42, - "learning_rate": 1e-05, - "loss": 0.0134, - "step": 1060 - }, - { - "epoch": 5.42, - "learning_rate": 1e-05, - "loss": 0.0326, - "step": 1061 - }, - { - "epoch": 5.43, - "learning_rate": 1e-05, - "loss": 0.0068, - "step": 1062 - }, - { - "epoch": 5.43, - "learning_rate": 1e-05, - "loss": 0.0107, - "step": 1063 - }, - { - "epoch": 5.44, - "learning_rate": 1e-05, - "loss": 0.0157, - "step": 1064 - }, - { - "epoch": 5.44, - "learning_rate": 1e-05, - "loss": 0.0397, - "step": 1065 - }, - { - "epoch": 5.45, - "learning_rate": 1e-05, - "loss": 0.0205, - "step": 1066 - }, - { - "epoch": 5.45, - "learning_rate": 1e-05, - "loss": 0.0161, - "step": 1067 - }, - { - "epoch": 5.46, - "learning_rate": 1e-05, - "loss": 0.003, - "step": 1068 - }, - { - "epoch": 5.46, - "learning_rate": 1e-05, - "loss": 0.0025, - "step": 1069 - }, - { - "epoch": 5.47, - "learning_rate": 1e-05, - "loss": 0.0079, - "step": 1070 - }, - { - "epoch": 5.47, - "learning_rate": 1e-05, - "loss": 0.0065, - "step": 1071 - }, - { - "epoch": 5.48, - "learning_rate": 1e-05, - "loss": 0.0068, - "step": 1072 - }, - { - "epoch": 5.48, - "learning_rate": 1e-05, - "loss": 0.0526, - "step": 1073 - }, - { - "epoch": 5.49, - "learning_rate": 1e-05, - "loss": 0.009, - "step": 1074 - }, - { - "epoch": 5.49, - "learning_rate": 1e-05, - "loss": 0.0707, - "step": 1075 - }, - { - "epoch": 5.5, - "learning_rate": 1e-05, - "loss": 0.0105, - "step": 1076 - }, - { - "epoch": 5.5, - "learning_rate": 1e-05, - "loss": 0.0069, - "step": 1077 - }, - { - "epoch": 5.51, - "learning_rate": 1e-05, - "loss": 0.0378, - "step": 1078 - }, - { - "epoch": 5.51, - "learning_rate": 1e-05, - "loss": 0.0095, - "step": 1079 - }, - { - "epoch": 5.52, - "learning_rate": 1e-05, - "loss": 0.0309, - "step": 1080 - }, - { - "epoch": 5.52, - "learning_rate": 1e-05, - "loss": 0.0301, - "step": 1081 - }, - { - "epoch": 5.53, - "learning_rate": 1e-05, - "loss": 0.0011, - "step": 1082 - }, - { - "epoch": 5.53, - "learning_rate": 1e-05, - "loss": 0.0093, - "step": 1083 - }, - { - "epoch": 5.54, - "learning_rate": 1e-05, - "loss": 0.005, - "step": 1084 - }, - { - "epoch": 5.54, - "learning_rate": 1e-05, - "loss": 0.0052, - "step": 1085 - }, - { - "epoch": 5.55, - "learning_rate": 1e-05, - "loss": 0.0114, - "step": 1086 - }, - { - "epoch": 5.55, - "learning_rate": 1e-05, - "loss": 0.0049, - "step": 1087 - }, - { - "epoch": 5.56, - "learning_rate": 1e-05, - "loss": 0.0114, - "step": 1088 - }, - { - "epoch": 5.56, - "learning_rate": 1e-05, - "loss": 0.0189, - "step": 1089 - }, - { - "epoch": 5.57, - "learning_rate": 1e-05, - "loss": 0.0809, - "step": 1090 - }, - { - "epoch": 5.57, - "learning_rate": 1e-05, - "loss": 0.0023, - "step": 1091 - }, - { - "epoch": 5.58, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 1092 - }, - { - "epoch": 5.58, - "learning_rate": 1e-05, - "loss": 0.0166, - "step": 1093 - }, - { - "epoch": 5.59, - "learning_rate": 1e-05, - "loss": 0.0096, - "step": 1094 - }, - { - "epoch": 5.59, - "learning_rate": 1e-05, - "loss": 0.0078, - "step": 1095 - }, - { - "epoch": 5.6, - "learning_rate": 1e-05, - "loss": 0.0093, - "step": 1096 - }, - { - "epoch": 5.6, - "learning_rate": 1e-05, - "loss": 0.0687, - "step": 1097 - }, - { - "epoch": 5.61, - "learning_rate": 1e-05, - "loss": 0.053, - "step": 1098 - }, - { - "epoch": 5.61, - "learning_rate": 1e-05, - "loss": 0.0003, - "step": 1099 - }, - { - "epoch": 5.62, - "learning_rate": 1e-05, - "loss": 0.0097, - "step": 1100 - }, - { - "epoch": 5.62, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 1101 - }, - { - "epoch": 5.63, - "learning_rate": 1e-05, - "loss": 0.0076, - "step": 1102 - }, - { - "epoch": 5.63, - "learning_rate": 1e-05, - "loss": 0.0293, - "step": 1103 - }, - { - "epoch": 5.64, - "learning_rate": 1e-05, - "loss": 0.0009, - "step": 1104 - }, - { - "epoch": 5.64, - "learning_rate": 1e-05, - "loss": 0.0183, - "step": 1105 - }, - { - "epoch": 5.65, - "learning_rate": 1e-05, - "loss": 0.026, - "step": 1106 - }, - { - "epoch": 5.66, - "learning_rate": 1e-05, - "loss": 0.0249, - "step": 1107 - }, - { - "epoch": 5.66, - "learning_rate": 1e-05, - "loss": 0.0359, - "step": 1108 - }, - { - "epoch": 5.67, - "learning_rate": 1e-05, - "loss": 0.0166, - "step": 1109 - }, - { - "epoch": 5.67, - "learning_rate": 1e-05, - "loss": 0.0103, - "step": 1110 - }, - { - "epoch": 5.68, - "learning_rate": 1e-05, - "loss": 0.0257, - "step": 1111 - }, - { - "epoch": 5.68, - "learning_rate": 1e-05, - "loss": 0.0125, - "step": 1112 - }, - { - "epoch": 5.69, - "learning_rate": 1e-05, - "loss": 0.0027, - "step": 1113 - }, - { - "epoch": 5.69, - "learning_rate": 1e-05, - "loss": 0.0081, - "step": 1114 - }, - { - "epoch": 5.7, - "learning_rate": 1e-05, - "loss": 0.0115, - "step": 1115 - }, - { - "epoch": 5.7, - "learning_rate": 1e-05, - "loss": 0.0042, - "step": 1116 - }, - { - "epoch": 5.71, - "learning_rate": 1e-05, - "loss": 0.0182, - "step": 1117 - }, - { - "epoch": 5.71, - "learning_rate": 1e-05, - "loss": 0.0012, - "step": 1118 - }, - { - "epoch": 5.72, - "learning_rate": 1e-05, - "loss": 0.0016, - "step": 1119 - }, - { - "epoch": 5.72, - "learning_rate": 1e-05, - "loss": 0.0187, - "step": 1120 - }, - { - "epoch": 5.73, - "learning_rate": 1e-05, - "loss": 0.0342, - "step": 1121 - }, - { - "epoch": 5.73, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 1122 - }, - { - "epoch": 5.74, - "learning_rate": 1e-05, - "loss": 0.0022, - "step": 1123 - }, - { - "epoch": 5.74, - "learning_rate": 1e-05, - "loss": 0.057, - "step": 1124 - }, - { - "epoch": 5.75, - "learning_rate": 1e-05, - "loss": 0.0028, - "step": 1125 - }, - { - "epoch": 5.75, - "learning_rate": 1e-05, - "loss": 0.0089, - "step": 1126 - }, - { - "epoch": 5.76, - "learning_rate": 1e-05, - "loss": 0.0816, - "step": 1127 - }, - { - "epoch": 5.76, - "learning_rate": 1e-05, - "loss": 0.0709, - "step": 1128 - }, - { - "epoch": 5.77, - "learning_rate": 1e-05, - "loss": 0.0879, - "step": 1129 - }, - { - "epoch": 5.77, - "learning_rate": 1e-05, - "loss": 0.0135, - "step": 1130 - }, - { - "epoch": 5.78, - "learning_rate": 1e-05, - "loss": 0.0312, - "step": 1131 - }, - { - "epoch": 5.78, - "learning_rate": 1e-05, - "loss": 0.0257, - "step": 1132 - }, - { - "epoch": 5.79, - "learning_rate": 1e-05, - "loss": 0.0102, - "step": 1133 - }, - { - "epoch": 5.79, - "learning_rate": 1e-05, - "loss": 0.0192, - "step": 1134 - }, - { - "epoch": 5.8, - "learning_rate": 1e-05, - "loss": 0.0267, - "step": 1135 - }, - { - "epoch": 5.8, - "learning_rate": 1e-05, - "loss": 0.0117, - "step": 1136 - }, - { - "epoch": 5.81, - "learning_rate": 1e-05, - "loss": 0.0206, - "step": 1137 - }, - { - "epoch": 5.81, - "learning_rate": 1e-05, - "loss": 0.0007, - "step": 1138 - }, - { - "epoch": 5.82, - "learning_rate": 1e-05, - "loss": 0.0033, - "step": 1139 - }, - { - "epoch": 5.82, - "learning_rate": 1e-05, - "loss": 0.0248, - "step": 1140 - }, - { - "epoch": 5.83, - "learning_rate": 1e-05, - "loss": 0.049, - "step": 1141 - }, - { - "epoch": 5.83, - "learning_rate": 1e-05, - "loss": 0.0078, - "step": 1142 - }, - { - "epoch": 5.84, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 1143 - }, - { - "epoch": 5.84, - "learning_rate": 1e-05, - "loss": 0.0454, - "step": 1144 - }, - { - "epoch": 5.85, - "learning_rate": 1e-05, - "loss": 0.0113, - "step": 1145 - }, - { - "epoch": 5.85, - "learning_rate": 1e-05, - "loss": 0.0806, - "step": 1146 - }, - { - "epoch": 5.86, - "learning_rate": 1e-05, - "loss": 0.0245, - "step": 1147 - }, - { - "epoch": 5.86, - "learning_rate": 1e-05, - "loss": 0.0014, - "step": 1148 - }, - { - "epoch": 5.87, - "learning_rate": 1e-05, - "loss": 0.0049, - "step": 1149 - }, - { - "epoch": 5.87, - "learning_rate": 1e-05, - "loss": 0.0726, - "step": 1150 - }, - { - "epoch": 5.88, - "learning_rate": 1e-05, - "loss": 0.0083, - "step": 1151 - }, - { - "epoch": 5.89, - "learning_rate": 1e-05, - "loss": 0.0581, - "step": 1152 - }, - { - "epoch": 5.89, - "learning_rate": 1e-05, - "loss": 0.0611, - "step": 1153 - }, - { - "epoch": 5.9, - "learning_rate": 1e-05, - "loss": 0.0195, - "step": 1154 - }, - { - "epoch": 5.9, - "learning_rate": 1e-05, - "loss": 0.0024, - "step": 1155 - }, - { - "epoch": 5.91, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 1156 - }, - { - "epoch": 5.91, - "learning_rate": 1e-05, - "loss": 0.0598, - "step": 1157 - }, - { - "epoch": 5.92, - "learning_rate": 1e-05, - "loss": 0.0032, - "step": 1158 - }, - { - "epoch": 5.92, - "learning_rate": 1e-05, - "loss": 0.0341, - "step": 1159 - }, - { - "epoch": 5.93, - "learning_rate": 1e-05, - "loss": 0.0008, - "step": 1160 - }, - { - "epoch": 5.93, - "learning_rate": 1e-05, - "loss": 0.014, - "step": 1161 - }, - { - "epoch": 5.94, - "learning_rate": 1e-05, - "loss": 0.0188, - "step": 1162 - }, - { - "epoch": 5.94, - "learning_rate": 1e-05, - "loss": 0.006, - "step": 1163 - }, - { - "epoch": 5.95, - "learning_rate": 1e-05, - "loss": 0.0111, - "step": 1164 - }, - { - "epoch": 5.95, - "learning_rate": 1e-05, - "loss": 0.0007, - "step": 1165 - }, - { - "epoch": 5.96, - "learning_rate": 1e-05, - "loss": 0.0481, - "step": 1166 - }, - { - "epoch": 5.96, - "learning_rate": 1e-05, - "loss": 0.0137, - "step": 1167 - }, - { - "epoch": 5.97, - "learning_rate": 1e-05, - "loss": 0.0401, - "step": 1168 - }, - { - "epoch": 5.97, - "learning_rate": 1e-05, - "loss": 0.0785, - "step": 1169 - }, - { - "epoch": 5.98, - "learning_rate": 1e-05, - "loss": 0.0291, - "step": 1170 - }, - { - "epoch": 5.98, - "learning_rate": 1e-05, - "loss": 0.0118, - "step": 1171 - }, - { - "epoch": 5.99, - "learning_rate": 1e-05, - "loss": 0.0136, - "step": 1172 - }, - { - "epoch": 5.99, - "learning_rate": 1e-05, - "loss": 0.0004, - "step": 1173 - }, - { - "epoch": 6.0, - "learning_rate": 1e-05, - "loss": 0.0064, - "step": 1174 - }, - { - "epoch": 6.0, - "learning_rate": 1e-05, - "loss": 0.0364, - "step": 1175 - }, - { - "epoch": 6.01, - "learning_rate": 1e-05, - "loss": 0.0007, - "step": 1176 - }, - { - "epoch": 6.01, - "learning_rate": 1e-05, - "loss": 0.0348, - "step": 1177 - }, - { - "epoch": 6.02, - "learning_rate": 1e-05, - "loss": 0.019, - "step": 1178 - }, - { - "epoch": 6.02, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 1179 - }, - { - "epoch": 6.03, - "learning_rate": 1e-05, - "loss": 0.0202, - "step": 1180 - }, - { - "epoch": 6.03, - "learning_rate": 1e-05, - "loss": 0.022, - "step": 1181 - }, - { - "epoch": 6.04, - "learning_rate": 1e-05, - "loss": 0.0071, - "step": 1182 - }, - { - "epoch": 6.04, - "learning_rate": 1e-05, - "loss": 0.0112, - "step": 1183 - }, - { - "epoch": 6.05, - "learning_rate": 1e-05, - "loss": 0.0038, - "step": 1184 - }, - { - "epoch": 6.05, - "learning_rate": 1e-05, - "loss": 0.0022, - "step": 1185 - }, - { - "epoch": 6.06, - "learning_rate": 1e-05, - "loss": 0.0076, - "step": 1186 - }, - { - "epoch": 6.06, - "learning_rate": 1e-05, - "loss": 0.0039, - "step": 1187 - }, - { - "epoch": 6.07, - "learning_rate": 1e-05, - "loss": 0.0122, - "step": 1188 - }, - { - "epoch": 6.07, - "learning_rate": 1e-05, - "loss": 0.0058, - "step": 1189 - }, - { - "epoch": 6.08, - "learning_rate": 1e-05, - "loss": 0.0003, - "step": 1190 - }, - { - "epoch": 6.08, - "learning_rate": 1e-05, - "loss": 0.0107, - "step": 1191 - }, - { - "epoch": 6.09, - "learning_rate": 1e-05, - "loss": 0.0152, - "step": 1192 - }, - { - "epoch": 6.09, - "learning_rate": 1e-05, - "loss": 0.0011, - "step": 1193 - }, - { - "epoch": 6.1, - "learning_rate": 1e-05, - "loss": 0.0073, - "step": 1194 - }, - { - "epoch": 6.1, - "learning_rate": 1e-05, - "loss": 0.0155, - "step": 1195 - }, - { - "epoch": 6.11, - "learning_rate": 1e-05, - "loss": 0.0165, - "step": 1196 - }, - { - "epoch": 6.11, - "learning_rate": 1e-05, - "loss": 0.0009, - "step": 1197 - }, - { - "epoch": 6.12, - "learning_rate": 1e-05, - "loss": 0.0039, - "step": 1198 - }, - { - "epoch": 6.13, - "learning_rate": 1e-05, - "loss": 0.0152, - "step": 1199 - }, - { - "epoch": 6.13, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 1200 - }, - { - "epoch": 6.14, - "learning_rate": 1e-05, - "loss": 0.0251, - "step": 1201 - }, - { - "epoch": 6.14, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 1202 - }, - { - "epoch": 6.15, - "learning_rate": 1e-05, - "loss": 0.0028, - "step": 1203 - }, - { - "epoch": 6.15, - "learning_rate": 1e-05, - "loss": 0.0093, - "step": 1204 - }, - { - "epoch": 6.16, - "learning_rate": 1e-05, - "loss": 0.0586, - "step": 1205 - }, - { - "epoch": 6.16, - "learning_rate": 1e-05, - "loss": 0.0299, - "step": 1206 - }, - { - "epoch": 6.17, - "learning_rate": 1e-05, - "loss": 0.0046, - "step": 1207 - }, - { - "epoch": 6.17, - "learning_rate": 1e-05, - "loss": 0.0342, - "step": 1208 - }, - { - "epoch": 6.18, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 1209 - }, - { - "epoch": 6.18, - "learning_rate": 1e-05, - "loss": 0.0197, - "step": 1210 - }, - { - "epoch": 6.19, - "learning_rate": 1e-05, - "loss": 0.021, - "step": 1211 - }, - { - "epoch": 6.19, - "learning_rate": 1e-05, - "loss": 0.02, - "step": 1212 - }, - { - "epoch": 6.2, - "learning_rate": 1e-05, - "loss": 0.0004, - "step": 1213 - }, - { - "epoch": 6.2, - "learning_rate": 1e-05, - "loss": 0.0009, - "step": 1214 - }, - { - "epoch": 6.21, - "learning_rate": 1e-05, - "loss": 0.0209, - "step": 1215 - }, - { - "epoch": 6.21, - "learning_rate": 1e-05, - "loss": 0.0056, - "step": 1216 - }, - { - "epoch": 6.22, - "learning_rate": 1e-05, - "loss": 0.0058, - "step": 1217 - }, - { - "epoch": 6.22, - "learning_rate": 1e-05, - "loss": 0.0082, - "step": 1218 - }, - { - "epoch": 6.23, - "learning_rate": 1e-05, - "loss": 0.003, - "step": 1219 - }, - { - "epoch": 6.23, - "learning_rate": 1e-05, - "loss": 0.0, - "step": 1220 - }, - { - "epoch": 6.24, - "learning_rate": 1e-05, - "loss": 0.0135, - "step": 1221 - }, - { - "epoch": 6.24, - "learning_rate": 1e-05, - "loss": 0.0469, - "step": 1222 - }, - { - "epoch": 6.25, - "learning_rate": 1e-05, - "loss": 0.0295, - "step": 1223 - }, - { - "epoch": 6.25, - "learning_rate": 1e-05, - "loss": 0.0011, - "step": 1224 - }, - { - "epoch": 6.26, - "learning_rate": 1e-05, - "loss": 0.0114, - "step": 1225 - }, - { - "epoch": 6.26, - "learning_rate": 1e-05, - "loss": 0.0084, - "step": 1226 - }, - { - "epoch": 6.27, - "learning_rate": 1e-05, - "loss": 0.0075, - "step": 1227 - }, - { - "epoch": 6.27, - "learning_rate": 1e-05, - "loss": 0.0104, - "step": 1228 - }, - { - "epoch": 6.28, - "learning_rate": 1e-05, - "loss": 0.1115, - "step": 1229 - }, - { - "epoch": 6.28, - "learning_rate": 1e-05, - "loss": 0.0087, - "step": 1230 - }, - { - "epoch": 6.29, - "learning_rate": 1e-05, - "loss": 0.015, - "step": 1231 - }, - { - "epoch": 6.29, - "learning_rate": 1e-05, - "loss": 0.0154, - "step": 1232 - }, - { - "epoch": 6.3, - "learning_rate": 1e-05, - "loss": 0.0923, - "step": 1233 - }, - { - "epoch": 6.3, - "learning_rate": 1e-05, - "loss": 0.0127, - "step": 1234 - }, - { - "epoch": 6.31, - "learning_rate": 1e-05, - "loss": 0.0448, - "step": 1235 - }, - { - "epoch": 6.31, - "learning_rate": 1e-05, - "loss": 0.0694, - "step": 1236 - }, - { - "epoch": 6.32, - "learning_rate": 1e-05, - "loss": 0.0096, - "step": 1237 - }, - { - "epoch": 6.32, - "learning_rate": 1e-05, - "loss": 0.0178, - "step": 1238 - }, - { - "epoch": 6.33, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 1239 - }, - { - "epoch": 6.33, - "learning_rate": 1e-05, - "loss": 0.0009, - "step": 1240 - }, - { - "epoch": 6.34, - "learning_rate": 1e-05, - "loss": 0.0699, - "step": 1241 - }, - { - "epoch": 6.34, - "learning_rate": 1e-05, - "loss": 0.005, - "step": 1242 - }, - { - "epoch": 6.35, - "learning_rate": 1e-05, - "loss": 0.0028, - "step": 1243 - }, - { - "epoch": 6.36, - "learning_rate": 1e-05, - "loss": 0.0003, - "step": 1244 - }, - { - "epoch": 6.36, - "learning_rate": 1e-05, - "loss": 0.0103, - "step": 1245 - }, - { - "epoch": 6.37, - "learning_rate": 1e-05, - "loss": 0.0163, - "step": 1246 - }, - { - "epoch": 6.37, - "learning_rate": 1e-05, - "loss": 0.0098, - "step": 1247 - }, - { - "epoch": 6.38, - "learning_rate": 1e-05, - "loss": 0.0124, - "step": 1248 - }, - { - "epoch": 6.38, - "learning_rate": 1e-05, - "loss": 0.0053, - "step": 1249 - }, - { - "epoch": 6.39, - "learning_rate": 1e-05, - "loss": 0.0127, - "step": 1250 - }, - { - "epoch": 6.39, - "learning_rate": 1e-05, - "loss": 0.0122, - "step": 1251 - }, - { - "epoch": 6.4, - "learning_rate": 1e-05, - "loss": 0.0092, - "step": 1252 - }, - { - "epoch": 6.4, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 1253 - }, - { - "epoch": 6.41, - "learning_rate": 1e-05, - "loss": 0.028, - "step": 1254 - }, - { - "epoch": 6.41, - "learning_rate": 1e-05, - "loss": 0.1754, - "step": 1255 - }, - { - "epoch": 6.42, - "learning_rate": 1e-05, - "loss": 0.0275, - "step": 1256 - }, - { - "epoch": 6.42, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 1257 - }, - { - "epoch": 6.43, - "learning_rate": 1e-05, - "loss": 0.0048, - "step": 1258 - }, - { - "epoch": 6.43, - "learning_rate": 1e-05, - "loss": 0.0122, - "step": 1259 - }, - { - "epoch": 6.44, - "learning_rate": 1e-05, - "loss": 0.0232, - "step": 1260 - }, - { - "epoch": 6.44, - "learning_rate": 1e-05, - "loss": 0.0046, - "step": 1261 - }, - { - "epoch": 6.45, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 1262 - }, - { - "epoch": 6.45, - "learning_rate": 1e-05, - "loss": 0.01, - "step": 1263 - }, - { - "epoch": 6.46, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 1264 - }, - { - "epoch": 6.46, - "learning_rate": 1e-05, - "loss": 0.0062, - "step": 1265 - }, - { - "epoch": 6.47, - "learning_rate": 1e-05, - "loss": 0.01, - "step": 1266 - }, - { - "epoch": 6.47, - "learning_rate": 1e-05, - "loss": 0.0104, - "step": 1267 - }, - { - "epoch": 6.48, - "learning_rate": 1e-05, - "loss": 0.014, - "step": 1268 - }, - { - "epoch": 6.48, - "learning_rate": 1e-05, - "loss": 0.0052, - "step": 1269 - }, - { - "epoch": 6.49, - "learning_rate": 1e-05, - "loss": 0.0028, - "step": 1270 - }, - { - "epoch": 6.49, - "learning_rate": 1e-05, - "loss": 0.0053, - "step": 1271 - }, - { - "epoch": 6.5, - "learning_rate": 1e-05, - "loss": 0.0185, - "step": 1272 - }, - { - "epoch": 6.5, - "learning_rate": 1e-05, - "loss": 0.0128, - "step": 1273 - }, - { - "epoch": 6.51, - "learning_rate": 1e-05, - "loss": 0.0225, - "step": 1274 - }, - { - "epoch": 6.51, - "learning_rate": 1e-05, - "loss": 0.0051, - "step": 1275 - }, - { - "epoch": 6.52, - "learning_rate": 1e-05, - "loss": 0.008, - "step": 1276 - }, - { - "epoch": 6.52, - "learning_rate": 1e-05, - "loss": 0.001, - "step": 1277 - }, - { - "epoch": 6.53, - "learning_rate": 1e-05, - "loss": 0.0, - "step": 1278 - }, - { - "epoch": 6.53, - "learning_rate": 1e-05, - "loss": 0.0052, - "step": 1279 - }, - { - "epoch": 6.54, - "learning_rate": 1e-05, - "loss": 0.0021, - "step": 1280 - }, - { - "epoch": 6.54, - "learning_rate": 1e-05, - "loss": 0.0157, - "step": 1281 - }, - { - "epoch": 6.55, - "learning_rate": 1e-05, - "loss": 0.027, - "step": 1282 - }, - { - "epoch": 6.55, - "learning_rate": 1e-05, - "loss": 0.0199, - "step": 1283 - }, - { - "epoch": 6.56, - "learning_rate": 1e-05, - "loss": 0.0232, - "step": 1284 - }, - { - "epoch": 6.56, - "learning_rate": 1e-05, - "loss": 0.0078, - "step": 1285 - }, - { - "epoch": 6.57, - "learning_rate": 1e-05, - "loss": 0.0051, - "step": 1286 - }, - { - "epoch": 6.57, - "learning_rate": 1e-05, - "loss": 0.005, - "step": 1287 - }, - { - "epoch": 6.58, - "learning_rate": 1e-05, - "loss": 0.0005, - "step": 1288 - }, - { - "epoch": 6.58, - "learning_rate": 1e-05, - "loss": 0.0018, - "step": 1289 - }, - { - "epoch": 6.59, - "learning_rate": 1e-05, - "loss": 0.0222, - "step": 1290 - }, - { - "epoch": 6.6, - "learning_rate": 1e-05, - "loss": 0.0001, - "step": 1291 - }, - { - "epoch": 6.6, - "learning_rate": 1e-05, - "loss": 0.0161, - "step": 1292 - }, - { - "epoch": 6.61, - "learning_rate": 1e-05, - "loss": 0.0082, - "step": 1293 - }, - { - "epoch": 6.61, - "learning_rate": 1e-05, - "loss": 0.0554, - "step": 1294 - }, - { - "epoch": 6.62, - "learning_rate": 1e-05, - "loss": 0.0011, - "step": 1295 - }, - { - "epoch": 6.62, - "learning_rate": 1e-05, - "loss": 0.0125, - "step": 1296 - }, - { - "epoch": 6.63, - "learning_rate": 1e-05, - "loss": 0.0047, - "step": 1297 - }, - { - "epoch": 6.63, - "learning_rate": 1e-05, - "loss": 0.0014, - "step": 1298 - }, - { - "epoch": 6.64, - "learning_rate": 1e-05, - "loss": 0.0075, - "step": 1299 - }, - { - "epoch": 6.64, - "learning_rate": 1e-05, - "loss": 0.0247, - "step": 1300 - }, - { - "epoch": 6.65, - "learning_rate": 1e-05, - "loss": 0.0288, - "step": 1301 - }, - { - "epoch": 6.65, - "learning_rate": 1e-05, - "loss": 0.0585, - "step": 1302 - }, - { - "epoch": 6.66, - "learning_rate": 1e-05, - "loss": 0.0001, - "step": 1303 - }, - { - "epoch": 6.66, - "learning_rate": 1e-05, - "loss": 0.0151, - "step": 1304 - }, - { - "epoch": 6.67, - "learning_rate": 1e-05, - "loss": 0.0008, - "step": 1305 - }, - { - "epoch": 6.67, - "learning_rate": 1e-05, - "loss": 0.0378, - "step": 1306 - }, - { - "epoch": 6.68, - "learning_rate": 1e-05, - "loss": 0.0074, - "step": 1307 - }, - { - "epoch": 6.68, - "learning_rate": 1e-05, - "loss": 0.0054, - "step": 1308 - }, - { - "epoch": 6.69, - "learning_rate": 1e-05, - "loss": 0.024, - "step": 1309 - }, - { - "epoch": 6.69, - "learning_rate": 1e-05, - "loss": 0.0049, - "step": 1310 - }, - { - "epoch": 6.7, - "learning_rate": 1e-05, - "loss": 0.0139, - "step": 1311 - }, - { - "epoch": 6.7, - "learning_rate": 1e-05, - "loss": 0.0012, - "step": 1312 - }, - { - "epoch": 6.71, - "learning_rate": 1e-05, - "loss": 0.0228, - "step": 1313 - }, - { - "epoch": 6.71, - "learning_rate": 1e-05, - "loss": 0.0001, - "step": 1314 - }, - { - "epoch": 6.72, - "learning_rate": 1e-05, - "loss": 0.0079, - "step": 1315 - }, - { - "epoch": 6.72, - "learning_rate": 1e-05, - "loss": 0.0036, - "step": 1316 - }, - { - "epoch": 6.73, - "learning_rate": 1e-05, - "loss": 0.0169, - "step": 1317 - }, - { - "epoch": 6.73, - "learning_rate": 1e-05, - "loss": 0.0005, - "step": 1318 - }, - { - "epoch": 6.74, - "learning_rate": 1e-05, - "loss": 0.0075, - "step": 1319 - }, - { - "epoch": 6.74, - "learning_rate": 1e-05, - "loss": 0.0068, - "step": 1320 - }, - { - "epoch": 6.75, - "learning_rate": 1e-05, - "loss": 0.005, - "step": 1321 - }, - { - "epoch": 6.75, - "learning_rate": 1e-05, - "loss": 0.0069, - "step": 1322 - }, - { - "epoch": 6.76, - "learning_rate": 1e-05, - "loss": 0.0179, - "step": 1323 - }, - { - "epoch": 6.76, - "learning_rate": 1e-05, - "loss": 0.05, - "step": 1324 - }, - { - "epoch": 6.77, - "learning_rate": 1e-05, - "loss": 0.0004, - "step": 1325 - }, - { - "epoch": 6.77, - "learning_rate": 1e-05, - "loss": 0.0022, - "step": 1326 - }, - { - "epoch": 6.78, - "learning_rate": 1e-05, - "loss": 0.0018, - "step": 1327 - }, - { - "epoch": 6.78, - "learning_rate": 1e-05, - "loss": 0.0063, - "step": 1328 - }, - { - "epoch": 6.79, - "learning_rate": 1e-05, - "loss": 0.005, - "step": 1329 - }, - { - "epoch": 6.79, - "learning_rate": 1e-05, - "loss": 0.0161, - "step": 1330 - }, - { - "epoch": 6.8, - "learning_rate": 1e-05, - "loss": 0.0095, - "step": 1331 - }, - { - "epoch": 6.8, - "learning_rate": 1e-05, - "loss": 0.009, - "step": 1332 - }, - { - "epoch": 6.81, - "learning_rate": 1e-05, - "loss": 0.1165, - "step": 1333 - }, - { - "epoch": 6.81, - "learning_rate": 1e-05, - "loss": 0.015, - "step": 1334 - }, - { - "epoch": 6.82, - "learning_rate": 1e-05, - "loss": 0.0038, - "step": 1335 - }, - { - "epoch": 6.83, - "learning_rate": 1e-05, - "loss": 0.0077, - "step": 1336 - }, - { - "epoch": 6.83, - "learning_rate": 1e-05, - "loss": 0.014, - "step": 1337 - }, - { - "epoch": 6.84, - "learning_rate": 1e-05, - "loss": 0.0068, - "step": 1338 - }, - { - "epoch": 6.84, - "learning_rate": 1e-05, - "loss": 0.0044, - "step": 1339 - }, - { - "epoch": 6.85, - "learning_rate": 1e-05, - "loss": 0.0039, - "step": 1340 - }, - { - "epoch": 6.85, - "learning_rate": 1e-05, - "loss": 0.0214, - "step": 1341 - }, - { - "epoch": 6.86, - "learning_rate": 1e-05, - "loss": 0.0045, - "step": 1342 - }, - { - "epoch": 6.86, - "learning_rate": 1e-05, - "loss": 0.0249, - "step": 1343 - }, - { - "epoch": 6.87, - "learning_rate": 1e-05, - "loss": 0.0231, - "step": 1344 - }, - { - "epoch": 6.87, - "learning_rate": 1e-05, - "loss": 0.0014, - "step": 1345 - }, - { - "epoch": 6.88, - "learning_rate": 1e-05, - "loss": 0.003, - "step": 1346 - }, - { - "epoch": 6.88, - "learning_rate": 1e-05, - "loss": 0.0067, - "step": 1347 - }, - { - "epoch": 6.89, - "learning_rate": 1e-05, - "loss": 0.0006, - "step": 1348 - }, - { - "epoch": 6.89, - "learning_rate": 1e-05, - "loss": 0.0092, - "step": 1349 - }, - { - "epoch": 6.9, - "learning_rate": 1e-05, - "loss": 0.0342, - "step": 1350 - }, - { - "epoch": 6.9, - "learning_rate": 1e-05, - "loss": 0.005, - "step": 1351 - }, - { - "epoch": 6.91, - "learning_rate": 1e-05, - "loss": 0.0126, - "step": 1352 - }, - { - "epoch": 6.91, - "learning_rate": 1e-05, - "loss": 0.0105, - "step": 1353 - }, - { - "epoch": 6.92, - "learning_rate": 1e-05, - "loss": 0.044, - "step": 1354 - }, - { - "epoch": 6.92, - "learning_rate": 1e-05, - "loss": 0.002, - "step": 1355 - }, - { - "epoch": 6.93, - "learning_rate": 1e-05, - "loss": 0.0113, - "step": 1356 - }, - { - "epoch": 6.93, - "learning_rate": 1e-05, - "loss": 0.0087, - "step": 1357 - }, - { - "epoch": 6.94, - "learning_rate": 1e-05, - "loss": 0.0043, - "step": 1358 - }, - { - "epoch": 6.94, - "learning_rate": 1e-05, - "loss": 0.0155, - "step": 1359 - }, - { - "epoch": 6.95, - "learning_rate": 1e-05, - "loss": 0.0086, - "step": 1360 - }, - { - "epoch": 6.95, - "learning_rate": 1e-05, - "loss": 0.0002, - "step": 1361 - }, - { - "epoch": 6.96, - "learning_rate": 1e-05, - "loss": 0.0281, - "step": 1362 - }, - { - "epoch": 6.96, - "learning_rate": 1e-05, - "loss": 0.0123, - "step": 1363 - }, - { - "epoch": 6.97, - "learning_rate": 1e-05, - "loss": 0.0271, - "step": 1364 - }, - { - "epoch": 6.97, - "learning_rate": 1e-05, - "loss": 0.0395, - "step": 1365 - }, - { - "epoch": 6.98, - "learning_rate": 1e-05, - "loss": 0.0648, - "step": 1366 - }, - { - "epoch": 6.98, - "learning_rate": 1e-05, - "loss": 0.0213, - "step": 1367 - }, - { - "epoch": 6.99, - "learning_rate": 1e-05, - "loss": 0.0336, - "step": 1368 - }, - { - "epoch": 6.99, - "learning_rate": 1e-05, - "loss": 0.0156, - "step": 1369 - }, - { - "epoch": 7.0, - "learning_rate": 1e-05, - "loss": 0.0012, - "step": 1370 - } - ], - "max_steps": 1560, - "num_train_epochs": 8, - "total_flos": 734226001428480.0, - "trial_name": null, - "trial_params": null -}