Training in progress, step 4000
Browse files- model.safetensors +1 -1
- training_log.txt +202 -502
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16060556616
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b97b930bed72713201df23e0c478a65f7432d1a963a1e8f39536b8fe9c1d94f
|
| 3 |
size 16060556616
|
training_log.txt
CHANGED
|
@@ -1,506 +1,206 @@
|
|
| 1 |
|
| 2 |
==================================================
|
| 3 |
-
Training started at: 2026-
|
| 4 |
==================================================
|
| 5 |
|
| 6 |
-
[2026-
|
| 7 |
-
[2026-
|
| 8 |
-
[2026-
|
| 9 |
-
[2026-
|
| 10 |
-
[2026-
|
| 11 |
-
[2026-
|
| 12 |
-
[2026-
|
| 13 |
-
[2026-
|
| 14 |
-
[2026-
|
| 15 |
-
[2026-
|
| 16 |
-
[2026-
|
| 17 |
-
[2026-
|
| 18 |
-
[2026-
|
| 19 |
-
[2026-
|
| 20 |
-
[2026-
|
| 21 |
-
[2026-
|
| 22 |
-
[2026-
|
| 23 |
-
[2026-
|
| 24 |
-
[2026-
|
| 25 |
-
[2026-
|
| 26 |
-
[2026-
|
| 27 |
-
[2026-
|
| 28 |
-
[2026-
|
| 29 |
-
[2026-
|
| 30 |
-
[2026-
|
| 31 |
-
[2026-
|
| 32 |
-
[2026-
|
| 33 |
-
[2026-
|
| 34 |
-
[2026-
|
| 35 |
-
[2026-
|
| 36 |
-
[2026-
|
| 37 |
-
[2026-
|
| 38 |
-
[2026-
|
| 39 |
-
[2026-
|
| 40 |
-
[2026-
|
| 41 |
-
[2026-
|
| 42 |
-
[2026-
|
| 43 |
-
[2026-
|
| 44 |
-
[2026-
|
| 45 |
-
[2026-
|
| 46 |
-
[2026-
|
| 47 |
-
[2026-
|
| 48 |
-
[2026-
|
| 49 |
-
[2026-
|
| 50 |
-
[2026-
|
| 51 |
-
[2026-
|
| 52 |
-
[2026-
|
| 53 |
-
[2026-
|
| 54 |
-
[2026-
|
| 55 |
-
[2026-
|
| 56 |
-
[2026-
|
| 57 |
-
[2026-
|
| 58 |
-
[2026-
|
| 59 |
-
[2026-
|
| 60 |
-
[2026-
|
| 61 |
-
[2026-
|
| 62 |
-
[2026-
|
| 63 |
-
[2026-
|
| 64 |
-
[2026-
|
| 65 |
-
[2026-
|
| 66 |
-
[2026-
|
| 67 |
-
[2026-
|
| 68 |
-
[2026-
|
| 69 |
-
[2026-
|
| 70 |
-
[2026-
|
| 71 |
-
[2026-
|
| 72 |
-
[2026-
|
| 73 |
-
[2026-
|
| 74 |
-
[2026-
|
| 75 |
-
[2026-
|
| 76 |
-
[2026-
|
| 77 |
-
[2026-
|
| 78 |
-
[2026-
|
| 79 |
-
[2026-
|
| 80 |
-
[2026-
|
| 81 |
-
[2026-
|
| 82 |
-
[2026-
|
| 83 |
-
[2026-
|
| 84 |
-
[2026-
|
| 85 |
-
[2026-
|
| 86 |
-
[2026-
|
| 87 |
-
[2026-
|
| 88 |
-
[2026-
|
| 89 |
-
[2026-
|
| 90 |
-
[2026-
|
| 91 |
-
[2026-
|
| 92 |
-
[2026-
|
| 93 |
-
[2026-
|
| 94 |
-
[2026-
|
| 95 |
-
[2026-
|
| 96 |
-
[2026-
|
| 97 |
-
[2026-
|
| 98 |
-
[2026-
|
| 99 |
-
[2026-
|
| 100 |
-
[2026-
|
| 101 |
-
[2026-
|
| 102 |
-
[2026-
|
| 103 |
-
[2026-
|
| 104 |
-
[2026-
|
| 105 |
-
[2026-
|
| 106 |
-
[2026-
|
| 107 |
-
[2026-
|
| 108 |
-
[2026-
|
| 109 |
-
[2026-
|
| 110 |
-
[2026-
|
| 111 |
-
[2026-
|
| 112 |
-
[2026-
|
| 113 |
-
[2026-
|
| 114 |
-
[2026-
|
| 115 |
-
[2026-
|
| 116 |
-
[2026-
|
| 117 |
-
[2026-
|
| 118 |
-
[2026-
|
| 119 |
-
[2026-
|
| 120 |
-
[2026-
|
| 121 |
-
[2026-
|
| 122 |
-
[2026-
|
| 123 |
-
[2026-
|
| 124 |
-
[2026-
|
| 125 |
-
[2026-
|
| 126 |
-
[2026-
|
| 127 |
-
[2026-
|
| 128 |
-
[2026-
|
| 129 |
-
[2026-
|
| 130 |
-
[2026-
|
| 131 |
-
[2026-
|
| 132 |
-
[2026-
|
| 133 |
-
[2026-
|
| 134 |
-
[2026-
|
| 135 |
-
[2026-
|
| 136 |
-
[2026-
|
| 137 |
-
[2026-
|
| 138 |
-
[2026-
|
| 139 |
-
[2026-
|
| 140 |
-
[2026-
|
| 141 |
-
[2026-
|
| 142 |
-
[2026-
|
| 143 |
-
[2026-
|
| 144 |
-
[2026-
|
| 145 |
-
[2026-
|
| 146 |
-
[2026-
|
| 147 |
-
[2026-
|
| 148 |
-
[2026-
|
| 149 |
-
[2026-
|
| 150 |
-
[2026-
|
| 151 |
-
[2026-
|
| 152 |
-
[2026-
|
| 153 |
-
[2026-
|
| 154 |
-
[2026-
|
| 155 |
-
[2026-
|
| 156 |
-
[2026-
|
| 157 |
-
[2026-
|
| 158 |
-
[2026-
|
| 159 |
-
[2026-
|
| 160 |
-
[2026-
|
| 161 |
-
[2026-
|
| 162 |
-
[2026-
|
| 163 |
-
[2026-
|
| 164 |
-
[2026-
|
| 165 |
-
[2026-
|
| 166 |
-
[2026-
|
| 167 |
-
[2026-
|
| 168 |
-
[2026-
|
| 169 |
-
[2026-
|
| 170 |
-
[2026-
|
| 171 |
-
[2026-
|
| 172 |
-
[2026-
|
| 173 |
-
[2026-
|
| 174 |
-
[2026-
|
| 175 |
-
[2026-
|
| 176 |
-
[2026-
|
| 177 |
-
[2026-
|
| 178 |
-
[2026-
|
| 179 |
-
[2026-
|
| 180 |
-
[2026-
|
| 181 |
-
[2026-
|
| 182 |
-
[2026-
|
| 183 |
-
[2026-
|
| 184 |
-
[2026-
|
| 185 |
-
[2026-
|
| 186 |
-
[2026-
|
| 187 |
-
[2026-
|
| 188 |
-
[2026-
|
| 189 |
-
[2026-
|
| 190 |
-
[2026-
|
| 191 |
-
[2026-
|
| 192 |
-
[2026-
|
| 193 |
-
[2026-
|
| 194 |
-
[2026-
|
| 195 |
-
[2026-
|
| 196 |
-
[2026-
|
| 197 |
-
[2026-
|
| 198 |
-
[2026-
|
| 199 |
-
[2026-
|
| 200 |
-
[2026-
|
| 201 |
-
[2026-
|
| 202 |
-
[2026-
|
| 203 |
-
[2026-
|
| 204 |
-
[2026-
|
| 205 |
-
[2026-
|
| 206 |
-
[2026-
|
| 207 |
-
[2026-03-31 13:01:03] Step 2010: loss: 1.2657, grad_norm: 0.4238, learning_rate: 0.0000, epoch: 0.0100
|
| 208 |
-
[2026-03-31 13:02:28] Step 2020: loss: 1.0027, grad_norm: 0.5195, learning_rate: 0.0000, epoch: 0.0101
|
| 209 |
-
[2026-03-31 13:03:51] Step 2030: loss: 1.1940, grad_norm: 0.3379, learning_rate: 0.0000, epoch: 0.0101
|
| 210 |
-
[2026-03-31 13:05:15] Step 2040: loss: 1.4490, grad_norm: 0.3066, learning_rate: 0.0000, epoch: 0.0102
|
| 211 |
-
[2026-03-31 13:06:40] Step 2050: loss: 1.1737, grad_norm: 0.4531, learning_rate: 0.0000, epoch: 0.0103
|
| 212 |
-
[2026-03-31 13:08:05] Step 2060: loss: 1.1079, grad_norm: 0.5000, learning_rate: 0.0000, epoch: 0.0103
|
| 213 |
-
[2026-03-31 13:09:29] Step 2070: loss: 1.2216, grad_norm: 0.3008, learning_rate: 0.0000, epoch: 0.0103
|
| 214 |
-
[2026-03-31 13:10:53] Step 2080: loss: 1.1185, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0104
|
| 215 |
-
[2026-03-31 13:12:17] Step 2090: loss: 1.3267, grad_norm: 0.5820, learning_rate: 0.0000, epoch: 0.0104
|
| 216 |
-
[2026-03-31 13:13:42] Step 2100: loss: 1.5635, grad_norm: 0.7656, learning_rate: 0.0000, epoch: 0.0105
|
| 217 |
-
[2026-03-31 13:15:05] Step 2110: loss: 1.0361, grad_norm: 0.2969, learning_rate: 0.0000, epoch: 0.0106
|
| 218 |
-
[2026-03-31 13:16:29] Step 2120: loss: 1.0532, grad_norm: 1.3906, learning_rate: 0.0000, epoch: 0.0106
|
| 219 |
-
[2026-03-31 13:17:54] Step 2130: loss: 1.0644, grad_norm: 0.2324, learning_rate: 0.0000, epoch: 0.0106
|
| 220 |
-
[2026-03-31 13:19:18] Step 2140: loss: 1.0322, grad_norm: 0.2520, learning_rate: 0.0000, epoch: 0.0107
|
| 221 |
-
[2026-03-31 13:20:42] Step 2150: loss: 1.4070, grad_norm: 0.4805, learning_rate: 0.0000, epoch: 0.0107
|
| 222 |
-
[2026-03-31 13:22:07] Step 2160: loss: 1.3160, grad_norm: 1.1016, learning_rate: 0.0000, epoch: 0.0108
|
| 223 |
-
[2026-03-31 13:23:31] Step 2170: loss: 1.1469, grad_norm: 0.3711, learning_rate: 0.0000, epoch: 0.0109
|
| 224 |
-
[2026-03-31 13:24:55] Step 2180: loss: 1.0909, grad_norm: 0.2793, learning_rate: 0.0000, epoch: 0.0109
|
| 225 |
-
[2026-03-31 13:26:20] Step 2190: loss: 0.9834, grad_norm: 0.2969, learning_rate: 0.0000, epoch: 0.0109
|
| 226 |
-
[2026-03-31 13:27:44] Step 2200: loss: 1.1768, grad_norm: 0.3359, learning_rate: 0.0000, epoch: 0.0110
|
| 227 |
-
[2026-03-31 13:29:08] Step 2210: loss: 1.0558, grad_norm: 0.4531, learning_rate: 0.0000, epoch: 0.0111
|
| 228 |
-
[2026-03-31 13:30:33] Step 2220: loss: 0.9522, grad_norm: 0.2500, learning_rate: 0.0000, epoch: 0.0111
|
| 229 |
-
[2026-03-31 13:31:57] Step 2230: loss: 0.8136, grad_norm: 0.3008, learning_rate: 0.0000, epoch: 0.0112
|
| 230 |
-
[2026-03-31 13:33:21] Step 2240: loss: 1.0795, grad_norm: 0.4082, learning_rate: 0.0000, epoch: 0.0112
|
| 231 |
-
[2026-03-31 13:34:45] Step 2250: loss: 0.9262, grad_norm: 1.2500, learning_rate: 0.0000, epoch: 0.0112
|
| 232 |
-
[2026-03-31 13:36:10] Step 2260: loss: 0.9367, grad_norm: 0.2480, learning_rate: 0.0000, epoch: 0.0113
|
| 233 |
-
[2026-03-31 13:37:34] Step 2270: loss: 0.9101, grad_norm: 0.2178, learning_rate: 0.0000, epoch: 0.0114
|
| 234 |
-
[2026-03-31 13:38:58] Step 2280: loss: 0.9367, grad_norm: 0.2305, learning_rate: 0.0000, epoch: 0.0114
|
| 235 |
-
[2026-03-31 13:40:22] Step 2290: loss: 0.9764, grad_norm: 0.2383, learning_rate: 0.0000, epoch: 0.0115
|
| 236 |
-
[2026-03-31 13:41:46] Step 2300: loss: 0.9531, grad_norm: 0.2256, learning_rate: 0.0000, epoch: 0.0115
|
| 237 |
-
[2026-03-31 13:43:11] Step 2310: loss: 0.9336, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0115
|
| 238 |
-
[2026-03-31 13:44:35] Step 2320: loss: 1.0024, grad_norm: 0.2451, learning_rate: 0.0000, epoch: 0.0116
|
| 239 |
-
[2026-03-31 13:45:59] Step 2330: loss: 1.1485, grad_norm: 3.6719, learning_rate: 0.0000, epoch: 0.0117
|
| 240 |
-
[2026-03-31 13:47:23] Step 2340: loss: 0.9640, grad_norm: 0.3320, learning_rate: 0.0000, epoch: 0.0117
|
| 241 |
-
[2026-03-31 13:48:47] Step 2350: loss: 1.0453, grad_norm: 0.7852, learning_rate: 0.0000, epoch: 0.0118
|
| 242 |
-
[2026-03-31 13:50:11] Step 2360: loss: 0.9072, grad_norm: 0.2910, learning_rate: 0.0000, epoch: 0.0118
|
| 243 |
-
[2026-03-31 13:51:36] Step 2370: loss: 0.8404, grad_norm: 0.1699, learning_rate: 0.0000, epoch: 0.0118
|
| 244 |
-
[2026-03-31 13:53:00] Step 2380: loss: 0.9135, grad_norm: 0.2695, learning_rate: 0.0000, epoch: 0.0119
|
| 245 |
-
[2026-03-31 13:54:24] Step 2390: loss: 0.9864, grad_norm: 0.2578, learning_rate: 0.0000, epoch: 0.0120
|
| 246 |
-
[2026-03-31 13:55:48] Step 2400: loss: 0.8803, grad_norm: 0.2578, learning_rate: 0.0000, epoch: 0.0120
|
| 247 |
-
[2026-03-31 13:57:12] Step 2410: loss: 0.8425, grad_norm: 0.2734, learning_rate: 0.0000, epoch: 0.0120
|
| 248 |
-
[2026-03-31 13:58:37] Step 2420: loss: 0.9706, grad_norm: 0.3008, learning_rate: 0.0000, epoch: 0.0121
|
| 249 |
-
[2026-03-31 14:00:01] Step 2430: loss: 0.8751, grad_norm: 0.2461, learning_rate: 0.0000, epoch: 0.0121
|
| 250 |
-
[2026-03-31 14:01:26] Step 2440: loss: 0.9805, grad_norm: 0.9375, learning_rate: 0.0000, epoch: 0.0122
|
| 251 |
-
[2026-03-31 14:02:50] Step 2450: loss: 0.8812, grad_norm: 0.3164, learning_rate: 0.0000, epoch: 0.0123
|
| 252 |
-
[2026-03-31 14:04:14] Step 2460: loss: 0.8169, grad_norm: 0.2637, learning_rate: 0.0000, epoch: 0.0123
|
| 253 |
-
[2026-03-31 14:05:38] Step 2470: loss: 0.9798, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0123
|
| 254 |
-
[2026-03-31 14:07:03] Step 2480: loss: 0.8950, grad_norm: 0.2676, learning_rate: 0.0000, epoch: 0.0124
|
| 255 |
-
[2026-03-31 14:08:27] Step 2490: loss: 0.8163, grad_norm: 0.2432, learning_rate: 0.0000, epoch: 0.0124
|
| 256 |
-
[2026-03-31 14:09:51] Step 2500: loss: 0.7961, grad_norm: 0.1855, learning_rate: 0.0000, epoch: 0.0125
|
| 257 |
-
[2026-03-31 14:11:16] Step 2510: loss: 1.0739, grad_norm: 0.3184, learning_rate: 0.0000, epoch: 0.0126
|
| 258 |
-
[2026-03-31 14:12:40] Step 2520: loss: 0.8903, grad_norm: 0.2139, learning_rate: 0.0000, epoch: 0.0126
|
| 259 |
-
[2026-03-31 14:14:05] Step 2530: loss: 0.9089, grad_norm: 0.2148, learning_rate: 0.0000, epoch: 0.0126
|
| 260 |
-
[2026-03-31 14:15:28] Step 2540: loss: 0.5330, grad_norm: 0.2852, learning_rate: 0.0000, epoch: 0.0127
|
| 261 |
-
[2026-03-31 14:16:52] Step 2550: loss: 1.0004, grad_norm: 0.2324, learning_rate: 0.0000, epoch: 0.0127
|
| 262 |
-
[2026-03-31 14:18:16] Step 2560: loss: 0.9046, grad_norm: 0.2734, learning_rate: 0.0000, epoch: 0.0128
|
| 263 |
-
[2026-03-31 14:19:40] Step 2570: loss: 0.9581, grad_norm: 0.5195, learning_rate: 0.0000, epoch: 0.0129
|
| 264 |
-
[2026-03-31 14:21:05] Step 2580: loss: 0.9202, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0129
|
| 265 |
-
[2026-03-31 14:22:29] Step 2590: loss: 0.9269, grad_norm: 0.3203, learning_rate: 0.0000, epoch: 0.0129
|
| 266 |
-
[2026-03-31 14:23:53] Step 2600: loss: 0.9212, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0130
|
| 267 |
-
[2026-03-31 14:25:17] Step 2610: loss: 0.8384, grad_norm: 0.3203, learning_rate: 0.0000, epoch: 0.0131
|
| 268 |
-
[2026-03-31 14:26:41] Step 2620: loss: 0.7916, grad_norm: 0.1846, learning_rate: 0.0000, epoch: 0.0131
|
| 269 |
-
[2026-03-31 14:28:06] Step 2630: loss: 0.8990, grad_norm: 0.2275, learning_rate: 0.0000, epoch: 0.0132
|
| 270 |
-
[2026-03-31 14:29:30] Step 2640: loss: 0.8468, grad_norm: 0.2334, learning_rate: 0.0000, epoch: 0.0132
|
| 271 |
-
[2026-03-31 14:30:54] Step 2650: loss: 0.9808, grad_norm: 0.2402, learning_rate: 0.0000, epoch: 0.0132
|
| 272 |
-
[2026-03-31 14:32:18] Step 2660: loss: 0.8761, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0133
|
| 273 |
-
[2026-03-31 14:33:42] Step 2670: loss: 0.9009, grad_norm: 1.6328, learning_rate: 0.0000, epoch: 0.0134
|
| 274 |
-
[2026-03-31 14:35:07] Step 2680: loss: 0.8481, grad_norm: 0.2715, learning_rate: 0.0000, epoch: 0.0134
|
| 275 |
-
[2026-03-31 14:36:31] Step 2690: loss: 0.8816, grad_norm: 0.2461, learning_rate: 0.0000, epoch: 0.0135
|
| 276 |
-
[2026-03-31 14:37:55] Step 2700: loss: 1.1318, grad_norm: 0.2090, learning_rate: 0.0000, epoch: 0.0135
|
| 277 |
-
[2026-03-31 14:39:20] Step 2710: loss: 0.9612, grad_norm: 0.2676, learning_rate: 0.0000, epoch: 0.0135
|
| 278 |
-
[2026-03-31 14:40:44] Step 2720: loss: 0.8793, grad_norm: 0.4551, learning_rate: 0.0000, epoch: 0.0136
|
| 279 |
-
[2026-03-31 14:42:09] Step 2730: loss: 0.8833, grad_norm: 0.2324, learning_rate: 0.0000, epoch: 0.0137
|
| 280 |
-
[2026-03-31 14:43:33] Step 2740: loss: 0.9257, grad_norm: 0.3516, learning_rate: 0.0000, epoch: 0.0137
|
| 281 |
-
[2026-03-31 14:44:57] Step 2750: loss: 0.8887, grad_norm: 0.6445, learning_rate: 0.0000, epoch: 0.0138
|
| 282 |
-
[2026-03-31 14:46:22] Step 2760: loss: 0.8589, grad_norm: 2.2812, learning_rate: 0.0000, epoch: 0.0138
|
| 283 |
-
[2026-03-31 14:47:46] Step 2770: loss: 0.8785, grad_norm: 1.1016, learning_rate: 0.0000, epoch: 0.0138
|
| 284 |
-
[2026-03-31 14:49:09] Step 2780: loss: 0.7189, grad_norm: 0.2061, learning_rate: 0.0000, epoch: 0.0139
|
| 285 |
-
[2026-03-31 14:50:34] Step 2790: loss: 0.8549, grad_norm: 0.7891, learning_rate: 0.0000, epoch: 0.0140
|
| 286 |
-
[2026-03-31 14:51:58] Step 2800: loss: 0.9150, grad_norm: 0.2734, learning_rate: 0.0000, epoch: 0.0140
|
| 287 |
-
[2026-03-31 14:53:22] Step 2810: loss: 0.9147, grad_norm: 0.2158, learning_rate: 0.0000, epoch: 0.0140
|
| 288 |
-
[2026-03-31 14:54:46] Step 2820: loss: 0.8828, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0141
|
| 289 |
-
[2026-03-31 14:56:10] Step 2830: loss: 0.9843, grad_norm: 2.1094, learning_rate: 0.0000, epoch: 0.0141
|
| 290 |
-
[2026-03-31 14:57:34] Step 2840: loss: 0.8710, grad_norm: 0.1416, learning_rate: 0.0000, epoch: 0.0142
|
| 291 |
-
[2026-03-31 14:58:59] Step 2850: loss: 1.0107, grad_norm: 0.2480, learning_rate: 0.0000, epoch: 0.0143
|
| 292 |
-
[2026-03-31 15:00:23] Step 2860: loss: 0.8816, grad_norm: 0.2363, learning_rate: 0.0000, epoch: 0.0143
|
| 293 |
-
[2026-03-31 15:01:47] Step 2870: loss: 0.8672, grad_norm: 0.2930, learning_rate: 0.0000, epoch: 0.0143
|
| 294 |
-
[2026-03-31 15:03:10] Step 2880: loss: 0.8050, grad_norm: 0.2002, learning_rate: 0.0000, epoch: 0.0144
|
| 295 |
-
[2026-03-31 15:04:34] Step 2890: loss: 1.1084, grad_norm: 0.3613, learning_rate: 0.0000, epoch: 0.0144
|
| 296 |
-
[2026-03-31 15:05:59] Step 2900: loss: 0.9335, grad_norm: 0.4688, learning_rate: 0.0000, epoch: 0.0145
|
| 297 |
-
[2026-03-31 15:07:23] Step 2910: loss: 0.9287, grad_norm: 0.2891, learning_rate: 0.0000, epoch: 0.0146
|
| 298 |
-
[2026-03-31 15:08:47] Step 2920: loss: 0.8159, grad_norm: 1.1719, learning_rate: 0.0000, epoch: 0.0146
|
| 299 |
-
[2026-03-31 15:10:11] Step 2930: loss: 0.9082, grad_norm: 0.1699, learning_rate: 0.0000, epoch: 0.0146
|
| 300 |
-
[2026-03-31 15:11:35] Step 2940: loss: 0.9660, grad_norm: 0.2041, learning_rate: 0.0000, epoch: 0.0147
|
| 301 |
-
[2026-03-31 15:13:00] Step 2950: loss: 0.7883, grad_norm: 0.2676, learning_rate: 0.0000, epoch: 0.0147
|
| 302 |
-
[2026-03-31 15:14:24] Step 2960: loss: 0.8419, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0148
|
| 303 |
-
[2026-03-31 15:15:48] Step 2970: loss: 0.9337, grad_norm: 0.2285, learning_rate: 0.0000, epoch: 0.0149
|
| 304 |
-
[2026-03-31 15:17:13] Step 2980: loss: 0.8750, grad_norm: 0.1953, learning_rate: 0.0000, epoch: 0.0149
|
| 305 |
-
[2026-03-31 15:18:37] Step 2990: loss: 0.8789, grad_norm: 0.2168, learning_rate: 0.0000, epoch: 0.0149
|
| 306 |
-
[2026-03-31 15:20:01] Step 3000: loss: 0.9561, grad_norm: 0.1797, learning_rate: 0.0000, epoch: 0.0150
|
| 307 |
-
[2026-03-31 15:21:25] Step 3010: loss: 0.9642, grad_norm: 0.3496, learning_rate: 0.0000, epoch: 0.0150
|
| 308 |
-
[2026-03-31 15:22:49] Step 3020: loss: 1.0347, grad_norm: 0.2656, learning_rate: 0.0000, epoch: 0.0151
|
| 309 |
-
[2026-03-31 15:24:13] Step 3030: loss: 0.9259, grad_norm: 0.1924, learning_rate: 0.0000, epoch: 0.0152
|
| 310 |
-
[2026-03-31 15:25:37] Step 3040: loss: 0.9136, grad_norm: 0.2285, learning_rate: 0.0000, epoch: 0.0152
|
| 311 |
-
[2026-03-31 15:27:02] Step 3050: loss: 0.9276, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0152
|
| 312 |
-
[2026-03-31 15:28:26] Step 3060: loss: 0.8269, grad_norm: 0.1494, learning_rate: 0.0000, epoch: 0.0153
|
| 313 |
-
[2026-03-31 15:29:50] Step 3070: loss: 0.9138, grad_norm: 0.2109, learning_rate: 0.0000, epoch: 0.0154
|
| 314 |
-
[2026-03-31 15:31:14] Step 3080: loss: 1.0753, grad_norm: 0.2275, learning_rate: 0.0000, epoch: 0.0154
|
| 315 |
-
[2026-03-31 15:32:38] Step 3090: loss: 0.9903, grad_norm: 0.2227, learning_rate: 0.0000, epoch: 0.0155
|
| 316 |
-
[2026-03-31 15:34:03] Step 3100: loss: 0.9186, grad_norm: 0.2617, learning_rate: 0.0000, epoch: 0.0155
|
| 317 |
-
[2026-03-31 15:35:27] Step 3110: loss: 0.9422, grad_norm: 0.1680, learning_rate: 0.0000, epoch: 0.0155
|
| 318 |
-
[2026-03-31 15:36:52] Step 3120: loss: 0.9539, grad_norm: 0.4414, learning_rate: 0.0000, epoch: 0.0156
|
| 319 |
-
[2026-03-31 15:38:16] Step 3130: loss: 0.8632, grad_norm: 0.1807, learning_rate: 0.0000, epoch: 0.0157
|
| 320 |
-
[2026-03-31 15:39:40] Step 3140: loss: 1.0032, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0157
|
| 321 |
-
[2026-03-31 15:41:05] Step 3150: loss: 1.0542, grad_norm: 0.1982, learning_rate: 0.0000, epoch: 0.0158
|
| 322 |
-
[2026-03-31 15:42:30] Step 3160: loss: 1.0788, grad_norm: 0.2715, learning_rate: 0.0000, epoch: 0.0158
|
| 323 |
-
[2026-03-31 15:43:55] Step 3170: loss: 0.9620, grad_norm: 0.1738, learning_rate: 0.0000, epoch: 0.0158
|
| 324 |
-
[2026-03-31 15:45:19] Step 3180: loss: 0.9879, grad_norm: 0.1670, learning_rate: 0.0000, epoch: 0.0159
|
| 325 |
-
[2026-03-31 15:46:44] Step 3190: loss: 0.8974, grad_norm: 0.2119, learning_rate: 0.0000, epoch: 0.0159
|
| 326 |
-
[2026-03-31 15:48:09] Step 3200: loss: 0.8947, grad_norm: 0.5977, learning_rate: 0.0000, epoch: 0.0160
|
| 327 |
-
[2026-03-31 15:49:33] Step 3210: loss: 0.7498, grad_norm: 0.2793, learning_rate: 0.0000, epoch: 0.0160
|
| 328 |
-
[2026-03-31 15:50:58] Step 3220: loss: 0.8955, grad_norm: 0.1680, learning_rate: 0.0000, epoch: 0.0161
|
| 329 |
-
[2026-03-31 15:52:23] Step 3230: loss: 0.9516, grad_norm: 0.2471, learning_rate: 0.0000, epoch: 0.0162
|
| 330 |
-
[2026-03-31 15:53:47] Step 3240: loss: 0.8910, grad_norm: 0.2002, learning_rate: 0.0000, epoch: 0.0162
|
| 331 |
-
[2026-03-31 15:55:12] Step 3250: loss: 0.8762, grad_norm: 0.2207, learning_rate: 0.0000, epoch: 0.0163
|
| 332 |
-
[2026-03-31 15:56:35] Step 3260: loss: 0.9899, grad_norm: 0.4258, learning_rate: 0.0000, epoch: 0.0163
|
| 333 |
-
[2026-03-31 15:58:00] Step 3270: loss: 0.8093, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0163
|
| 334 |
-
[2026-03-31 15:59:25] Step 3280: loss: 0.8794, grad_norm: 0.3125, learning_rate: 0.0000, epoch: 0.0164
|
| 335 |
-
[2026-03-31 16:00:49] Step 3290: loss: 0.8551, grad_norm: 0.1768, learning_rate: 0.0000, epoch: 0.0164
|
| 336 |
-
[2026-03-31 16:02:13] Step 3300: loss: 1.1321, grad_norm: 0.1748, learning_rate: 0.0000, epoch: 0.0165
|
| 337 |
-
[2026-03-31 16:03:38] Step 3310: loss: 0.9119, grad_norm: 0.3984, learning_rate: 0.0000, epoch: 0.0165
|
| 338 |
-
[2026-03-31 16:05:03] Step 3320: loss: 0.8985, grad_norm: 0.1787, learning_rate: 0.0000, epoch: 0.0166
|
| 339 |
-
[2026-03-31 16:06:27] Step 3330: loss: 0.8375, grad_norm: 0.1631, learning_rate: 0.0000, epoch: 0.0167
|
| 340 |
-
[2026-03-31 16:07:52] Step 3340: loss: 0.9068, grad_norm: 0.2432, learning_rate: 0.0000, epoch: 0.0167
|
| 341 |
-
[2026-03-31 16:09:17] Step 3350: loss: 0.9024, grad_norm: 0.1885, learning_rate: 0.0000, epoch: 0.0168
|
| 342 |
-
[2026-03-31 16:10:41] Step 3360: loss: 0.8532, grad_norm: 0.1748, learning_rate: 0.0000, epoch: 0.0168
|
| 343 |
-
[2026-03-31 16:12:06] Step 3370: loss: 0.8744, grad_norm: 0.1875, learning_rate: 0.0000, epoch: 0.0169
|
| 344 |
-
[2026-03-31 16:13:31] Step 3380: loss: 0.7658, grad_norm: 0.1494, learning_rate: 0.0000, epoch: 0.0169
|
| 345 |
-
[2026-03-31 16:14:55] Step 3390: loss: 0.8695, grad_norm: 0.1475, learning_rate: 0.0000, epoch: 0.0169
|
| 346 |
-
[2026-03-31 16:16:20] Step 3400: loss: 0.8754, grad_norm: 0.1816, learning_rate: 0.0000, epoch: 0.0170
|
| 347 |
-
[2026-03-31 16:17:44] Step 3410: loss: 0.9383, grad_norm: 0.2256, learning_rate: 0.0000, epoch: 0.0170
|
| 348 |
-
[2026-03-31 16:19:09] Step 3420: loss: 0.9612, grad_norm: 0.2207, learning_rate: 0.0000, epoch: 0.0171
|
| 349 |
-
[2026-03-31 16:20:34] Step 3430: loss: 0.8932, grad_norm: 0.1768, learning_rate: 0.0000, epoch: 0.0171
|
| 350 |
-
[2026-03-31 16:21:58] Step 3440: loss: 0.9117, grad_norm: 0.1924, learning_rate: 0.0000, epoch: 0.0172
|
| 351 |
-
[2026-03-31 16:23:22] Step 3450: loss: 0.9146, grad_norm: 0.1777, learning_rate: 0.0000, epoch: 0.0173
|
| 352 |
-
[2026-03-31 16:24:47] Step 3460: loss: 0.8657, grad_norm: 0.5508, learning_rate: 0.0000, epoch: 0.0173
|
| 353 |
-
[2026-03-31 16:26:12] Step 3470: loss: 0.9556, grad_norm: 0.3516, learning_rate: 0.0000, epoch: 0.0174
|
| 354 |
-
[2026-03-31 16:27:36] Step 3480: loss: 0.9741, grad_norm: 0.3984, learning_rate: 0.0000, epoch: 0.0174
|
| 355 |
-
[2026-03-31 16:29:01] Step 3490: loss: 0.9079, grad_norm: 0.1885, learning_rate: 0.0000, epoch: 0.0175
|
| 356 |
-
[2026-03-31 16:30:25] Step 3500: loss: 0.8970, grad_norm: 0.1787, learning_rate: 0.0000, epoch: 0.0175
|
| 357 |
-
[2026-03-31 16:31:50] Step 3510: loss: 0.8500, grad_norm: 0.5078, learning_rate: 0.0000, epoch: 0.0175
|
| 358 |
-
[2026-03-31 16:33:14] Step 3520: loss: 0.8806, grad_norm: 0.1582, learning_rate: 0.0000, epoch: 0.0176
|
| 359 |
-
[2026-03-31 16:34:38] Step 3530: loss: 0.9168, grad_norm: 0.2461, learning_rate: 0.0000, epoch: 0.0176
|
| 360 |
-
[2026-03-31 16:36:03] Step 3540: loss: 0.9057, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0177
|
| 361 |
-
[2026-03-31 16:37:27] Step 3550: loss: 1.0234, grad_norm: 0.1865, learning_rate: 0.0000, epoch: 0.0177
|
| 362 |
-
[2026-03-31 16:38:51] Step 3560: loss: 0.8210, grad_norm: 0.1729, learning_rate: 0.0000, epoch: 0.0178
|
| 363 |
-
[2026-03-31 16:40:16] Step 3570: loss: 0.8790, grad_norm: 0.1875, learning_rate: 0.0000, epoch: 0.0179
|
| 364 |
-
[2026-03-31 16:41:40] Step 3580: loss: 0.8225, grad_norm: 0.1416, learning_rate: 0.0000, epoch: 0.0179
|
| 365 |
-
[2026-03-31 16:43:05] Step 3590: loss: 0.9980, grad_norm: 0.1670, learning_rate: 0.0000, epoch: 0.0180
|
| 366 |
-
[2026-03-31 16:44:30] Step 3600: loss: 0.8671, grad_norm: 0.2539, learning_rate: 0.0000, epoch: 0.0180
|
| 367 |
-
[2026-03-31 16:45:54] Step 3610: loss: 0.8107, grad_norm: 0.3477, learning_rate: 0.0000, epoch: 0.0181
|
| 368 |
-
[2026-03-31 16:47:18] Step 3620: loss: 0.7576, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0181
|
| 369 |
-
[2026-03-31 16:48:42] Step 3630: loss: 0.9383, grad_norm: 2.0156, learning_rate: 0.0000, epoch: 0.0181
|
| 370 |
-
[2026-03-31 16:50:06] Step 3640: loss: 0.8580, grad_norm: 0.2432, learning_rate: 0.0000, epoch: 0.0182
|
| 371 |
-
[2026-03-31 16:51:30] Step 3650: loss: 0.9007, grad_norm: 0.2988, learning_rate: 0.0000, epoch: 0.0182
|
| 372 |
-
[2026-03-31 16:52:55] Step 3660: loss: 0.8176, grad_norm: 0.2129, learning_rate: 0.0000, epoch: 0.0183
|
| 373 |
-
[2026-03-31 16:54:19] Step 3670: loss: 0.9369, grad_norm: 0.1982, learning_rate: 0.0000, epoch: 0.0184
|
| 374 |
-
[2026-03-31 16:55:43] Step 3680: loss: 1.1911, grad_norm: 0.7070, learning_rate: 0.0000, epoch: 0.0184
|
| 375 |
-
[2026-03-31 16:57:08] Step 3690: loss: 1.0193, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0185
|
| 376 |
-
[2026-03-31 16:58:32] Step 3700: loss: 1.0052, grad_norm: 0.1924, learning_rate: 0.0000, epoch: 0.0185
|
| 377 |
-
[2026-03-31 16:59:57] Step 3710: loss: 1.0500, grad_norm: 0.1768, learning_rate: 0.0000, epoch: 0.0186
|
| 378 |
-
[2026-03-31 17:01:21] Step 3720: loss: 0.9210, grad_norm: 0.2090, learning_rate: 0.0000, epoch: 0.0186
|
| 379 |
-
[2026-03-31 17:02:45] Step 3730: loss: 0.9981, grad_norm: 0.2578, learning_rate: 0.0000, epoch: 0.0186
|
| 380 |
-
[2026-03-31 17:04:10] Step 3740: loss: 0.8967, grad_norm: 0.2188, learning_rate: 0.0000, epoch: 0.0187
|
| 381 |
-
[2026-03-31 17:05:34] Step 3750: loss: 0.8784, grad_norm: 0.1826, learning_rate: 0.0000, epoch: 0.0187
|
| 382 |
-
[2026-03-31 17:06:59] Step 3760: loss: 0.8947, grad_norm: 0.1953, learning_rate: 0.0000, epoch: 0.0188
|
| 383 |
-
[2026-03-31 17:08:23] Step 3770: loss: 0.9104, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0188
|
| 384 |
-
[2026-03-31 17:09:48] Step 3780: loss: 1.0458, grad_norm: 0.2471, learning_rate: 0.0000, epoch: 0.0189
|
| 385 |
-
[2026-03-31 17:11:11] Step 3790: loss: 0.7790, grad_norm: 0.2373, learning_rate: 0.0000, epoch: 0.0190
|
| 386 |
-
[2026-03-31 17:12:36] Step 3800: loss: 0.7284, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0190
|
| 387 |
-
[2026-03-31 17:14:00] Step 3810: loss: 0.9255, grad_norm: 0.1924, learning_rate: 0.0000, epoch: 0.0191
|
| 388 |
-
[2026-03-31 17:15:24] Step 3820: loss: 0.8977, grad_norm: 0.1680, learning_rate: 0.0000, epoch: 0.0191
|
| 389 |
-
[2026-03-31 17:16:49] Step 3830: loss: 0.9002, grad_norm: 0.2490, learning_rate: 0.0000, epoch: 0.0192
|
| 390 |
-
[2026-03-31 17:18:13] Step 3840: loss: 1.0609, grad_norm: 0.2061, learning_rate: 0.0000, epoch: 0.0192
|
| 391 |
-
[2026-03-31 17:19:38] Step 3850: loss: 1.0644, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0192
|
| 392 |
-
[2026-03-31 17:21:02] Step 3860: loss: 0.8270, grad_norm: 0.2197, learning_rate: 0.0000, epoch: 0.0193
|
| 393 |
-
[2026-03-31 17:22:27] Step 3870: loss: 0.9838, grad_norm: 0.1729, learning_rate: 0.0000, epoch: 0.0193
|
| 394 |
-
[2026-03-31 17:23:51] Step 3880: loss: 0.8780, grad_norm: 0.1689, learning_rate: 0.0000, epoch: 0.0194
|
| 395 |
-
[2026-03-31 17:25:15] Step 3890: loss: 0.9309, grad_norm: 0.1611, learning_rate: 0.0000, epoch: 0.0194
|
| 396 |
-
[2026-03-31 17:26:40] Step 3900: loss: 0.8935, grad_norm: 0.2236, learning_rate: 0.0000, epoch: 0.0195
|
| 397 |
-
[2026-03-31 17:28:04] Step 3910: loss: 1.0017, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0196
|
| 398 |
-
[2026-03-31 17:29:29] Step 3920: loss: 0.9719, grad_norm: 0.2158, learning_rate: 0.0000, epoch: 0.0196
|
| 399 |
-
[2026-03-31 17:30:54] Step 3930: loss: 0.8408, grad_norm: 0.1504, learning_rate: 0.0000, epoch: 0.0197
|
| 400 |
-
[2026-03-31 17:32:18] Step 3940: loss: 0.9504, grad_norm: 0.1973, learning_rate: 0.0000, epoch: 0.0197
|
| 401 |
-
[2026-03-31 17:33:42] Step 3950: loss: 1.0454, grad_norm: 0.2129, learning_rate: 0.0000, epoch: 0.0198
|
| 402 |
-
[2026-03-31 17:35:06] Step 3960: loss: 0.9728, grad_norm: 0.2109, learning_rate: 0.0000, epoch: 0.0198
|
| 403 |
-
[2026-03-31 17:36:30] Step 3970: loss: 0.8852, grad_norm: 0.2168, learning_rate: 0.0000, epoch: 0.0198
|
| 404 |
-
[2026-03-31 17:37:55] Step 3980: loss: 0.9254, grad_norm: 0.1914, learning_rate: 0.0000, epoch: 0.0199
|
| 405 |
-
[2026-03-31 17:39:19] Step 3990: loss: 0.8751, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0199
|
| 406 |
-
[2026-03-31 17:40:44] Step 4000: loss: 0.9173, grad_norm: 0.3281, learning_rate: 0.0000, epoch: 0.0200
|
| 407 |
-
[2026-03-31 17:42:08] Step 4010: loss: 0.9351, grad_norm: 0.2520, learning_rate: 0.0000, epoch: 0.0200
|
| 408 |
-
[2026-03-31 17:43:32] Step 4020: loss: 0.8773, grad_norm: 0.1943, learning_rate: 0.0000, epoch: 0.0201
|
| 409 |
-
[2026-03-31 17:44:57] Step 4030: loss: 0.8618, grad_norm: 0.1582, learning_rate: 0.0000, epoch: 0.0202
|
| 410 |
-
[2026-03-31 17:46:21] Step 4040: loss: 0.8500, grad_norm: 0.1719, learning_rate: 0.0000, epoch: 0.0202
|
| 411 |
-
[2026-03-31 17:47:45] Step 4050: loss: 0.8936, grad_norm: 0.2578, learning_rate: 0.0000, epoch: 0.0203
|
| 412 |
-
[2026-03-31 17:49:09] Step 4060: loss: 0.8683, grad_norm: 0.1953, learning_rate: 0.0000, epoch: 0.0203
|
| 413 |
-
[2026-03-31 17:50:34] Step 4070: loss: 0.7832, grad_norm: 0.1328, learning_rate: 0.0000, epoch: 0.0204
|
| 414 |
-
[2026-03-31 17:51:58] Step 4080: loss: 0.6775, grad_norm: 0.2061, learning_rate: 0.0000, epoch: 0.0204
|
| 415 |
-
[2026-03-31 17:53:23] Step 4090: loss: 0.9338, grad_norm: 0.2480, learning_rate: 0.0000, epoch: 0.0204
|
| 416 |
-
[2026-03-31 17:54:47] Step 4100: loss: 0.7835, grad_norm: 0.2520, learning_rate: 0.0000, epoch: 0.0205
|
| 417 |
-
[2026-03-31 17:56:12] Step 4110: loss: 0.8946, grad_norm: 0.1533, learning_rate: 0.0000, epoch: 0.0205
|
| 418 |
-
[2026-03-31 17:57:36] Step 4120: loss: 0.8517, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0206
|
| 419 |
-
[2026-03-31 17:59:01] Step 4130: loss: 0.8826, grad_norm: 0.1631, learning_rate: 0.0000, epoch: 0.0207
|
| 420 |
-
[2026-03-31 18:00:25] Step 4140: loss: 0.8173, grad_norm: 0.2754, learning_rate: 0.0000, epoch: 0.0207
|
| 421 |
-
[2026-03-31 18:01:49] Step 4150: loss: 0.8839, grad_norm: 0.2100, learning_rate: 0.0000, epoch: 0.0208
|
| 422 |
-
[2026-03-31 18:03:14] Step 4160: loss: 0.8888, grad_norm: 0.1475, learning_rate: 0.0000, epoch: 0.0208
|
| 423 |
-
[2026-03-31 18:04:38] Step 4170: loss: 0.9064, grad_norm: 0.1807, learning_rate: 0.0000, epoch: 0.0209
|
| 424 |
-
[2026-03-31 18:06:02] Step 4180: loss: 0.8271, grad_norm: 0.2578, learning_rate: 0.0000, epoch: 0.0209
|
| 425 |
-
[2026-03-31 18:07:27] Step 4190: loss: 0.9239, grad_norm: 0.2021, learning_rate: 0.0000, epoch: 0.0209
|
| 426 |
-
[2026-03-31 18:08:51] Step 4200: loss: 0.9820, grad_norm: 0.2305, learning_rate: 0.0000, epoch: 0.0210
|
| 427 |
-
[2026-03-31 18:10:15] Step 4210: loss: 0.8543, grad_norm: 0.1523, learning_rate: 0.0000, epoch: 0.0210
|
| 428 |
-
[2026-03-31 18:11:39] Step 4220: loss: 0.9517, grad_norm: 0.2793, learning_rate: 0.0000, epoch: 0.0211
|
| 429 |
-
[2026-03-31 18:13:04] Step 4230: loss: 0.8595, grad_norm: 0.2031, learning_rate: 0.0000, epoch: 0.0211
|
| 430 |
-
[2026-03-31 18:14:28] Step 4240: loss: 0.8872, grad_norm: 0.2051, learning_rate: 0.0000, epoch: 0.0212
|
| 431 |
-
[2026-03-31 18:15:53] Step 4250: loss: 0.8650, grad_norm: 0.1289, learning_rate: 0.0000, epoch: 0.0213
|
| 432 |
-
[2026-03-31 18:17:17] Step 4260: loss: 0.9305, grad_norm: 0.2432, learning_rate: 0.0000, epoch: 0.0213
|
| 433 |
-
[2026-03-31 18:18:41] Step 4270: loss: 0.9319, grad_norm: 0.1855, learning_rate: 0.0000, epoch: 0.0214
|
| 434 |
-
[2026-03-31 18:20:05] Step 4280: loss: 0.8637, grad_norm: 0.1885, learning_rate: 0.0000, epoch: 0.0214
|
| 435 |
-
[2026-03-31 18:21:29] Step 4290: loss: 0.8701, grad_norm: 0.2305, learning_rate: 0.0000, epoch: 0.0215
|
| 436 |
-
[2026-03-31 18:22:53] Step 4300: loss: 0.9087, grad_norm: 0.1895, learning_rate: 0.0000, epoch: 0.0215
|
| 437 |
-
[2026-03-31 18:24:17] Step 4310: loss: 0.9215, grad_norm: 0.1738, learning_rate: 0.0000, epoch: 0.0215
|
| 438 |
-
[2026-03-31 18:25:42] Step 4320: loss: 0.9207, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0216
|
| 439 |
-
[2026-03-31 18:27:06] Step 4330: loss: 0.9114, grad_norm: 0.6992, learning_rate: 0.0000, epoch: 0.0216
|
| 440 |
-
[2026-03-31 18:28:30] Step 4340: loss: 0.9899, grad_norm: 0.2041, learning_rate: 0.0000, epoch: 0.0217
|
| 441 |
-
[2026-03-31 18:29:54] Step 4350: loss: 1.0729, grad_norm: 2.7656, learning_rate: 0.0000, epoch: 0.0217
|
| 442 |
-
[2026-03-31 18:31:19] Step 4360: loss: 0.8625, grad_norm: 0.1738, learning_rate: 0.0000, epoch: 0.0218
|
| 443 |
-
[2026-03-31 18:32:43] Step 4370: loss: 0.9157, grad_norm: 0.3574, learning_rate: 0.0000, epoch: 0.0219
|
| 444 |
-
[2026-03-31 18:34:07] Step 4380: loss: 0.8719, grad_norm: 0.1973, learning_rate: 0.0000, epoch: 0.0219
|
| 445 |
-
[2026-03-31 18:35:31] Step 4390: loss: 0.8874, grad_norm: 0.2109, learning_rate: 0.0000, epoch: 0.0220
|
| 446 |
-
[2026-03-31 18:36:55] Step 4400: loss: 0.9309, grad_norm: 0.1826, learning_rate: 0.0000, epoch: 0.0220
|
| 447 |
-
[2026-03-31 18:38:20] Step 4410: loss: 0.8833, grad_norm: 0.2090, learning_rate: 0.0000, epoch: 0.0221
|
| 448 |
-
[2026-03-31 18:39:44] Step 4420: loss: 0.7998, grad_norm: 0.1895, learning_rate: 0.0000, epoch: 0.0221
|
| 449 |
-
[2026-03-31 18:41:08] Step 4430: loss: 0.8777, grad_norm: 0.2158, learning_rate: 0.0000, epoch: 0.0221
|
| 450 |
-
[2026-03-31 18:42:32] Step 4440: loss: 0.8901, grad_norm: 0.4219, learning_rate: 0.0000, epoch: 0.0222
|
| 451 |
-
[2026-03-31 18:43:56] Step 4450: loss: 0.8968, grad_norm: 0.2314, learning_rate: 0.0000, epoch: 0.0222
|
| 452 |
-
[2026-03-31 18:45:20] Step 4460: loss: 0.8684, grad_norm: 0.1738, learning_rate: 0.0000, epoch: 0.0223
|
| 453 |
-
[2026-03-31 18:46:45] Step 4470: loss: 0.8574, grad_norm: 0.1973, learning_rate: 0.0000, epoch: 0.0223
|
| 454 |
-
[2026-03-31 18:48:09] Step 4480: loss: 0.9964, grad_norm: 0.1973, learning_rate: 0.0000, epoch: 0.0224
|
| 455 |
-
[2026-03-31 18:49:33] Step 4490: loss: 0.8622, grad_norm: 0.2188, learning_rate: 0.0000, epoch: 0.0225
|
| 456 |
-
[2026-03-31 18:50:57] Step 4500: loss: 0.7814, grad_norm: 0.2451, learning_rate: 0.0000, epoch: 0.0225
|
| 457 |
-
[2026-03-31 18:52:21] Step 4510: loss: 0.9241, grad_norm: 0.1953, learning_rate: 0.0000, epoch: 0.0226
|
| 458 |
-
[2026-03-31 18:53:46] Step 4520: loss: 0.8458, grad_norm: 0.2051, learning_rate: 0.0000, epoch: 0.0226
|
| 459 |
-
[2026-03-31 18:55:10] Step 4530: loss: 1.0007, grad_norm: 0.2021, learning_rate: 0.0000, epoch: 0.0226
|
| 460 |
-
[2026-03-31 18:56:34] Step 4540: loss: 0.9046, grad_norm: 0.2100, learning_rate: 0.0000, epoch: 0.0227
|
| 461 |
-
[2026-03-31 18:57:58] Step 4550: loss: 0.9815, grad_norm: 0.2236, learning_rate: 0.0000, epoch: 0.0227
|
| 462 |
-
[2026-03-31 18:59:23] Step 4560: loss: 0.9537, grad_norm: 0.2617, learning_rate: 0.0000, epoch: 0.0228
|
| 463 |
-
[2026-03-31 19:00:47] Step 4570: loss: 0.8766, grad_norm: 0.1895, learning_rate: 0.0000, epoch: 0.0228
|
| 464 |
-
[2026-03-31 19:02:11] Step 4580: loss: 1.0590, grad_norm: 0.5586, learning_rate: 0.0000, epoch: 0.0229
|
| 465 |
-
[2026-03-31 19:03:35] Step 4590: loss: 0.8494, grad_norm: 0.2207, learning_rate: 0.0000, epoch: 0.0230
|
| 466 |
-
[2026-03-31 19:04:59] Step 4600: loss: 1.0682, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0230
|
| 467 |
-
[2026-03-31 19:06:23] Step 4610: loss: 1.0588, grad_norm: 0.3145, learning_rate: 0.0000, epoch: 0.0231
|
| 468 |
-
[2026-03-31 19:07:48] Step 4620: loss: 0.8522, grad_norm: 0.2422, learning_rate: 0.0000, epoch: 0.0231
|
| 469 |
-
[2026-03-31 19:09:12] Step 4630: loss: 1.0611, grad_norm: 0.5234, learning_rate: 0.0000, epoch: 0.0232
|
| 470 |
-
[2026-03-31 19:10:36] Step 4640: loss: 0.9557, grad_norm: 0.2402, learning_rate: 0.0000, epoch: 0.0232
|
| 471 |
-
[2026-03-31 19:12:00] Step 4650: loss: 1.0054, grad_norm: 36.2500, learning_rate: 0.0000, epoch: 0.0232
|
| 472 |
-
[2026-03-31 19:13:24] Step 4660: loss: 0.9374, grad_norm: 0.3652, learning_rate: 0.0000, epoch: 0.0233
|
| 473 |
-
[2026-03-31 19:14:49] Step 4670: loss: 1.0201, grad_norm: 0.2441, learning_rate: 0.0000, epoch: 0.0233
|
| 474 |
-
[2026-03-31 19:16:13] Step 4680: loss: 0.9874, grad_norm: 0.2383, learning_rate: 0.0000, epoch: 0.0234
|
| 475 |
-
[2026-03-31 19:17:37] Step 4690: loss: 1.4854, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0234
|
| 476 |
-
[2026-03-31 19:19:01] Step 4700: loss: 0.8645, grad_norm: 0.1934, learning_rate: 0.0000, epoch: 0.0235
|
| 477 |
-
[2026-03-31 19:20:26] Step 4710: loss: 1.0148, grad_norm: 0.2334, learning_rate: 0.0000, epoch: 0.0236
|
| 478 |
-
[2026-03-31 19:21:50] Step 4720: loss: 0.8638, grad_norm: 0.5508, learning_rate: 0.0000, epoch: 0.0236
|
| 479 |
-
[2026-03-31 19:23:14] Step 4730: loss: 0.9692, grad_norm: 0.2539, learning_rate: 0.0000, epoch: 0.0237
|
| 480 |
-
[2026-03-31 19:24:38] Step 4740: loss: 0.9147, grad_norm: 0.7852, learning_rate: 0.0000, epoch: 0.0237
|
| 481 |
-
[2026-03-31 19:26:01] Step 4750: loss: 0.9041, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0238
|
| 482 |
-
[2026-03-31 19:27:25] Step 4760: loss: 0.9570, grad_norm: 0.2441, learning_rate: 0.0000, epoch: 0.0238
|
| 483 |
-
[2026-03-31 19:28:50] Step 4770: loss: 0.9288, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0238
|
| 484 |
-
[2026-03-31 19:30:14] Step 4780: loss: 0.9323, grad_norm: 0.1787, learning_rate: 0.0000, epoch: 0.0239
|
| 485 |
-
[2026-03-31 19:31:39] Step 4790: loss: 0.8816, grad_norm: 1.1172, learning_rate: 0.0000, epoch: 0.0239
|
| 486 |
-
[2026-03-31 19:33:03] Step 4800: loss: 0.9510, grad_norm: 0.2354, learning_rate: 0.0000, epoch: 0.0240
|
| 487 |
-
[2026-03-31 19:34:27] Step 4810: loss: 0.9225, grad_norm: 0.1768, learning_rate: 0.0000, epoch: 0.0240
|
| 488 |
-
[2026-03-31 19:35:51] Step 4820: loss: 0.9849, grad_norm: 0.1523, learning_rate: 0.0000, epoch: 0.0241
|
| 489 |
-
[2026-03-31 19:37:16] Step 4830: loss: 0.8417, grad_norm: 0.1709, learning_rate: 0.0000, epoch: 0.0242
|
| 490 |
-
[2026-03-31 19:38:40] Step 4840: loss: 0.8466, grad_norm: 0.1748, learning_rate: 0.0000, epoch: 0.0242
|
| 491 |
-
[2026-03-31 19:40:04] Step 4850: loss: 0.8813, grad_norm: 0.2949, learning_rate: 0.0000, epoch: 0.0243
|
| 492 |
-
[2026-03-31 19:41:28] Step 4860: loss: 0.9765, grad_norm: 0.1982, learning_rate: 0.0000, epoch: 0.0243
|
| 493 |
-
[2026-03-31 19:42:52] Step 4870: loss: 0.9710, grad_norm: 0.1729, learning_rate: 0.0000, epoch: 0.0244
|
| 494 |
-
[2026-03-31 19:44:16] Step 4880: loss: 0.9264, grad_norm: 0.2129, learning_rate: 0.0000, epoch: 0.0244
|
| 495 |
-
[2026-03-31 19:45:40] Step 4890: loss: 0.7979, grad_norm: 0.1602, learning_rate: 0.0000, epoch: 0.0244
|
| 496 |
-
[2026-03-31 19:47:04] Step 4900: loss: 0.9023, grad_norm: 0.1729, learning_rate: 0.0000, epoch: 0.0245
|
| 497 |
-
[2026-03-31 19:48:28] Step 4910: loss: 0.8968, grad_norm: 1.2891, learning_rate: 0.0000, epoch: 0.0245
|
| 498 |
-
[2026-03-31 19:49:52] Step 4920: loss: 0.7637, grad_norm: 0.2109, learning_rate: 0.0000, epoch: 0.0246
|
| 499 |
-
[2026-03-31 19:51:17] Step 4930: loss: 0.9097, grad_norm: 0.1699, learning_rate: 0.0000, epoch: 0.0246
|
| 500 |
-
[2026-03-31 19:52:41] Step 4940: loss: 0.8691, grad_norm: 0.1914, learning_rate: 0.0000, epoch: 0.0247
|
| 501 |
-
[2026-03-31 19:54:05] Step 4950: loss: 0.7844, grad_norm: 0.2080, learning_rate: 0.0000, epoch: 0.0248
|
| 502 |
-
[2026-03-31 19:55:29] Step 4960: loss: 0.7980, grad_norm: 0.6211, learning_rate: 0.0000, epoch: 0.0248
|
| 503 |
-
[2026-03-31 19:56:53] Step 4970: loss: 0.9153, grad_norm: 0.1699, learning_rate: 0.0000, epoch: 0.0249
|
| 504 |
-
[2026-03-31 19:58:17] Step 4980: loss: 0.9106, grad_norm: 0.2100, learning_rate: 0.0000, epoch: 0.0249
|
| 505 |
-
[2026-03-31 19:59:42] Step 4990: loss: 0.9035, grad_norm: 0.1943, learning_rate: 0.0000, epoch: 0.0249
|
| 506 |
-
[2026-03-31 20:01:06] Step 5000: loss: 0.8836, grad_norm: 0.2734, learning_rate: 0.0000, epoch: 0.0250
|
|
|
|
| 1 |
|
| 2 |
==================================================
|
| 3 |
+
Training started at: 2026-04-04 09:18:44
|
| 4 |
==================================================
|
| 5 |
|
| 6 |
+
[2026-04-04 09:24:56] Step 2010: loss: 2.8399, grad_norm: 1.7500, learning_rate: 0.0000, epoch: 0.0100
|
| 7 |
+
[2026-04-04 09:26:20] Step 2020: loss: 2.5400, grad_norm: 3.1562, learning_rate: 0.0000, epoch: 0.0101
|
| 8 |
+
[2026-04-04 09:27:44] Step 2030: loss: 2.2588, grad_norm: 0.6328, learning_rate: 0.0000, epoch: 0.0101
|
| 9 |
+
[2026-04-04 09:29:08] Step 2040: loss: 2.5586, grad_norm: 2.8906, learning_rate: 0.0000, epoch: 0.0102
|
| 10 |
+
[2026-04-04 09:30:32] Step 2050: loss: 1.3992, grad_norm: 0.3848, learning_rate: 0.0000, epoch: 0.0103
|
| 11 |
+
[2026-04-04 09:31:57] Step 2060: loss: 0.9564, grad_norm: 0.6211, learning_rate: 0.0000, epoch: 0.0103
|
| 12 |
+
[2026-04-04 09:33:21] Step 2070: loss: 1.0082, grad_norm: 0.3125, learning_rate: 0.0000, epoch: 0.0103
|
| 13 |
+
[2026-04-04 09:34:45] Step 2080: loss: 1.0009, grad_norm: 0.3047, learning_rate: 0.0000, epoch: 0.0104
|
| 14 |
+
[2026-04-04 09:36:10] Step 2090: loss: 1.0767, grad_norm: 4.9375, learning_rate: 0.0000, epoch: 0.0104
|
| 15 |
+
[2026-04-04 09:37:34] Step 2100: loss: 1.2326, grad_norm: 7.1562, learning_rate: 0.0000, epoch: 0.0105
|
| 16 |
+
[2026-04-04 09:38:58] Step 2110: loss: 1.0480, grad_norm: 0.3496, learning_rate: 0.0000, epoch: 0.0106
|
| 17 |
+
[2026-04-04 09:40:22] Step 2120: loss: 0.9334, grad_norm: 1.3828, learning_rate: 0.0000, epoch: 0.0106
|
| 18 |
+
[2026-04-04 09:41:46] Step 2130: loss: 0.9686, grad_norm: 0.3398, learning_rate: 0.0000, epoch: 0.0106
|
| 19 |
+
[2026-04-04 09:43:10] Step 2140: loss: 0.9875, grad_norm: 0.2695, learning_rate: 0.0000, epoch: 0.0107
|
| 20 |
+
[2026-04-04 09:44:34] Step 2150: loss: 1.0286, grad_norm: 0.4629, learning_rate: 0.0000, epoch: 0.0107
|
| 21 |
+
[2026-04-04 09:45:59] Step 2160: loss: 0.9684, grad_norm: 0.4922, learning_rate: 0.0000, epoch: 0.0108
|
| 22 |
+
[2026-04-04 09:47:23] Step 2170: loss: 1.0229, grad_norm: 0.3691, learning_rate: 0.0000, epoch: 0.0109
|
| 23 |
+
[2026-04-04 09:48:48] Step 2180: loss: 0.9697, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0109
|
| 24 |
+
[2026-04-04 09:50:12] Step 2190: loss: 0.9916, grad_norm: 0.2852, learning_rate: 0.0000, epoch: 0.0109
|
| 25 |
+
[2026-04-04 09:51:36] Step 2200: loss: 0.7603, grad_norm: 0.3242, learning_rate: 0.0000, epoch: 0.0110
|
| 26 |
+
[2026-04-04 09:53:00] Step 2210: loss: 0.8609, grad_norm: 0.2617, learning_rate: 0.0000, epoch: 0.0111
|
| 27 |
+
[2026-04-04 09:54:24] Step 2220: loss: 0.9794, grad_norm: 0.2363, learning_rate: 0.0000, epoch: 0.0111
|
| 28 |
+
[2026-04-04 09:55:48] Step 2230: loss: 0.8372, grad_norm: 0.2451, learning_rate: 0.0000, epoch: 0.0112
|
| 29 |
+
[2026-04-04 09:57:13] Step 2240: loss: 1.0379, grad_norm: 0.2090, learning_rate: 0.0000, epoch: 0.0112
|
| 30 |
+
[2026-04-04 09:58:37] Step 2250: loss: 0.9140, grad_norm: 0.5703, learning_rate: 0.0000, epoch: 0.0112
|
| 31 |
+
[2026-04-04 10:00:02] Step 2260: loss: 0.9224, grad_norm: 0.2041, learning_rate: 0.0000, epoch: 0.0113
|
| 32 |
+
[2026-04-04 10:01:26] Step 2270: loss: 0.9180, grad_norm: 0.2061, learning_rate: 0.0000, epoch: 0.0114
|
| 33 |
+
[2026-04-04 10:02:50] Step 2280: loss: 0.9332, grad_norm: 0.2324, learning_rate: 0.0000, epoch: 0.0114
|
| 34 |
+
[2026-04-04 10:04:15] Step 2290: loss: 1.0547, grad_norm: 0.2490, learning_rate: 0.0000, epoch: 0.0115
|
| 35 |
+
[2026-04-04 10:05:38] Step 2300: loss: 0.9023, grad_norm: 0.2402, learning_rate: 0.0000, epoch: 0.0115
|
| 36 |
+
[2026-04-04 10:07:03] Step 2310: loss: 0.9282, grad_norm: 0.2471, learning_rate: 0.0000, epoch: 0.0115
|
| 37 |
+
[2026-04-04 10:08:27] Step 2320: loss: 1.0646, grad_norm: 0.3613, learning_rate: 0.0000, epoch: 0.0116
|
| 38 |
+
[2026-04-04 10:09:51] Step 2330: loss: 1.0843, grad_norm: 0.2295, learning_rate: 0.0000, epoch: 0.0117
|
| 39 |
+
[2026-04-04 10:11:15] Step 2340: loss: 0.8962, grad_norm: 0.4238, learning_rate: 0.0000, epoch: 0.0117
|
| 40 |
+
[2026-04-04 10:12:39] Step 2350: loss: 1.1290, grad_norm: 0.2246, learning_rate: 0.0000, epoch: 0.0118
|
| 41 |
+
[2026-04-04 10:14:03] Step 2360: loss: 0.8712, grad_norm: 0.3027, learning_rate: 0.0000, epoch: 0.0118
|
| 42 |
+
[2026-04-04 10:15:28] Step 2370: loss: 0.8248, grad_norm: 0.1875, learning_rate: 0.0000, epoch: 0.0118
|
| 43 |
+
[2026-04-04 10:16:52] Step 2380: loss: 0.8929, grad_norm: 0.2314, learning_rate: 0.0000, epoch: 0.0119
|
| 44 |
+
[2026-04-04 10:18:16] Step 2390: loss: 0.9730, grad_norm: 0.2129, learning_rate: 0.0000, epoch: 0.0120
|
| 45 |
+
[2026-04-04 10:19:40] Step 2400: loss: 0.8794, grad_norm: 0.3340, learning_rate: 0.0000, epoch: 0.0120
|
| 46 |
+
[2026-04-04 10:21:04] Step 2410: loss: 0.8079, grad_norm: 0.2637, learning_rate: 0.0000, epoch: 0.0120
|
| 47 |
+
[2026-04-04 10:22:29] Step 2420: loss: 0.9668, grad_norm: 0.2490, learning_rate: 0.0000, epoch: 0.0121
|
| 48 |
+
[2026-04-04 10:23:53] Step 2430: loss: 0.8787, grad_norm: 0.2969, learning_rate: 0.0000, epoch: 0.0121
|
| 49 |
+
[2026-04-04 10:25:17] Step 2440: loss: 0.8943, grad_norm: 0.2852, learning_rate: 0.0000, epoch: 0.0122
|
| 50 |
+
[2026-04-04 10:26:41] Step 2450: loss: 0.9024, grad_norm: 0.2637, learning_rate: 0.0000, epoch: 0.0123
|
| 51 |
+
[2026-04-04 10:28:05] Step 2460: loss: 0.7944, grad_norm: 0.3672, learning_rate: 0.0000, epoch: 0.0123
|
| 52 |
+
[2026-04-04 10:29:30] Step 2470: loss: 0.9620, grad_norm: 0.2109, learning_rate: 0.0000, epoch: 0.0123
|
| 53 |
+
[2026-04-04 10:30:54] Step 2480: loss: 0.8795, grad_norm: 0.2637, learning_rate: 0.0000, epoch: 0.0124
|
| 54 |
+
[2026-04-04 10:32:19] Step 2490: loss: 0.8035, grad_norm: 0.2295, learning_rate: 0.0000, epoch: 0.0124
|
| 55 |
+
[2026-04-04 10:33:43] Step 2500: loss: 0.7863, grad_norm: 0.3164, learning_rate: 0.0000, epoch: 0.0125
|
| 56 |
+
[2026-04-04 10:35:07] Step 2510: loss: 1.0078, grad_norm: 0.5156, learning_rate: 0.0000, epoch: 0.0126
|
| 57 |
+
[2026-04-04 10:36:32] Step 2520: loss: 0.8982, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0126
|
| 58 |
+
[2026-04-04 10:37:56] Step 2530: loss: 0.9230, grad_norm: 0.2070, learning_rate: 0.0000, epoch: 0.0126
|
| 59 |
+
[2026-04-04 10:39:19] Step 2540: loss: 0.4966, grad_norm: 0.2969, learning_rate: 0.0000, epoch: 0.0127
|
| 60 |
+
[2026-04-04 10:40:43] Step 2550: loss: 0.9865, grad_norm: 0.2305, learning_rate: 0.0000, epoch: 0.0127
|
| 61 |
+
[2026-04-04 10:42:07] Step 2560: loss: 0.8903, grad_norm: 0.2402, learning_rate: 0.0000, epoch: 0.0128
|
| 62 |
+
[2026-04-04 10:43:31] Step 2570: loss: 0.9468, grad_norm: 0.4883, learning_rate: 0.0000, epoch: 0.0129
|
| 63 |
+
[2026-04-04 10:44:56] Step 2580: loss: 0.9083, grad_norm: 0.2490, learning_rate: 0.0000, epoch: 0.0129
|
| 64 |
+
[2026-04-04 10:46:20] Step 2590: loss: 0.9190, grad_norm: 0.2041, learning_rate: 0.0000, epoch: 0.0129
|
| 65 |
+
[2026-04-04 10:47:44] Step 2600: loss: 0.9126, grad_norm: 0.2373, learning_rate: 0.0000, epoch: 0.0130
|
| 66 |
+
[2026-04-04 10:49:09] Step 2610: loss: 0.8419, grad_norm: 0.3125, learning_rate: 0.0000, epoch: 0.0131
|
| 67 |
+
[2026-04-04 10:50:33] Step 2620: loss: 0.7891, grad_norm: 0.1816, learning_rate: 0.0000, epoch: 0.0131
|
| 68 |
+
[2026-04-04 10:51:57] Step 2630: loss: 0.8950, grad_norm: 0.2314, learning_rate: 0.0000, epoch: 0.0132
|
| 69 |
+
[2026-04-04 10:53:21] Step 2640: loss: 0.8428, grad_norm: 0.2539, learning_rate: 0.0000, epoch: 0.0132
|
| 70 |
+
[2026-04-04 10:54:45] Step 2650: loss: 0.9133, grad_norm: 0.2734, learning_rate: 0.0000, epoch: 0.0132
|
| 71 |
+
[2026-04-04 10:56:09] Step 2660: loss: 1.2072, grad_norm: 0.2793, learning_rate: 0.0000, epoch: 0.0133
|
| 72 |
+
[2026-04-04 10:57:34] Step 2670: loss: 0.9179, grad_norm: 1.3750, learning_rate: 0.0000, epoch: 0.0134
|
| 73 |
+
[2026-04-04 10:58:58] Step 2680: loss: 0.8475, grad_norm: 0.2266, learning_rate: 0.0000, epoch: 0.0134
|
| 74 |
+
[2026-04-04 11:00:22] Step 2690: loss: 0.8865, grad_norm: 0.3066, learning_rate: 0.0000, epoch: 0.0135
|
| 75 |
+
[2026-04-04 11:01:47] Step 2700: loss: 1.1335, grad_norm: 0.2578, learning_rate: 0.0000, epoch: 0.0135
|
| 76 |
+
[2026-04-04 11:03:11] Step 2710: loss: 0.8892, grad_norm: 0.3223, learning_rate: 0.0000, epoch: 0.0135
|
| 77 |
+
[2026-04-04 11:04:35] Step 2720: loss: 0.8915, grad_norm: 0.3145, learning_rate: 0.0000, epoch: 0.0136
|
| 78 |
+
[2026-04-04 11:06:00] Step 2730: loss: 0.8971, grad_norm: 0.2383, learning_rate: 0.0000, epoch: 0.0137
|
| 79 |
+
[2026-04-04 11:07:24] Step 2740: loss: 0.9294, grad_norm: 0.2695, learning_rate: 0.0000, epoch: 0.0137
|
| 80 |
+
[2026-04-04 11:08:48] Step 2750: loss: 0.8750, grad_norm: 0.1738, learning_rate: 0.0000, epoch: 0.0138
|
| 81 |
+
[2026-04-04 11:10:12] Step 2760: loss: 0.8538, grad_norm: 0.9219, learning_rate: 0.0000, epoch: 0.0138
|
| 82 |
+
[2026-04-04 11:11:36] Step 2770: loss: 0.8476, grad_norm: 0.2754, learning_rate: 0.0000, epoch: 0.0138
|
| 83 |
+
[2026-04-04 11:13:00] Step 2780: loss: 0.7105, grad_norm: 0.2656, learning_rate: 0.0000, epoch: 0.0139
|
| 84 |
+
[2026-04-04 11:14:24] Step 2790: loss: 0.8698, grad_norm: 0.5781, learning_rate: 0.0000, epoch: 0.0140
|
| 85 |
+
[2026-04-04 11:15:48] Step 2800: loss: 0.9280, grad_norm: 0.2305, learning_rate: 0.0000, epoch: 0.0140
|
| 86 |
+
[2026-04-04 11:17:12] Step 2810: loss: 0.9404, grad_norm: 0.2236, learning_rate: 0.0000, epoch: 0.0140
|
| 87 |
+
[2026-04-04 11:18:36] Step 2820: loss: 0.8950, grad_norm: 0.2188, learning_rate: 0.0000, epoch: 0.0141
|
| 88 |
+
[2026-04-04 11:20:00] Step 2830: loss: 0.9473, grad_norm: 1.7734, learning_rate: 0.0000, epoch: 0.0141
|
| 89 |
+
[2026-04-04 11:21:24] Step 2840: loss: 0.8666, grad_norm: 0.1504, learning_rate: 0.0000, epoch: 0.0142
|
| 90 |
+
[2026-04-04 11:22:48] Step 2850: loss: 0.9285, grad_norm: 0.2275, learning_rate: 0.0000, epoch: 0.0143
|
| 91 |
+
[2026-04-04 11:24:12] Step 2860: loss: 0.8818, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0143
|
| 92 |
+
[2026-04-04 11:25:36] Step 2870: loss: 0.8695, grad_norm: 0.3086, learning_rate: 0.0000, epoch: 0.0143
|
| 93 |
+
[2026-04-04 11:27:00] Step 2880: loss: 0.8156, grad_norm: 0.2432, learning_rate: 0.0000, epoch: 0.0144
|
| 94 |
+
[2026-04-04 11:28:24] Step 2890: loss: 1.0572, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0144
|
| 95 |
+
[2026-04-04 11:29:48] Step 2900: loss: 0.9074, grad_norm: 0.3926, learning_rate: 0.0000, epoch: 0.0145
|
| 96 |
+
[2026-04-04 11:31:12] Step 2910: loss: 0.9228, grad_norm: 0.3008, learning_rate: 0.0000, epoch: 0.0146
|
| 97 |
+
[2026-04-04 11:32:36] Step 2920: loss: 0.7711, grad_norm: 0.2148, learning_rate: 0.0000, epoch: 0.0146
|
| 98 |
+
[2026-04-04 11:33:59] Step 2930: loss: 0.8896, grad_norm: 0.1621, learning_rate: 0.0000, epoch: 0.0146
|
| 99 |
+
[2026-04-04 11:35:24] Step 2940: loss: 0.8782, grad_norm: 0.2031, learning_rate: 0.0000, epoch: 0.0147
|
| 100 |
+
[2026-04-04 11:36:47] Step 2950: loss: 0.8114, grad_norm: 0.3398, learning_rate: 0.0000, epoch: 0.0147
|
| 101 |
+
[2026-04-04 11:38:12] Step 2960: loss: 0.8362, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0148
|
| 102 |
+
[2026-04-04 11:39:36] Step 2970: loss: 0.9280, grad_norm: 0.2402, learning_rate: 0.0000, epoch: 0.0149
|
| 103 |
+
[2026-04-04 11:41:00] Step 2980: loss: 0.8748, grad_norm: 0.1982, learning_rate: 0.0000, epoch: 0.0149
|
| 104 |
+
[2026-04-04 11:42:24] Step 2990: loss: 0.8827, grad_norm: 0.2451, learning_rate: 0.0000, epoch: 0.0149
|
| 105 |
+
[2026-04-04 11:43:48] Step 3000: loss: 0.8786, grad_norm: 0.6719, learning_rate: 0.0000, epoch: 0.0150
|
| 106 |
+
[2026-04-04 11:45:12] Step 3010: loss: 0.9199, grad_norm: 10.9375, learning_rate: 0.0000, epoch: 0.0150
|
| 107 |
+
[2026-04-04 11:46:36] Step 3020: loss: 0.9592, grad_norm: 0.3965, learning_rate: 0.0000, epoch: 0.0151
|
| 108 |
+
[2026-04-04 11:48:00] Step 3030: loss: 0.9399, grad_norm: 0.2031, learning_rate: 0.0000, epoch: 0.0152
|
| 109 |
+
[2026-04-04 11:49:24] Step 3040: loss: 0.9260, grad_norm: 0.2617, learning_rate: 0.0000, epoch: 0.0152
|
| 110 |
+
[2026-04-04 11:50:48] Step 3050: loss: 0.8369, grad_norm: 0.2695, learning_rate: 0.0000, epoch: 0.0152
|
| 111 |
+
[2026-04-04 11:52:13] Step 3060: loss: 0.8429, grad_norm: 0.2285, learning_rate: 0.0000, epoch: 0.0153
|
| 112 |
+
[2026-04-04 11:53:37] Step 3070: loss: 0.9259, grad_norm: 0.2637, learning_rate: 0.0000, epoch: 0.0154
|
| 113 |
+
[2026-04-04 11:55:01] Step 3080: loss: 0.9881, grad_norm: 0.2412, learning_rate: 0.0000, epoch: 0.0154
|
| 114 |
+
[2026-04-04 11:56:25] Step 3090: loss: 0.9997, grad_norm: 0.2207, learning_rate: 0.0000, epoch: 0.0155
|
| 115 |
+
[2026-04-04 11:57:49] Step 3100: loss: 0.8810, grad_norm: 0.4238, learning_rate: 0.0000, epoch: 0.0155
|
| 116 |
+
[2026-04-04 11:59:14] Step 3110: loss: 0.8769, grad_norm: 0.2500, learning_rate: 0.0000, epoch: 0.0155
|
| 117 |
+
[2026-04-04 12:00:39] Step 3120: loss: 0.8588, grad_norm: 0.2598, learning_rate: 0.0000, epoch: 0.0156
|
| 118 |
+
[2026-04-04 12:02:03] Step 3130: loss: 0.8142, grad_norm: 0.1787, learning_rate: 0.0000, epoch: 0.0157
|
| 119 |
+
[2026-04-04 12:03:28] Step 3140: loss: 0.8229, grad_norm: 0.1855, learning_rate: 0.0000, epoch: 0.0157
|
| 120 |
+
[2026-04-04 12:04:52] Step 3150: loss: 0.8981, grad_norm: 0.1797, learning_rate: 0.0000, epoch: 0.0158
|
| 121 |
+
[2026-04-04 12:06:17] Step 3160: loss: 0.9945, grad_norm: 0.2109, learning_rate: 0.0000, epoch: 0.0158
|
| 122 |
+
[2026-04-04 12:07:42] Step 3170: loss: 0.8905, grad_norm: 0.1758, learning_rate: 0.0000, epoch: 0.0158
|
| 123 |
+
[2026-04-04 12:09:07] Step 3180: loss: 0.8941, grad_norm: 0.1865, learning_rate: 0.0000, epoch: 0.0159
|
| 124 |
+
[2026-04-04 12:10:32] Step 3190: loss: 0.8214, grad_norm: 0.1826, learning_rate: 0.0000, epoch: 0.0159
|
| 125 |
+
[2026-04-04 12:11:56] Step 3200: loss: 0.8866, grad_norm: 0.1865, learning_rate: 0.0000, epoch: 0.0160
|
| 126 |
+
[2026-04-04 12:13:20] Step 3210: loss: 0.7030, grad_norm: 0.2178, learning_rate: 0.0000, epoch: 0.0160
|
| 127 |
+
[2026-04-04 12:14:45] Step 3220: loss: 0.8260, grad_norm: 0.1631, learning_rate: 0.0000, epoch: 0.0161
|
| 128 |
+
[2026-04-04 12:16:09] Step 3230: loss: 0.8487, grad_norm: 0.1768, learning_rate: 0.0000, epoch: 0.0162
|
| 129 |
+
[2026-04-04 12:17:34] Step 3240: loss: 0.8839, grad_norm: 1.5859, learning_rate: 0.0000, epoch: 0.0162
|
| 130 |
+
[2026-04-04 12:18:59] Step 3250: loss: 0.8126, grad_norm: 0.1729, learning_rate: 0.0000, epoch: 0.0163
|
| 131 |
+
[2026-04-04 12:20:22] Step 3260: loss: 0.8689, grad_norm: 3.5781, learning_rate: 0.0000, epoch: 0.0163
|
| 132 |
+
[2026-04-04 12:21:46] Step 3270: loss: 0.8413, grad_norm: 0.7891, learning_rate: 0.0000, epoch: 0.0163
|
| 133 |
+
[2026-04-04 12:23:11] Step 3280: loss: 0.9708, grad_norm: 0.4590, learning_rate: 0.0000, epoch: 0.0164
|
| 134 |
+
[2026-04-04 12:24:35] Step 3290: loss: 0.8723, grad_norm: 0.2754, learning_rate: 0.0000, epoch: 0.0164
|
| 135 |
+
[2026-04-04 12:26:00] Step 3300: loss: 0.9940, grad_norm: 0.2246, learning_rate: 0.0000, epoch: 0.0165
|
| 136 |
+
[2026-04-04 12:27:24] Step 3310: loss: 0.8340, grad_norm: 1.4609, learning_rate: 0.0000, epoch: 0.0165
|
| 137 |
+
[2026-04-04 12:28:49] Step 3320: loss: 0.8388, grad_norm: 0.1953, learning_rate: 0.0000, epoch: 0.0166
|
| 138 |
+
[2026-04-04 12:30:14] Step 3330: loss: 0.8837, grad_norm: 0.2148, learning_rate: 0.0000, epoch: 0.0167
|
| 139 |
+
[2026-04-04 12:31:38] Step 3340: loss: 0.8853, grad_norm: 0.2354, learning_rate: 0.0000, epoch: 0.0167
|
| 140 |
+
[2026-04-04 12:33:02] Step 3350: loss: 0.9380, grad_norm: 0.2168, learning_rate: 0.0000, epoch: 0.0168
|
| 141 |
+
[2026-04-04 12:34:27] Step 3360: loss: 0.8515, grad_norm: 0.2021, learning_rate: 0.0000, epoch: 0.0168
|
| 142 |
+
[2026-04-04 12:35:51] Step 3370: loss: 0.9047, grad_norm: 0.1885, learning_rate: 0.0000, epoch: 0.0169
|
| 143 |
+
[2026-04-04 12:37:16] Step 3380: loss: 0.7929, grad_norm: 1.0000, learning_rate: 0.0000, epoch: 0.0169
|
| 144 |
+
[2026-04-04 12:38:41] Step 3390: loss: 0.8338, grad_norm: 0.2012, learning_rate: 0.0000, epoch: 0.0169
|
| 145 |
+
[2026-04-04 12:40:05] Step 3400: loss: 0.8262, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0170
|
| 146 |
+
[2026-04-04 12:41:30] Step 3410: loss: 0.7306, grad_norm: 0.2295, learning_rate: 0.0000, epoch: 0.0170
|
| 147 |
+
[2026-04-04 12:42:55] Step 3420: loss: 0.9324, grad_norm: 0.2520, learning_rate: 0.0000, epoch: 0.0171
|
| 148 |
+
[2026-04-04 12:44:19] Step 3430: loss: 0.7892, grad_norm: 0.1533, learning_rate: 0.0000, epoch: 0.0171
|
| 149 |
+
[2026-04-04 12:45:43] Step 3440: loss: 0.9207, grad_norm: 0.2139, learning_rate: 0.0000, epoch: 0.0172
|
| 150 |
+
[2026-04-04 12:47:07] Step 3450: loss: 0.8582, grad_norm: 0.2539, learning_rate: 0.0000, epoch: 0.0173
|
| 151 |
+
[2026-04-04 12:48:32] Step 3460: loss: 0.8868, grad_norm: 0.3867, learning_rate: 0.0000, epoch: 0.0173
|
| 152 |
+
[2026-04-04 12:49:57] Step 3470: loss: 0.8362, grad_norm: 0.2148, learning_rate: 0.0000, epoch: 0.0174
|
| 153 |
+
[2026-04-04 12:51:21] Step 3480: loss: 0.9078, grad_norm: 0.1855, learning_rate: 0.0000, epoch: 0.0174
|
| 154 |
+
[2026-04-04 12:52:45] Step 3490: loss: 0.8814, grad_norm: 0.1660, learning_rate: 0.0000, epoch: 0.0175
|
| 155 |
+
[2026-04-04 12:54:10] Step 3500: loss: 0.8289, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0175
|
| 156 |
+
[2026-04-04 12:55:35] Step 3510: loss: 0.7498, grad_norm: 0.3477, learning_rate: 0.0000, epoch: 0.0175
|
| 157 |
+
[2026-04-04 12:56:59] Step 3520: loss: 0.8706, grad_norm: 0.1504, learning_rate: 0.0000, epoch: 0.0176
|
| 158 |
+
[2026-04-04 12:58:24] Step 3530: loss: 0.8755, grad_norm: 0.2480, learning_rate: 0.0000, epoch: 0.0176
|
| 159 |
+
[2026-04-04 12:59:48] Step 3540: loss: 0.8941, grad_norm: 0.2041, learning_rate: 0.0000, epoch: 0.0177
|
| 160 |
+
[2026-04-04 13:01:12] Step 3550: loss: 0.9034, grad_norm: 0.1914, learning_rate: 0.0000, epoch: 0.0177
|
| 161 |
+
[2026-04-04 13:02:37] Step 3560: loss: 0.8194, grad_norm: 0.1680, learning_rate: 0.0000, epoch: 0.0178
|
| 162 |
+
[2026-04-04 13:04:01] Step 3570: loss: 0.8267, grad_norm: 0.1807, learning_rate: 0.0000, epoch: 0.0179
|
| 163 |
+
[2026-04-04 13:05:26] Step 3580: loss: 0.8188, grad_norm: 0.1445, learning_rate: 0.0000, epoch: 0.0179
|
| 164 |
+
[2026-04-04 13:06:51] Step 3590: loss: 0.8666, grad_norm: 0.1797, learning_rate: 0.0000, epoch: 0.0180
|
| 165 |
+
[2026-04-04 13:08:15] Step 3600: loss: 0.8775, grad_norm: 0.3203, learning_rate: 0.0000, epoch: 0.0180
|
| 166 |
+
[2026-04-04 13:09:40] Step 3610: loss: 0.7504, grad_norm: 0.2734, learning_rate: 0.0000, epoch: 0.0181
|
| 167 |
+
[2026-04-04 13:11:04] Step 3620: loss: 0.7219, grad_norm: 0.2393, learning_rate: 0.0000, epoch: 0.0181
|
| 168 |
+
[2026-04-04 13:12:28] Step 3630: loss: 0.8402, grad_norm: 0.3105, learning_rate: 0.0000, epoch: 0.0181
|
| 169 |
+
[2026-04-04 13:13:52] Step 3640: loss: 0.7577, grad_norm: 0.2559, learning_rate: 0.0000, epoch: 0.0182
|
| 170 |
+
[2026-04-04 13:15:16] Step 3650: loss: 0.8412, grad_norm: 0.3867, learning_rate: 0.0000, epoch: 0.0182
|
| 171 |
+
[2026-04-04 13:16:41] Step 3660: loss: 0.7960, grad_norm: 0.1533, learning_rate: 0.0000, epoch: 0.0183
|
| 172 |
+
[2026-04-04 13:18:05] Step 3670: loss: 0.9164, grad_norm: 0.1787, learning_rate: 0.0000, epoch: 0.0184
|
| 173 |
+
[2026-04-04 13:19:30] Step 3680: loss: 0.9855, grad_norm: 0.1270, learning_rate: 0.0000, epoch: 0.0184
|
| 174 |
+
[2026-04-04 13:20:54] Step 3690: loss: 0.8332, grad_norm: 0.1455, learning_rate: 0.0000, epoch: 0.0185
|
| 175 |
+
[2026-04-04 13:22:19] Step 3700: loss: 0.8512, grad_norm: 0.1924, learning_rate: 0.0000, epoch: 0.0185
|
| 176 |
+
[2026-04-04 13:23:43] Step 3710: loss: 0.8994, grad_norm: 0.4375, learning_rate: 0.0000, epoch: 0.0186
|
| 177 |
+
[2026-04-04 13:25:08] Step 3720: loss: 0.9205, grad_norm: 0.2314, learning_rate: 0.0000, epoch: 0.0186
|
| 178 |
+
[2026-04-04 13:26:32] Step 3730: loss: 0.8814, grad_norm: 0.2227, learning_rate: 0.0000, epoch: 0.0186
|
| 179 |
+
[2026-04-04 13:27:56] Step 3740: loss: 0.8907, grad_norm: 0.2227, learning_rate: 0.0000, epoch: 0.0187
|
| 180 |
+
[2026-04-04 13:29:20] Step 3750: loss: 0.8737, grad_norm: 0.1475, learning_rate: 0.0000, epoch: 0.0187
|
| 181 |
+
[2026-04-04 13:30:45] Step 3760: loss: 0.8853, grad_norm: 0.1943, learning_rate: 0.0000, epoch: 0.0188
|
| 182 |
+
[2026-04-04 13:32:09] Step 3770: loss: 0.8969, grad_norm: 0.1943, learning_rate: 0.0000, epoch: 0.0188
|
| 183 |
+
[2026-04-04 13:33:34] Step 3780: loss: 0.8080, grad_norm: 0.1992, learning_rate: 0.0000, epoch: 0.0189
|
| 184 |
+
[2026-04-04 13:34:57] Step 3790: loss: 0.7392, grad_norm: 0.1592, learning_rate: 0.0000, epoch: 0.0190
|
| 185 |
+
[2026-04-04 13:36:21] Step 3800: loss: 0.6776, grad_norm: 0.1875, learning_rate: 0.0000, epoch: 0.0190
|
| 186 |
+
[2026-04-04 13:37:46] Step 3810: loss: 0.8414, grad_norm: 0.1836, learning_rate: 0.0000, epoch: 0.0191
|
| 187 |
+
[2026-04-04 13:39:10] Step 3820: loss: 0.8751, grad_norm: 0.1699, learning_rate: 0.0000, epoch: 0.0191
|
| 188 |
+
[2026-04-04 13:40:34] Step 3830: loss: 0.8723, grad_norm: 0.1738, learning_rate: 0.0000, epoch: 0.0192
|
| 189 |
+
[2026-04-04 13:41:59] Step 3840: loss: 0.7960, grad_norm: 0.1758, learning_rate: 0.0000, epoch: 0.0192
|
| 190 |
+
[2026-04-04 13:43:23] Step 3850: loss: 0.9287, grad_norm: 0.1865, learning_rate: 0.0000, epoch: 0.0192
|
| 191 |
+
[2026-04-04 13:44:48] Step 3860: loss: 0.8020, grad_norm: 0.1934, learning_rate: 0.0000, epoch: 0.0193
|
| 192 |
+
[2026-04-04 13:46:12] Step 3870: loss: 0.9291, grad_norm: 0.1758, learning_rate: 0.0000, epoch: 0.0193
|
| 193 |
+
[2026-04-04 13:47:36] Step 3880: loss: 0.8263, grad_norm: 0.1816, learning_rate: 0.0000, epoch: 0.0194
|
| 194 |
+
[2026-04-04 13:49:01] Step 3890: loss: 0.8684, grad_norm: 0.1641, learning_rate: 0.0000, epoch: 0.0194
|
| 195 |
+
[2026-04-04 13:50:26] Step 3900: loss: 0.8842, grad_norm: 0.2363, learning_rate: 0.0000, epoch: 0.0195
|
| 196 |
+
[2026-04-04 13:51:50] Step 3910: loss: 0.9238, grad_norm: 0.1885, learning_rate: 0.0000, epoch: 0.0196
|
| 197 |
+
[2026-04-04 13:53:15] Step 3920: loss: 0.8393, grad_norm: 0.1748, learning_rate: 0.0000, epoch: 0.0196
|
| 198 |
+
[2026-04-04 13:54:40] Step 3930: loss: 0.8357, grad_norm: 0.1436, learning_rate: 0.0000, epoch: 0.0197
|
| 199 |
+
[2026-04-04 13:56:04] Step 3940: loss: 0.8333, grad_norm: 0.2021, learning_rate: 0.0000, epoch: 0.0197
|
| 200 |
+
[2026-04-04 13:57:28] Step 3950: loss: 0.9847, grad_norm: 0.2031, learning_rate: 0.0000, epoch: 0.0198
|
| 201 |
+
[2026-04-04 13:58:52] Step 3960: loss: 0.8540, grad_norm: 0.1670, learning_rate: 0.0000, epoch: 0.0198
|
| 202 |
+
[2026-04-04 14:00:17] Step 3970: loss: 0.7641, grad_norm: 0.1436, learning_rate: 0.0000, epoch: 0.0198
|
| 203 |
+
[2026-04-04 14:01:41] Step 3980: loss: 0.7934, grad_norm: 0.1621, learning_rate: 0.0000, epoch: 0.0199
|
| 204 |
+
[2026-04-04 14:03:05] Step 3990: loss: 0.8577, grad_norm: 0.2070, learning_rate: 0.0000, epoch: 0.0199
|
| 205 |
+
[2026-04-04 14:04:30] Step 4000: loss: 0.8141, grad_norm: 0.1709, learning_rate: 0.0000, epoch: 0.0200
|
| 206 |
+
[2026-04-04 14:07:12] Step 4010: loss: 0.8507, grad_norm: 0.1602, learning_rate: 0.0000, epoch: 0.0200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|