Training in progress, step 2400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b7314d8b5f35930722e1c3d90a6061192742967a1457d59859792a0878fce57
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72e4902bbfa6e4e56f6e17ab398c622f809713c649baa3c6fd399c0f05448a5f
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:684fee310519fed9cc8ee66dc9698ba16f4e9489a577756f84e7b21aa51e01d4
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba3530fffdeb6293174ae0b25b4bed0ccc682e606b6e29d3d50fec77e3192eef
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2182,6 +2182,151 @@
|
|
| 2182 |
"EMA_steps_per_second": 25.124,
|
| 2183 |
"epoch": 97.82608695652173,
|
| 2184 |
"step": 2250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2185 |
}
|
| 2186 |
],
|
| 2187 |
"logging_steps": 10,
|
|
@@ -2201,7 +2346,7 @@
|
|
| 2201 |
"attributes": {}
|
| 2202 |
}
|
| 2203 |
},
|
| 2204 |
-
"total_flos":
|
| 2205 |
"train_batch_size": 4,
|
| 2206 |
"trial_name": null,
|
| 2207 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 104.34782608695652,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 2400,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2182 |
"EMA_steps_per_second": 25.124,
|
| 2183 |
"epoch": 97.82608695652173,
|
| 2184 |
"step": 2250
|
| 2185 |
+
},
|
| 2186 |
+
{
|
| 2187 |
+
"epoch": 98.26086956521739,
|
| 2188 |
+
"grad_norm": 1.9422506093978882,
|
| 2189 |
+
"learning_rate": 7.487548003627922e-06,
|
| 2190 |
+
"loss": 0.2414,
|
| 2191 |
+
"step": 2260
|
| 2192 |
+
},
|
| 2193 |
+
{
|
| 2194 |
+
"epoch": 98.69565217391305,
|
| 2195 |
+
"grad_norm": 1.745564341545105,
|
| 2196 |
+
"learning_rate": 7.487516099090849e-06,
|
| 2197 |
+
"loss": 0.278,
|
| 2198 |
+
"step": 2270
|
| 2199 |
+
},
|
| 2200 |
+
{
|
| 2201 |
+
"epoch": 99.1304347826087,
|
| 2202 |
+
"grad_norm": 2.0466256141662598,
|
| 2203 |
+
"learning_rate": 7.48748271071887e-06,
|
| 2204 |
+
"loss": 0.2487,
|
| 2205 |
+
"step": 2280
|
| 2206 |
+
},
|
| 2207 |
+
{
|
| 2208 |
+
"epoch": 99.56521739130434,
|
| 2209 |
+
"grad_norm": 2.3589112758636475,
|
| 2210 |
+
"learning_rate": 7.48744783852522e-06,
|
| 2211 |
+
"loss": 0.2882,
|
| 2212 |
+
"step": 2290
|
| 2213 |
+
},
|
| 2214 |
+
{
|
| 2215 |
+
"epoch": 100.0,
|
| 2216 |
+
"grad_norm": 2.6583240032196045,
|
| 2217 |
+
"learning_rate": 7.487411482523721e-06,
|
| 2218 |
+
"loss": 0.2324,
|
| 2219 |
+
"step": 2300
|
| 2220 |
+
},
|
| 2221 |
+
{
|
| 2222 |
+
"epoch": 100.43478260869566,
|
| 2223 |
+
"grad_norm": 2.685478448867798,
|
| 2224 |
+
"learning_rate": 7.4873736427287825e-06,
|
| 2225 |
+
"loss": 0.2368,
|
| 2226 |
+
"step": 2310
|
| 2227 |
+
},
|
| 2228 |
+
{
|
| 2229 |
+
"epoch": 100.8695652173913,
|
| 2230 |
+
"grad_norm": 1.7692900896072388,
|
| 2231 |
+
"learning_rate": 7.487334319155404e-06,
|
| 2232 |
+
"loss": 0.2694,
|
| 2233 |
+
"step": 2320
|
| 2234 |
+
},
|
| 2235 |
+
{
|
| 2236 |
+
"epoch": 101.30434782608695,
|
| 2237 |
+
"grad_norm": 2.5517287254333496,
|
| 2238 |
+
"learning_rate": 7.487293511819172e-06,
|
| 2239 |
+
"loss": 0.2417,
|
| 2240 |
+
"step": 2330
|
| 2241 |
+
},
|
| 2242 |
+
{
|
| 2243 |
+
"epoch": 101.73913043478261,
|
| 2244 |
+
"grad_norm": 1.7970623970031738,
|
| 2245 |
+
"learning_rate": 7.4872512207362605e-06,
|
| 2246 |
+
"loss": 0.2446,
|
| 2247 |
+
"step": 2340
|
| 2248 |
+
},
|
| 2249 |
+
{
|
| 2250 |
+
"epoch": 102.17391304347827,
|
| 2251 |
+
"grad_norm": 1.792651653289795,
|
| 2252 |
+
"learning_rate": 7.487207445923432e-06,
|
| 2253 |
+
"loss": 0.2934,
|
| 2254 |
+
"step": 2350
|
| 2255 |
+
},
|
| 2256 |
+
{
|
| 2257 |
+
"epoch": 102.6086956521739,
|
| 2258 |
+
"grad_norm": 2.1051220893859863,
|
| 2259 |
+
"learning_rate": 7.487162187398039e-06,
|
| 2260 |
+
"loss": 0.2844,
|
| 2261 |
+
"step": 2360
|
| 2262 |
+
},
|
| 2263 |
+
{
|
| 2264 |
+
"epoch": 103.04347826086956,
|
| 2265 |
+
"grad_norm": 1.9311975240707397,
|
| 2266 |
+
"learning_rate": 7.487115445178019e-06,
|
| 2267 |
+
"loss": 0.2162,
|
| 2268 |
+
"step": 2370
|
| 2269 |
+
},
|
| 2270 |
+
{
|
| 2271 |
+
"epoch": 103.47826086956522,
|
| 2272 |
+
"grad_norm": 2.12684965133667,
|
| 2273 |
+
"learning_rate": 7.487067219281901e-06,
|
| 2274 |
+
"loss": 0.2911,
|
| 2275 |
+
"step": 2380
|
| 2276 |
+
},
|
| 2277 |
+
{
|
| 2278 |
+
"epoch": 103.91304347826087,
|
| 2279 |
+
"grad_norm": 2.0107476711273193,
|
| 2280 |
+
"learning_rate": 7.4870175097287985e-06,
|
| 2281 |
+
"loss": 0.2413,
|
| 2282 |
+
"step": 2390
|
| 2283 |
+
},
|
| 2284 |
+
{
|
| 2285 |
+
"epoch": 104.34782608695652,
|
| 2286 |
+
"grad_norm": 1.9675108194351196,
|
| 2287 |
+
"learning_rate": 7.486966316538416e-06,
|
| 2288 |
+
"loss": 0.2557,
|
| 2289 |
+
"step": 2400
|
| 2290 |
+
},
|
| 2291 |
+
{
|
| 2292 |
+
"epoch": 104.34782608695652,
|
| 2293 |
+
"eval_loss": 0.9136893153190613,
|
| 2294 |
+
"eval_runtime": 0.4193,
|
| 2295 |
+
"eval_samples_per_second": 23.849,
|
| 2296 |
+
"eval_steps_per_second": 23.849,
|
| 2297 |
+
"step": 2400
|
| 2298 |
+
},
|
| 2299 |
+
{
|
| 2300 |
+
"Start_State_loss": 0.861186683177948,
|
| 2301 |
+
"Start_State_runtime": 0.4057,
|
| 2302 |
+
"Start_State_samples_per_second": 24.646,
|
| 2303 |
+
"Start_State_steps_per_second": 24.646,
|
| 2304 |
+
"epoch": 104.34782608695652,
|
| 2305 |
+
"step": 2400
|
| 2306 |
+
},
|
| 2307 |
+
{
|
| 2308 |
+
"Raw_Model_loss": 0.9136893153190613,
|
| 2309 |
+
"Raw_Model_runtime": 0.4082,
|
| 2310 |
+
"Raw_Model_samples_per_second": 24.497,
|
| 2311 |
+
"Raw_Model_steps_per_second": 24.497,
|
| 2312 |
+
"epoch": 104.34782608695652,
|
| 2313 |
+
"step": 2400
|
| 2314 |
+
},
|
| 2315 |
+
{
|
| 2316 |
+
"SWA_loss": 0.7567933797836304,
|
| 2317 |
+
"SWA_runtime": 0.4029,
|
| 2318 |
+
"SWA_samples_per_second": 24.818,
|
| 2319 |
+
"SWA_steps_per_second": 24.818,
|
| 2320 |
+
"epoch": 104.34782608695652,
|
| 2321 |
+
"step": 2400
|
| 2322 |
+
},
|
| 2323 |
+
{
|
| 2324 |
+
"EMA_loss": 0.8605263829231262,
|
| 2325 |
+
"EMA_runtime": 0.4051,
|
| 2326 |
+
"EMA_samples_per_second": 24.683,
|
| 2327 |
+
"EMA_steps_per_second": 24.683,
|
| 2328 |
+
"epoch": 104.34782608695652,
|
| 2329 |
+
"step": 2400
|
| 2330 |
}
|
| 2331 |
],
|
| 2332 |
"logging_steps": 10,
|
|
|
|
| 2346 |
"attributes": {}
|
| 2347 |
}
|
| 2348 |
},
|
| 2349 |
+
"total_flos": 6.183025621814477e+16,
|
| 2350 |
"train_batch_size": 4,
|
| 2351 |
"trial_name": null,
|
| 2352 |
"trial_params": null
|