Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8b87db48356f50aba644c7f62751e63e8a04ec2268f84e4eba37dd50c1a0411
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85a117e1ecba729ee34a78722b6baa410d7ae44754ab32c14568e8bd85298764
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cb981e2bdbf592329578c66e6b81722ec222bc355b0ddd3bdc9f7689a385e7a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0054105d8fb5cdfddd8876e0968830f9a7aa658759cebb2ad97ea276facb582b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2269,6 +2269,84 @@
|
|
| 2269 |
"eval_samples_per_second": 22.714,
|
| 2270 |
"eval_steps_per_second": 5.678,
|
| 2271 |
"step": 14500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2272 |
}
|
| 2273 |
],
|
| 2274 |
"logging_steps": 50,
|
|
@@ -2288,7 +2366,7 @@
|
|
| 2288 |
"attributes": {}
|
| 2289 |
}
|
| 2290 |
},
|
| 2291 |
-
"total_flos": 3.
|
| 2292 |
"train_batch_size": 4,
|
| 2293 |
"trial_name": null,
|
| 2294 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.08243728429079056,
|
| 3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-15000",
|
| 4 |
+
"epoch": 1.2,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 15000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2269 |
"eval_samples_per_second": 22.714,
|
| 2270 |
"eval_steps_per_second": 5.678,
|
| 2271 |
"step": 14500
|
| 2272 |
+
},
|
| 2273 |
+
{
|
| 2274 |
+
"epoch": 1.164,
|
| 2275 |
+
"grad_norm": 0.06833196431398392,
|
| 2276 |
+
"learning_rate": 1.25436e-05,
|
| 2277 |
+
"loss": 0.0586,
|
| 2278 |
+
"step": 14550
|
| 2279 |
+
},
|
| 2280 |
+
{
|
| 2281 |
+
"epoch": 1.168,
|
| 2282 |
+
"grad_norm": 0.16051365435123444,
|
| 2283 |
+
"learning_rate": 1.24836e-05,
|
| 2284 |
+
"loss": 0.056,
|
| 2285 |
+
"step": 14600
|
| 2286 |
+
},
|
| 2287 |
+
{
|
| 2288 |
+
"epoch": 1.172,
|
| 2289 |
+
"grad_norm": 0.18909616768360138,
|
| 2290 |
+
"learning_rate": 1.24236e-05,
|
| 2291 |
+
"loss": 0.057,
|
| 2292 |
+
"step": 14650
|
| 2293 |
+
},
|
| 2294 |
+
{
|
| 2295 |
+
"epoch": 1.176,
|
| 2296 |
+
"grad_norm": 0.17333486676216125,
|
| 2297 |
+
"learning_rate": 1.23636e-05,
|
| 2298 |
+
"loss": 0.0562,
|
| 2299 |
+
"step": 14700
|
| 2300 |
+
},
|
| 2301 |
+
{
|
| 2302 |
+
"epoch": 1.18,
|
| 2303 |
+
"grad_norm": 0.0701974630355835,
|
| 2304 |
+
"learning_rate": 1.23036e-05,
|
| 2305 |
+
"loss": 0.0545,
|
| 2306 |
+
"step": 14750
|
| 2307 |
+
},
|
| 2308 |
+
{
|
| 2309 |
+
"epoch": 1.184,
|
| 2310 |
+
"grad_norm": 0.1582074612379074,
|
| 2311 |
+
"learning_rate": 1.2243599999999999e-05,
|
| 2312 |
+
"loss": 0.06,
|
| 2313 |
+
"step": 14800
|
| 2314 |
+
},
|
| 2315 |
+
{
|
| 2316 |
+
"epoch": 1.188,
|
| 2317 |
+
"grad_norm": 0.13948781788349152,
|
| 2318 |
+
"learning_rate": 1.21836e-05,
|
| 2319 |
+
"loss": 0.0529,
|
| 2320 |
+
"step": 14850
|
| 2321 |
+
},
|
| 2322 |
+
{
|
| 2323 |
+
"epoch": 1.192,
|
| 2324 |
+
"grad_norm": 0.0946699008345604,
|
| 2325 |
+
"learning_rate": 1.21236e-05,
|
| 2326 |
+
"loss": 0.0668,
|
| 2327 |
+
"step": 14900
|
| 2328 |
+
},
|
| 2329 |
+
{
|
| 2330 |
+
"epoch": 1.196,
|
| 2331 |
+
"grad_norm": 0.1089014783501625,
|
| 2332 |
+
"learning_rate": 1.20636e-05,
|
| 2333 |
+
"loss": 0.0562,
|
| 2334 |
+
"step": 14950
|
| 2335 |
+
},
|
| 2336 |
+
{
|
| 2337 |
+
"epoch": 1.2,
|
| 2338 |
+
"grad_norm": 0.09682592004537582,
|
| 2339 |
+
"learning_rate": 1.20036e-05,
|
| 2340 |
+
"loss": 0.0548,
|
| 2341 |
+
"step": 15000
|
| 2342 |
+
},
|
| 2343 |
+
{
|
| 2344 |
+
"epoch": 1.2,
|
| 2345 |
+
"eval_loss": 0.08243728429079056,
|
| 2346 |
+
"eval_runtime": 88.0302,
|
| 2347 |
+
"eval_samples_per_second": 22.719,
|
| 2348 |
+
"eval_steps_per_second": 5.68,
|
| 2349 |
+
"step": 15000
|
| 2350 |
}
|
| 2351 |
],
|
| 2352 |
"logging_steps": 50,
|
|
|
|
| 2366 |
"attributes": {}
|
| 2367 |
}
|
| 2368 |
},
|
| 2369 |
+
"total_flos": 3.65374734336e+16,
|
| 2370 |
"train_batch_size": 4,
|
| 2371 |
"trial_name": null,
|
| 2372 |
"trial_params": null
|