Training in progress, step 610000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58bbdb63dd5f35ddeda47e9dc139795456694d0aba5e0383355b4522b67e77b3
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b37572e353c74d53a74256f6a8b831fe7ecaed9eb997d03f96697e3f38be10fb
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e969b0a1f38bb709ff184bddc11e3e1b7a366f1f525d658e15c5e0e9638178b2
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e472d706bbb24da1ae339dbf5b5e166d6a3aff07e50beb6830b013698716675c
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb9d01b196a085088b251b99e2302b0af5c57d846827fd2f433da2dd23ebd86a
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f9539e61c7b8fd54fd055839d1a6ea3b2cd4ea6f97a1aef7d0a7c91c2429be6
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71de57969ec7a3201bd7bf6de3d6e1584c8439f398aacc61865ba691cc2653d6
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4446,11 +4446,85 @@
|
|
| 4446 |
"eval_samples_per_second": 928.316,
|
| 4447 |
"eval_steps_per_second": 14.853,
|
| 4448 |
"step": 600000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4449 |
}
|
| 4450 |
],
|
| 4451 |
"max_steps": 1000000,
|
| 4452 |
"num_train_epochs": 16,
|
| 4453 |
-
"total_flos": 4.
|
| 4454 |
"trial_name": null,
|
| 4455 |
"trial_params": null
|
| 4456 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.314825843297143,
|
| 5 |
+
"global_step": 610000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4446 |
"eval_samples_per_second": 928.316,
|
| 4447 |
"eval_steps_per_second": 14.853,
|
| 4448 |
"step": 600000
|
| 4449 |
+
},
|
| 4450 |
+
{
|
| 4451 |
+
"epoch": 9.18,
|
| 4452 |
+
"learning_rate": 6.259170789846017e-05,
|
| 4453 |
+
"loss": 0.2546,
|
| 4454 |
+
"step": 601000
|
| 4455 |
+
},
|
| 4456 |
+
{
|
| 4457 |
+
"epoch": 9.19,
|
| 4458 |
+
"learning_rate": 6.236759027106965e-05,
|
| 4459 |
+
"loss": 0.2542,
|
| 4460 |
+
"step": 602000
|
| 4461 |
+
},
|
| 4462 |
+
{
|
| 4463 |
+
"epoch": 9.21,
|
| 4464 |
+
"learning_rate": 6.214366546888694e-05,
|
| 4465 |
+
"loss": 0.2541,
|
| 4466 |
+
"step": 603000
|
| 4467 |
+
},
|
| 4468 |
+
{
|
| 4469 |
+
"epoch": 9.22,
|
| 4470 |
+
"learning_rate": 6.191993594071785e-05,
|
| 4471 |
+
"loss": 0.2541,
|
| 4472 |
+
"step": 604000
|
| 4473 |
+
},
|
| 4474 |
+
{
|
| 4475 |
+
"epoch": 9.24,
|
| 4476 |
+
"learning_rate": 6.169640413323262e-05,
|
| 4477 |
+
"loss": 0.254,
|
| 4478 |
+
"step": 605000
|
| 4479 |
+
},
|
| 4480 |
+
{
|
| 4481 |
+
"epoch": 9.24,
|
| 4482 |
+
"eval_runtime": 1.0913,
|
| 4483 |
+
"eval_samples_per_second": 916.334,
|
| 4484 |
+
"eval_steps_per_second": 14.661,
|
| 4485 |
+
"step": 605000
|
| 4486 |
+
},
|
| 4487 |
+
{
|
| 4488 |
+
"epoch": 9.25,
|
| 4489 |
+
"learning_rate": 6.147307249093929e-05,
|
| 4490 |
+
"loss": 0.2537,
|
| 4491 |
+
"step": 606000
|
| 4492 |
+
},
|
| 4493 |
+
{
|
| 4494 |
+
"epoch": 9.27,
|
| 4495 |
+
"learning_rate": 6.124994345615693e-05,
|
| 4496 |
+
"loss": 0.2532,
|
| 4497 |
+
"step": 607000
|
| 4498 |
+
},
|
| 4499 |
+
{
|
| 4500 |
+
"epoch": 9.28,
|
| 4501 |
+
"learning_rate": 6.102701946898891e-05,
|
| 4502 |
+
"loss": 0.2536,
|
| 4503 |
+
"step": 608000
|
| 4504 |
+
},
|
| 4505 |
+
{
|
| 4506 |
+
"epoch": 9.3,
|
| 4507 |
+
"learning_rate": 6.0804302967296225e-05,
|
| 4508 |
+
"loss": 0.2545,
|
| 4509 |
+
"step": 609000
|
| 4510 |
+
},
|
| 4511 |
+
{
|
| 4512 |
+
"epoch": 9.31,
|
| 4513 |
+
"learning_rate": 6.058179638667089e-05,
|
| 4514 |
+
"loss": 0.2536,
|
| 4515 |
+
"step": 610000
|
| 4516 |
+
},
|
| 4517 |
+
{
|
| 4518 |
+
"epoch": 9.31,
|
| 4519 |
+
"eval_runtime": 1.0284,
|
| 4520 |
+
"eval_samples_per_second": 972.365,
|
| 4521 |
+
"eval_steps_per_second": 15.558,
|
| 4522 |
+
"step": 610000
|
| 4523 |
}
|
| 4524 |
],
|
| 4525 |
"max_steps": 1000000,
|
| 4526 |
"num_train_epochs": 16,
|
| 4527 |
+
"total_flos": 4.276110009237837e+22,
|
| 4528 |
"trial_name": null,
|
| 4529 |
"trial_params": null
|
| 4530 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b37572e353c74d53a74256f6a8b831fe7ecaed9eb997d03f96697e3f38be10fb
|
| 3 |
size 449471589
|