Training in progress, step 770000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cd115a94124bab2069eaa28cc8b9048cd539d53dac292919b3e0c103db4c72e
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca7fa82eacf1e8434be7ded7b8c684db48c7eb8a95a9938e56bd64eca327a5df
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7d3b820a136ad2a4ba7c86024ef116cb8408b724ca033611384f2431449421e
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9edcea991831f16f231bcb3a9b5fa5ff10be4f8fa58a6379f44531e869bdb54a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d7420c489d698358bef68e17228256f66ae1823918a8b52fcc13ba74d224c70
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:682c61168677972229ed1e1a9e1518def3be9c6cebcff3c8dc65ade2edc13840
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c87245ad3a3a643f81f3f2450a9b83d6c09d17eda4b9f8dcf29bf1d0ea618777
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5630,11 +5630,85 @@
|
|
| 5630 |
"eval_samples_per_second": 1390.499,
|
| 5631 |
"eval_steps_per_second": 22.248,
|
| 5632 |
"step": 760000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5633 |
}
|
| 5634 |
],
|
| 5635 |
"max_steps": 1000000,
|
| 5636 |
"num_train_epochs": 16,
|
| 5637 |
-
"total_flos": 5.
|
| 5638 |
"trial_name": null,
|
| 5639 |
"trial_params": null
|
| 5640 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.758058851375083,
|
| 5 |
+
"global_step": 770000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5630 |
"eval_samples_per_second": 1390.499,
|
| 5631 |
"eval_steps_per_second": 22.248,
|
| 5632 |
"step": 760000
|
| 5633 |
+
},
|
| 5634 |
+
{
|
| 5635 |
+
"epoch": 11.62,
|
| 5636 |
+
"learning_rate": 3.0748661009182616e-05,
|
| 5637 |
+
"loss": 0.2396,
|
| 5638 |
+
"step": 761000
|
| 5639 |
+
},
|
| 5640 |
+
{
|
| 5641 |
+
"epoch": 11.64,
|
| 5642 |
+
"learning_rate": 3.0584435522191896e-05,
|
| 5643 |
+
"loss": 0.2395,
|
| 5644 |
+
"step": 762000
|
| 5645 |
+
},
|
| 5646 |
+
{
|
| 5647 |
+
"epoch": 11.65,
|
| 5648 |
+
"learning_rate": 3.0420750435897183e-05,
|
| 5649 |
+
"loss": 0.2393,
|
| 5650 |
+
"step": 763000
|
| 5651 |
+
},
|
| 5652 |
+
{
|
| 5653 |
+
"epoch": 11.67,
|
| 5654 |
+
"learning_rate": 3.025760754033246e-05,
|
| 5655 |
+
"loss": 0.239,
|
| 5656 |
+
"step": 764000
|
| 5657 |
+
},
|
| 5658 |
+
{
|
| 5659 |
+
"epoch": 11.68,
|
| 5660 |
+
"learning_rate": 3.0095008619602206e-05,
|
| 5661 |
+
"loss": 0.2392,
|
| 5662 |
+
"step": 765000
|
| 5663 |
+
},
|
| 5664 |
+
{
|
| 5665 |
+
"epoch": 11.68,
|
| 5666 |
+
"eval_runtime": 0.7905,
|
| 5667 |
+
"eval_samples_per_second": 1264.968,
|
| 5668 |
+
"eval_steps_per_second": 20.239,
|
| 5669 |
+
"step": 765000
|
| 5670 |
+
},
|
| 5671 |
+
{
|
| 5672 |
+
"epoch": 11.7,
|
| 5673 |
+
"learning_rate": 2.993295545186223e-05,
|
| 5674 |
+
"loss": 0.2393,
|
| 5675 |
+
"step": 766000
|
| 5676 |
+
},
|
| 5677 |
+
{
|
| 5678 |
+
"epoch": 11.71,
|
| 5679 |
+
"learning_rate": 2.977144980929996e-05,
|
| 5680 |
+
"loss": 0.2392,
|
| 5681 |
+
"step": 767000
|
| 5682 |
+
},
|
| 5683 |
+
{
|
| 5684 |
+
"epoch": 11.73,
|
| 5685 |
+
"learning_rate": 2.961049345811523e-05,
|
| 5686 |
+
"loss": 0.2388,
|
| 5687 |
+
"step": 768000
|
| 5688 |
+
},
|
| 5689 |
+
{
|
| 5690 |
+
"epoch": 11.74,
|
| 5691 |
+
"learning_rate": 2.945008815850097e-05,
|
| 5692 |
+
"loss": 0.2392,
|
| 5693 |
+
"step": 769000
|
| 5694 |
+
},
|
| 5695 |
+
{
|
| 5696 |
+
"epoch": 11.76,
|
| 5697 |
+
"learning_rate": 2.929023566462377e-05,
|
| 5698 |
+
"loss": 0.2391,
|
| 5699 |
+
"step": 770000
|
| 5700 |
+
},
|
| 5701 |
+
{
|
| 5702 |
+
"epoch": 11.76,
|
| 5703 |
+
"eval_runtime": 0.8418,
|
| 5704 |
+
"eval_samples_per_second": 1187.898,
|
| 5705 |
+
"eval_steps_per_second": 19.006,
|
| 5706 |
+
"step": 770000
|
| 5707 |
}
|
| 5708 |
],
|
| 5709 |
"max_steps": 1000000,
|
| 5710 |
"num_train_epochs": 16,
|
| 5711 |
+
"total_flos": 5.397712871587094e+22,
|
| 5712 |
"trial_name": null,
|
| 5713 |
"trial_params": null
|
| 5714 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca7fa82eacf1e8434be7ded7b8c684db48c7eb8a95a9938e56bd64eca327a5df
|
| 3 |
size 449471589
|