Training in progress, step 790000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bbcfc7841d8e3c7279dc9562dd48dabc2017ffc2b9998700cfd92549b10b258
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6d2e303b79c843dd2e255dcb66b42d622f3c11aae0591a4594fe30435ca030b
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0b06454874bab75beb606d73861ef28c53072edff450f0d67541ae83ce33a54
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74183b2ff4f54786fe97746a804e5ce75e50372685a561e44836c051c4c6a5de
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a238c961e97eac6dc08d049a0a3111fbea4302b13b1d99d77dbbe38ac524b535
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70dfde4719f025927689d32aa4b2f68fdcbdc6cc2b55cd4fb96def6b3d827e65
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83d1297302d20060e31d476195b98906c23904815e65152eb2d3ffb7dd074183
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5778,11 +5778,85 @@
|
|
| 5778 |
"eval_samples_per_second": 1276.363,
|
| 5779 |
"eval_steps_per_second": 20.422,
|
| 5780 |
"step": 780000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5781 |
}
|
| 5782 |
],
|
| 5783 |
"max_steps": 1000000,
|
| 5784 |
"num_train_epochs": 16,
|
| 5785 |
-
"total_flos": 5.
|
| 5786 |
"trial_name": null,
|
| 5787 |
"trial_params": null
|
| 5788 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.063462977384825,
|
| 5 |
+
"global_step": 790000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5778 |
"eval_samples_per_second": 1276.363,
|
| 5779 |
"eval_steps_per_second": 20.422,
|
| 5780 |
"step": 780000
|
| 5781 |
+
},
|
| 5782 |
+
{
|
| 5783 |
+
"epoch": 11.93,
|
| 5784 |
+
"learning_rate": 2.7568839012773365e-05,
|
| 5785 |
+
"loss": 0.238,
|
| 5786 |
+
"step": 781000
|
| 5787 |
+
},
|
| 5788 |
+
{
|
| 5789 |
+
"epoch": 11.94,
|
| 5790 |
+
"learning_rate": 2.7415754793213826e-05,
|
| 5791 |
+
"loss": 0.2375,
|
| 5792 |
+
"step": 782000
|
| 5793 |
+
},
|
| 5794 |
+
{
|
| 5795 |
+
"epoch": 11.96,
|
| 5796 |
+
"learning_rate": 2.7263245626535116e-05,
|
| 5797 |
+
"loss": 0.2377,
|
| 5798 |
+
"step": 783000
|
| 5799 |
+
},
|
| 5800 |
+
{
|
| 5801 |
+
"epoch": 11.97,
|
| 5802 |
+
"learning_rate": 2.7111313180553077e-05,
|
| 5803 |
+
"loss": 0.2378,
|
| 5804 |
+
"step": 784000
|
| 5805 |
+
},
|
| 5806 |
+
{
|
| 5807 |
+
"epoch": 11.99,
|
| 5808 |
+
"learning_rate": 2.6959959116776587e-05,
|
| 5809 |
+
"loss": 0.2376,
|
| 5810 |
+
"step": 785000
|
| 5811 |
+
},
|
| 5812 |
+
{
|
| 5813 |
+
"epoch": 11.99,
|
| 5814 |
+
"eval_runtime": 0.7664,
|
| 5815 |
+
"eval_samples_per_second": 1304.853,
|
| 5816 |
+
"eval_steps_per_second": 20.878,
|
| 5817 |
+
"step": 785000
|
| 5818 |
+
},
|
| 5819 |
+
{
|
| 5820 |
+
"epoch": 12.0,
|
| 5821 |
+
"learning_rate": 2.6809185090389406e-05,
|
| 5822 |
+
"loss": 0.2371,
|
| 5823 |
+
"step": 786000
|
| 5824 |
+
},
|
| 5825 |
+
{
|
| 5826 |
+
"epoch": 12.02,
|
| 5827 |
+
"learning_rate": 2.6658992750232167e-05,
|
| 5828 |
+
"loss": 0.2373,
|
| 5829 |
+
"step": 787000
|
| 5830 |
+
},
|
| 5831 |
+
{
|
| 5832 |
+
"epoch": 12.03,
|
| 5833 |
+
"learning_rate": 2.6509383738784218e-05,
|
| 5834 |
+
"loss": 0.2374,
|
| 5835 |
+
"step": 788000
|
| 5836 |
+
},
|
| 5837 |
+
{
|
| 5838 |
+
"epoch": 12.05,
|
| 5839 |
+
"learning_rate": 2.6360359692145757e-05,
|
| 5840 |
+
"loss": 0.237,
|
| 5841 |
+
"step": 789000
|
| 5842 |
+
},
|
| 5843 |
+
{
|
| 5844 |
+
"epoch": 12.06,
|
| 5845 |
+
"learning_rate": 2.6211922240019883e-05,
|
| 5846 |
+
"loss": 0.2368,
|
| 5847 |
+
"step": 790000
|
| 5848 |
+
},
|
| 5849 |
+
{
|
| 5850 |
+
"epoch": 12.06,
|
| 5851 |
+
"eval_runtime": 0.7543,
|
| 5852 |
+
"eval_samples_per_second": 1325.719,
|
| 5853 |
+
"eval_steps_per_second": 21.212,
|
| 5854 |
+
"step": 790000
|
| 5855 |
}
|
| 5856 |
],
|
| 5857 |
"max_steps": 1000000,
|
| 5858 |
"num_train_epochs": 16,
|
| 5859 |
+
"total_flos": 5.537912736579639e+22,
|
| 5860 |
"trial_name": null,
|
| 5861 |
"trial_params": null
|
| 5862 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6d2e303b79c843dd2e255dcb66b42d622f3c11aae0591a4594fe30435ca030b
|
| 3 |
size 449471589
|