Training in progress, step 800000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6ec057c0ea307d9f51e3009e95cfba4d493f3e514026b596f1c721e347110a2
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c985c5c271524f27a0a6ac2cedd7bd56467790c039bcfe7ae085c019b80866ff
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0493bee6992b67d925775f2df83491a04dff10cd3cfaddd00934784185d10d2e
|
| 3 |
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04d30c889b2c1188805f0e0743f415f3e88c779ab0ac6888677c659a1fcc6f0f
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5feee3a51b2120b65724867d176679fa33e4ec4b388d10b3340281bee5153e6a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34638a46c5b98d6fa60d632175553537f127fe65e252717a514cd2f205e50dee
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57cc3662a36c24e948440a9c5383a944373362c15a98a0fb5317ec1e024dd4c4
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 12.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5852,11 +5852,85 @@
|
|
| 5852 |
"eval_samples_per_second": 1325.719,
|
| 5853 |
"eval_steps_per_second": 21.212,
|
| 5854 |
"step": 790000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5855 |
}
|
| 5856 |
],
|
| 5857 |
"max_steps": 1000000,
|
| 5858 |
"num_train_epochs": 16,
|
| 5859 |
-
"total_flos": 5.
|
| 5860 |
"trial_name": null,
|
| 5861 |
"trial_params": null
|
| 5862 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.216165040389695,
|
| 5 |
+
"global_step": 800000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5852 |
"eval_samples_per_second": 1325.719,
|
| 5853 |
"eval_steps_per_second": 21.212,
|
| 5854 |
"step": 790000
|
| 5855 |
+
},
|
| 5856 |
+
{
|
| 5857 |
+
"epoch": 12.08,
|
| 5858 |
+
"learning_rate": 2.6064073005694758e-05,
|
| 5859 |
+
"loss": 0.2381,
|
| 5860 |
+
"step": 791000
|
| 5861 |
+
},
|
| 5862 |
+
{
|
| 5863 |
+
"epoch": 12.09,
|
| 5864 |
+
"learning_rate": 2.591681360602595e-05,
|
| 5865 |
+
"loss": 0.2373,
|
| 5866 |
+
"step": 792000
|
| 5867 |
+
},
|
| 5868 |
+
{
|
| 5869 |
+
"epoch": 12.11,
|
| 5870 |
+
"learning_rate": 2.577014565141866e-05,
|
| 5871 |
+
"loss": 0.2377,
|
| 5872 |
+
"step": 793000
|
| 5873 |
+
},
|
| 5874 |
+
{
|
| 5875 |
+
"epoch": 12.12,
|
| 5876 |
+
"learning_rate": 2.562407074581014e-05,
|
| 5877 |
+
"loss": 0.2382,
|
| 5878 |
+
"step": 794000
|
| 5879 |
+
},
|
| 5880 |
+
{
|
| 5881 |
+
"epoch": 12.14,
|
| 5882 |
+
"learning_rate": 2.5478590486652137e-05,
|
| 5883 |
+
"loss": 0.2374,
|
| 5884 |
+
"step": 795000
|
| 5885 |
+
},
|
| 5886 |
+
{
|
| 5887 |
+
"epoch": 12.14,
|
| 5888 |
+
"eval_runtime": 0.8227,
|
| 5889 |
+
"eval_samples_per_second": 1215.581,
|
| 5890 |
+
"eval_steps_per_second": 19.449,
|
| 5891 |
+
"step": 795000
|
| 5892 |
+
},
|
| 5893 |
+
{
|
| 5894 |
+
"epoch": 12.16,
|
| 5895 |
+
"learning_rate": 2.533370646489347e-05,
|
| 5896 |
+
"loss": 0.237,
|
| 5897 |
+
"step": 796000
|
| 5898 |
+
},
|
| 5899 |
+
{
|
| 5900 |
+
"epoch": 12.17,
|
| 5901 |
+
"learning_rate": 2.5189420264962586e-05,
|
| 5902 |
+
"loss": 0.2367,
|
| 5903 |
+
"step": 797000
|
| 5904 |
+
},
|
| 5905 |
+
{
|
| 5906 |
+
"epoch": 12.19,
|
| 5907 |
+
"learning_rate": 2.504573346475026e-05,
|
| 5908 |
+
"loss": 0.2371,
|
| 5909 |
+
"step": 798000
|
| 5910 |
+
},
|
| 5911 |
+
{
|
| 5912 |
+
"epoch": 12.2,
|
| 5913 |
+
"learning_rate": 2.4902647635592324e-05,
|
| 5914 |
+
"loss": 0.2372,
|
| 5915 |
+
"step": 799000
|
| 5916 |
+
},
|
| 5917 |
+
{
|
| 5918 |
+
"epoch": 12.22,
|
| 5919 |
+
"learning_rate": 2.476016434225246e-05,
|
| 5920 |
+
"loss": 0.2372,
|
| 5921 |
+
"step": 800000
|
| 5922 |
+
},
|
| 5923 |
+
{
|
| 5924 |
+
"epoch": 12.22,
|
| 5925 |
+
"eval_runtime": 0.741,
|
| 5926 |
+
"eval_samples_per_second": 1349.61,
|
| 5927 |
+
"eval_steps_per_second": 21.594,
|
| 5928 |
+
"step": 800000
|
| 5929 |
}
|
| 5930 |
],
|
| 5931 |
"max_steps": 1000000,
|
| 5932 |
"num_train_epochs": 16,
|
| 5933 |
+
"total_flos": 5.6080129976099865e+22,
|
| 5934 |
"trial_name": null,
|
| 5935 |
"trial_params": null
|
| 5936 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c985c5c271524f27a0a6ac2cedd7bd56467790c039bcfe7ae085c019b80866ff
|
| 3 |
size 449471589
|