Training in progress, step 970000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6337e42221c0412df86c7992210272b9971c9d1fc461208785aaf4d8ac59d2f
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02e43adcfc82aa259193d845850177a25c81c1a4194053ab16836d335061826a
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:896791a7c2af828c5892ebdd0c2594828d16ab816d74f045b54f109ae6d9494f
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2b1d003a5601ab66979661e59677be128945b2fa99167367a5f20eee9647c21
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:881d6829cb0acc89bbda293c5817890086c3d5c60b0bada5736d21401b19e6e3
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df3df06c4c66eb76fd186c61e084b6f3a769bb5d036f690f217d70cc8d7d4fc4
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af3eb0d3db8162f6de4427ee5f19b1787f4bdb865e0ebda13f4fed6034a8890
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 14.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7110,11 +7110,85 @@
|
|
| 7110 |
"eval_samples_per_second": 1227.238,
|
| 7111 |
"eval_steps_per_second": 19.636,
|
| 7112 |
"step": 960000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7113 |
}
|
| 7114 |
],
|
| 7115 |
"max_steps": 1000000,
|
| 7116 |
"num_train_epochs": 16,
|
| 7117 |
-
"total_flos": 6.
|
| 7118 |
"trial_name": null,
|
| 7119 |
"trial_params": null
|
| 7120 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.812100111472507,
|
| 5 |
+
"global_step": 970000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7110 |
"eval_samples_per_second": 1227.238,
|
| 7111 |
"eval_steps_per_second": 19.636,
|
| 7112 |
"step": 960000
|
| 7113 |
+
},
|
| 7114 |
+
{
|
| 7115 |
+
"epoch": 14.67,
|
| 7116 |
+
"learning_rate": 1.0581363452005424e-05,
|
| 7117 |
+
"loss": 0.2279,
|
| 7118 |
+
"step": 961000
|
| 7119 |
+
},
|
| 7120 |
+
{
|
| 7121 |
+
"epoch": 14.69,
|
| 7122 |
+
"learning_rate": 1.0551970907986557e-05,
|
| 7123 |
+
"loss": 0.2277,
|
| 7124 |
+
"step": 962000
|
| 7125 |
+
},
|
| 7126 |
+
{
|
| 7127 |
+
"epoch": 14.71,
|
| 7128 |
+
"learning_rate": 1.0523337836487271e-05,
|
| 7129 |
+
"loss": 0.2276,
|
| 7130 |
+
"step": 963000
|
| 7131 |
+
},
|
| 7132 |
+
{
|
| 7133 |
+
"epoch": 14.72,
|
| 7134 |
+
"learning_rate": 1.0495464550634267e-05,
|
| 7135 |
+
"loss": 0.2278,
|
| 7136 |
+
"step": 964000
|
| 7137 |
+
},
|
| 7138 |
+
{
|
| 7139 |
+
"epoch": 14.74,
|
| 7140 |
+
"learning_rate": 1.046835135524533e-05,
|
| 7141 |
+
"loss": 0.2277,
|
| 7142 |
+
"step": 965000
|
| 7143 |
+
},
|
| 7144 |
+
{
|
| 7145 |
+
"epoch": 14.74,
|
| 7146 |
+
"eval_runtime": 0.7884,
|
| 7147 |
+
"eval_samples_per_second": 1268.404,
|
| 7148 |
+
"eval_steps_per_second": 20.294,
|
| 7149 |
+
"step": 965000
|
| 7150 |
+
},
|
| 7151 |
+
{
|
| 7152 |
+
"epoch": 14.75,
|
| 7153 |
+
"learning_rate": 1.044199854682601e-05,
|
| 7154 |
+
"loss": 0.2278,
|
| 7155 |
+
"step": 966000
|
| 7156 |
+
},
|
| 7157 |
+
{
|
| 7158 |
+
"epoch": 14.77,
|
| 7159 |
+
"learning_rate": 1.0416406413566414e-05,
|
| 7160 |
+
"loss": 0.2279,
|
| 7161 |
+
"step": 967000
|
| 7162 |
+
},
|
| 7163 |
+
{
|
| 7164 |
+
"epoch": 14.78,
|
| 7165 |
+
"learning_rate": 1.0391575235337991e-05,
|
| 7166 |
+
"loss": 0.2278,
|
| 7167 |
+
"step": 968000
|
| 7168 |
+
},
|
| 7169 |
+
{
|
| 7170 |
+
"epoch": 14.8,
|
| 7171 |
+
"learning_rate": 1.0367505283690547e-05,
|
| 7172 |
+
"loss": 0.2276,
|
| 7173 |
+
"step": 969000
|
| 7174 |
+
},
|
| 7175 |
+
{
|
| 7176 |
+
"epoch": 14.81,
|
| 7177 |
+
"learning_rate": 1.0344196821849202e-05,
|
| 7178 |
+
"loss": 0.2279,
|
| 7179 |
+
"step": 970000
|
| 7180 |
+
},
|
| 7181 |
+
{
|
| 7182 |
+
"epoch": 14.81,
|
| 7183 |
+
"eval_runtime": 0.7534,
|
| 7184 |
+
"eval_samples_per_second": 1327.252,
|
| 7185 |
+
"eval_steps_per_second": 21.236,
|
| 7186 |
+
"step": 970000
|
| 7187 |
}
|
| 7188 |
],
|
| 7189 |
"max_steps": 1000000,
|
| 7190 |
"num_train_epochs": 16,
|
| 7191 |
+
"total_flos": 6.7997161209895905e+22,
|
| 7192 |
"trial_name": null,
|
| 7193 |
"trial_params": null
|
| 7194 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02e43adcfc82aa259193d845850177a25c81c1a4194053ab16836d335061826a
|
| 3 |
size 449471589
|