Training in progress, step 30000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893438545
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4cc35e78f4b2875a4e7bc2823bb00b03a35ef0a895f4019b9ee2bc553e87589
|
| 3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72eceef87bfa3bd9a4ae124be6a652f67324b3717ebee85984495f36f82da2ae
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5fa93e01617d6f205944a831ee335dee07ebb08b04eafcf9562f4cb23a4302a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39fcc0dca710a70aacc882ac08d1de25fb93138ed23a4e1e7de926a24206dd2c
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bcc311eb216d0f91d79274b66b1cc6a0546d030081f7d13ccdc6a5a80b2096b7
|
| 3 |
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10eef73b54f0ee2610594db97b5e21337619a39459c4b5d554e4fc3069d6003e
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0eff7bfd19bb5de4a804b312a4895c5e9ec017a31baa7a463d8d86ec7115b34c
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -154,11 +154,85 @@
|
|
| 154 |
"eval_samples_per_second": 982.441,
|
| 155 |
"eval_steps_per_second": 15.719,
|
| 156 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
}
|
| 158 |
],
|
| 159 |
"max_steps": 1000000,
|
| 160 |
"num_train_epochs": 16,
|
| 161 |
-
"total_flos":
|
| 162 |
"trial_name": null,
|
| 163 |
"trial_params": null
|
| 164 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.4581061890146136,
|
| 5 |
+
"global_step": 30000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 154 |
"eval_samples_per_second": 982.441,
|
| 155 |
"eval_steps_per_second": 15.719,
|
| 156 |
"step": 20000
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.32,
|
| 160 |
+
"learning_rate": 6.299999999999999e-05,
|
| 161 |
+
"loss": 0.6074,
|
| 162 |
+
"step": 21000
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"epoch": 0.34,
|
| 166 |
+
"learning_rate": 6.599999999999999e-05,
|
| 167 |
+
"loss": 0.6039,
|
| 168 |
+
"step": 22000
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"epoch": 0.35,
|
| 172 |
+
"learning_rate": 6.9e-05,
|
| 173 |
+
"loss": 0.6005,
|
| 174 |
+
"step": 23000
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"epoch": 0.37,
|
| 178 |
+
"learning_rate": 7.199999999999999e-05,
|
| 179 |
+
"loss": 0.5968,
|
| 180 |
+
"step": 24000
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 0.38,
|
| 184 |
+
"learning_rate": 7.5e-05,
|
| 185 |
+
"loss": 0.5932,
|
| 186 |
+
"step": 25000
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"epoch": 0.38,
|
| 190 |
+
"eval_runtime": 1.1249,
|
| 191 |
+
"eval_samples_per_second": 888.989,
|
| 192 |
+
"eval_steps_per_second": 14.224,
|
| 193 |
+
"step": 25000
|
| 194 |
+
},
|
| 195 |
+
{
|
| 196 |
+
"epoch": 0.4,
|
| 197 |
+
"learning_rate": 7.8e-05,
|
| 198 |
+
"loss": 0.5912,
|
| 199 |
+
"step": 26000
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 0.41,
|
| 203 |
+
"learning_rate": 8.1e-05,
|
| 204 |
+
"loss": 0.58,
|
| 205 |
+
"step": 27000
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.43,
|
| 209 |
+
"learning_rate": 8.4e-05,
|
| 210 |
+
"loss": 0.5698,
|
| 211 |
+
"step": 28000
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"epoch": 0.44,
|
| 215 |
+
"learning_rate": 8.699999999999999e-05,
|
| 216 |
+
"loss": 0.5639,
|
| 217 |
+
"step": 29000
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
"epoch": 0.46,
|
| 221 |
+
"learning_rate": 8.999999999999999e-05,
|
| 222 |
+
"loss": 0.5601,
|
| 223 |
+
"step": 30000
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"epoch": 0.46,
|
| 227 |
+
"eval_runtime": 1.0096,
|
| 228 |
+
"eval_samples_per_second": 990.512,
|
| 229 |
+
"eval_steps_per_second": 15.848,
|
| 230 |
+
"step": 30000
|
| 231 |
}
|
| 232 |
],
|
| 233 |
"max_steps": 1000000,
|
| 234 |
"num_train_epochs": 16,
|
| 235 |
+
"total_flos": 2.1030078309104144e+21,
|
| 236 |
"trial_name": null,
|
| 237 |
"trial_params": null
|
| 238 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72eceef87bfa3bd9a4ae124be6a652f67324b3717ebee85984495f36f82da2ae
|
| 3 |
size 449471589
|