Training in progress, step 590000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccf7cac5a21a11d7f667ccf92a06d4ca4f6e11529886f97a81cee946646a7850
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dfc2dd7ec4e8c1ccc6143bbc6f13f079f3e7e48a8bbfe20e040b742b8d0a1df
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f3d7d4e29826fad65749e3d1c71437aab80b1c0083abdcf52c0c2924f0e5e69
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17829d91164c4f44a41a5ead32db30cb5aee27cae3b78beefecb92d0468bafdc
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a63261dcff7584c8844400c561ac2e5b385953523dee796cb56c42af169cadd9
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b41591d972d5c0406eef4b610a30a04b97cf79722da627acb09c85c762f5f46f
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf17ad9f6f8f5230f179dea9ee53bd043cf1c420455dc89092d4dbf5e8505225
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4298,11 +4298,85 @@
|
|
| 4298 |
"eval_samples_per_second": 932.263,
|
| 4299 |
"eval_steps_per_second": 14.916,
|
| 4300 |
"step": 580000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4301 |
}
|
| 4302 |
],
|
| 4303 |
"max_steps": 1000000,
|
| 4304 |
"num_train_epochs": 16,
|
| 4305 |
-
"total_flos": 4.
|
| 4306 |
"trial_name": null,
|
| 4307 |
"trial_params": null
|
| 4308 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.0094217172874,
|
| 5 |
+
"global_step": 590000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4298 |
"eval_samples_per_second": 932.263,
|
| 4299 |
"eval_steps_per_second": 14.916,
|
| 4300 |
"step": 580000
|
| 4301 |
+
},
|
| 4302 |
+
{
|
| 4303 |
+
"epoch": 8.87,
|
| 4304 |
+
"learning_rate": 6.71107648105703e-05,
|
| 4305 |
+
"loss": 0.2564,
|
| 4306 |
+
"step": 581000
|
| 4307 |
+
},
|
| 4308 |
+
{
|
| 4309 |
+
"epoch": 8.89,
|
| 4310 |
+
"learning_rate": 6.688330797899925e-05,
|
| 4311 |
+
"loss": 0.2562,
|
| 4312 |
+
"step": 582000
|
| 4313 |
+
},
|
| 4314 |
+
{
|
| 4315 |
+
"epoch": 8.9,
|
| 4316 |
+
"learning_rate": 6.665599458947072e-05,
|
| 4317 |
+
"loss": 0.2562,
|
| 4318 |
+
"step": 583000
|
| 4319 |
+
},
|
| 4320 |
+
{
|
| 4321 |
+
"epoch": 8.92,
|
| 4322 |
+
"learning_rate": 6.642882712784742e-05,
|
| 4323 |
+
"loss": 0.2561,
|
| 4324 |
+
"step": 584000
|
| 4325 |
+
},
|
| 4326 |
+
{
|
| 4327 |
+
"epoch": 8.93,
|
| 4328 |
+
"learning_rate": 6.620180807839639e-05,
|
| 4329 |
+
"loss": 0.2561,
|
| 4330 |
+
"step": 585000
|
| 4331 |
+
},
|
| 4332 |
+
{
|
| 4333 |
+
"epoch": 8.93,
|
| 4334 |
+
"eval_runtime": 0.9936,
|
| 4335 |
+
"eval_samples_per_second": 1006.405,
|
| 4336 |
+
"eval_steps_per_second": 16.102,
|
| 4337 |
+
"step": 585000
|
| 4338 |
+
},
|
| 4339 |
+
{
|
| 4340 |
+
"epoch": 8.95,
|
| 4341 |
+
"learning_rate": 6.597493992376152e-05,
|
| 4342 |
+
"loss": 0.2557,
|
| 4343 |
+
"step": 586000
|
| 4344 |
+
},
|
| 4345 |
+
{
|
| 4346 |
+
"epoch": 8.96,
|
| 4347 |
+
"learning_rate": 6.574822514493664e-05,
|
| 4348 |
+
"loss": 0.2554,
|
| 4349 |
+
"step": 587000
|
| 4350 |
+
},
|
| 4351 |
+
{
|
| 4352 |
+
"epoch": 8.98,
|
| 4353 |
+
"learning_rate": 6.552166622123824e-05,
|
| 4354 |
+
"loss": 0.2554,
|
| 4355 |
+
"step": 588000
|
| 4356 |
+
},
|
| 4357 |
+
{
|
| 4358 |
+
"epoch": 8.99,
|
| 4359 |
+
"learning_rate": 6.52952656302784e-05,
|
| 4360 |
+
"loss": 0.2556,
|
| 4361 |
+
"step": 589000
|
| 4362 |
+
},
|
| 4363 |
+
{
|
| 4364 |
+
"epoch": 9.01,
|
| 4365 |
+
"learning_rate": 6.506902584793773e-05,
|
| 4366 |
+
"loss": 0.2553,
|
| 4367 |
+
"step": 590000
|
| 4368 |
+
},
|
| 4369 |
+
{
|
| 4370 |
+
"epoch": 9.01,
|
| 4371 |
+
"eval_runtime": 0.9015,
|
| 4372 |
+
"eval_samples_per_second": 1109.201,
|
| 4373 |
+
"eval_steps_per_second": 17.747,
|
| 4374 |
+
"step": 590000
|
| 4375 |
}
|
| 4376 |
],
|
| 4377 |
"max_steps": 1000000,
|
| 4378 |
"num_train_epochs": 16,
|
| 4379 |
+
"total_flos": 4.135909487177143e+22,
|
| 4380 |
"trial_name": null,
|
| 4381 |
"trial_params": null
|
| 4382 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dfc2dd7ec4e8c1ccc6143bbc6f13f079f3e7e48a8bbfe20e040b742b8d0a1df
|
| 3 |
size 449471589
|