Training in progress, step 600000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa3e03e9ecd0026fc79482638e67bfbb968ad2fb2a2251138fea448958ad2549
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff751ae05b56a482e47a728b40ba33717079b6b3d77d92ae8823ddd2e524f99f
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e165b16ec4c4b7c229152423019cf42f78fde1837556dbc7f0e0a0d03eb92f2
|
| 3 |
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6acb016c9e03d8943408e9c25efa54b6209d293a5e14d07d93d97c6c5df995b
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a372c6638d81ac9bbd86bc3b09ec3143aa46879e63bc5f10b74bd9c0aea9c32b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a1ca703723e30170efc3dee97c3c7534e4adefa45396ecce1b5c543326f570d
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34904d09dca1d79fb910d11916faed0bf8107fa6c955fc65b114b5f30425510a
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4372,11 +4372,85 @@
|
|
| 4372 |
"eval_samples_per_second": 1109.201,
|
| 4373 |
"eval_steps_per_second": 17.747,
|
| 4374 |
"step": 590000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4375 |
}
|
| 4376 |
],
|
| 4377 |
"max_steps": 1000000,
|
| 4378 |
"num_train_epochs": 16,
|
| 4379 |
-
"total_flos": 4.
|
| 4380 |
"trial_name": null,
|
| 4381 |
"trial_params": null
|
| 4382 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.162123780292271,
|
| 5 |
+
"global_step": 600000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4372 |
"eval_samples_per_second": 1109.201,
|
| 4373 |
"eval_steps_per_second": 17.747,
|
| 4374 |
"step": 590000
|
| 4375 |
+
},
|
| 4376 |
+
{
|
| 4377 |
+
"epoch": 9.02,
|
| 4378 |
+
"learning_rate": 6.484294934833822e-05,
|
| 4379 |
+
"loss": 0.2552,
|
| 4380 |
+
"step": 591000
|
| 4381 |
+
},
|
| 4382 |
+
{
|
| 4383 |
+
"epoch": 9.04,
|
| 4384 |
+
"learning_rate": 6.461703860381628e-05,
|
| 4385 |
+
"loss": 0.2551,
|
| 4386 |
+
"step": 592000
|
| 4387 |
+
},
|
| 4388 |
+
{
|
| 4389 |
+
"epoch": 9.06,
|
| 4390 |
+
"learning_rate": 6.439129608489559e-05,
|
| 4391 |
+
"loss": 0.2555,
|
| 4392 |
+
"step": 593000
|
| 4393 |
+
},
|
| 4394 |
+
{
|
| 4395 |
+
"epoch": 9.07,
|
| 4396 |
+
"learning_rate": 6.41657242602602e-05,
|
| 4397 |
+
"loss": 0.2549,
|
| 4398 |
+
"step": 594000
|
| 4399 |
+
},
|
| 4400 |
+
{
|
| 4401 |
+
"epoch": 9.09,
|
| 4402 |
+
"learning_rate": 6.39403255967274e-05,
|
| 4403 |
+
"loss": 0.255,
|
| 4404 |
+
"step": 595000
|
| 4405 |
+
},
|
| 4406 |
+
{
|
| 4407 |
+
"epoch": 9.09,
|
| 4408 |
+
"eval_runtime": 1.1107,
|
| 4409 |
+
"eval_samples_per_second": 900.319,
|
| 4410 |
+
"eval_steps_per_second": 14.405,
|
| 4411 |
+
"step": 595000
|
| 4412 |
+
},
|
| 4413 |
+
{
|
| 4414 |
+
"epoch": 9.1,
|
| 4415 |
+
"learning_rate": 6.371510255922088e-05,
|
| 4416 |
+
"loss": 0.2545,
|
| 4417 |
+
"step": 596000
|
| 4418 |
+
},
|
| 4419 |
+
{
|
| 4420 |
+
"epoch": 9.12,
|
| 4421 |
+
"learning_rate": 6.349005761074372e-05,
|
| 4422 |
+
"loss": 0.2547,
|
| 4423 |
+
"step": 597000
|
| 4424 |
+
},
|
| 4425 |
+
{
|
| 4426 |
+
"epoch": 9.13,
|
| 4427 |
+
"learning_rate": 6.326519321235139e-05,
|
| 4428 |
+
"loss": 0.2546,
|
| 4429 |
+
"step": 598000
|
| 4430 |
+
},
|
| 4431 |
+
{
|
| 4432 |
+
"epoch": 9.15,
|
| 4433 |
+
"learning_rate": 6.304051182312496e-05,
|
| 4434 |
+
"loss": 0.2549,
|
| 4435 |
+
"step": 599000
|
| 4436 |
+
},
|
| 4437 |
+
{
|
| 4438 |
+
"epoch": 9.16,
|
| 4439 |
+
"learning_rate": 6.281601590014407e-05,
|
| 4440 |
+
"loss": 0.2546,
|
| 4441 |
+
"step": 600000
|
| 4442 |
+
},
|
| 4443 |
+
{
|
| 4444 |
+
"epoch": 9.16,
|
| 4445 |
+
"eval_runtime": 1.0772,
|
| 4446 |
+
"eval_samples_per_second": 928.316,
|
| 4447 |
+
"eval_steps_per_second": 14.853,
|
| 4448 |
+
"step": 600000
|
| 4449 |
}
|
| 4450 |
],
|
| 4451 |
"max_steps": 1000000,
|
| 4452 |
"num_train_epochs": 16,
|
| 4453 |
+
"total_flos": 4.20600974820749e+22,
|
| 4454 |
"trial_name": null,
|
| 4455 |
"trial_params": null
|
| 4456 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff751ae05b56a482e47a728b40ba33717079b6b3d77d92ae8823ddd2e524f99f
|
| 3 |
size 449471589
|