Training in progress, step 830000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f349542b4533abe4453e0adeb6aff6cd875b986f4117c2f333ebbbb94148a468
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d8915b38c77403d5b3caf94070565cc919cba4e372d557eb5c40dbe89ac1681
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87068f53b7dee620f289e61f6508b8ae2aca3d5b7cb2a5e745862a635059c762
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b44f4d1ea700e774f5dee0343ba4324675c77c29852dd54fec6a281d849ccd3b
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -16406,11 +16406,211 @@
|
|
| 16406 |
"eval_samples_per_second": 871.505,
|
| 16407 |
"eval_steps_per_second": 13.659,
|
| 16408 |
"step": 820000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16409 |
}
|
| 16410 |
],
|
| 16411 |
"max_steps": 1000000,
|
| 16412 |
"num_train_epochs": 12,
|
| 16413 |
-
"total_flos": 5.
|
| 16414 |
"trial_name": null,
|
| 16415 |
"trial_params": null
|
| 16416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.899746829796014,
|
| 5 |
+
"global_step": 830000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 16406 |
"eval_samples_per_second": 871.505,
|
| 16407 |
"eval_steps_per_second": 13.659,
|
| 16408 |
"step": 820000
|
| 16409 |
+
},
|
| 16410 |
+
{
|
| 16411 |
+
"epoch": 8.79,
|
| 16412 |
+
"learning_rate": 2.1974562300613417e-05,
|
| 16413 |
+
"loss": 0.186,
|
| 16414 |
+
"step": 820500
|
| 16415 |
+
},
|
| 16416 |
+
{
|
| 16417 |
+
"epoch": 8.8,
|
| 16418 |
+
"learning_rate": 2.1909901420919184e-05,
|
| 16419 |
+
"loss": 0.1856,
|
| 16420 |
+
"step": 821000
|
| 16421 |
+
},
|
| 16422 |
+
{
|
| 16423 |
+
"epoch": 8.8,
|
| 16424 |
+
"eval_loss": 0.17747129499912262,
|
| 16425 |
+
"eval_runtime": 2.664,
|
| 16426 |
+
"eval_samples_per_second": 862.246,
|
| 16427 |
+
"eval_steps_per_second": 13.514,
|
| 16428 |
+
"step": 821000
|
| 16429 |
+
},
|
| 16430 |
+
{
|
| 16431 |
+
"epoch": 8.8,
|
| 16432 |
+
"learning_rate": 2.1845399357336326e-05,
|
| 16433 |
+
"loss": 0.186,
|
| 16434 |
+
"step": 821500
|
| 16435 |
+
},
|
| 16436 |
+
{
|
| 16437 |
+
"epoch": 8.81,
|
| 16438 |
+
"learning_rate": 2.1781056286210997e-05,
|
| 16439 |
+
"loss": 0.186,
|
| 16440 |
+
"step": 822000
|
| 16441 |
+
},
|
| 16442 |
+
{
|
| 16443 |
+
"epoch": 8.81,
|
| 16444 |
+
"eval_loss": 0.1773909628391266,
|
| 16445 |
+
"eval_runtime": 2.5828,
|
| 16446 |
+
"eval_samples_per_second": 889.354,
|
| 16447 |
+
"eval_steps_per_second": 13.939,
|
| 16448 |
+
"step": 822000
|
| 16449 |
+
},
|
| 16450 |
+
{
|
| 16451 |
+
"epoch": 8.82,
|
| 16452 |
+
"learning_rate": 2.1716872383454674e-05,
|
| 16453 |
+
"loss": 0.1861,
|
| 16454 |
+
"step": 822500
|
| 16455 |
+
},
|
| 16456 |
+
{
|
| 16457 |
+
"epoch": 8.82,
|
| 16458 |
+
"learning_rate": 2.1652847824543744e-05,
|
| 16459 |
+
"loss": 0.1856,
|
| 16460 |
+
"step": 823000
|
| 16461 |
+
},
|
| 16462 |
+
{
|
| 16463 |
+
"epoch": 8.82,
|
| 16464 |
+
"eval_loss": 0.1759449690580368,
|
| 16465 |
+
"eval_runtime": 2.6867,
|
| 16466 |
+
"eval_samples_per_second": 854.948,
|
| 16467 |
+
"eval_steps_per_second": 13.399,
|
| 16468 |
+
"step": 823000
|
| 16469 |
+
},
|
| 16470 |
+
{
|
| 16471 |
+
"epoch": 8.83,
|
| 16472 |
+
"learning_rate": 2.1588982784518853e-05,
|
| 16473 |
+
"loss": 0.1862,
|
| 16474 |
+
"step": 823500
|
| 16475 |
+
},
|
| 16476 |
+
{
|
| 16477 |
+
"epoch": 8.83,
|
| 16478 |
+
"learning_rate": 2.1525277437984636e-05,
|
| 16479 |
+
"loss": 0.1857,
|
| 16480 |
+
"step": 824000
|
| 16481 |
+
},
|
| 16482 |
+
{
|
| 16483 |
+
"epoch": 8.83,
|
| 16484 |
+
"eval_loss": 0.1774652898311615,
|
| 16485 |
+
"eval_runtime": 2.6123,
|
| 16486 |
+
"eval_samples_per_second": 879.304,
|
| 16487 |
+
"eval_steps_per_second": 13.781,
|
| 16488 |
+
"step": 824000
|
| 16489 |
+
},
|
| 16490 |
+
{
|
| 16491 |
+
"epoch": 8.84,
|
| 16492 |
+
"learning_rate": 2.1461731959109053e-05,
|
| 16493 |
+
"loss": 0.186,
|
| 16494 |
+
"step": 824500
|
| 16495 |
+
},
|
| 16496 |
+
{
|
| 16497 |
+
"epoch": 8.84,
|
| 16498 |
+
"learning_rate": 2.1398346521623e-05,
|
| 16499 |
+
"loss": 0.1857,
|
| 16500 |
+
"step": 825000
|
| 16501 |
+
},
|
| 16502 |
+
{
|
| 16503 |
+
"epoch": 8.84,
|
| 16504 |
+
"eval_loss": 0.17699038982391357,
|
| 16505 |
+
"eval_runtime": 2.654,
|
| 16506 |
+
"eval_samples_per_second": 865.476,
|
| 16507 |
+
"eval_steps_per_second": 13.564,
|
| 16508 |
+
"step": 825000
|
| 16509 |
+
},
|
| 16510 |
+
{
|
| 16511 |
+
"epoch": 8.85,
|
| 16512 |
+
"learning_rate": 2.1335121298819867e-05,
|
| 16513 |
+
"loss": 0.1859,
|
| 16514 |
+
"step": 825500
|
| 16515 |
+
},
|
| 16516 |
+
{
|
| 16517 |
+
"epoch": 8.86,
|
| 16518 |
+
"learning_rate": 2.1272056463554978e-05,
|
| 16519 |
+
"loss": 0.1862,
|
| 16520 |
+
"step": 826000
|
| 16521 |
+
},
|
| 16522 |
+
{
|
| 16523 |
+
"epoch": 8.86,
|
| 16524 |
+
"eval_loss": 0.17693667113780975,
|
| 16525 |
+
"eval_runtime": 2.6428,
|
| 16526 |
+
"eval_samples_per_second": 869.15,
|
| 16527 |
+
"eval_steps_per_second": 13.622,
|
| 16528 |
+
"step": 826000
|
| 16529 |
+
},
|
| 16530 |
+
{
|
| 16531 |
+
"epoch": 8.86,
|
| 16532 |
+
"learning_rate": 2.1209152188245214e-05,
|
| 16533 |
+
"loss": 0.1858,
|
| 16534 |
+
"step": 826500
|
| 16535 |
+
},
|
| 16536 |
+
{
|
| 16537 |
+
"epoch": 8.87,
|
| 16538 |
+
"learning_rate": 2.114640864486845e-05,
|
| 16539 |
+
"loss": 0.1857,
|
| 16540 |
+
"step": 827000
|
| 16541 |
+
},
|
| 16542 |
+
{
|
| 16543 |
+
"epoch": 8.87,
|
| 16544 |
+
"eval_loss": 0.1788521409034729,
|
| 16545 |
+
"eval_runtime": 2.6742,
|
| 16546 |
+
"eval_samples_per_second": 858.952,
|
| 16547 |
+
"eval_steps_per_second": 13.462,
|
| 16548 |
+
"step": 827000
|
| 16549 |
+
},
|
| 16550 |
+
{
|
| 16551 |
+
"epoch": 8.87,
|
| 16552 |
+
"learning_rate": 2.1083826004963102e-05,
|
| 16553 |
+
"loss": 0.1859,
|
| 16554 |
+
"step": 827500
|
| 16555 |
+
},
|
| 16556 |
+
{
|
| 16557 |
+
"epoch": 8.88,
|
| 16558 |
+
"learning_rate": 2.1021404439627775e-05,
|
| 16559 |
+
"loss": 0.1855,
|
| 16560 |
+
"step": 828000
|
| 16561 |
+
},
|
| 16562 |
+
{
|
| 16563 |
+
"epoch": 8.88,
|
| 16564 |
+
"eval_loss": 0.17763476073741913,
|
| 16565 |
+
"eval_runtime": 2.5581,
|
| 16566 |
+
"eval_samples_per_second": 897.942,
|
| 16567 |
+
"eval_steps_per_second": 14.073,
|
| 16568 |
+
"step": 828000
|
| 16569 |
+
},
|
| 16570 |
+
{
|
| 16571 |
+
"epoch": 8.88,
|
| 16572 |
+
"learning_rate": 2.09591441195206e-05,
|
| 16573 |
+
"loss": 0.1856,
|
| 16574 |
+
"step": 828500
|
| 16575 |
+
},
|
| 16576 |
+
{
|
| 16577 |
+
"epoch": 8.89,
|
| 16578 |
+
"learning_rate": 2.089704521485896e-05,
|
| 16579 |
+
"loss": 0.1858,
|
| 16580 |
+
"step": 829000
|
| 16581 |
+
},
|
| 16582 |
+
{
|
| 16583 |
+
"epoch": 8.89,
|
| 16584 |
+
"eval_loss": 0.17711400985717773,
|
| 16585 |
+
"eval_runtime": 2.6039,
|
| 16586 |
+
"eval_samples_per_second": 882.145,
|
| 16587 |
+
"eval_steps_per_second": 13.826,
|
| 16588 |
+
"step": 829000
|
| 16589 |
+
},
|
| 16590 |
+
{
|
| 16591 |
+
"epoch": 8.89,
|
| 16592 |
+
"learning_rate": 2.083510789541883e-05,
|
| 16593 |
+
"loss": 0.1852,
|
| 16594 |
+
"step": 829500
|
| 16595 |
+
},
|
| 16596 |
+
{
|
| 16597 |
+
"epoch": 8.9,
|
| 16598 |
+
"learning_rate": 2.0773332330534513e-05,
|
| 16599 |
+
"loss": 0.1857,
|
| 16600 |
+
"step": 830000
|
| 16601 |
+
},
|
| 16602 |
+
{
|
| 16603 |
+
"epoch": 8.9,
|
| 16604 |
+
"eval_loss": 0.17438167333602905,
|
| 16605 |
+
"eval_runtime": 2.6569,
|
| 16606 |
+
"eval_samples_per_second": 864.555,
|
| 16607 |
+
"eval_steps_per_second": 13.55,
|
| 16608 |
+
"step": 830000
|
| 16609 |
}
|
| 16610 |
],
|
| 16611 |
"max_steps": 1000000,
|
| 16612 |
"num_train_epochs": 12,
|
| 16613 |
+
"total_flos": 5.818277853938688e+22,
|
| 16614 |
"trial_name": null,
|
| 16615 |
"trial_params": null
|
| 16616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d8915b38c77403d5b3caf94070565cc919cba4e372d557eb5c40dbe89ac1681
|
| 3 |
size 449471589
|