Training in progress, step 880000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86cf27fbaeb2a38de0ef33258b77f6fefbd96bfd63b67353f72569cf9236a376
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff45cd407febf926d10bda98ff4d352e6977480876fc00eacce9c1938f55c43
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:044c8fd4cb46d527c7cfde8c2f060ed7dc755348a5dbf6882b40d299eabc87d2
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebaa6261431616bb924fa3611c1e782327703255936f9b7e34a1eda29c117895
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -17406,11 +17406,211 @@
|
|
| 17406 |
"eval_samples_per_second": 841.196,
|
| 17407 |
"eval_steps_per_second": 13.184,
|
| 17408 |
"step": 870000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17409 |
}
|
| 17410 |
],
|
| 17411 |
"max_steps": 1000000,
|
| 17412 |
"num_train_epochs": 12,
|
| 17413 |
-
"total_flos": 6.
|
| 17414 |
"trial_name": null,
|
| 17415 |
"trial_params": null
|
| 17416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.457390450910632,
|
| 5 |
+
"global_step": 880000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 17406 |
"eval_samples_per_second": 841.196,
|
| 17407 |
"eval_steps_per_second": 13.184,
|
| 17408 |
"step": 870000
|
| 17409 |
+
},
|
| 17410 |
+
{
|
| 17411 |
+
"epoch": 9.35,
|
| 17412 |
+
"learning_rate": 1.6321390014277996e-05,
|
| 17413 |
+
"loss": 0.1831,
|
| 17414 |
+
"step": 870500
|
| 17415 |
+
},
|
| 17416 |
+
{
|
| 17417 |
+
"epoch": 9.36,
|
| 17418 |
+
"learning_rate": 1.6273411576885517e-05,
|
| 17419 |
+
"loss": 0.1836,
|
| 17420 |
+
"step": 871000
|
| 17421 |
+
},
|
| 17422 |
+
{
|
| 17423 |
+
"epoch": 9.36,
|
| 17424 |
+
"eval_loss": 0.17539818584918976,
|
| 17425 |
+
"eval_runtime": 2.6712,
|
| 17426 |
+
"eval_samples_per_second": 859.901,
|
| 17427 |
+
"eval_steps_per_second": 13.477,
|
| 17428 |
+
"step": 871000
|
| 17429 |
+
},
|
| 17430 |
+
{
|
| 17431 |
+
"epoch": 9.36,
|
| 17432 |
+
"learning_rate": 1.6225607365552378e-05,
|
| 17433 |
+
"loss": 0.1831,
|
| 17434 |
+
"step": 871500
|
| 17435 |
+
},
|
| 17436 |
+
{
|
| 17437 |
+
"epoch": 9.37,
|
| 17438 |
+
"learning_rate": 1.617797751097349e-05,
|
| 17439 |
+
"loss": 0.1832,
|
| 17440 |
+
"step": 872000
|
| 17441 |
+
},
|
| 17442 |
+
{
|
| 17443 |
+
"epoch": 9.37,
|
| 17444 |
+
"eval_loss": 0.1717691868543625,
|
| 17445 |
+
"eval_runtime": 2.6798,
|
| 17446 |
+
"eval_samples_per_second": 857.157,
|
| 17447 |
+
"eval_steps_per_second": 13.434,
|
| 17448 |
+
"step": 872000
|
| 17449 |
+
},
|
| 17450 |
+
{
|
| 17451 |
+
"epoch": 9.37,
|
| 17452 |
+
"learning_rate": 1.6130522143367032e-05,
|
| 17453 |
+
"loss": 0.1832,
|
| 17454 |
+
"step": 872500
|
| 17455 |
+
},
|
| 17456 |
+
{
|
| 17457 |
+
"epoch": 9.38,
|
| 17458 |
+
"learning_rate": 1.608324139247421e-05,
|
| 17459 |
+
"loss": 0.1835,
|
| 17460 |
+
"step": 873000
|
| 17461 |
+
},
|
| 17462 |
+
{
|
| 17463 |
+
"epoch": 9.38,
|
| 17464 |
+
"eval_loss": 0.1719122976064682,
|
| 17465 |
+
"eval_runtime": 2.6225,
|
| 17466 |
+
"eval_samples_per_second": 875.898,
|
| 17467 |
+
"eval_steps_per_second": 13.728,
|
| 17468 |
+
"step": 873000
|
| 17469 |
+
},
|
| 17470 |
+
{
|
| 17471 |
+
"epoch": 9.38,
|
| 17472 |
+
"learning_rate": 1.6036135387558756e-05,
|
| 17473 |
+
"loss": 0.1831,
|
| 17474 |
+
"step": 873500
|
| 17475 |
+
},
|
| 17476 |
+
{
|
| 17477 |
+
"epoch": 9.39,
|
| 17478 |
+
"learning_rate": 1.5989204257406693e-05,
|
| 17479 |
+
"loss": 0.1833,
|
| 17480 |
+
"step": 874000
|
| 17481 |
+
},
|
| 17482 |
+
{
|
| 17483 |
+
"epoch": 9.39,
|
| 17484 |
+
"eval_loss": 0.17478306591510773,
|
| 17485 |
+
"eval_runtime": 2.6101,
|
| 17486 |
+
"eval_samples_per_second": 880.046,
|
| 17487 |
+
"eval_steps_per_second": 13.793,
|
| 17488 |
+
"step": 874000
|
| 17489 |
+
},
|
| 17490 |
+
{
|
| 17491 |
+
"epoch": 9.4,
|
| 17492 |
+
"learning_rate": 1.594244813032595e-05,
|
| 17493 |
+
"loss": 0.1829,
|
| 17494 |
+
"step": 874500
|
| 17495 |
+
},
|
| 17496 |
+
{
|
| 17497 |
+
"epoch": 9.4,
|
| 17498 |
+
"learning_rate": 1.5895867134145974e-05,
|
| 17499 |
+
"loss": 0.1829,
|
| 17500 |
+
"step": 875000
|
| 17501 |
+
},
|
| 17502 |
+
{
|
| 17503 |
+
"epoch": 9.4,
|
| 17504 |
+
"eval_loss": 0.17394264042377472,
|
| 17505 |
+
"eval_runtime": 2.5878,
|
| 17506 |
+
"eval_samples_per_second": 887.623,
|
| 17507 |
+
"eval_steps_per_second": 13.911,
|
| 17508 |
+
"step": 875000
|
| 17509 |
+
},
|
| 17510 |
+
{
|
| 17511 |
+
"epoch": 9.41,
|
| 17512 |
+
"learning_rate": 1.5849461396217467e-05,
|
| 17513 |
+
"loss": 0.1834,
|
| 17514 |
+
"step": 875500
|
| 17515 |
+
},
|
| 17516 |
+
{
|
| 17517 |
+
"epoch": 9.41,
|
| 17518 |
+
"learning_rate": 1.5803231043411912e-05,
|
| 17519 |
+
"loss": 0.1827,
|
| 17520 |
+
"step": 876000
|
| 17521 |
+
},
|
| 17522 |
+
{
|
| 17523 |
+
"epoch": 9.41,
|
| 17524 |
+
"eval_loss": 0.17351944744586945,
|
| 17525 |
+
"eval_runtime": 2.6686,
|
| 17526 |
+
"eval_samples_per_second": 860.761,
|
| 17527 |
+
"eval_steps_per_second": 13.49,
|
| 17528 |
+
"step": 876000
|
| 17529 |
+
},
|
| 17530 |
+
{
|
| 17531 |
+
"epoch": 9.42,
|
| 17532 |
+
"learning_rate": 1.575717620212132e-05,
|
| 17533 |
+
"loss": 0.183,
|
| 17534 |
+
"step": 876500
|
| 17535 |
+
},
|
| 17536 |
+
{
|
| 17537 |
+
"epoch": 9.42,
|
| 17538 |
+
"learning_rate": 1.5711296998257902e-05,
|
| 17539 |
+
"loss": 0.1832,
|
| 17540 |
+
"step": 877000
|
| 17541 |
+
},
|
| 17542 |
+
{
|
| 17543 |
+
"epoch": 9.42,
|
| 17544 |
+
"eval_loss": 0.17347006499767303,
|
| 17545 |
+
"eval_runtime": 2.7428,
|
| 17546 |
+
"eval_samples_per_second": 837.47,
|
| 17547 |
+
"eval_steps_per_second": 13.125,
|
| 17548 |
+
"step": 877000
|
| 17549 |
+
},
|
| 17550 |
+
{
|
| 17551 |
+
"epoch": 9.43,
|
| 17552 |
+
"learning_rate": 1.5665593557253623e-05,
|
| 17553 |
+
"loss": 0.1833,
|
| 17554 |
+
"step": 877500
|
| 17555 |
+
},
|
| 17556 |
+
{
|
| 17557 |
+
"epoch": 9.44,
|
| 17558 |
+
"learning_rate": 1.562006600405996e-05,
|
| 17559 |
+
"loss": 0.1829,
|
| 17560 |
+
"step": 878000
|
| 17561 |
+
},
|
| 17562 |
+
{
|
| 17563 |
+
"epoch": 9.44,
|
| 17564 |
+
"eval_loss": 0.1734461635351181,
|
| 17565 |
+
"eval_runtime": 2.6113,
|
| 17566 |
+
"eval_samples_per_second": 879.646,
|
| 17567 |
+
"eval_steps_per_second": 13.786,
|
| 17568 |
+
"step": 878000
|
| 17569 |
+
},
|
| 17570 |
+
{
|
| 17571 |
+
"epoch": 9.44,
|
| 17572 |
+
"learning_rate": 1.5574714463147512e-05,
|
| 17573 |
+
"loss": 0.1831,
|
| 17574 |
+
"step": 878500
|
| 17575 |
+
},
|
| 17576 |
+
{
|
| 17577 |
+
"epoch": 9.45,
|
| 17578 |
+
"learning_rate": 1.5529539058505624e-05,
|
| 17579 |
+
"loss": 0.183,
|
| 17580 |
+
"step": 879000
|
| 17581 |
+
},
|
| 17582 |
+
{
|
| 17583 |
+
"epoch": 9.45,
|
| 17584 |
+
"eval_loss": 0.17375677824020386,
|
| 17585 |
+
"eval_runtime": 2.5315,
|
| 17586 |
+
"eval_samples_per_second": 907.374,
|
| 17587 |
+
"eval_steps_per_second": 14.221,
|
| 17588 |
+
"step": 879000
|
| 17589 |
+
},
|
| 17590 |
+
{
|
| 17591 |
+
"epoch": 9.45,
|
| 17592 |
+
"learning_rate": 1.5484539913642175e-05,
|
| 17593 |
+
"loss": 0.1826,
|
| 17594 |
+
"step": 879500
|
| 17595 |
+
},
|
| 17596 |
+
{
|
| 17597 |
+
"epoch": 9.46,
|
| 17598 |
+
"learning_rate": 1.543971715158307e-05,
|
| 17599 |
+
"loss": 0.1828,
|
| 17600 |
+
"step": 880000
|
| 17601 |
+
},
|
| 17602 |
+
{
|
| 17603 |
+
"epoch": 9.46,
|
| 17604 |
+
"eval_loss": 0.17431409657001495,
|
| 17605 |
+
"eval_runtime": 2.6398,
|
| 17606 |
+
"eval_samples_per_second": 870.144,
|
| 17607 |
+
"eval_steps_per_second": 13.637,
|
| 17608 |
+
"step": 880000
|
| 17609 |
}
|
| 17610 |
],
|
| 17611 |
"max_steps": 1000000,
|
| 17612 |
"num_train_epochs": 12,
|
| 17613 |
+
"total_flos": 6.168773682642908e+22,
|
| 17614 |
"trial_name": null,
|
| 17615 |
"trial_params": null
|
| 17616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff45cd407febf926d10bda98ff4d352e6977480876fc00eacce9c1938f55c43
|
| 3 |
size 449471589
|