Training in progress, step 970000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e51baa4ddc0d5650abf5371aac2f77196b05031ccca7029b3d99ba99af85e57f
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acf04012905f76240c2902acedd8866c3a784e83992a5f4e0dc380bf807380dc
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a45db7e85e08c084e49c40cab0c2c6092d92f81b5fa24290a645085ef74f75b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56d647c17f4fed38d972bfade7f44a26e438ac9b6b775a7bbc225c5be1e112bd
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e9cbd943c7dcfb1555090abbcd45a86173e47d10be2fa2e7308539ca596dff0
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5917abb04845a366f52356ca50f06ea044267bd039a587ed19cc120ed161e748
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:582d77403f5af050452c09ec279770dab4724f234e767ab55c84c502beea2905
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94c6bb99cfe9f0c710fe2cc6cec0d5d888a917b4fa016be56cafcfbbd47bac76
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f81952f7451d63a6ff6bf67269698e7e674adab210fce43113020157f4cf03d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee4f92b2c66061ec16f42f6ff8db5a75108eff8cc62884e9d5c3c7875be42d2c
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf98a74aa6df8eeab9552258d949bc73dcd837ff3b88682e5ebe82858a949936
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11526,11 +11526,131 @@
|
|
| 11526 |
"learning_rate": 1.0611515147111736e-05,
|
| 11527 |
"loss": 0.2832,
|
| 11528 |
"step": 960000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11529 |
}
|
| 11530 |
],
|
| 11531 |
"max_steps": 1000000,
|
| 11532 |
"num_train_epochs": 2,
|
| 11533 |
-
"total_flos": 6.
|
| 11534 |
"trial_name": null,
|
| 11535 |
"trial_params": null
|
| 11536 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.4783125134525394,
|
| 5 |
+
"global_step": 970000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11526 |
"learning_rate": 1.0611515147111736e-05,
|
| 11527 |
"loss": 0.2832,
|
| 11528 |
"step": 960000
|
| 11529 |
+
},
|
| 11530 |
+
{
|
| 11531 |
+
"epoch": 1.46,
|
| 11532 |
+
"learning_rate": 1.0596344426086501e-05,
|
| 11533 |
+
"loss": 0.2831,
|
| 11534 |
+
"step": 960500
|
| 11535 |
+
},
|
| 11536 |
+
{
|
| 11537 |
+
"epoch": 1.46,
|
| 11538 |
+
"learning_rate": 1.0581363452005424e-05,
|
| 11539 |
+
"loss": 0.2836,
|
| 11540 |
+
"step": 961000
|
| 11541 |
+
},
|
| 11542 |
+
{
|
| 11543 |
+
"epoch": 1.46,
|
| 11544 |
+
"learning_rate": 1.0566572265825932e-05,
|
| 11545 |
+
"loss": 0.2835,
|
| 11546 |
+
"step": 961500
|
| 11547 |
+
},
|
| 11548 |
+
{
|
| 11549 |
+
"epoch": 1.46,
|
| 11550 |
+
"learning_rate": 1.0551970907986557e-05,
|
| 11551 |
+
"loss": 0.2835,
|
| 11552 |
+
"step": 962000
|
| 11553 |
+
},
|
| 11554 |
+
{
|
| 11555 |
+
"epoch": 1.46,
|
| 11556 |
+
"learning_rate": 1.0537559418406849e-05,
|
| 11557 |
+
"loss": 0.2838,
|
| 11558 |
+
"step": 962500
|
| 11559 |
+
},
|
| 11560 |
+
{
|
| 11561 |
+
"epoch": 1.46,
|
| 11562 |
+
"learning_rate": 1.0523337836487271e-05,
|
| 11563 |
+
"loss": 0.2829,
|
| 11564 |
+
"step": 963000
|
| 11565 |
+
},
|
| 11566 |
+
{
|
| 11567 |
+
"epoch": 1.47,
|
| 11568 |
+
"learning_rate": 1.0509306201109092e-05,
|
| 11569 |
+
"loss": 0.2835,
|
| 11570 |
+
"step": 963500
|
| 11571 |
+
},
|
| 11572 |
+
{
|
| 11573 |
+
"epoch": 1.47,
|
| 11574 |
+
"learning_rate": 1.0495464550634267e-05,
|
| 11575 |
+
"loss": 0.284,
|
| 11576 |
+
"step": 964000
|
| 11577 |
+
},
|
| 11578 |
+
{
|
| 11579 |
+
"epoch": 1.47,
|
| 11580 |
+
"learning_rate": 1.0481812922905339e-05,
|
| 11581 |
+
"loss": 0.2837,
|
| 11582 |
+
"step": 964500
|
| 11583 |
+
},
|
| 11584 |
+
{
|
| 11585 |
+
"epoch": 1.47,
|
| 11586 |
+
"learning_rate": 1.046835135524533e-05,
|
| 11587 |
+
"loss": 0.2834,
|
| 11588 |
+
"step": 965000
|
| 11589 |
+
},
|
| 11590 |
+
{
|
| 11591 |
+
"epoch": 1.47,
|
| 11592 |
+
"learning_rate": 1.0455079884457653e-05,
|
| 11593 |
+
"loss": 0.2832,
|
| 11594 |
+
"step": 965500
|
| 11595 |
+
},
|
| 11596 |
+
{
|
| 11597 |
+
"epoch": 1.47,
|
| 11598 |
+
"learning_rate": 1.044199854682601e-05,
|
| 11599 |
+
"loss": 0.2837,
|
| 11600 |
+
"step": 966000
|
| 11601 |
+
},
|
| 11602 |
+
{
|
| 11603 |
+
"epoch": 1.47,
|
| 11604 |
+
"learning_rate": 1.0429107378114277e-05,
|
| 11605 |
+
"loss": 0.2834,
|
| 11606 |
+
"step": 966500
|
| 11607 |
+
},
|
| 11608 |
+
{
|
| 11609 |
+
"epoch": 1.47,
|
| 11610 |
+
"learning_rate": 1.0416406413566414e-05,
|
| 11611 |
+
"loss": 0.2833,
|
| 11612 |
+
"step": 967000
|
| 11613 |
+
},
|
| 11614 |
+
{
|
| 11615 |
+
"epoch": 1.47,
|
| 11616 |
+
"learning_rate": 1.0403895687906366e-05,
|
| 11617 |
+
"loss": 0.2839,
|
| 11618 |
+
"step": 967500
|
| 11619 |
+
},
|
| 11620 |
+
{
|
| 11621 |
+
"epoch": 1.47,
|
| 11622 |
+
"learning_rate": 1.0391575235337991e-05,
|
| 11623 |
+
"loss": 0.2828,
|
| 11624 |
+
"step": 968000
|
| 11625 |
+
},
|
| 11626 |
+
{
|
| 11627 |
+
"epoch": 1.48,
|
| 11628 |
+
"learning_rate": 1.0379445089544929e-05,
|
| 11629 |
+
"loss": 0.2837,
|
| 11630 |
+
"step": 968500
|
| 11631 |
+
},
|
| 11632 |
+
{
|
| 11633 |
+
"epoch": 1.48,
|
| 11634 |
+
"learning_rate": 1.0367505283690547e-05,
|
| 11635 |
+
"loss": 0.2832,
|
| 11636 |
+
"step": 969000
|
| 11637 |
+
},
|
| 11638 |
+
{
|
| 11639 |
+
"epoch": 1.48,
|
| 11640 |
+
"learning_rate": 1.0355755850417803e-05,
|
| 11641 |
+
"loss": 0.283,
|
| 11642 |
+
"step": 969500
|
| 11643 |
+
},
|
| 11644 |
+
{
|
| 11645 |
+
"epoch": 1.48,
|
| 11646 |
+
"learning_rate": 1.0344196821849202e-05,
|
| 11647 |
+
"loss": 0.2821,
|
| 11648 |
+
"step": 970000
|
| 11649 |
}
|
| 11650 |
],
|
| 11651 |
"max_steps": 1000000,
|
| 11652 |
"num_train_epochs": 2,
|
| 11653 |
+
"total_flos": 6.557885636027719e+22,
|
| 11654 |
"trial_name": null,
|
| 11655 |
"trial_params": null
|
| 11656 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acf04012905f76240c2902acedd8866c3a784e83992a5f4e0dc380bf807380dc
|
| 3 |
size 449450757
|