Training in progress, step 910000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dcd026b7ddbbed9854cf4a848e8d10565ecae8551444b93cf80516c2a3075094
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c1213c78566dba45a4dbe4cbf12671b2071e92687a82a7bd671a18dd4693188
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23bff7c83371f4786566c176465b52f06fe8e3280f8cace7df94d70a8a5b2164
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7220d15ed34c785cde537b4220883e048554a1408ecd7fe46faf0069997fc88e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52361dad9976f354f626771e1b0dbf34f1017e0982c95cf304b85aa55d90473b
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:242dcb89ecc86bf6b74de9a6782f0aca020ba83ca594d7f0c60637dcb70148d3
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68bd93c5df85b9ef6b8dfb004005413abc49b194d979c692716ee25211f1498f
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 13.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6666,11 +6666,85 @@
|
|
| 6666 |
"eval_samples_per_second": 1325.982,
|
| 6667 |
"eval_steps_per_second": 21.216,
|
| 6668 |
"step": 900000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6669 |
}
|
| 6670 |
],
|
| 6671 |
"max_steps": 1000000,
|
| 6672 |
"num_train_epochs": 16,
|
| 6673 |
-
"total_flos": 6.
|
| 6674 |
"trial_name": null,
|
| 6675 |
"trial_params": null
|
| 6676 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 13.89588773344328,
|
| 5 |
+
"global_step": 910000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6666 |
"eval_samples_per_second": 1325.982,
|
| 6667 |
"eval_steps_per_second": 21.216,
|
| 6668 |
"step": 900000
|
| 6669 |
+
},
|
| 6670 |
+
{
|
| 6671 |
+
"epoch": 13.76,
|
| 6672 |
+
"learning_rate": 1.3717991925554562e-05,
|
| 6673 |
+
"loss": 0.2308,
|
| 6674 |
+
"step": 901000
|
| 6675 |
+
},
|
| 6676 |
+
{
|
| 6677 |
+
"epoch": 13.77,
|
| 6678 |
+
"learning_rate": 1.3643915619574529e-05,
|
| 6679 |
+
"loss": 0.2305,
|
| 6680 |
+
"step": 902000
|
| 6681 |
+
},
|
| 6682 |
+
{
|
| 6683 |
+
"epoch": 13.79,
|
| 6684 |
+
"learning_rate": 1.35705649731021e-05,
|
| 6685 |
+
"loss": 0.2304,
|
| 6686 |
+
"step": 903000
|
| 6687 |
+
},
|
| 6688 |
+
{
|
| 6689 |
+
"epoch": 13.8,
|
| 6690 |
+
"learning_rate": 1.3497940788288195e-05,
|
| 6691 |
+
"loss": 0.2301,
|
| 6692 |
+
"step": 904000
|
| 6693 |
+
},
|
| 6694 |
+
{
|
| 6695 |
+
"epoch": 13.82,
|
| 6696 |
+
"learning_rate": 1.3426043859339253e-05,
|
| 6697 |
+
"loss": 0.2304,
|
| 6698 |
+
"step": 905000
|
| 6699 |
+
},
|
| 6700 |
+
{
|
| 6701 |
+
"epoch": 13.82,
|
| 6702 |
+
"eval_runtime": 0.9802,
|
| 6703 |
+
"eval_samples_per_second": 1020.243,
|
| 6704 |
+
"eval_steps_per_second": 16.324,
|
| 6705 |
+
"step": 905000
|
| 6706 |
+
},
|
| 6707 |
+
{
|
| 6708 |
+
"epoch": 13.83,
|
| 6709 |
+
"learning_rate": 1.3354874972508582e-05,
|
| 6710 |
+
"loss": 0.2302,
|
| 6711 |
+
"step": 906000
|
| 6712 |
+
},
|
| 6713 |
+
{
|
| 6714 |
+
"epoch": 13.85,
|
| 6715 |
+
"learning_rate": 1.3284434906087695e-05,
|
| 6716 |
+
"loss": 0.2303,
|
| 6717 |
+
"step": 907000
|
| 6718 |
+
},
|
| 6719 |
+
{
|
| 6720 |
+
"epoch": 13.87,
|
| 6721 |
+
"learning_rate": 1.3214724430397915e-05,
|
| 6722 |
+
"loss": 0.2304,
|
| 6723 |
+
"step": 908000
|
| 6724 |
+
},
|
| 6725 |
+
{
|
| 6726 |
+
"epoch": 13.88,
|
| 6727 |
+
"learning_rate": 1.314574430778182e-05,
|
| 6728 |
+
"loss": 0.2302,
|
| 6729 |
+
"step": 909000
|
| 6730 |
+
},
|
| 6731 |
+
{
|
| 6732 |
+
"epoch": 13.9,
|
| 6733 |
+
"learning_rate": 1.3077495292594966e-05,
|
| 6734 |
+
"loss": 0.2305,
|
| 6735 |
+
"step": 910000
|
| 6736 |
+
},
|
| 6737 |
+
{
|
| 6738 |
+
"epoch": 13.9,
|
| 6739 |
+
"eval_runtime": 0.7262,
|
| 6740 |
+
"eval_samples_per_second": 1377.03,
|
| 6741 |
+
"eval_steps_per_second": 22.032,
|
| 6742 |
+
"step": 910000
|
| 6743 |
}
|
| 6744 |
],
|
| 6745 |
"max_steps": 1000000,
|
| 6746 |
"num_train_epochs": 16,
|
| 6747 |
+
"total_flos": 6.379115211875656e+22,
|
| 6748 |
"trial_name": null,
|
| 6749 |
"trial_params": null
|
| 6750 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c1213c78566dba45a4dbe4cbf12671b2071e92687a82a7bd671a18dd4693188
|
| 3 |
size 449471589
|