Training in progress, step 1306, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +46 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13934748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16ba4d36be29c47761e4de822f2a56f3f1055d47e1e1f14d2488c6f83eddeec5
|
| 3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13999412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e6479e4d97665e28705f11762ea10d92ed3b015451419fbda2316d5d8b06c73
|
| 3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7eaf25285a1507f4859bd6e1d4e28599283b7616a349f7a68095dd221ca407fb
|
| 3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990904
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24253fe25c07b2675104a5e19811341ecd3d26067b978ca553e4ce6c22669504
|
| 3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
|
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac126ceaf5112e8e4b4de50777b385824f0aae9304c36217aac6a5eb68d490a5
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7688ad663685a9025f78d869af514fc8027af746b690ee11df3209317548241
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3842b87888cf0e1be7e33db60d6a70e76fe57f5600b71df71fcc44946743a05
|
| 3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6966784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52d83d2a347e7151765828045f9d61b506f1e7fecdc1832b06fe6a81ecb6a04b
|
| 3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2103551eb3bb592775eb464494702c4cad71ff0b6af0e4e6c99b289b56b204ad
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b97f6488d0e04c6e47d5c627ac69513ec7b0ea439086b82abe71f5696931884c
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5116ddf741fb875690434076a78462204512b021145f27059e0edc50c3027b9
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6598ea402d6825b4746034f32ba2ae771f302f2bde0d4492fa6a600836a7a44e
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:530a7f21d17686b5d16f06aafaf55f3d145270ec70fa92cf1f5dd1bd2ccad51b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -9899,6 +9899,48 @@
|
|
| 9899 |
"eval_samples_per_second": 6.942,
|
| 9900 |
"eval_steps_per_second": 0.231,
|
| 9901 |
"step": 1300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9902 |
}
|
| 9903 |
],
|
| 9904 |
"logging_steps": 1,
|
|
@@ -9913,12 +9955,12 @@
|
|
| 9913 |
"should_evaluate": false,
|
| 9914 |
"should_log": false,
|
| 9915 |
"should_save": true,
|
| 9916 |
-
"should_training_stop":
|
| 9917 |
},
|
| 9918 |
"attributes": {}
|
| 9919 |
}
|
| 9920 |
},
|
| 9921 |
-
"total_flos":
|
| 9922 |
"train_batch_size": 8,
|
| 9923 |
"trial_name": null,
|
| 9924 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9996172981247609,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 1306,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 9899 |
"eval_samples_per_second": 6.942,
|
| 9900 |
"eval_steps_per_second": 0.231,
|
| 9901 |
"step": 1300
|
| 9902 |
+
},
|
| 9903 |
+
{
|
| 9904 |
+
"epoch": 0.995790279372369,
|
| 9905 |
+
"grad_norm": 7.007760047912598,
|
| 9906 |
+
"learning_rate": 8.935678962196381e-10,
|
| 9907 |
+
"loss": 0.2438,
|
| 9908 |
+
"step": 1301
|
| 9909 |
+
},
|
| 9910 |
+
{
|
| 9911 |
+
"epoch": 0.9965556831228473,
|
| 9912 |
+
"grad_norm": 6.088259696960449,
|
| 9913 |
+
"learning_rate": 5.718865197423817e-10,
|
| 9914 |
+
"loss": 0.3118,
|
| 9915 |
+
"step": 1302
|
| 9916 |
+
},
|
| 9917 |
+
{
|
| 9918 |
+
"epoch": 0.9973210868733257,
|
| 9919 |
+
"grad_norm": 8.319058418273926,
|
| 9920 |
+
"learning_rate": 3.2168750880634537e-10,
|
| 9921 |
+
"loss": 0.36,
|
| 9922 |
+
"step": 1303
|
| 9923 |
+
},
|
| 9924 |
+
{
|
| 9925 |
+
"epoch": 0.9980864906238041,
|
| 9926 |
+
"grad_norm": 7.24370813369751,
|
| 9927 |
+
"learning_rate": 1.4297265199414434e-10,
|
| 9928 |
+
"loss": 0.3103,
|
| 9929 |
+
"step": 1304
|
| 9930 |
+
},
|
| 9931 |
+
{
|
| 9932 |
+
"epoch": 0.9988518943742825,
|
| 9933 |
+
"grad_norm": 5.21471643447876,
|
| 9934 |
+
"learning_rate": 3.57432268771607e-11,
|
| 9935 |
+
"loss": 0.2336,
|
| 9936 |
+
"step": 1305
|
| 9937 |
+
},
|
| 9938 |
+
{
|
| 9939 |
+
"epoch": 0.9996172981247609,
|
| 9940 |
+
"grad_norm": 4.997439384460449,
|
| 9941 |
+
"learning_rate": 0.0,
|
| 9942 |
+
"loss": 0.2676,
|
| 9943 |
+
"step": 1306
|
| 9944 |
}
|
| 9945 |
],
|
| 9946 |
"logging_steps": 1,
|
|
|
|
| 9955 |
"should_evaluate": false,
|
| 9956 |
"should_log": false,
|
| 9957 |
"should_save": true,
|
| 9958 |
+
"should_training_stop": true
|
| 9959 |
},
|
| 9960 |
"attributes": {}
|
| 9961 |
}
|
| 9962 |
},
|
| 9963 |
+
"total_flos": 2.006617662863442e+17,
|
| 9964 |
"train_batch_size": 8,
|
| 9965 |
"trial_name": null,
|
| 9966 |
"trial_params": null
|