Training in progress, step 980000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90e6f74ca02156084fa05f854168c4cbdee8fc0fa6687cea7dfffc7ceaa970ef
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44a8bb7d1ad03b47ab97301f2bf5aa4416e913d62ffabd09bdd937d55c43233d
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2693c812482df2fdf768d0c19e41d192e583b64a43dbe767a2677f629f2520e7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39770cef90df6052fa5bdc49403a83d0e05cc2d3766019022596476c4a73f3b2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:708a00e78f0bdafbd2eb890af573c704006de2c61a1f639e3fb47ce38e039820
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3d789d948e2afb641edc41de23d3e0ac8454e4ca3cace740853515e0185e05d
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1f60f9446cba0320cf9ced93c4b14816af8d6988d011f7cc2f5b01e8ada101d
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 14.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7184,11 +7184,85 @@
|
|
| 7184 |
"eval_samples_per_second": 1327.252,
|
| 7185 |
"eval_steps_per_second": 21.236,
|
| 7186 |
"step": 970000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7187 |
}
|
| 7188 |
],
|
| 7189 |
"max_steps": 1000000,
|
| 7190 |
"num_train_epochs": 16,
|
| 7191 |
-
"total_flos": 6.
|
| 7192 |
"trial_name": null,
|
| 7193 |
"trial_params": null
|
| 7194 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.964802174477377,
|
| 5 |
+
"global_step": 980000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7184 |
"eval_samples_per_second": 1327.252,
|
| 7185 |
"eval_steps_per_second": 21.236,
|
| 7186 |
"step": 970000
|
| 7187 |
+
},
|
| 7188 |
+
{
|
| 7189 |
+
"epoch": 14.83,
|
| 7190 |
+
"learning_rate": 1.032165010471157e-05,
|
| 7191 |
+
"loss": 0.2277,
|
| 7192 |
+
"step": 971000
|
| 7193 |
+
},
|
| 7194 |
+
{
|
| 7195 |
+
"epoch": 14.84,
|
| 7196 |
+
"learning_rate": 1.0299865378844936e-05,
|
| 7197 |
+
"loss": 0.2275,
|
| 7198 |
+
"step": 972000
|
| 7199 |
+
},
|
| 7200 |
+
{
|
| 7201 |
+
"epoch": 14.86,
|
| 7202 |
+
"learning_rate": 1.0278842882483569e-05,
|
| 7203 |
+
"loss": 0.2275,
|
| 7204 |
+
"step": 973000
|
| 7205 |
+
},
|
| 7206 |
+
{
|
| 7207 |
+
"epoch": 14.87,
|
| 7208 |
+
"learning_rate": 1.025858284552612e-05,
|
| 7209 |
+
"loss": 0.2276,
|
| 7210 |
+
"step": 974000
|
| 7211 |
+
},
|
| 7212 |
+
{
|
| 7213 |
+
"epoch": 14.89,
|
| 7214 |
+
"learning_rate": 1.023908548953311e-05,
|
| 7215 |
+
"loss": 0.2275,
|
| 7216 |
+
"step": 975000
|
| 7217 |
+
},
|
| 7218 |
+
{
|
| 7219 |
+
"epoch": 14.89,
|
| 7220 |
+
"eval_runtime": 0.7861,
|
| 7221 |
+
"eval_samples_per_second": 1272.066,
|
| 7222 |
+
"eval_steps_per_second": 20.353,
|
| 7223 |
+
"step": 975000
|
| 7224 |
+
},
|
| 7225 |
+
{
|
| 7226 |
+
"epoch": 14.9,
|
| 7227 |
+
"learning_rate": 1.02203510277245e-05,
|
| 7228 |
+
"loss": 0.2276,
|
| 7229 |
+
"step": 976000
|
| 7230 |
+
},
|
| 7231 |
+
{
|
| 7232 |
+
"epoch": 14.92,
|
| 7233 |
+
"learning_rate": 1.0202379664977364e-05,
|
| 7234 |
+
"loss": 0.2272,
|
| 7235 |
+
"step": 977000
|
| 7236 |
+
},
|
| 7237 |
+
{
|
| 7238 |
+
"epoch": 14.93,
|
| 7239 |
+
"learning_rate": 1.018517159782365e-05,
|
| 7240 |
+
"loss": 0.2274,
|
| 7241 |
+
"step": 978000
|
| 7242 |
+
},
|
| 7243 |
+
{
|
| 7244 |
+
"epoch": 14.95,
|
| 7245 |
+
"learning_rate": 1.0168727014448004e-05,
|
| 7246 |
+
"loss": 0.2272,
|
| 7247 |
+
"step": 979000
|
| 7248 |
+
},
|
| 7249 |
+
{
|
| 7250 |
+
"epoch": 14.96,
|
| 7251 |
+
"learning_rate": 1.0153046094685783e-05,
|
| 7252 |
+
"loss": 0.227,
|
| 7253 |
+
"step": 980000
|
| 7254 |
+
},
|
| 7255 |
+
{
|
| 7256 |
+
"epoch": 14.96,
|
| 7257 |
+
"eval_runtime": 0.7489,
|
| 7258 |
+
"eval_samples_per_second": 1335.226,
|
| 7259 |
+
"eval_steps_per_second": 21.364,
|
| 7260 |
+
"step": 980000
|
| 7261 |
}
|
| 7262 |
],
|
| 7263 |
"max_steps": 1000000,
|
| 7264 |
"num_train_epochs": 16,
|
| 7265 |
+
"total_flos": 6.869816382019938e+22,
|
| 7266 |
"trial_name": null,
|
| 7267 |
"trial_params": null
|
| 7268 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44a8bb7d1ad03b47ab97301f2bf5aa4416e913d62ffabd09bdd937d55c43233d
|
| 3 |
size 449471589
|