Training in progress, step 370000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9c8cce7fc16d49d0f7371660f21497251dd70fb6a0863ced538e8e9c69a0ca9
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb573bc42125377ae261521a84adbf685b2347ffb43eb62b536c886544fe2d46
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ee12284053cf83e8530cb65257ff01350f611fa2a8f8b92c00004c4644ba728
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c588aab0631e1cbe9b9bc91bd8fdb267f40c2efed21da72dbc11089c341db1a
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d81b56af08f24fc8d507e3068f145577049df6c261f921cf212def6001138c25
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f3d36fad09b43ee50c94cb8bbf636c2e82070e751a5d530c5d483d7fea96111
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:738ab6b372718dc5257fa8c5c82fc8480566553405b2c1fe74ac6bb9bd72e750
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a56c60b33853d8e6230ccd6985b78412b5623c0d98e6fcecc17ad9dcdf56630f
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87c77a0d4fbb407266eb2a3608ccf0fffdba85f7cd116784ad470dea58425561
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0880cbd756484509ced83a375e5248ed2190c14baa2ebe717f828a8c21b981bd
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:580594f4768ecd1bd92e87dca92e874365a397305161c1f8781b79f1f0b613ba
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 11.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7206,11 +7206,211 @@
|
|
| 7206 |
"eval_samples_per_second": 1921.599,
|
| 7207 |
"eval_steps_per_second": 30.746,
|
| 7208 |
"step": 360000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7209 |
}
|
| 7210 |
],
|
| 7211 |
"max_steps": 500000,
|
| 7212 |
"num_train_epochs": 16,
|
| 7213 |
-
"total_flos": 1.
|
| 7214 |
"trial_name": null,
|
| 7215 |
"trial_params": null
|
| 7216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 11.336131621679586,
|
| 5 |
+
"global_step": 370000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7206 |
"eval_samples_per_second": 1921.599,
|
| 7207 |
"eval_steps_per_second": 30.746,
|
| 7208 |
"step": 360000
|
| 7209 |
+
},
|
| 7210 |
+
{
|
| 7211 |
+
"epoch": 11.05,
|
| 7212 |
+
"learning_rate": 6.746049527150238e-05,
|
| 7213 |
+
"loss": 0.3226,
|
| 7214 |
+
"step": 360500
|
| 7215 |
+
},
|
| 7216 |
+
{
|
| 7217 |
+
"epoch": 11.06,
|
| 7218 |
+
"learning_rate": 6.707871421704209e-05,
|
| 7219 |
+
"loss": 0.3225,
|
| 7220 |
+
"step": 361000
|
| 7221 |
+
},
|
| 7222 |
+
{
|
| 7223 |
+
"epoch": 11.06,
|
| 7224 |
+
"eval_loss": 0.7777162790298462,
|
| 7225 |
+
"eval_runtime": 0.5171,
|
| 7226 |
+
"eval_samples_per_second": 1933.691,
|
| 7227 |
+
"eval_steps_per_second": 30.939,
|
| 7228 |
+
"step": 361000
|
| 7229 |
+
},
|
| 7230 |
+
{
|
| 7231 |
+
"epoch": 11.08,
|
| 7232 |
+
"learning_rate": 6.669789465567683e-05,
|
| 7233 |
+
"loss": 0.3226,
|
| 7234 |
+
"step": 361500
|
| 7235 |
+
},
|
| 7236 |
+
{
|
| 7237 |
+
"epoch": 11.09,
|
| 7238 |
+
"learning_rate": 6.631804075198838e-05,
|
| 7239 |
+
"loss": 0.3223,
|
| 7240 |
+
"step": 362000
|
| 7241 |
+
},
|
| 7242 |
+
{
|
| 7243 |
+
"epoch": 11.09,
|
| 7244 |
+
"eval_loss": 0.7790626287460327,
|
| 7245 |
+
"eval_runtime": 0.512,
|
| 7246 |
+
"eval_samples_per_second": 1953.298,
|
| 7247 |
+
"eval_steps_per_second": 31.253,
|
| 7248 |
+
"step": 362000
|
| 7249 |
+
},
|
| 7250 |
+
{
|
| 7251 |
+
"epoch": 11.11,
|
| 7252 |
+
"learning_rate": 6.593915665999816e-05,
|
| 7253 |
+
"loss": 0.3221,
|
| 7254 |
+
"step": 362500
|
| 7255 |
+
},
|
| 7256 |
+
{
|
| 7257 |
+
"epoch": 11.12,
|
| 7258 |
+
"learning_rate": 6.55612465231219e-05,
|
| 7259 |
+
"loss": 0.3222,
|
| 7260 |
+
"step": 363000
|
| 7261 |
+
},
|
| 7262 |
+
{
|
| 7263 |
+
"epoch": 11.12,
|
| 7264 |
+
"eval_loss": 0.7765858173370361,
|
| 7265 |
+
"eval_runtime": 0.5305,
|
| 7266 |
+
"eval_samples_per_second": 1884.91,
|
| 7267 |
+
"eval_steps_per_second": 30.159,
|
| 7268 |
+
"step": 363000
|
| 7269 |
+
},
|
| 7270 |
+
{
|
| 7271 |
+
"epoch": 11.14,
|
| 7272 |
+
"learning_rate": 6.518431447412434e-05,
|
| 7273 |
+
"loss": 0.3219,
|
| 7274 |
+
"step": 363500
|
| 7275 |
+
},
|
| 7276 |
+
{
|
| 7277 |
+
"epoch": 11.15,
|
| 7278 |
+
"learning_rate": 6.480836463507392e-05,
|
| 7279 |
+
"loss": 0.322,
|
| 7280 |
+
"step": 364000
|
| 7281 |
+
},
|
| 7282 |
+
{
|
| 7283 |
+
"epoch": 11.15,
|
| 7284 |
+
"eval_loss": 0.7808003425598145,
|
| 7285 |
+
"eval_runtime": 0.5024,
|
| 7286 |
+
"eval_samples_per_second": 1990.262,
|
| 7287 |
+
"eval_steps_per_second": 31.844,
|
| 7288 |
+
"step": 364000
|
| 7289 |
+
},
|
| 7290 |
+
{
|
| 7291 |
+
"epoch": 11.17,
|
| 7292 |
+
"learning_rate": 6.443340111729786e-05,
|
| 7293 |
+
"loss": 0.3219,
|
| 7294 |
+
"step": 364500
|
| 7295 |
+
},
|
| 7296 |
+
{
|
| 7297 |
+
"epoch": 11.18,
|
| 7298 |
+
"learning_rate": 6.405942802133713e-05,
|
| 7299 |
+
"loss": 0.322,
|
| 7300 |
+
"step": 365000
|
| 7301 |
+
},
|
| 7302 |
+
{
|
| 7303 |
+
"epoch": 11.18,
|
| 7304 |
+
"eval_loss": 0.7744137048721313,
|
| 7305 |
+
"eval_runtime": 0.5239,
|
| 7306 |
+
"eval_samples_per_second": 1908.642,
|
| 7307 |
+
"eval_steps_per_second": 30.538,
|
| 7308 |
+
"step": 365000
|
| 7309 |
+
},
|
| 7310 |
+
{
|
| 7311 |
+
"epoch": 11.2,
|
| 7312 |
+
"learning_rate": 6.36864494369016e-05,
|
| 7313 |
+
"loss": 0.3218,
|
| 7314 |
+
"step": 365500
|
| 7315 |
+
},
|
| 7316 |
+
{
|
| 7317 |
+
"epoch": 11.21,
|
| 7318 |
+
"learning_rate": 6.331446944282534e-05,
|
| 7319 |
+
"loss": 0.3217,
|
| 7320 |
+
"step": 366000
|
| 7321 |
+
},
|
| 7322 |
+
{
|
| 7323 |
+
"epoch": 11.21,
|
| 7324 |
+
"eval_loss": 0.7770714163780212,
|
| 7325 |
+
"eval_runtime": 0.5069,
|
| 7326 |
+
"eval_samples_per_second": 1972.691,
|
| 7327 |
+
"eval_steps_per_second": 31.563,
|
| 7328 |
+
"step": 366000
|
| 7329 |
+
},
|
| 7330 |
+
{
|
| 7331 |
+
"epoch": 11.23,
|
| 7332 |
+
"learning_rate": 6.294349210702188e-05,
|
| 7333 |
+
"loss": 0.3219,
|
| 7334 |
+
"step": 366500
|
| 7335 |
+
},
|
| 7336 |
+
{
|
| 7337 |
+
"epoch": 11.24,
|
| 7338 |
+
"learning_rate": 6.257352148643998e-05,
|
| 7339 |
+
"loss": 0.3217,
|
| 7340 |
+
"step": 367000
|
| 7341 |
+
},
|
| 7342 |
+
{
|
| 7343 |
+
"epoch": 11.24,
|
| 7344 |
+
"eval_loss": 0.7741234302520752,
|
| 7345 |
+
"eval_runtime": 0.5107,
|
| 7346 |
+
"eval_samples_per_second": 1958.008,
|
| 7347 |
+
"eval_steps_per_second": 31.328,
|
| 7348 |
+
"step": 367000
|
| 7349 |
+
},
|
| 7350 |
+
{
|
| 7351 |
+
"epoch": 11.26,
|
| 7352 |
+
"learning_rate": 6.220456162701908e-05,
|
| 7353 |
+
"loss": 0.3215,
|
| 7354 |
+
"step": 367500
|
| 7355 |
+
},
|
| 7356 |
+
{
|
| 7357 |
+
"epoch": 11.27,
|
| 7358 |
+
"learning_rate": 6.183661656364515e-05,
|
| 7359 |
+
"loss": 0.3216,
|
| 7360 |
+
"step": 368000
|
| 7361 |
+
},
|
| 7362 |
+
{
|
| 7363 |
+
"epoch": 11.27,
|
| 7364 |
+
"eval_loss": 0.7758086919784546,
|
| 7365 |
+
"eval_runtime": 0.5088,
|
| 7366 |
+
"eval_samples_per_second": 1965.315,
|
| 7367 |
+
"eval_steps_per_second": 31.445,
|
| 7368 |
+
"step": 368000
|
| 7369 |
+
},
|
| 7370 |
+
{
|
| 7371 |
+
"epoch": 11.29,
|
| 7372 |
+
"learning_rate": 6.146969032010631e-05,
|
| 7373 |
+
"loss": 0.3214,
|
| 7374 |
+
"step": 368500
|
| 7375 |
+
},
|
| 7376 |
+
{
|
| 7377 |
+
"epoch": 11.31,
|
| 7378 |
+
"learning_rate": 6.110378690904928e-05,
|
| 7379 |
+
"loss": 0.3216,
|
| 7380 |
+
"step": 369000
|
| 7381 |
+
},
|
| 7382 |
+
{
|
| 7383 |
+
"epoch": 11.31,
|
| 7384 |
+
"eval_loss": 0.7766358256340027,
|
| 7385 |
+
"eval_runtime": 0.5033,
|
| 7386 |
+
"eval_samples_per_second": 1986.831,
|
| 7387 |
+
"eval_steps_per_second": 31.789,
|
| 7388 |
+
"step": 369000
|
| 7389 |
+
},
|
| 7390 |
+
{
|
| 7391 |
+
"epoch": 11.32,
|
| 7392 |
+
"learning_rate": 6.073891033193507e-05,
|
| 7393 |
+
"loss": 0.3214,
|
| 7394 |
+
"step": 369500
|
| 7395 |
+
},
|
| 7396 |
+
{
|
| 7397 |
+
"epoch": 11.34,
|
| 7398 |
+
"learning_rate": 6.037506457899553e-05,
|
| 7399 |
+
"loss": 0.3213,
|
| 7400 |
+
"step": 370000
|
| 7401 |
+
},
|
| 7402 |
+
{
|
| 7403 |
+
"epoch": 11.34,
|
| 7404 |
+
"eval_loss": 0.7805209159851074,
|
| 7405 |
+
"eval_runtime": 0.5089,
|
| 7406 |
+
"eval_samples_per_second": 1964.929,
|
| 7407 |
+
"eval_steps_per_second": 31.439,
|
| 7408 |
+
"step": 370000
|
| 7409 |
}
|
| 7410 |
],
|
| 7411 |
"max_steps": 500000,
|
| 7412 |
"num_train_epochs": 16,
|
| 7413 |
+
"total_flos": 1.1820936267184766e+22,
|
| 7414 |
"trial_name": null,
|
| 7415 |
"trial_params": null
|
| 7416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb573bc42125377ae261521a84adbf685b2347ffb43eb62b536c886544fe2d46
|
| 3 |
size 102501541
|