Training in progress, step 620000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a097619eaa9ef720984e4ddd9dde8f3b697ba4e3a54ad0e09caff9a338f70f3
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6297a4aa090b90aa3635b3517c4b127894ad2c42e14fd6d228c6743ce17aee7d
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ef2eb325ca73aeb9167731426720e192b4dc476427d6c01affc7b3b2a3e583b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ef8f093c13597a9033ad0961b449077de3ab17c6b5e598e7ffe900737a37b62
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e20445f0abf4410bec575adab612ba675a2a9e22a555ffb2b2fb85961556a332
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20bb77018a33c479ea4b69c28339611c22b8e8641554d8590d8198df56a8bc21
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34450b80230704bfaa6dfc6f8672078f09277102e090e32e505412e8b0a06323
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f073590871f68cf52e503ea996a2b041d76f54fc155c63b39985473c3a2c6e9
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb10ba8d357eb68fad272ff5bc8fb10ce9e5818ee9a6bd185f2331209c9c5eee
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bdc5c8f1d9880eb7b2e1404af5eeae63b870215c24de51ca47db7d2d9d87809
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e0be0a1dddd0483c31953c97f497ac534ef42fa519c13cb1ceaab964eeaafea
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7326,11 +7326,131 @@
|
|
| 7326 |
"learning_rate": 6.058179638667089e-05,
|
| 7327 |
"loss": 0.3027,
|
| 7328 |
"step": 610000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7329 |
}
|
| 7330 |
],
|
| 7331 |
"max_steps": 1000000,
|
| 7332 |
"num_train_epochs": 2,
|
| 7333 |
-
"total_flos": 4.
|
| 7334 |
"trial_name": null,
|
| 7335 |
"trial_params": null
|
| 7336 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2394100408205695,
|
| 5 |
+
"global_step": 620000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7326 |
"learning_rate": 6.058179638667089e-05,
|
| 7327 |
"loss": 0.3027,
|
| 7328 |
"step": 610000
|
| 7329 |
+
},
|
| 7330 |
+
{
|
| 7331 |
+
"epoch": 1.22,
|
| 7332 |
+
"learning_rate": 6.047062257725395e-05,
|
| 7333 |
+
"loss": 0.3035,
|
| 7334 |
+
"step": 610500
|
| 7335 |
+
},
|
| 7336 |
+
{
|
| 7337 |
+
"epoch": 1.22,
|
| 7338 |
+
"learning_rate": 6.035950216040917e-05,
|
| 7339 |
+
"loss": 0.303,
|
| 7340 |
+
"step": 611000
|
| 7341 |
+
},
|
| 7342 |
+
{
|
| 7343 |
+
"epoch": 1.22,
|
| 7344 |
+
"learning_rate": 6.0248435439935516e-05,
|
| 7345 |
+
"loss": 0.3031,
|
| 7346 |
+
"step": 611500
|
| 7347 |
+
},
|
| 7348 |
+
{
|
| 7349 |
+
"epoch": 1.22,
|
| 7350 |
+
"learning_rate": 6.0137422719485145e-05,
|
| 7351 |
+
"loss": 0.3032,
|
| 7352 |
+
"step": 612000
|
| 7353 |
+
},
|
| 7354 |
+
{
|
| 7355 |
+
"epoch": 1.22,
|
| 7356 |
+
"learning_rate": 6.0026464302562636e-05,
|
| 7357 |
+
"loss": 0.303,
|
| 7358 |
+
"step": 612500
|
| 7359 |
+
},
|
| 7360 |
+
{
|
| 7361 |
+
"epoch": 1.23,
|
| 7362 |
+
"learning_rate": 5.991556049252401e-05,
|
| 7363 |
+
"loss": 0.303,
|
| 7364 |
+
"step": 613000
|
| 7365 |
+
},
|
| 7366 |
+
{
|
| 7367 |
+
"epoch": 1.23,
|
| 7368 |
+
"learning_rate": 5.980471159257609e-05,
|
| 7369 |
+
"loss": 0.3031,
|
| 7370 |
+
"step": 613500
|
| 7371 |
+
},
|
| 7372 |
+
{
|
| 7373 |
+
"epoch": 1.23,
|
| 7374 |
+
"learning_rate": 5.969391790577551e-05,
|
| 7375 |
+
"loss": 0.3026,
|
| 7376 |
+
"step": 614000
|
| 7377 |
+
},
|
| 7378 |
+
{
|
| 7379 |
+
"epoch": 1.23,
|
| 7380 |
+
"learning_rate": 5.958317973502798e-05,
|
| 7381 |
+
"loss": 0.3026,
|
| 7382 |
+
"step": 614500
|
| 7383 |
+
},
|
| 7384 |
+
{
|
| 7385 |
+
"epoch": 1.23,
|
| 7386 |
+
"learning_rate": 5.947249738308747e-05,
|
| 7387 |
+
"loss": 0.3024,
|
| 7388 |
+
"step": 615000
|
| 7389 |
+
},
|
| 7390 |
+
{
|
| 7391 |
+
"epoch": 1.23,
|
| 7392 |
+
"learning_rate": 5.9361871152555254e-05,
|
| 7393 |
+
"loss": 0.3031,
|
| 7394 |
+
"step": 615500
|
| 7395 |
+
},
|
| 7396 |
+
{
|
| 7397 |
+
"epoch": 1.23,
|
| 7398 |
+
"learning_rate": 5.925130134587924e-05,
|
| 7399 |
+
"loss": 0.3021,
|
| 7400 |
+
"step": 616000
|
| 7401 |
+
},
|
| 7402 |
+
{
|
| 7403 |
+
"epoch": 1.23,
|
| 7404 |
+
"learning_rate": 5.914078826535307e-05,
|
| 7405 |
+
"loss": 0.3021,
|
| 7406 |
+
"step": 616500
|
| 7407 |
+
},
|
| 7408 |
+
{
|
| 7409 |
+
"epoch": 1.23,
|
| 7410 |
+
"learning_rate": 5.903033221311528e-05,
|
| 7411 |
+
"loss": 0.3023,
|
| 7412 |
+
"step": 617000
|
| 7413 |
+
},
|
| 7414 |
+
{
|
| 7415 |
+
"epoch": 1.23,
|
| 7416 |
+
"learning_rate": 5.891993349114847e-05,
|
| 7417 |
+
"loss": 0.3025,
|
| 7418 |
+
"step": 617500
|
| 7419 |
+
},
|
| 7420 |
+
{
|
| 7421 |
+
"epoch": 1.24,
|
| 7422 |
+
"learning_rate": 5.880959240127858e-05,
|
| 7423 |
+
"loss": 0.3021,
|
| 7424 |
+
"step": 618000
|
| 7425 |
+
},
|
| 7426 |
+
{
|
| 7427 |
+
"epoch": 1.24,
|
| 7428 |
+
"learning_rate": 5.86993092451739e-05,
|
| 7429 |
+
"loss": 0.3022,
|
| 7430 |
+
"step": 618500
|
| 7431 |
+
},
|
| 7432 |
+
{
|
| 7433 |
+
"epoch": 1.24,
|
| 7434 |
+
"learning_rate": 5.858908432434438e-05,
|
| 7435 |
+
"loss": 0.3021,
|
| 7436 |
+
"step": 619000
|
| 7437 |
+
},
|
| 7438 |
+
{
|
| 7439 |
+
"epoch": 1.24,
|
| 7440 |
+
"learning_rate": 5.847891794014074e-05,
|
| 7441 |
+
"loss": 0.3017,
|
| 7442 |
+
"step": 619500
|
| 7443 |
+
},
|
| 7444 |
+
{
|
| 7445 |
+
"epoch": 1.24,
|
| 7446 |
+
"learning_rate": 5.8368810393753684e-05,
|
| 7447 |
+
"loss": 0.3021,
|
| 7448 |
+
"step": 620000
|
| 7449 |
}
|
| 7450 |
],
|
| 7451 |
"max_steps": 1000000,
|
| 7452 |
"num_train_epochs": 2,
|
| 7453 |
+
"total_flos": 4.191638959928546e+22,
|
| 7454 |
"trial_name": null,
|
| 7455 |
"trial_params": null
|
| 7456 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6297a4aa090b90aa3635b3517c4b127894ad2c42e14fd6d228c6743ce17aee7d
|
| 3 |
size 449450757
|