Training in progress, step 630000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:017625820feef9696ace4a5cbefe218b931336a9991e2245257a1d1342a0f729
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d36d94ab11c86f651fecbc7a217529f6f250ac924b506ffb7d29aa9de0ca5bc
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d31d0ded159b5f3b1a8c1ce1b7b826e4fbdda0cc5ba59eaba62ee8809a462e8f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:398f2318b4ded20f61f24fd00e4055ea625eaa86f27bce6d1e31ca0965b80a81
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbb6422a1fad93ed77bc18b18a2c8499aa6774c77ffada9e53f436cd9d13ca0c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce446a79d89ee06223f9e2ae5f2f4290f0fb0ec1cfcfeee86b4b4ba2420ef30e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fbfbffabb738f483b0924b2fcbbbf4a31bdaab1f9760b5a622efffc7c59e8d2
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a6b1230026b3f360d114a9d0f5608343d3dbe5979744e0c2b45d14032617ff1
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c92afb5535b80215526b380f6cb7f75fa76f1d0853152e112df8d84246f00fed
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5fe487d9251494c826a0bd20a1c2515c3d527bc1906f192546685af4384e7fe
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5538e16e2cb8a022511fa1c4ff3a30d17572708626a194d4d5db3edb9bc5de72
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7446,11 +7446,131 @@
|
|
| 7446 |
"learning_rate": 5.8368810393753684e-05,
|
| 7447 |
"loss": 0.3021,
|
| 7448 |
"step": 620000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7449 |
}
|
| 7450 |
],
|
| 7451 |
"max_steps": 1000000,
|
| 7452 |
"num_train_epochs": 2,
|
| 7453 |
-
"total_flos": 4.
|
| 7454 |
"trial_name": null,
|
| 7455 |
"trial_params": null
|
| 7456 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2594005253499334,
|
| 5 |
+
"global_step": 630000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7446 |
"learning_rate": 5.8368810393753684e-05,
|
| 7447 |
"loss": 0.3021,
|
| 7448 |
"step": 620000
|
| 7449 |
+
},
|
| 7450 |
+
{
|
| 7451 |
+
"epoch": 1.24,
|
| 7452 |
+
"learning_rate": 5.8258761986213015e-05,
|
| 7453 |
+
"loss": 0.3027,
|
| 7454 |
+
"step": 620500
|
| 7455 |
+
},
|
| 7456 |
+
{
|
| 7457 |
+
"epoch": 1.24,
|
| 7458 |
+
"learning_rate": 5.814877301838688e-05,
|
| 7459 |
+
"loss": 0.3021,
|
| 7460 |
+
"step": 621000
|
| 7461 |
+
},
|
| 7462 |
+
{
|
| 7463 |
+
"epoch": 1.24,
|
| 7464 |
+
"learning_rate": 5.803884379098094e-05,
|
| 7465 |
+
"loss": 0.3022,
|
| 7466 |
+
"step": 621500
|
| 7467 |
+
},
|
| 7468 |
+
{
|
| 7469 |
+
"epoch": 1.24,
|
| 7470 |
+
"learning_rate": 5.7928974604537494e-05,
|
| 7471 |
+
"loss": 0.3022,
|
| 7472 |
+
"step": 622000
|
| 7473 |
+
},
|
| 7474 |
+
{
|
| 7475 |
+
"epoch": 1.24,
|
| 7476 |
+
"learning_rate": 5.781916575943469e-05,
|
| 7477 |
+
"loss": 0.3022,
|
| 7478 |
+
"step": 622500
|
| 7479 |
+
},
|
| 7480 |
+
{
|
| 7481 |
+
"epoch": 1.25,
|
| 7482 |
+
"learning_rate": 5.770941755588573e-05,
|
| 7483 |
+
"loss": 0.3023,
|
| 7484 |
+
"step": 623000
|
| 7485 |
+
},
|
| 7486 |
+
{
|
| 7487 |
+
"epoch": 1.25,
|
| 7488 |
+
"learning_rate": 5.7599730293938e-05,
|
| 7489 |
+
"loss": 0.302,
|
| 7490 |
+
"step": 623500
|
| 7491 |
+
},
|
| 7492 |
+
{
|
| 7493 |
+
"epoch": 1.25,
|
| 7494 |
+
"learning_rate": 5.749010427347233e-05,
|
| 7495 |
+
"loss": 0.3021,
|
| 7496 |
+
"step": 624000
|
| 7497 |
+
},
|
| 7498 |
+
{
|
| 7499 |
+
"epoch": 1.25,
|
| 7500 |
+
"learning_rate": 5.738053979420199e-05,
|
| 7501 |
+
"loss": 0.3019,
|
| 7502 |
+
"step": 624500
|
| 7503 |
+
},
|
| 7504 |
+
{
|
| 7505 |
+
"epoch": 1.25,
|
| 7506 |
+
"learning_rate": 5.7271037155672156e-05,
|
| 7507 |
+
"loss": 0.3015,
|
| 7508 |
+
"step": 625000
|
| 7509 |
+
},
|
| 7510 |
+
{
|
| 7511 |
+
"epoch": 1.25,
|
| 7512 |
+
"learning_rate": 5.716159665725883e-05,
|
| 7513 |
+
"loss": 0.3016,
|
| 7514 |
+
"step": 625500
|
| 7515 |
+
},
|
| 7516 |
+
{
|
| 7517 |
+
"epoch": 1.25,
|
| 7518 |
+
"learning_rate": 5.7052218598168154e-05,
|
| 7519 |
+
"loss": 0.3017,
|
| 7520 |
+
"step": 626000
|
| 7521 |
+
},
|
| 7522 |
+
{
|
| 7523 |
+
"epoch": 1.25,
|
| 7524 |
+
"learning_rate": 5.69429032774356e-05,
|
| 7525 |
+
"loss": 0.3021,
|
| 7526 |
+
"step": 626500
|
| 7527 |
+
},
|
| 7528 |
+
{
|
| 7529 |
+
"epoch": 1.25,
|
| 7530 |
+
"learning_rate": 5.6833650993925016e-05,
|
| 7531 |
+
"loss": 0.3015,
|
| 7532 |
+
"step": 627000
|
| 7533 |
+
},
|
| 7534 |
+
{
|
| 7535 |
+
"epoch": 1.25,
|
| 7536 |
+
"learning_rate": 5.6724462046328025e-05,
|
| 7537 |
+
"loss": 0.3021,
|
| 7538 |
+
"step": 627500
|
| 7539 |
+
},
|
| 7540 |
+
{
|
| 7541 |
+
"epoch": 1.26,
|
| 7542 |
+
"learning_rate": 5.661533673316303e-05,
|
| 7543 |
+
"loss": 0.3026,
|
| 7544 |
+
"step": 628000
|
| 7545 |
+
},
|
| 7546 |
+
{
|
| 7547 |
+
"epoch": 1.26,
|
| 7548 |
+
"learning_rate": 5.6506275352774447e-05,
|
| 7549 |
+
"loss": 0.3009,
|
| 7550 |
+
"step": 628500
|
| 7551 |
+
},
|
| 7552 |
+
{
|
| 7553 |
+
"epoch": 1.26,
|
| 7554 |
+
"learning_rate": 5.639727820333198e-05,
|
| 7555 |
+
"loss": 0.3017,
|
| 7556 |
+
"step": 629000
|
| 7557 |
+
},
|
| 7558 |
+
{
|
| 7559 |
+
"epoch": 1.26,
|
| 7560 |
+
"learning_rate": 5.62883455828296e-05,
|
| 7561 |
+
"loss": 0.3016,
|
| 7562 |
+
"step": 629500
|
| 7563 |
+
},
|
| 7564 |
+
{
|
| 7565 |
+
"epoch": 1.26,
|
| 7566 |
+
"learning_rate": 5.617947778908498e-05,
|
| 7567 |
+
"loss": 0.3015,
|
| 7568 |
+
"step": 630000
|
| 7569 |
}
|
| 7570 |
],
|
| 7571 |
"max_steps": 1000000,
|
| 7572 |
"num_train_epochs": 2,
|
| 7573 |
+
"total_flos": 4.2592477348671294e+22,
|
| 7574 |
"trial_name": null,
|
| 7575 |
"trial_params": null
|
| 7576 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d36d94ab11c86f651fecbc7a217529f6f250ac924b506ffb7d29aa9de0ca5bc
|
| 3 |
size 449450757
|