Training in progress, step 640000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:570fd9971dd127676195908f1f0168c560d379e06053db1ec1c6889a24e76909
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:407afab53633fc482bbe780f5224c6b1388fc7b7dd3f17aa73388222d02bc81c
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:069c7b8d28935c1bdaf707018f31232b5c5d0b17ca264ac835e0cab62f47f60b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acfb6c5ca1e2a8aae6849b592c5e4c4b839246ca00f42f46fa8da24fee6f7051
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed15d29f5bdaa33109b0c66a9aa2dbc57339a469e3f71f40bec5ec342e0d6d49
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:962d2f5974f30660f555e22cdf0c12b334de1b8fc49a6a5192e63c3a6ee6eebe
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa2e736f0ce5f395a825ebebcf342c762745706534807a9b43b2a6a713704726
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bff10bd0517565104b7a365f7830fc50ca6a2c535ddf94460fc2737ad38c9a7
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24fffbd4923bc1f675f8117f531217edf35f82264eb436b97401dab9e4eeeaa0
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d716524e67d0b69cb1b5ffc99aa56ed5a73b186c4b6bfbd6ff0ef38267147113
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03a35091ba68234fa026466686321e8ce53cfe05ba57973184932ffc7464e369
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7566,11 +7566,131 @@
|
|
| 7566 |
"learning_rate": 5.617947778908498e-05,
|
| 7567 |
"loss": 0.3015,
|
| 7568 |
"step": 630000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7569 |
}
|
| 7570 |
],
|
| 7571 |
"max_steps": 1000000,
|
| 7572 |
"num_train_epochs": 2,
|
| 7573 |
-
"total_flos": 4.
|
| 7574 |
"trial_name": null,
|
| 7575 |
"trial_params": null
|
| 7576 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2793910098792973,
|
| 5 |
+
"global_step": 640000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7566 |
"learning_rate": 5.617947778908498e-05,
|
| 7567 |
"loss": 0.3015,
|
| 7568 |
"step": 630000
|
| 7569 |
+
},
|
| 7570 |
+
{
|
| 7571 |
+
"epoch": 1.26,
|
| 7572 |
+
"learning_rate": 5.60706751197385e-05,
|
| 7573 |
+
"loss": 0.3014,
|
| 7574 |
+
"step": 630500
|
| 7575 |
+
},
|
| 7576 |
+
{
|
| 7577 |
+
"epoch": 1.26,
|
| 7578 |
+
"learning_rate": 5.596193787225254e-05,
|
| 7579 |
+
"loss": 0.3008,
|
| 7580 |
+
"step": 631000
|
| 7581 |
+
},
|
| 7582 |
+
{
|
| 7583 |
+
"epoch": 1.26,
|
| 7584 |
+
"learning_rate": 5.585326634391049e-05,
|
| 7585 |
+
"loss": 0.3008,
|
| 7586 |
+
"step": 631500
|
| 7587 |
+
},
|
| 7588 |
+
{
|
| 7589 |
+
"epoch": 1.26,
|
| 7590 |
+
"learning_rate": 5.574466083181624e-05,
|
| 7591 |
+
"loss": 0.3014,
|
| 7592 |
+
"step": 632000
|
| 7593 |
+
},
|
| 7594 |
+
{
|
| 7595 |
+
"epoch": 1.26,
|
| 7596 |
+
"learning_rate": 5.563612163289308e-05,
|
| 7597 |
+
"loss": 0.3008,
|
| 7598 |
+
"step": 632500
|
| 7599 |
+
},
|
| 7600 |
+
{
|
| 7601 |
+
"epoch": 1.27,
|
| 7602 |
+
"learning_rate": 5.552764904388305e-05,
|
| 7603 |
+
"loss": 0.3016,
|
| 7604 |
+
"step": 633000
|
| 7605 |
+
},
|
| 7606 |
+
{
|
| 7607 |
+
"epoch": 1.27,
|
| 7608 |
+
"learning_rate": 5.541924336134609e-05,
|
| 7609 |
+
"loss": 0.3014,
|
| 7610 |
+
"step": 633500
|
| 7611 |
+
},
|
| 7612 |
+
{
|
| 7613 |
+
"epoch": 1.27,
|
| 7614 |
+
"learning_rate": 5.5310904881659116e-05,
|
| 7615 |
+
"loss": 0.301,
|
| 7616 |
+
"step": 634000
|
| 7617 |
+
},
|
| 7618 |
+
{
|
| 7619 |
+
"epoch": 1.27,
|
| 7620 |
+
"learning_rate": 5.5202633901015464e-05,
|
| 7621 |
+
"loss": 0.3008,
|
| 7622 |
+
"step": 634500
|
| 7623 |
+
},
|
| 7624 |
+
{
|
| 7625 |
+
"epoch": 1.27,
|
| 7626 |
+
"learning_rate": 5.5094430715423835e-05,
|
| 7627 |
+
"loss": 0.3017,
|
| 7628 |
+
"step": 635000
|
| 7629 |
+
},
|
| 7630 |
+
{
|
| 7631 |
+
"epoch": 1.27,
|
| 7632 |
+
"learning_rate": 5.4986295620707626e-05,
|
| 7633 |
+
"loss": 0.3005,
|
| 7634 |
+
"step": 635500
|
| 7635 |
+
},
|
| 7636 |
+
{
|
| 7637 |
+
"epoch": 1.27,
|
| 7638 |
+
"learning_rate": 5.487822891250406e-05,
|
| 7639 |
+
"loss": 0.3004,
|
| 7640 |
+
"step": 636000
|
| 7641 |
+
},
|
| 7642 |
+
{
|
| 7643 |
+
"epoch": 1.27,
|
| 7644 |
+
"learning_rate": 5.477023088626334e-05,
|
| 7645 |
+
"loss": 0.3008,
|
| 7646 |
+
"step": 636500
|
| 7647 |
+
},
|
| 7648 |
+
{
|
| 7649 |
+
"epoch": 1.27,
|
| 7650 |
+
"learning_rate": 5.4662301837247985e-05,
|
| 7651 |
+
"loss": 0.301,
|
| 7652 |
+
"step": 637000
|
| 7653 |
+
},
|
| 7654 |
+
{
|
| 7655 |
+
"epoch": 1.27,
|
| 7656 |
+
"learning_rate": 5.45544420605319e-05,
|
| 7657 |
+
"loss": 0.3008,
|
| 7658 |
+
"step": 637500
|
| 7659 |
+
},
|
| 7660 |
+
{
|
| 7661 |
+
"epoch": 1.28,
|
| 7662 |
+
"learning_rate": 5.4446651850999604e-05,
|
| 7663 |
+
"loss": 0.3012,
|
| 7664 |
+
"step": 638000
|
| 7665 |
+
},
|
| 7666 |
+
{
|
| 7667 |
+
"epoch": 1.28,
|
| 7668 |
+
"learning_rate": 5.433893150334538e-05,
|
| 7669 |
+
"loss": 0.3009,
|
| 7670 |
+
"step": 638500
|
| 7671 |
+
},
|
| 7672 |
+
{
|
| 7673 |
+
"epoch": 1.28,
|
| 7674 |
+
"learning_rate": 5.4231281312072544e-05,
|
| 7675 |
+
"loss": 0.301,
|
| 7676 |
+
"step": 639000
|
| 7677 |
+
},
|
| 7678 |
+
{
|
| 7679 |
+
"epoch": 1.28,
|
| 7680 |
+
"learning_rate": 5.4123701571492636e-05,
|
| 7681 |
+
"loss": 0.3009,
|
| 7682 |
+
"step": 639500
|
| 7683 |
+
},
|
| 7684 |
+
{
|
| 7685 |
+
"epoch": 1.28,
|
| 7686 |
+
"learning_rate": 5.401619257572453e-05,
|
| 7687 |
+
"loss": 0.3007,
|
| 7688 |
+
"step": 640000
|
| 7689 |
}
|
| 7690 |
],
|
| 7691 |
"max_steps": 1000000,
|
| 7692 |
"num_train_epochs": 2,
|
| 7693 |
+
"total_flos": 4.326855661422541e+22,
|
| 7694 |
"trial_name": null,
|
| 7695 |
"trial_params": null
|
| 7696 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:407afab53633fc482bbe780f5224c6b1388fc7b7dd3f17aa73388222d02bc81c
|
| 3 |
size 449450757
|