Training in progress, step 840000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2f81c933b26cfeb60d53ba82d975294e2c7358973e2715677db9ca7fd31945d
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80793dabaaad0486a9c6c7d32363ba477ae4d785ddabb640496bf016359dc491
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c09ff3d1788e565e5a086e252ccf0ede212b045e4e5f4392a44c6ea6f0987dd6
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e058e2238a38cee98eacc9109fd883ace95c4833f253ace4bd37e2704c0fe5af
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9431567146b6a803c38f6863bbd8c9115e688967dc8f725b32605962fde389b3
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65f8725dca368138ac60071ebf1967a52a0bdc41ecaaff24531fe8b99b9ccb52
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eda6d197ffa3fe6958e94125c5fa0490a4afe5ac2f8a51ad2a4931b09364f04
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa3a1be70ad51e1c8a5b547f1989a2e92d51a9ec27c3a4490875ff9354ff3dda
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bc51de7c0af3e5027c4a852a232459cf39ee9a71ea51b7603a1f5327ee5a020
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4a63b1c76ff6ed1a203a2dff4664ff326fc59ea9cbb507ef4f3897d7810fb84
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90777e10c619a178822dbd35785dbd74396ff21ef94c6855b7e97b44a2c700b9
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9966,11 +9966,131 @@
|
|
| 9966 |
"learning_rate": 2.0773332330534513e-05,
|
| 9967 |
"loss": 0.2887,
|
| 9968 |
"step": 830000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9969 |
}
|
| 9970 |
],
|
| 9971 |
"max_steps": 1000000,
|
| 9972 |
"num_train_epochs": 2,
|
| 9973 |
-
"total_flos": 5.
|
| 9974 |
"trial_name": null,
|
| 9975 |
"trial_params": null
|
| 9976 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2192265686657473,
|
| 5 |
+
"global_step": 840000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9966 |
"learning_rate": 2.0773332330534513e-05,
|
| 9967 |
"loss": 0.2887,
|
| 9968 |
"step": 830000
|
| 9969 |
+
},
|
| 9970 |
+
{
|
| 9971 |
+
"epoch": 1.2,
|
| 9972 |
+
"learning_rate": 2.0711718689098057e-05,
|
| 9973 |
+
"loss": 0.2886,
|
| 9974 |
+
"step": 830500
|
| 9975 |
+
},
|
| 9976 |
+
{
|
| 9977 |
+
"epoch": 1.2,
|
| 9978 |
+
"learning_rate": 2.0650267139558772e-05,
|
| 9979 |
+
"loss": 0.2887,
|
| 9980 |
+
"step": 831000
|
| 9981 |
+
},
|
| 9982 |
+
{
|
| 9983 |
+
"epoch": 1.2,
|
| 9984 |
+
"learning_rate": 2.058897784992289e-05,
|
| 9985 |
+
"loss": 0.2885,
|
| 9986 |
+
"step": 831500
|
| 9987 |
+
},
|
| 9988 |
+
{
|
| 9989 |
+
"epoch": 1.2,
|
| 9990 |
+
"learning_rate": 2.052785098775293e-05,
|
| 9991 |
+
"loss": 0.2886,
|
| 9992 |
+
"step": 832000
|
| 9993 |
+
},
|
| 9994 |
+
{
|
| 9995 |
+
"epoch": 1.2,
|
| 9996 |
+
"learning_rate": 2.0466886720167436e-05,
|
| 9997 |
+
"loss": 0.2877,
|
| 9998 |
+
"step": 832500
|
| 9999 |
+
},
|
| 10000 |
+
{
|
| 10001 |
+
"epoch": 1.21,
|
| 10002 |
+
"learning_rate": 2.04060852138404e-05,
|
| 10003 |
+
"loss": 0.2878,
|
| 10004 |
+
"step": 833000
|
| 10005 |
+
},
|
| 10006 |
+
{
|
| 10007 |
+
"epoch": 1.21,
|
| 10008 |
+
"learning_rate": 2.0345446635000783e-05,
|
| 10009 |
+
"loss": 0.2887,
|
| 10010 |
+
"step": 833500
|
| 10011 |
+
},
|
| 10012 |
+
{
|
| 10013 |
+
"epoch": 1.21,
|
| 10014 |
+
"learning_rate": 2.028497114943219e-05,
|
| 10015 |
+
"loss": 0.2888,
|
| 10016 |
+
"step": 834000
|
| 10017 |
+
},
|
| 10018 |
+
{
|
| 10019 |
+
"epoch": 1.21,
|
| 10020 |
+
"learning_rate": 2.022465892247223e-05,
|
| 10021 |
+
"loss": 0.2881,
|
| 10022 |
+
"step": 834500
|
| 10023 |
+
},
|
| 10024 |
+
{
|
| 10025 |
+
"epoch": 1.21,
|
| 10026 |
+
"learning_rate": 2.0164510119012263e-05,
|
| 10027 |
+
"loss": 0.2878,
|
| 10028 |
+
"step": 835000
|
| 10029 |
+
},
|
| 10030 |
+
{
|
| 10031 |
+
"epoch": 1.21,
|
| 10032 |
+
"learning_rate": 2.0104524903496834e-05,
|
| 10033 |
+
"loss": 0.2879,
|
| 10034 |
+
"step": 835500
|
| 10035 |
+
},
|
| 10036 |
+
{
|
| 10037 |
+
"epoch": 1.21,
|
| 10038 |
+
"learning_rate": 2.0044703439923217e-05,
|
| 10039 |
+
"loss": 0.2876,
|
| 10040 |
+
"step": 836000
|
| 10041 |
+
},
|
| 10042 |
+
{
|
| 10043 |
+
"epoch": 1.21,
|
| 10044 |
+
"learning_rate": 1.998504589184101e-05,
|
| 10045 |
+
"loss": 0.2879,
|
| 10046 |
+
"step": 836500
|
| 10047 |
+
},
|
| 10048 |
+
{
|
| 10049 |
+
"epoch": 1.21,
|
| 10050 |
+
"learning_rate": 1.9925552422351654e-05,
|
| 10051 |
+
"loss": 0.2878,
|
| 10052 |
+
"step": 837000
|
| 10053 |
+
},
|
| 10054 |
+
{
|
| 10055 |
+
"epoch": 1.21,
|
| 10056 |
+
"learning_rate": 1.9866223194108028e-05,
|
| 10057 |
+
"loss": 0.2884,
|
| 10058 |
+
"step": 837500
|
| 10059 |
+
},
|
| 10060 |
+
{
|
| 10061 |
+
"epoch": 1.22,
|
| 10062 |
+
"learning_rate": 1.9807058369314016e-05,
|
| 10063 |
+
"loss": 0.288,
|
| 10064 |
+
"step": 838000
|
| 10065 |
+
},
|
| 10066 |
+
{
|
| 10067 |
+
"epoch": 1.22,
|
| 10068 |
+
"learning_rate": 1.9748058109723953e-05,
|
| 10069 |
+
"loss": 0.2879,
|
| 10070 |
+
"step": 838500
|
| 10071 |
+
},
|
| 10072 |
+
{
|
| 10073 |
+
"epoch": 1.22,
|
| 10074 |
+
"learning_rate": 1.968922257664231e-05,
|
| 10075 |
+
"loss": 0.2878,
|
| 10076 |
+
"step": 839000
|
| 10077 |
+
},
|
| 10078 |
+
{
|
| 10079 |
+
"epoch": 1.22,
|
| 10080 |
+
"learning_rate": 1.9630551930923155e-05,
|
| 10081 |
+
"loss": 0.288,
|
| 10082 |
+
"step": 839500
|
| 10083 |
+
},
|
| 10084 |
+
{
|
| 10085 |
+
"epoch": 1.22,
|
| 10086 |
+
"learning_rate": 1.9572046332969825e-05,
|
| 10087 |
+
"loss": 0.2881,
|
| 10088 |
+
"step": 840000
|
| 10089 |
}
|
| 10090 |
],
|
| 10091 |
"max_steps": 1000000,
|
| 10092 |
"num_train_epochs": 2,
|
| 10093 |
+
"total_flos": 5.678984079326211e+22,
|
| 10094 |
"trial_name": null,
|
| 10095 |
"trial_params": null
|
| 10096 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80793dabaaad0486a9c6c7d32363ba477ae4d785ddabb640496bf016359dc491
|
| 3 |
size 449450757
|