Training in progress, step 670000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed5bbd83e76bfaea16133e5f4d584916d5b8420b3bb185b8e5801362569d4f69
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d66a36b6e02c5a5390d3c9dec0faf002f0a21fa7c7b5ef13a837f052f84e013
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12cb7e1a11524752d8ec0a2746c2da7c87cd4d3afc083cf4a0df43b88ed43337
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd3471c82ac0fc930f64e5adbb6702a0e555d4edfcc1c2dab4ff36db308349b1
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49fa69a09ea23ef88cf7df6a3190bd2ee20d350293163871b5d1dbdf1a735794
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dac01a61c51f51dbd4c3cc5d50cf7d5af4a9f263667ab541575cfd5deab9645
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:221b7d8fdca30ab22892af203b971ac82533d02ad7492e4e8b5068d84fa6a3ca
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:548649d9f5f2f112c52d8a0f4a7c44c0a8f1f18e8bb96cd91be7066faf617949
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:426865c62dd7c19a97ed19d06fe6fa6770f12ecb7a2997d9a0820ed2f9c93c21
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e886c38b9308720d6c044b1f01de3ee4919b1d3a6edb19ef015bd4926793ada4
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50de55c4a72c38e6722f7cb77ebe9f35ce412c17a797d93c631371b39d861204
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7926,11 +7926,131 @@
|
|
| 7926 |
"learning_rate": 4.97771953363055e-05,
|
| 7927 |
"loss": 0.299,
|
| 7928 |
"step": 660000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7929 |
}
|
| 7930 |
],
|
| 7931 |
"max_steps": 1000000,
|
| 7932 |
"num_train_epochs": 2,
|
| 7933 |
-
"total_flos": 4.
|
| 7934 |
"trial_name": null,
|
| 7935 |
"trial_params": null
|
| 7936 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3393624634673895,
|
| 5 |
+
"global_step": 670000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7926 |
"learning_rate": 4.97771953363055e-05,
|
| 7927 |
"loss": 0.299,
|
| 7928 |
"step": 660000
|
| 7929 |
+
},
|
| 7930 |
+
{
|
| 7931 |
+
"epoch": 1.32,
|
| 7932 |
+
"learning_rate": 4.967283762863444e-05,
|
| 7933 |
+
"loss": 0.2994,
|
| 7934 |
+
"step": 660500
|
| 7935 |
+
},
|
| 7936 |
+
{
|
| 7937 |
+
"epoch": 1.32,
|
| 7938 |
+
"learning_rate": 4.956856283426728e-05,
|
| 7939 |
+
"loss": 0.2994,
|
| 7940 |
+
"step": 661000
|
| 7941 |
+
},
|
| 7942 |
+
{
|
| 7943 |
+
"epoch": 1.32,
|
| 7944 |
+
"learning_rate": 4.946437123828732e-05,
|
| 7945 |
+
"loss": 0.2982,
|
| 7946 |
+
"step": 661500
|
| 7947 |
+
},
|
| 7948 |
+
{
|
| 7949 |
+
"epoch": 1.32,
|
| 7950 |
+
"learning_rate": 4.936026312555037e-05,
|
| 7951 |
+
"loss": 0.2992,
|
| 7952 |
+
"step": 662000
|
| 7953 |
+
},
|
| 7954 |
+
{
|
| 7955 |
+
"epoch": 1.32,
|
| 7956 |
+
"learning_rate": 4.925623878068408e-05,
|
| 7957 |
+
"loss": 0.2984,
|
| 7958 |
+
"step": 662500
|
| 7959 |
+
},
|
| 7960 |
+
{
|
| 7961 |
+
"epoch": 1.33,
|
| 7962 |
+
"learning_rate": 4.915229848808698e-05,
|
| 7963 |
+
"loss": 0.2994,
|
| 7964 |
+
"step": 663000
|
| 7965 |
+
},
|
| 7966 |
+
{
|
| 7967 |
+
"epoch": 1.33,
|
| 7968 |
+
"learning_rate": 4.904844253192795e-05,
|
| 7969 |
+
"loss": 0.2989,
|
| 7970 |
+
"step": 663500
|
| 7971 |
+
},
|
| 7972 |
+
{
|
| 7973 |
+
"epoch": 1.33,
|
| 7974 |
+
"learning_rate": 4.8944671196145136e-05,
|
| 7975 |
+
"loss": 0.2991,
|
| 7976 |
+
"step": 664000
|
| 7977 |
+
},
|
| 7978 |
+
{
|
| 7979 |
+
"epoch": 1.33,
|
| 7980 |
+
"learning_rate": 4.884098476444539e-05,
|
| 7981 |
+
"loss": 0.2984,
|
| 7982 |
+
"step": 664500
|
| 7983 |
+
},
|
| 7984 |
+
{
|
| 7985 |
+
"epoch": 1.33,
|
| 7986 |
+
"learning_rate": 4.8737383520303546e-05,
|
| 7987 |
+
"loss": 0.2984,
|
| 7988 |
+
"step": 665000
|
| 7989 |
+
},
|
| 7990 |
+
{
|
| 7991 |
+
"epoch": 1.33,
|
| 7992 |
+
"learning_rate": 4.8633867746961356e-05,
|
| 7993 |
+
"loss": 0.2988,
|
| 7994 |
+
"step": 665500
|
| 7995 |
+
},
|
| 7996 |
+
{
|
| 7997 |
+
"epoch": 1.33,
|
| 7998 |
+
"learning_rate": 4.853043772742709e-05,
|
| 7999 |
+
"loss": 0.2986,
|
| 8000 |
+
"step": 666000
|
| 8001 |
+
},
|
| 8002 |
+
{
|
| 8003 |
+
"epoch": 1.33,
|
| 8004 |
+
"learning_rate": 4.8427093744474364e-05,
|
| 8005 |
+
"loss": 0.299,
|
| 8006 |
+
"step": 666500
|
| 8007 |
+
},
|
| 8008 |
+
{
|
| 8009 |
+
"epoch": 1.33,
|
| 8010 |
+
"learning_rate": 4.832383608064172e-05,
|
| 8011 |
+
"loss": 0.2992,
|
| 8012 |
+
"step": 667000
|
| 8013 |
+
},
|
| 8014 |
+
{
|
| 8015 |
+
"epoch": 1.33,
|
| 8016 |
+
"learning_rate": 4.822066501823172e-05,
|
| 8017 |
+
"loss": 0.299,
|
| 8018 |
+
"step": 667500
|
| 8019 |
+
},
|
| 8020 |
+
{
|
| 8021 |
+
"epoch": 1.34,
|
| 8022 |
+
"learning_rate": 4.811758083931005e-05,
|
| 8023 |
+
"loss": 0.2984,
|
| 8024 |
+
"step": 668000
|
| 8025 |
+
},
|
| 8026 |
+
{
|
| 8027 |
+
"epoch": 1.34,
|
| 8028 |
+
"learning_rate": 4.8014583825704976e-05,
|
| 8029 |
+
"loss": 0.2982,
|
| 8030 |
+
"step": 668500
|
| 8031 |
+
},
|
| 8032 |
+
{
|
| 8033 |
+
"epoch": 1.34,
|
| 8034 |
+
"learning_rate": 4.791167425900632e-05,
|
| 8035 |
+
"loss": 0.2988,
|
| 8036 |
+
"step": 669000
|
| 8037 |
+
},
|
| 8038 |
+
{
|
| 8039 |
+
"epoch": 1.34,
|
| 8040 |
+
"learning_rate": 4.780885242056493e-05,
|
| 8041 |
+
"loss": 0.2983,
|
| 8042 |
+
"step": 669500
|
| 8043 |
+
},
|
| 8044 |
+
{
|
| 8045 |
+
"epoch": 1.34,
|
| 8046 |
+
"learning_rate": 4.770611859149185e-05,
|
| 8047 |
+
"loss": 0.2987,
|
| 8048 |
+
"step": 670000
|
| 8049 |
}
|
| 8050 |
],
|
| 8051 |
"max_steps": 1000000,
|
| 8052 |
"num_train_epochs": 2,
|
| 8053 |
+
"total_flos": 4.529677636130487e+22,
|
| 8054 |
"trial_name": null,
|
| 8055 |
"trial_params": null
|
| 8056 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d66a36b6e02c5a5390d3c9dec0faf002f0a21fa7c7b5ef13a837f052f84e013
|
| 3 |
size 449450757
|