Training in progress, step 910000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:286afc89953ac85c207ddee00ff74005ad2f262dcd53bfa64635a387b524e2f5
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b52f3b8fcfa70b1731fae94d573cc6b63207a962d882488f83af9b17655c7c7
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ff175a5d87dd6b1bf5f6c9e51e5ee8b16642953afdff5a8deb87c35e540e812
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68bd93c5df85b9ef6b8dfb004005413abc49b194d979c692716ee25211f1498f
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -18006,11 +18006,211 @@
|
|
| 18006 |
"eval_samples_per_second": 854.803,
|
| 18007 |
"eval_steps_per_second": 13.397,
|
| 18008 |
"step": 900000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18009 |
}
|
| 18010 |
],
|
| 18011 |
"max_steps": 1000000,
|
| 18012 |
"num_train_epochs": 12,
|
| 18013 |
-
"total_flos": 6.
|
| 18014 |
"trial_name": null,
|
| 18015 |
"trial_params": null
|
| 18016 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.791976623579403,
|
| 5 |
+
"global_step": 910000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 18006 |
"eval_samples_per_second": 854.803,
|
| 18007 |
"eval_steps_per_second": 13.397,
|
| 18008 |
"step": 900000
|
| 18009 |
+
},
|
| 18010 |
+
{
|
| 18011 |
+
"epoch": 9.69,
|
| 18012 |
+
"learning_rate": 1.3755301948017599e-05,
|
| 18013 |
+
"loss": 0.1822,
|
| 18014 |
+
"step": 900500
|
| 18015 |
+
},
|
| 18016 |
+
{
|
| 18017 |
+
"epoch": 9.69,
|
| 18018 |
+
"learning_rate": 1.3717991925554562e-05,
|
| 18019 |
+
"loss": 0.1821,
|
| 18020 |
+
"step": 901000
|
| 18021 |
+
},
|
| 18022 |
+
{
|
| 18023 |
+
"epoch": 9.69,
|
| 18024 |
+
"eval_loss": 0.17332369089126587,
|
| 18025 |
+
"eval_runtime": 2.5587,
|
| 18026 |
+
"eval_samples_per_second": 897.731,
|
| 18027 |
+
"eval_steps_per_second": 14.07,
|
| 18028 |
+
"step": 901000
|
| 18029 |
+
},
|
| 18030 |
+
{
|
| 18031 |
+
"epoch": 9.7,
|
| 18032 |
+
"learning_rate": 1.368086311557062e-05,
|
| 18033 |
+
"loss": 0.1821,
|
| 18034 |
+
"step": 901500
|
| 18035 |
+
},
|
| 18036 |
+
{
|
| 18037 |
+
"epoch": 9.7,
|
| 18038 |
+
"learning_rate": 1.3643915619574529e-05,
|
| 18039 |
+
"loss": 0.1821,
|
| 18040 |
+
"step": 902000
|
| 18041 |
+
},
|
| 18042 |
+
{
|
| 18043 |
+
"epoch": 9.7,
|
| 18044 |
+
"eval_loss": 0.1713598072528839,
|
| 18045 |
+
"eval_runtime": 2.7303,
|
| 18046 |
+
"eval_samples_per_second": 841.295,
|
| 18047 |
+
"eval_steps_per_second": 13.185,
|
| 18048 |
+
"step": 902000
|
| 18049 |
+
},
|
| 18050 |
+
{
|
| 18051 |
+
"epoch": 9.71,
|
| 18052 |
+
"learning_rate": 1.3607149538579341e-05,
|
| 18053 |
+
"loss": 0.182,
|
| 18054 |
+
"step": 902500
|
| 18055 |
+
},
|
| 18056 |
+
{
|
| 18057 |
+
"epoch": 9.71,
|
| 18058 |
+
"learning_rate": 1.35705649731021e-05,
|
| 18059 |
+
"loss": 0.1816,
|
| 18060 |
+
"step": 903000
|
| 18061 |
+
},
|
| 18062 |
+
{
|
| 18063 |
+
"epoch": 9.71,
|
| 18064 |
+
"eval_loss": 0.17178992927074432,
|
| 18065 |
+
"eval_runtime": 2.6316,
|
| 18066 |
+
"eval_samples_per_second": 872.853,
|
| 18067 |
+
"eval_steps_per_second": 13.68,
|
| 18068 |
+
"step": 903000
|
| 18069 |
+
},
|
| 18070 |
+
{
|
| 18071 |
+
"epoch": 9.72,
|
| 18072 |
+
"learning_rate": 1.3534162023163642e-05,
|
| 18073 |
+
"loss": 0.1821,
|
| 18074 |
+
"step": 903500
|
| 18075 |
+
},
|
| 18076 |
+
{
|
| 18077 |
+
"epoch": 9.73,
|
| 18078 |
+
"learning_rate": 1.3497940788288195e-05,
|
| 18079 |
+
"loss": 0.182,
|
| 18080 |
+
"step": 904000
|
| 18081 |
+
},
|
| 18082 |
+
{
|
| 18083 |
+
"epoch": 9.73,
|
| 18084 |
+
"eval_loss": 0.17401227355003357,
|
| 18085 |
+
"eval_runtime": 2.6109,
|
| 18086 |
+
"eval_samples_per_second": 879.785,
|
| 18087 |
+
"eval_steps_per_second": 13.789,
|
| 18088 |
+
"step": 904000
|
| 18089 |
+
},
|
| 18090 |
+
{
|
| 18091 |
+
"epoch": 9.73,
|
| 18092 |
+
"learning_rate": 1.3461901367503262e-05,
|
| 18093 |
+
"loss": 0.1816,
|
| 18094 |
+
"step": 904500
|
| 18095 |
+
},
|
| 18096 |
+
{
|
| 18097 |
+
"epoch": 9.74,
|
| 18098 |
+
"learning_rate": 1.3426043859339253e-05,
|
| 18099 |
+
"loss": 0.1822,
|
| 18100 |
+
"step": 905000
|
| 18101 |
+
},
|
| 18102 |
+
{
|
| 18103 |
+
"epoch": 9.74,
|
| 18104 |
+
"eval_loss": 0.16998076438903809,
|
| 18105 |
+
"eval_runtime": 2.6306,
|
| 18106 |
+
"eval_samples_per_second": 873.188,
|
| 18107 |
+
"eval_steps_per_second": 13.685,
|
| 18108 |
+
"step": 905000
|
| 18109 |
+
},
|
| 18110 |
+
{
|
| 18111 |
+
"epoch": 9.74,
|
| 18112 |
+
"learning_rate": 1.3390368361829197e-05,
|
| 18113 |
+
"loss": 0.1818,
|
| 18114 |
+
"step": 905500
|
| 18115 |
+
},
|
| 18116 |
+
{
|
| 18117 |
+
"epoch": 9.75,
|
| 18118 |
+
"learning_rate": 1.3354874972508582e-05,
|
| 18119 |
+
"loss": 0.1815,
|
| 18120 |
+
"step": 906000
|
| 18121 |
+
},
|
| 18122 |
+
{
|
| 18123 |
+
"epoch": 9.75,
|
| 18124 |
+
"eval_loss": 0.17435437440872192,
|
| 18125 |
+
"eval_runtime": 2.7558,
|
| 18126 |
+
"eval_samples_per_second": 833.52,
|
| 18127 |
+
"eval_steps_per_second": 13.063,
|
| 18128 |
+
"step": 906000
|
| 18129 |
+
},
|
| 18130 |
+
{
|
| 18131 |
+
"epoch": 9.75,
|
| 18132 |
+
"learning_rate": 1.3319563788414934e-05,
|
| 18133 |
+
"loss": 0.182,
|
| 18134 |
+
"step": 906500
|
| 18135 |
+
},
|
| 18136 |
+
{
|
| 18137 |
+
"epoch": 9.76,
|
| 18138 |
+
"learning_rate": 1.3284434906087695e-05,
|
| 18139 |
+
"loss": 0.1823,
|
| 18140 |
+
"step": 907000
|
| 18141 |
+
},
|
| 18142 |
+
{
|
| 18143 |
+
"epoch": 9.76,
|
| 18144 |
+
"eval_loss": 0.17397646605968475,
|
| 18145 |
+
"eval_runtime": 2.6707,
|
| 18146 |
+
"eval_samples_per_second": 860.067,
|
| 18147 |
+
"eval_steps_per_second": 13.479,
|
| 18148 |
+
"step": 907000
|
| 18149 |
+
},
|
| 18150 |
+
{
|
| 18151 |
+
"epoch": 9.76,
|
| 18152 |
+
"learning_rate": 1.3249488421567911e-05,
|
| 18153 |
+
"loss": 0.1818,
|
| 18154 |
+
"step": 907500
|
| 18155 |
+
},
|
| 18156 |
+
{
|
| 18157 |
+
"epoch": 9.77,
|
| 18158 |
+
"learning_rate": 1.3214724430397915e-05,
|
| 18159 |
+
"loss": 0.1817,
|
| 18160 |
+
"step": 908000
|
| 18161 |
+
},
|
| 18162 |
+
{
|
| 18163 |
+
"epoch": 9.77,
|
| 18164 |
+
"eval_loss": 0.17298473417758942,
|
| 18165 |
+
"eval_runtime": 2.659,
|
| 18166 |
+
"eval_samples_per_second": 863.865,
|
| 18167 |
+
"eval_steps_per_second": 13.539,
|
| 18168 |
+
"step": 908000
|
| 18169 |
+
},
|
| 18170 |
+
{
|
| 18171 |
+
"epoch": 9.78,
|
| 18172 |
+
"learning_rate": 1.3180143027621145e-05,
|
| 18173 |
+
"loss": 0.1819,
|
| 18174 |
+
"step": 908500
|
| 18175 |
+
},
|
| 18176 |
+
{
|
| 18177 |
+
"epoch": 9.78,
|
| 18178 |
+
"learning_rate": 1.314574430778182e-05,
|
| 18179 |
+
"loss": 0.1817,
|
| 18180 |
+
"step": 909000
|
| 18181 |
+
},
|
| 18182 |
+
{
|
| 18183 |
+
"epoch": 9.78,
|
| 18184 |
+
"eval_loss": 0.17148981988430023,
|
| 18185 |
+
"eval_runtime": 2.7078,
|
| 18186 |
+
"eval_samples_per_second": 848.287,
|
| 18187 |
+
"eval_steps_per_second": 13.295,
|
| 18188 |
+
"step": 909000
|
| 18189 |
+
},
|
| 18190 |
+
{
|
| 18191 |
+
"epoch": 9.79,
|
| 18192 |
+
"learning_rate": 1.311152836492473e-05,
|
| 18193 |
+
"loss": 0.1817,
|
| 18194 |
+
"step": 909500
|
| 18195 |
+
},
|
| 18196 |
+
{
|
| 18197 |
+
"epoch": 9.79,
|
| 18198 |
+
"learning_rate": 1.3077495292594966e-05,
|
| 18199 |
+
"loss": 0.1817,
|
| 18200 |
+
"step": 910000
|
| 18201 |
+
},
|
| 18202 |
+
{
|
| 18203 |
+
"epoch": 9.79,
|
| 18204 |
+
"eval_loss": 0.17322474718093872,
|
| 18205 |
+
"eval_runtime": 2.6678,
|
| 18206 |
+
"eval_samples_per_second": 861.024,
|
| 18207 |
+
"eval_steps_per_second": 13.495,
|
| 18208 |
+
"step": 910000
|
| 18209 |
}
|
| 18210 |
],
|
| 18211 |
"max_steps": 1000000,
|
| 18212 |
"num_train_epochs": 12,
|
| 18213 |
+
"total_flos": 6.3790744657339496e+22,
|
| 18214 |
"trial_name": null,
|
| 18215 |
"trial_params": null
|
| 18216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b52f3b8fcfa70b1731fae94d573cc6b63207a962d882488f83af9b17655c7c7
|
| 3 |
size 449471589
|