Training in progress, step 940000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eecce8a24d008f6560478b115a93f60ad26f968d3ddf31f980be259930161927
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e98104480ffe165c63b6085beb91814af4abc00786a31fbe9ca7364388e7fd7
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e5a9ccad5520e92d8772f310d7fdda3e07cfbb13ef5c7d62c7867e7bebc124b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c122c7c46f7a52340c6d76de6ecac3033b4eb22c4f622df7095c80bbdc58bbda
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed89074af6bf56092de6f7f69bec6b0962f68e0ef26b7f849107336565f843e4
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c180309b549d4feb0af8c96f555ed5574acd58bfc58b660812ae5e9d9e08c50
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0bb0af833997f5b0c50461291524b2fc678bcc9d26f6e79d5bfe28f62abb339
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:824645f1c25785303da3dc203bf2689aba1f62a78c6bdfef5a484af4a0860aef
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e242b745198753e15159abe0972314bc254dd07db526f1dc4d19cc5c285dfb8e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0e565516de72353a91a8afad67f1979b57e142666bec47e27765241642f3d4f
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a619368d8fa94ba3412b069e884d3c02325231635774dd381c11c0f2a15299d7
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11166,11 +11166,131 @@
|
|
| 11166 |
"learning_rate": 1.1867138192639601e-05,
|
| 11167 |
"loss": 0.2842,
|
| 11168 |
"step": 930000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11169 |
}
|
| 11170 |
],
|
| 11171 |
"max_steps": 1000000,
|
| 11172 |
"num_train_epochs": 2,
|
| 11173 |
-
"total_flos": 6.
|
| 11174 |
"trial_name": null,
|
| 11175 |
"trial_params": null
|
| 11176 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.418523449270972,
|
| 5 |
+
"global_step": 940000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11166 |
"learning_rate": 1.1867138192639601e-05,
|
| 11167 |
"loss": 0.2842,
|
| 11168 |
"step": 930000
|
| 11169 |
+
},
|
| 11170 |
+
{
|
| 11171 |
+
"epoch": 1.4,
|
| 11172 |
+
"learning_rate": 1.1840677154152987e-05,
|
| 11173 |
+
"loss": 0.2848,
|
| 11174 |
+
"step": 930500
|
| 11175 |
+
},
|
| 11176 |
+
{
|
| 11177 |
+
"epoch": 1.4,
|
| 11178 |
+
"learning_rate": 1.1814402460652382e-05,
|
| 11179 |
+
"loss": 0.2842,
|
| 11180 |
+
"step": 931000
|
| 11181 |
+
},
|
| 11182 |
+
{
|
| 11183 |
+
"epoch": 1.4,
|
| 11184 |
+
"learning_rate": 1.178831418397181e-05,
|
| 11185 |
+
"loss": 0.2839,
|
| 11186 |
+
"step": 931500
|
| 11187 |
+
},
|
| 11188 |
+
{
|
| 11189 |
+
"epoch": 1.4,
|
| 11190 |
+
"learning_rate": 1.176241239543558e-05,
|
| 11191 |
+
"loss": 0.2842,
|
| 11192 |
+
"step": 932000
|
| 11193 |
+
},
|
| 11194 |
+
{
|
| 11195 |
+
"epoch": 1.4,
|
| 11196 |
+
"learning_rate": 1.173669716585822e-05,
|
| 11197 |
+
"loss": 0.2847,
|
| 11198 |
+
"step": 932500
|
| 11199 |
+
},
|
| 11200 |
+
{
|
| 11201 |
+
"epoch": 1.4,
|
| 11202 |
+
"learning_rate": 1.171116856554418e-05,
|
| 11203 |
+
"loss": 0.2836,
|
| 11204 |
+
"step": 933000
|
| 11205 |
+
},
|
| 11206 |
+
{
|
| 11207 |
+
"epoch": 1.41,
|
| 11208 |
+
"learning_rate": 1.168582666428768e-05,
|
| 11209 |
+
"loss": 0.284,
|
| 11210 |
+
"step": 933500
|
| 11211 |
+
},
|
| 11212 |
+
{
|
| 11213 |
+
"epoch": 1.41,
|
| 11214 |
+
"learning_rate": 1.1660671531372517e-05,
|
| 11215 |
+
"loss": 0.2837,
|
| 11216 |
+
"step": 934000
|
| 11217 |
+
},
|
| 11218 |
+
{
|
| 11219 |
+
"epoch": 1.41,
|
| 11220 |
+
"learning_rate": 1.1635703235571846e-05,
|
| 11221 |
+
"loss": 0.2848,
|
| 11222 |
+
"step": 934500
|
| 11223 |
+
},
|
| 11224 |
+
{
|
| 11225 |
+
"epoch": 1.41,
|
| 11226 |
+
"learning_rate": 1.1610921845148052e-05,
|
| 11227 |
+
"loss": 0.2845,
|
| 11228 |
+
"step": 935000
|
| 11229 |
+
},
|
| 11230 |
+
{
|
| 11231 |
+
"epoch": 1.41,
|
| 11232 |
+
"learning_rate": 1.1586327427852503e-05,
|
| 11233 |
+
"loss": 0.2847,
|
| 11234 |
+
"step": 935500
|
| 11235 |
+
},
|
| 11236 |
+
{
|
| 11237 |
+
"epoch": 1.41,
|
| 11238 |
+
"learning_rate": 1.156192005092539e-05,
|
| 11239 |
+
"loss": 0.2843,
|
| 11240 |
+
"step": 936000
|
| 11241 |
+
},
|
| 11242 |
+
{
|
| 11243 |
+
"epoch": 1.41,
|
| 11244 |
+
"learning_rate": 1.153769978109557e-05,
|
| 11245 |
+
"loss": 0.2836,
|
| 11246 |
+
"step": 936500
|
| 11247 |
+
},
|
| 11248 |
+
{
|
| 11249 |
+
"epoch": 1.41,
|
| 11250 |
+
"learning_rate": 1.1513666684580308e-05,
|
| 11251 |
+
"loss": 0.2847,
|
| 11252 |
+
"step": 937000
|
| 11253 |
+
},
|
| 11254 |
+
{
|
| 11255 |
+
"epoch": 1.41,
|
| 11256 |
+
"learning_rate": 1.1489820827085185e-05,
|
| 11257 |
+
"loss": 0.2839,
|
| 11258 |
+
"step": 937500
|
| 11259 |
+
},
|
| 11260 |
+
{
|
| 11261 |
+
"epoch": 1.41,
|
| 11262 |
+
"learning_rate": 1.1466162273803876e-05,
|
| 11263 |
+
"loss": 0.2844,
|
| 11264 |
+
"step": 938000
|
| 11265 |
+
},
|
| 11266 |
+
{
|
| 11267 |
+
"epoch": 1.42,
|
| 11268 |
+
"learning_rate": 1.144269108941795e-05,
|
| 11269 |
+
"loss": 0.284,
|
| 11270 |
+
"step": 938500
|
| 11271 |
+
},
|
| 11272 |
+
{
|
| 11273 |
+
"epoch": 1.42,
|
| 11274 |
+
"learning_rate": 1.1419407338096732e-05,
|
| 11275 |
+
"loss": 0.285,
|
| 11276 |
+
"step": 939000
|
| 11277 |
+
},
|
| 11278 |
+
{
|
| 11279 |
+
"epoch": 1.42,
|
| 11280 |
+
"learning_rate": 1.1396311083497103e-05,
|
| 11281 |
+
"loss": 0.2841,
|
| 11282 |
+
"step": 939500
|
| 11283 |
+
},
|
| 11284 |
+
{
|
| 11285 |
+
"epoch": 1.42,
|
| 11286 |
+
"learning_rate": 1.1373402388763346e-05,
|
| 11287 |
+
"loss": 0.2834,
|
| 11288 |
+
"step": 940000
|
| 11289 |
}
|
| 11290 |
],
|
| 11291 |
"max_steps": 1000000,
|
| 11292 |
"num_train_epochs": 2,
|
| 11293 |
+
"total_flos": 6.355066839128279e+22,
|
| 11294 |
"trial_name": null,
|
| 11295 |
"trial_params": null
|
| 11296 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e98104480ffe165c63b6085beb91814af4abc00786a31fbe9ca7364388e7fd7
|
| 3 |
size 449450757
|