Training in progress, step 860000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33c2dd3be5413f957dd0bc28a15d74ed06d6494592da1e0b0b7a9b5cff98543f
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a71f43fa4af460f6d355ec42e55c1a209c0d00e00e1e71a83db48fda6386bd08
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e95df6a4d037778eea05d14b6a856440647a0d0d1cd96b14fe0089db3d2ca05
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e461a2edfce20979aaf8643fc972588013ea8710a13005a233ed1e93cff795c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ce93d230c2b3e0029a85fd7329dfec40cd7b3ce89657776854f91684f3563c3
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e389a444854e3a8498410c2bcb75d082ab0efa10fdcbb6bf4f0273b2bb86e5d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5700265db9033170469a10a97e864c396caa87c59262a13775cece6d18e13d0d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0f1d070f7247d43e02284bd789b296fa89aaa5d91db9a24f1226ebc2a6cb000
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b4a68e81a6718c76544b602cd99c9a2dc4ab2f4f768d0dfcb99f521ee1fd60e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a84068892fd087eddda1aa0aa2f97caf64ae34a34d55297ff6867be4553c6f26
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31e18eaa468ae4a1684f551cb018df5d6a9cb8eadcd4fdd17b09a3c12ad301d8
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10206,11 +10206,131 @@
|
|
| 10206 |
"learning_rate": 1.8436837415545772e-05,
|
| 10207 |
"loss": 0.2874,
|
| 10208 |
"step": 850000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10209 |
}
|
| 10210 |
],
|
| 10211 |
"max_steps": 1000000,
|
| 10212 |
"num_train_epochs": 2,
|
| 10213 |
-
"total_flos": 5.
|
| 10214 |
"trial_name": null,
|
| 10215 |
"trial_params": null
|
| 10216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2590859447867921,
|
| 5 |
+
"global_step": 860000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10206 |
"learning_rate": 1.8436837415545772e-05,
|
| 10207 |
"loss": 0.2874,
|
| 10208 |
"step": 850000
|
| 10209 |
+
},
|
| 10210 |
+
{
|
| 10211 |
+
"epoch": 1.24,
|
| 10212 |
+
"learning_rate": 1.838183407087156e-05,
|
| 10213 |
+
"loss": 0.2878,
|
| 10214 |
+
"step": 850500
|
| 10215 |
+
},
|
| 10216 |
+
{
|
| 10217 |
+
"epoch": 1.24,
|
| 10218 |
+
"learning_rate": 1.8326999187910095e-05,
|
| 10219 |
+
"loss": 0.2872,
|
| 10220 |
+
"step": 851000
|
| 10221 |
+
},
|
| 10222 |
+
{
|
| 10223 |
+
"epoch": 1.24,
|
| 10224 |
+
"learning_rate": 1.8272332916577875e-05,
|
| 10225 |
+
"loss": 0.2867,
|
| 10226 |
+
"step": 851500
|
| 10227 |
+
},
|
| 10228 |
+
{
|
| 10229 |
+
"epoch": 1.24,
|
| 10230 |
+
"learning_rate": 1.8217835406330415e-05,
|
| 10231 |
+
"loss": 0.2874,
|
| 10232 |
+
"step": 852000
|
| 10233 |
+
},
|
| 10234 |
+
{
|
| 10235 |
+
"epoch": 1.24,
|
| 10236 |
+
"learning_rate": 1.81635068061618e-05,
|
| 10237 |
+
"loss": 0.2873,
|
| 10238 |
+
"step": 852500
|
| 10239 |
+
},
|
| 10240 |
+
{
|
| 10241 |
+
"epoch": 1.25,
|
| 10242 |
+
"learning_rate": 1.810934726460436e-05,
|
| 10243 |
+
"loss": 0.2875,
|
| 10244 |
+
"step": 853000
|
| 10245 |
+
},
|
| 10246 |
+
{
|
| 10247 |
+
"epoch": 1.25,
|
| 10248 |
+
"learning_rate": 1.80553569297282e-05,
|
| 10249 |
+
"loss": 0.2873,
|
| 10250 |
+
"step": 853500
|
| 10251 |
+
},
|
| 10252 |
+
{
|
| 10253 |
+
"epoch": 1.25,
|
| 10254 |
+
"learning_rate": 1.800153594914084e-05,
|
| 10255 |
+
"loss": 0.2875,
|
| 10256 |
+
"step": 854000
|
| 10257 |
+
},
|
| 10258 |
+
{
|
| 10259 |
+
"epoch": 1.25,
|
| 10260 |
+
"learning_rate": 1.7947884469986816e-05,
|
| 10261 |
+
"loss": 0.2872,
|
| 10262 |
+
"step": 854500
|
| 10263 |
+
},
|
| 10264 |
+
{
|
| 10265 |
+
"epoch": 1.25,
|
| 10266 |
+
"learning_rate": 1.7894402638947176e-05,
|
| 10267 |
+
"loss": 0.287,
|
| 10268 |
+
"step": 855000
|
| 10269 |
+
},
|
| 10270 |
+
{
|
| 10271 |
+
"epoch": 1.25,
|
| 10272 |
+
"learning_rate": 1.7841090602239237e-05,
|
| 10273 |
+
"loss": 0.2872,
|
| 10274 |
+
"step": 855500
|
| 10275 |
+
},
|
| 10276 |
+
{
|
| 10277 |
+
"epoch": 1.25,
|
| 10278 |
+
"learning_rate": 1.778794850561604e-05,
|
| 10279 |
+
"loss": 0.2871,
|
| 10280 |
+
"step": 856000
|
| 10281 |
+
},
|
| 10282 |
+
{
|
| 10283 |
+
"epoch": 1.25,
|
| 10284 |
+
"learning_rate": 1.7734976494366073e-05,
|
| 10285 |
+
"loss": 0.2873,
|
| 10286 |
+
"step": 856500
|
| 10287 |
+
},
|
| 10288 |
+
{
|
| 10289 |
+
"epoch": 1.25,
|
| 10290 |
+
"learning_rate": 1.7682174713312805e-05,
|
| 10291 |
+
"loss": 0.2871,
|
| 10292 |
+
"step": 857000
|
| 10293 |
+
},
|
| 10294 |
+
{
|
| 10295 |
+
"epoch": 1.25,
|
| 10296 |
+
"learning_rate": 1.7629543306814255e-05,
|
| 10297 |
+
"loss": 0.2875,
|
| 10298 |
+
"step": 857500
|
| 10299 |
+
},
|
| 10300 |
+
{
|
| 10301 |
+
"epoch": 1.26,
|
| 10302 |
+
"learning_rate": 1.75770824187627e-05,
|
| 10303 |
+
"loss": 0.287,
|
| 10304 |
+
"step": 858000
|
| 10305 |
+
},
|
| 10306 |
+
{
|
| 10307 |
+
"epoch": 1.26,
|
| 10308 |
+
"learning_rate": 1.7524792192584186e-05,
|
| 10309 |
+
"loss": 0.2865,
|
| 10310 |
+
"step": 858500
|
| 10311 |
+
},
|
| 10312 |
+
{
|
| 10313 |
+
"epoch": 1.26,
|
| 10314 |
+
"learning_rate": 1.747267277123821e-05,
|
| 10315 |
+
"loss": 0.2867,
|
| 10316 |
+
"step": 859000
|
| 10317 |
+
},
|
| 10318 |
+
{
|
| 10319 |
+
"epoch": 1.26,
|
| 10320 |
+
"learning_rate": 1.74207242972173e-05,
|
| 10321 |
+
"loss": 0.2862,
|
| 10322 |
+
"step": 859500
|
| 10323 |
+
},
|
| 10324 |
+
{
|
| 10325 |
+
"epoch": 1.26,
|
| 10326 |
+
"learning_rate": 1.7368946912546556e-05,
|
| 10327 |
+
"loss": 0.2878,
|
| 10328 |
+
"step": 860000
|
| 10329 |
}
|
| 10330 |
],
|
| 10331 |
"max_steps": 1000000,
|
| 10332 |
"num_train_epochs": 2,
|
| 10333 |
+
"total_flos": 5.8141983412238066e+22,
|
| 10334 |
"trial_name": null,
|
| 10335 |
"trial_params": null
|
| 10336 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a71f43fa4af460f6d355ec42e55c1a209c0d00e00e1e71a83db48fda6386bd08
|
| 3 |
size 449450757
|