Training in progress, step 700000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81ece12c6f9c13f5471c677109a4bde83e1a050417df99ad047004ba4276bba6
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfb425b9bd2d7db89a175b6be9f6f3add2f5419aac09b70a86f8357b95b72148
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:513fd308756b62d456cf51dbabd0e5432e6e9f801b69e6b4147b7910a92409c3
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1b41b44339013cf2c800ebd15e56c0ab490da939473759245613333c4ad094b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8acf2390c55c3fcc1c7f54edfdb7e51d825ccf03ad02c2710478f24d67d8d927
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b884538c1c7cf25c9c76b4e2aeb5b233c0e82af8266b74f74badb8738101de61
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0611954260b29d5933679a0b205628fe7afa2763d89a93117a665d8810ddfaa
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a2570d894279bdec746170684ab6ee38cfa6adc0692ab4c8e2d19fca72b235d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c360a23dde048e054ef7310a763eb2729b26bcfc6d980f3d3e175d2d2287e150
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ded6c10a8ae184d010b20213595dce955d5ae9ca4fec0187e6e124f4763508bf
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afc49fe155c033502b3ff00fe8f2d949db5aba4e89748d4722dc58fa6f673d45
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -8286,11 +8286,131 @@
|
|
| 8286 |
"learning_rate": 4.3672129835314955e-05,
|
| 8287 |
"loss": 0.2971,
|
| 8288 |
"step": 690000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8289 |
}
|
| 8290 |
],
|
| 8291 |
"max_steps": 1000000,
|
| 8292 |
"num_train_epochs": 2,
|
| 8293 |
-
"total_flos": 4.
|
| 8294 |
"trial_name": null,
|
| 8295 |
"trial_params": null
|
| 8296 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3993339170554817,
|
| 5 |
+
"global_step": 700000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 8286 |
"learning_rate": 4.3672129835314955e-05,
|
| 8287 |
"loss": 0.2971,
|
| 8288 |
"step": 690000
|
| 8289 |
+
},
|
| 8290 |
+
{
|
| 8291 |
+
"epoch": 1.38,
|
| 8292 |
+
"learning_rate": 4.3573243363356916e-05,
|
| 8293 |
+
"loss": 0.297,
|
| 8294 |
+
"step": 690500
|
| 8295 |
+
},
|
| 8296 |
+
{
|
| 8297 |
+
"epoch": 1.38,
|
| 8298 |
+
"learning_rate": 4.347445648076057e-05,
|
| 8299 |
+
"loss": 0.2969,
|
| 8300 |
+
"step": 691000
|
| 8301 |
+
},
|
| 8302 |
+
{
|
| 8303 |
+
"epoch": 1.38,
|
| 8304 |
+
"learning_rate": 4.337576945760554e-05,
|
| 8305 |
+
"loss": 0.2978,
|
| 8306 |
+
"step": 691500
|
| 8307 |
+
},
|
| 8308 |
+
{
|
| 8309 |
+
"epoch": 1.38,
|
| 8310 |
+
"learning_rate": 4.327718256369826e-05,
|
| 8311 |
+
"loss": 0.2965,
|
| 8312 |
+
"step": 692000
|
| 8313 |
+
},
|
| 8314 |
+
{
|
| 8315 |
+
"epoch": 1.38,
|
| 8316 |
+
"learning_rate": 4.317869606857162e-05,
|
| 8317 |
+
"loss": 0.2973,
|
| 8318 |
+
"step": 692500
|
| 8319 |
+
},
|
| 8320 |
+
{
|
| 8321 |
+
"epoch": 1.39,
|
| 8322 |
+
"learning_rate": 4.3080310241483885e-05,
|
| 8323 |
+
"loss": 0.2967,
|
| 8324 |
+
"step": 693000
|
| 8325 |
+
},
|
| 8326 |
+
{
|
| 8327 |
+
"epoch": 1.39,
|
| 8328 |
+
"learning_rate": 4.298202535141818e-05,
|
| 8329 |
+
"loss": 0.2974,
|
| 8330 |
+
"step": 693500
|
| 8331 |
+
},
|
| 8332 |
+
{
|
| 8333 |
+
"epoch": 1.39,
|
| 8334 |
+
"learning_rate": 4.2883841667081675e-05,
|
| 8335 |
+
"loss": 0.2967,
|
| 8336 |
+
"step": 694000
|
| 8337 |
+
},
|
| 8338 |
+
{
|
| 8339 |
+
"epoch": 1.39,
|
| 8340 |
+
"learning_rate": 4.2785759456904745e-05,
|
| 8341 |
+
"loss": 0.2966,
|
| 8342 |
+
"step": 694500
|
| 8343 |
+
},
|
| 8344 |
+
{
|
| 8345 |
+
"epoch": 1.39,
|
| 8346 |
+
"learning_rate": 4.268777898904044e-05,
|
| 8347 |
+
"loss": 0.2969,
|
| 8348 |
+
"step": 695000
|
| 8349 |
+
},
|
| 8350 |
+
{
|
| 8351 |
+
"epoch": 1.39,
|
| 8352 |
+
"learning_rate": 4.2589900531363606e-05,
|
| 8353 |
+
"loss": 0.2967,
|
| 8354 |
+
"step": 695500
|
| 8355 |
+
},
|
| 8356 |
+
{
|
| 8357 |
+
"epoch": 1.39,
|
| 8358 |
+
"learning_rate": 4.2492124351470214e-05,
|
| 8359 |
+
"loss": 0.2962,
|
| 8360 |
+
"step": 696000
|
| 8361 |
+
},
|
| 8362 |
+
{
|
| 8363 |
+
"epoch": 1.39,
|
| 8364 |
+
"learning_rate": 4.239445071667666e-05,
|
| 8365 |
+
"loss": 0.297,
|
| 8366 |
+
"step": 696500
|
| 8367 |
+
},
|
| 8368 |
+
{
|
| 8369 |
+
"epoch": 1.39,
|
| 8370 |
+
"learning_rate": 4.2296879894018835e-05,
|
| 8371 |
+
"loss": 0.2966,
|
| 8372 |
+
"step": 697000
|
| 8373 |
+
},
|
| 8374 |
+
{
|
| 8375 |
+
"epoch": 1.39,
|
| 8376 |
+
"learning_rate": 4.219941215025171e-05,
|
| 8377 |
+
"loss": 0.2971,
|
| 8378 |
+
"step": 697500
|
| 8379 |
+
},
|
| 8380 |
+
{
|
| 8381 |
+
"epoch": 1.4,
|
| 8382 |
+
"learning_rate": 4.210204775184834e-05,
|
| 8383 |
+
"loss": 0.2973,
|
| 8384 |
+
"step": 698000
|
| 8385 |
+
},
|
| 8386 |
+
{
|
| 8387 |
+
"epoch": 1.4,
|
| 8388 |
+
"learning_rate": 4.2004786964999304e-05,
|
| 8389 |
+
"loss": 0.2962,
|
| 8390 |
+
"step": 698500
|
| 8391 |
+
},
|
| 8392 |
+
{
|
| 8393 |
+
"epoch": 1.4,
|
| 8394 |
+
"learning_rate": 4.190763005561186e-05,
|
| 8395 |
+
"loss": 0.2964,
|
| 8396 |
+
"step": 699000
|
| 8397 |
+
},
|
| 8398 |
+
{
|
| 8399 |
+
"epoch": 1.4,
|
| 8400 |
+
"learning_rate": 4.1810577289309266e-05,
|
| 8401 |
+
"loss": 0.2968,
|
| 8402 |
+
"step": 699500
|
| 8403 |
+
},
|
| 8404 |
+
{
|
| 8405 |
+
"epoch": 1.4,
|
| 8406 |
+
"learning_rate": 4.171362893143013e-05,
|
| 8407 |
+
"loss": 0.2965,
|
| 8408 |
+
"step": 700000
|
| 8409 |
}
|
| 8410 |
],
|
| 8411 |
"max_steps": 1000000,
|
| 8412 |
"num_train_epochs": 2,
|
| 8413 |
+
"total_flos": 4.732499294618889e+22,
|
| 8414 |
"trial_name": null,
|
| 8415 |
"trial_params": null
|
| 8416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfb425b9bd2d7db89a175b6be9f6f3add2f5419aac09b70a86f8357b95b72148
|
| 3 |
size 449450757
|