Training in progress, step 330000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a22ef790a0c2f4f62e66619584ea27ceb6ef6b7c3985ca2d9f4be0901d33fb7e
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91c8cb7bfe62bed4ad754b48993f10b7da9c98b6075ef529d78d9d2d0a013fff
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d377ee122e46b8fa76b1d2b74be365b9135d1e8e6cede7fa0fe3ff751bb7334
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a10a546cee3b4518e4cb32764590234884d4cbc2be79dd9856924e21cdd3fd8
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a33a6a869db9bf49441e17af5679e56376e6e829633250eded5937f47c2020aa
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03c277499f6406d581d5a424d585bc71aa0cb5fc2c33d575b72b1f53a009a2c5
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b59b474b08b06647b657ae7d97ae9d06a436e1b6da58f15374f2b25278144cf8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d0b7236bb0eefa8365b0f6eccaff3b57014a0d4fca20545eaa932daadd82f47
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ec7b72e871e355904aa3d42031ed74fefb8d0ce9e7a1a82df41ead92a13bbee
|
| 3 |
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a490aaf46199ae217198d1b46bb5a0022da5901da5b5a0d9b96059f1a94a188
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:636cb28fce30ad56f68aface20193360fd815697da4c2ec39f5ca647b5e6b45b
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6406,11 +6406,211 @@
|
|
| 6406 |
"eval_samples_per_second": 2004.192,
|
| 6407 |
"eval_steps_per_second": 32.067,
|
| 6408 |
"step": 320000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6409 |
}
|
| 6410 |
],
|
| 6411 |
"max_steps": 500000,
|
| 6412 |
"num_train_epochs": 16,
|
| 6413 |
-
"total_flos": 1.
|
| 6414 |
"trial_name": null,
|
| 6415 |
"trial_params": null
|
| 6416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.110603878795306,
|
| 5 |
+
"global_step": 330000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6406 |
"eval_samples_per_second": 2004.192,
|
| 6407 |
"eval_steps_per_second": 32.067,
|
| 6408 |
"step": 320000
|
| 6409 |
+
},
|
| 6410 |
+
{
|
| 6411 |
+
"epoch": 9.82,
|
| 6412 |
+
"learning_rate": 0.00010073144655927253,
|
| 6413 |
+
"loss": 0.3273,
|
| 6414 |
+
"step": 320500
|
| 6415 |
+
},
|
| 6416 |
+
{
|
| 6417 |
+
"epoch": 9.83,
|
| 6418 |
+
"learning_rate": 0.0001002870876838929,
|
| 6419 |
+
"loss": 0.3276,
|
| 6420 |
+
"step": 321000
|
| 6421 |
+
},
|
| 6422 |
+
{
|
| 6423 |
+
"epoch": 9.83,
|
| 6424 |
+
"eval_loss": 0.7801169157028198,
|
| 6425 |
+
"eval_runtime": 0.505,
|
| 6426 |
+
"eval_samples_per_second": 1980.071,
|
| 6427 |
+
"eval_steps_per_second": 31.681,
|
| 6428 |
+
"step": 321000
|
| 6429 |
+
},
|
| 6430 |
+
{
|
| 6431 |
+
"epoch": 9.85,
|
| 6432 |
+
"learning_rate": 9.984332714015662e-05,
|
| 6433 |
+
"loss": 0.3272,
|
| 6434 |
+
"step": 321500
|
| 6435 |
+
},
|
| 6436 |
+
{
|
| 6437 |
+
"epoch": 9.87,
|
| 6438 |
+
"learning_rate": 9.94001697809578e-05,
|
| 6439 |
+
"loss": 0.3273,
|
| 6440 |
+
"step": 322000
|
| 6441 |
+
},
|
| 6442 |
+
{
|
| 6443 |
+
"epoch": 9.87,
|
| 6444 |
+
"eval_loss": 0.7831940650939941,
|
| 6445 |
+
"eval_runtime": 0.5116,
|
| 6446 |
+
"eval_samples_per_second": 1954.711,
|
| 6447 |
+
"eval_steps_per_second": 31.275,
|
| 6448 |
+
"step": 322000
|
| 6449 |
+
},
|
| 6450 |
+
{
|
| 6451 |
+
"epoch": 9.88,
|
| 6452 |
+
"learning_rate": 9.895762045259445e-05,
|
| 6453 |
+
"loss": 0.3274,
|
| 6454 |
+
"step": 322500
|
| 6455 |
+
},
|
| 6456 |
+
{
|
| 6457 |
+
"epoch": 9.9,
|
| 6458 |
+
"learning_rate": 9.851568399471498e-05,
|
| 6459 |
+
"loss": 0.3277,
|
| 6460 |
+
"step": 323000
|
| 6461 |
+
},
|
| 6462 |
+
{
|
| 6463 |
+
"epoch": 9.9,
|
| 6464 |
+
"eval_loss": 0.7861186265945435,
|
| 6465 |
+
"eval_runtime": 0.4926,
|
| 6466 |
+
"eval_samples_per_second": 2030.062,
|
| 6467 |
+
"eval_steps_per_second": 32.481,
|
| 6468 |
+
"step": 323000
|
| 6469 |
+
},
|
| 6470 |
+
{
|
| 6471 |
+
"epoch": 9.91,
|
| 6472 |
+
"learning_rate": 9.807436524026574e-05,
|
| 6473 |
+
"loss": 0.3275,
|
| 6474 |
+
"step": 323500
|
| 6475 |
+
},
|
| 6476 |
+
{
|
| 6477 |
+
"epoch": 9.93,
|
| 6478 |
+
"learning_rate": 9.763366901543801e-05,
|
| 6479 |
+
"loss": 0.3269,
|
| 6480 |
+
"step": 324000
|
| 6481 |
+
},
|
| 6482 |
+
{
|
| 6483 |
+
"epoch": 9.93,
|
| 6484 |
+
"eval_loss": 0.7829710841178894,
|
| 6485 |
+
"eval_runtime": 0.5448,
|
| 6486 |
+
"eval_samples_per_second": 1835.417,
|
| 6487 |
+
"eval_steps_per_second": 29.367,
|
| 6488 |
+
"step": 324000
|
| 6489 |
+
},
|
| 6490 |
+
{
|
| 6491 |
+
"epoch": 9.94,
|
| 6492 |
+
"learning_rate": 9.719360013961495e-05,
|
| 6493 |
+
"loss": 0.3269,
|
| 6494 |
+
"step": 324500
|
| 6495 |
+
},
|
| 6496 |
+
{
|
| 6497 |
+
"epoch": 9.96,
|
| 6498 |
+
"learning_rate": 9.675416342531944e-05,
|
| 6499 |
+
"loss": 0.3269,
|
| 6500 |
+
"step": 325000
|
| 6501 |
+
},
|
| 6502 |
+
{
|
| 6503 |
+
"epoch": 9.96,
|
| 6504 |
+
"eval_loss": 0.7878097891807556,
|
| 6505 |
+
"eval_runtime": 0.4984,
|
| 6506 |
+
"eval_samples_per_second": 2006.57,
|
| 6507 |
+
"eval_steps_per_second": 32.105,
|
| 6508 |
+
"step": 325000
|
| 6509 |
+
},
|
| 6510 |
+
{
|
| 6511 |
+
"epoch": 9.97,
|
| 6512 |
+
"learning_rate": 9.631536367816086e-05,
|
| 6513 |
+
"loss": 0.3282,
|
| 6514 |
+
"step": 325500
|
| 6515 |
+
},
|
| 6516 |
+
{
|
| 6517 |
+
"epoch": 9.99,
|
| 6518 |
+
"learning_rate": 9.587720569678299e-05,
|
| 6519 |
+
"loss": 0.3267,
|
| 6520 |
+
"step": 326000
|
| 6521 |
+
},
|
| 6522 |
+
{
|
| 6523 |
+
"epoch": 9.99,
|
| 6524 |
+
"eval_loss": 0.7815366387367249,
|
| 6525 |
+
"eval_runtime": 0.5069,
|
| 6526 |
+
"eval_samples_per_second": 1972.941,
|
| 6527 |
+
"eval_steps_per_second": 31.567,
|
| 6528 |
+
"step": 326000
|
| 6529 |
+
},
|
| 6530 |
+
{
|
| 6531 |
+
"epoch": 10.0,
|
| 6532 |
+
"learning_rate": 9.543969427281131e-05,
|
| 6533 |
+
"loss": 0.3268,
|
| 6534 |
+
"step": 326500
|
| 6535 |
+
},
|
| 6536 |
+
{
|
| 6537 |
+
"epoch": 10.02,
|
| 6538 |
+
"learning_rate": 9.500283419080062e-05,
|
| 6539 |
+
"loss": 0.3269,
|
| 6540 |
+
"step": 327000
|
| 6541 |
+
},
|
| 6542 |
+
{
|
| 6543 |
+
"epoch": 10.02,
|
| 6544 |
+
"eval_loss": 0.7789347767829895,
|
| 6545 |
+
"eval_runtime": 0.5192,
|
| 6546 |
+
"eval_samples_per_second": 1925.943,
|
| 6547 |
+
"eval_steps_per_second": 30.815,
|
| 6548 |
+
"step": 327000
|
| 6549 |
+
},
|
| 6550 |
+
{
|
| 6551 |
+
"epoch": 10.03,
|
| 6552 |
+
"learning_rate": 9.45666302281829e-05,
|
| 6553 |
+
"loss": 0.3268,
|
| 6554 |
+
"step": 327500
|
| 6555 |
+
},
|
| 6556 |
+
{
|
| 6557 |
+
"epoch": 10.05,
|
| 6558 |
+
"learning_rate": 9.413108715521467e-05,
|
| 6559 |
+
"loss": 0.3266,
|
| 6560 |
+
"step": 328000
|
| 6561 |
+
},
|
| 6562 |
+
{
|
| 6563 |
+
"epoch": 10.05,
|
| 6564 |
+
"eval_loss": 0.7733815908432007,
|
| 6565 |
+
"eval_runtime": 0.5128,
|
| 6566 |
+
"eval_samples_per_second": 1950.206,
|
| 6567 |
+
"eval_steps_per_second": 31.203,
|
| 6568 |
+
"step": 328000
|
| 6569 |
+
},
|
| 6570 |
+
{
|
| 6571 |
+
"epoch": 10.06,
|
| 6572 |
+
"learning_rate": 9.369620973492525e-05,
|
| 6573 |
+
"loss": 0.3265,
|
| 6574 |
+
"step": 328500
|
| 6575 |
+
},
|
| 6576 |
+
{
|
| 6577 |
+
"epoch": 10.08,
|
| 6578 |
+
"learning_rate": 9.326200272306445e-05,
|
| 6579 |
+
"loss": 0.3262,
|
| 6580 |
+
"step": 329000
|
| 6581 |
+
},
|
| 6582 |
+
{
|
| 6583 |
+
"epoch": 10.08,
|
| 6584 |
+
"eval_loss": 0.7761348485946655,
|
| 6585 |
+
"eval_runtime": 0.5067,
|
| 6586 |
+
"eval_samples_per_second": 1973.572,
|
| 6587 |
+
"eval_steps_per_second": 31.577,
|
| 6588 |
+
"step": 329000
|
| 6589 |
+
},
|
| 6590 |
+
{
|
| 6591 |
+
"epoch": 10.1,
|
| 6592 |
+
"learning_rate": 9.282847086805059e-05,
|
| 6593 |
+
"loss": 0.3267,
|
| 6594 |
+
"step": 329500
|
| 6595 |
+
},
|
| 6596 |
+
{
|
| 6597 |
+
"epoch": 10.11,
|
| 6598 |
+
"learning_rate": 9.239561891091853e-05,
|
| 6599 |
+
"loss": 0.3264,
|
| 6600 |
+
"step": 330000
|
| 6601 |
+
},
|
| 6602 |
+
{
|
| 6603 |
+
"epoch": 10.11,
|
| 6604 |
+
"eval_loss": 0.7795162200927734,
|
| 6605 |
+
"eval_runtime": 0.5194,
|
| 6606 |
+
"eval_samples_per_second": 1925.383,
|
| 6607 |
+
"eval_steps_per_second": 30.806,
|
| 6608 |
+
"step": 330000
|
| 6609 |
}
|
| 6610 |
],
|
| 6611 |
"max_steps": 500000,
|
| 6612 |
"num_train_epochs": 16,
|
| 6613 |
+
"total_flos": 1.0542995700038742e+22,
|
| 6614 |
"trial_name": null,
|
| 6615 |
"trial_params": null
|
| 6616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91c8cb7bfe62bed4ad754b48993f10b7da9c98b6075ef529d78d9d2d0a013fff
|
| 3 |
size 102501541
|