Training in progress, step 340000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bc8c307e09c2ee7140e86493006e9375ce3828ae419bfd73dd8e77040dc9432
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db82336fde2ca798cca2552947b8f10e1dae6da490a1f40a713b479f4e821a2d
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c78bed16f0847936846c879bca5d8a536a9a3a59d08245d05fbfcc52edb347e
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c8b7b2390f7d5ddc6390197696d6aa0925d05d2ef38d7290149a060e461ae44
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9313e219a1869a99c69734744312a0517c71c6052de358efa7a1080ce122046c
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:416722c1edd0afe1ceb2e8573cb1ac6a96bfc6996efd996f3b5e077d838af66f
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d09f85aaae937e7534df6bd43ac24aa3578e31af78965f5ed61081b83c1046da
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d4e2a6a930e95dd4bd770a07c72d2fac11a8c63fc0f19ad213c91a2d9c6320b
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c68c66e599d0d64dd47fb2d9ad7cf41ba596f6439d733b5b753368eb3156e13
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c12b1e861045ff9922be9ad05050d7efb0f92e7378cdd8573f5012b3fb1b1b30
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a0f100ea6da4a9ff03fc2cc9b0cbbba13b42ccc41293a1d6143e5a081a97f70
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6606,11 +6606,211 @@
|
|
| 6606 |
"eval_samples_per_second": 1925.383,
|
| 6607 |
"eval_steps_per_second": 30.806,
|
| 6608 |
"step": 330000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6609 |
}
|
| 6610 |
],
|
| 6611 |
"max_steps": 500000,
|
| 6612 |
"num_train_epochs": 16,
|
| 6613 |
-
"total_flos": 1.
|
| 6614 |
"trial_name": null,
|
| 6615 |
"trial_params": null
|
| 6616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.416985814516377,
|
| 5 |
+
"global_step": 340000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6606 |
"eval_samples_per_second": 1925.383,
|
| 6607 |
"eval_steps_per_second": 30.806,
|
| 6608 |
"step": 330000
|
| 6609 |
+
},
|
| 6610 |
+
{
|
| 6611 |
+
"epoch": 10.13,
|
| 6612 |
+
"learning_rate": 9.196345158526793e-05,
|
| 6613 |
+
"loss": 0.3267,
|
| 6614 |
+
"step": 330500
|
| 6615 |
+
},
|
| 6616 |
+
{
|
| 6617 |
+
"epoch": 10.14,
|
| 6618 |
+
"learning_rate": 9.153197361721149e-05,
|
| 6619 |
+
"loss": 0.3258,
|
| 6620 |
+
"step": 331000
|
| 6621 |
+
},
|
| 6622 |
+
{
|
| 6623 |
+
"epoch": 10.14,
|
| 6624 |
+
"eval_loss": 0.7773971557617188,
|
| 6625 |
+
"eval_runtime": 0.5057,
|
| 6626 |
+
"eval_samples_per_second": 1977.448,
|
| 6627 |
+
"eval_steps_per_second": 31.639,
|
| 6628 |
+
"step": 331000
|
| 6629 |
+
},
|
| 6630 |
+
{
|
| 6631 |
+
"epoch": 10.16,
|
| 6632 |
+
"learning_rate": 9.110118972532302e-05,
|
| 6633 |
+
"loss": 0.3259,
|
| 6634 |
+
"step": 331500
|
| 6635 |
+
},
|
| 6636 |
+
{
|
| 6637 |
+
"epoch": 10.17,
|
| 6638 |
+
"learning_rate": 9.067110462058634e-05,
|
| 6639 |
+
"loss": 0.3261,
|
| 6640 |
+
"step": 332000
|
| 6641 |
+
},
|
| 6642 |
+
{
|
| 6643 |
+
"epoch": 10.17,
|
| 6644 |
+
"eval_loss": 0.7772080302238464,
|
| 6645 |
+
"eval_runtime": 0.512,
|
| 6646 |
+
"eval_samples_per_second": 1953.287,
|
| 6647 |
+
"eval_steps_per_second": 31.253,
|
| 6648 |
+
"step": 332000
|
| 6649 |
+
},
|
| 6650 |
+
{
|
| 6651 |
+
"epoch": 10.19,
|
| 6652 |
+
"learning_rate": 9.024172300634305e-05,
|
| 6653 |
+
"loss": 0.3258,
|
| 6654 |
+
"step": 332500
|
| 6655 |
+
},
|
| 6656 |
+
{
|
| 6657 |
+
"epoch": 10.2,
|
| 6658 |
+
"learning_rate": 8.981304957824182e-05,
|
| 6659 |
+
"loss": 0.3257,
|
| 6660 |
+
"step": 333000
|
| 6661 |
+
},
|
| 6662 |
+
{
|
| 6663 |
+
"epoch": 10.2,
|
| 6664 |
+
"eval_loss": 0.7775481939315796,
|
| 6665 |
+
"eval_runtime": 0.4889,
|
| 6666 |
+
"eval_samples_per_second": 2045.391,
|
| 6667 |
+
"eval_steps_per_second": 32.726,
|
| 6668 |
+
"step": 333000
|
| 6669 |
+
},
|
| 6670 |
+
{
|
| 6671 |
+
"epoch": 10.22,
|
| 6672 |
+
"learning_rate": 8.938508902418643e-05,
|
| 6673 |
+
"loss": 0.326,
|
| 6674 |
+
"step": 333500
|
| 6675 |
+
},
|
| 6676 |
+
{
|
| 6677 |
+
"epoch": 10.23,
|
| 6678 |
+
"learning_rate": 8.89578460242851e-05,
|
| 6679 |
+
"loss": 0.3277,
|
| 6680 |
+
"step": 334000
|
| 6681 |
+
},
|
| 6682 |
+
{
|
| 6683 |
+
"epoch": 10.23,
|
| 6684 |
+
"eval_loss": 0.7818301916122437,
|
| 6685 |
+
"eval_runtime": 0.5001,
|
| 6686 |
+
"eval_samples_per_second": 1999.531,
|
| 6687 |
+
"eval_steps_per_second": 31.992,
|
| 6688 |
+
"step": 334000
|
| 6689 |
+
},
|
| 6690 |
+
{
|
| 6691 |
+
"epoch": 10.25,
|
| 6692 |
+
"learning_rate": 8.85313252507988e-05,
|
| 6693 |
+
"loss": 0.3257,
|
| 6694 |
+
"step": 334500
|
| 6695 |
+
},
|
| 6696 |
+
{
|
| 6697 |
+
"epoch": 10.26,
|
| 6698 |
+
"learning_rate": 8.810553136809027e-05,
|
| 6699 |
+
"loss": 0.3258,
|
| 6700 |
+
"step": 335000
|
| 6701 |
+
},
|
| 6702 |
+
{
|
| 6703 |
+
"epoch": 10.26,
|
| 6704 |
+
"eval_loss": 0.7805637121200562,
|
| 6705 |
+
"eval_runtime": 0.5114,
|
| 6706 |
+
"eval_samples_per_second": 1955.493,
|
| 6707 |
+
"eval_steps_per_second": 31.288,
|
| 6708 |
+
"step": 335000
|
| 6709 |
+
},
|
| 6710 |
+
{
|
| 6711 |
+
"epoch": 10.28,
|
| 6712 |
+
"learning_rate": 8.76804690325733e-05,
|
| 6713 |
+
"loss": 0.3257,
|
| 6714 |
+
"step": 335500
|
| 6715 |
+
},
|
| 6716 |
+
{
|
| 6717 |
+
"epoch": 10.29,
|
| 6718 |
+
"learning_rate": 8.725614289266137e-05,
|
| 6719 |
+
"loss": 0.3257,
|
| 6720 |
+
"step": 336000
|
| 6721 |
+
},
|
| 6722 |
+
{
|
| 6723 |
+
"epoch": 10.29,
|
| 6724 |
+
"eval_loss": 0.7819052934646606,
|
| 6725 |
+
"eval_runtime": 0.5066,
|
| 6726 |
+
"eval_samples_per_second": 1974.06,
|
| 6727 |
+
"eval_steps_per_second": 31.585,
|
| 6728 |
+
"step": 336000
|
| 6729 |
+
},
|
| 6730 |
+
{
|
| 6731 |
+
"epoch": 10.31,
|
| 6732 |
+
"learning_rate": 8.683255758871734e-05,
|
| 6733 |
+
"loss": 0.3258,
|
| 6734 |
+
"step": 336500
|
| 6735 |
+
},
|
| 6736 |
+
{
|
| 6737 |
+
"epoch": 10.33,
|
| 6738 |
+
"learning_rate": 8.640971775300207e-05,
|
| 6739 |
+
"loss": 0.3259,
|
| 6740 |
+
"step": 337000
|
| 6741 |
+
},
|
| 6742 |
+
{
|
| 6743 |
+
"epoch": 10.33,
|
| 6744 |
+
"eval_loss": 0.7828894257545471,
|
| 6745 |
+
"eval_runtime": 0.4948,
|
| 6746 |
+
"eval_samples_per_second": 2020.994,
|
| 6747 |
+
"eval_steps_per_second": 32.336,
|
| 6748 |
+
"step": 337000
|
| 6749 |
+
},
|
| 6750 |
+
{
|
| 6751 |
+
"epoch": 10.34,
|
| 6752 |
+
"learning_rate": 8.598762800962431e-05,
|
| 6753 |
+
"loss": 0.325,
|
| 6754 |
+
"step": 337500
|
| 6755 |
+
},
|
| 6756 |
+
{
|
| 6757 |
+
"epoch": 10.36,
|
| 6758 |
+
"learning_rate": 8.55662929744899e-05,
|
| 6759 |
+
"loss": 0.3253,
|
| 6760 |
+
"step": 338000
|
| 6761 |
+
},
|
| 6762 |
+
{
|
| 6763 |
+
"epoch": 10.36,
|
| 6764 |
+
"eval_loss": 0.7842022776603699,
|
| 6765 |
+
"eval_runtime": 0.5086,
|
| 6766 |
+
"eval_samples_per_second": 1966.304,
|
| 6767 |
+
"eval_steps_per_second": 31.461,
|
| 6768 |
+
"step": 338000
|
| 6769 |
+
},
|
| 6770 |
+
{
|
| 6771 |
+
"epoch": 10.37,
|
| 6772 |
+
"learning_rate": 8.514571725525124e-05,
|
| 6773 |
+
"loss": 0.325,
|
| 6774 |
+
"step": 338500
|
| 6775 |
+
},
|
| 6776 |
+
{
|
| 6777 |
+
"epoch": 10.39,
|
| 6778 |
+
"learning_rate": 8.47259054512571e-05,
|
| 6779 |
+
"loss": 0.3252,
|
| 6780 |
+
"step": 339000
|
| 6781 |
+
},
|
| 6782 |
+
{
|
| 6783 |
+
"epoch": 10.39,
|
| 6784 |
+
"eval_loss": 0.7787997722625732,
|
| 6785 |
+
"eval_runtime": 0.5114,
|
| 6786 |
+
"eval_samples_per_second": 1955.232,
|
| 6787 |
+
"eval_steps_per_second": 31.284,
|
| 6788 |
+
"step": 339000
|
| 6789 |
+
},
|
| 6790 |
+
{
|
| 6791 |
+
"epoch": 10.4,
|
| 6792 |
+
"learning_rate": 8.430686215350198e-05,
|
| 6793 |
+
"loss": 0.3253,
|
| 6794 |
+
"step": 339500
|
| 6795 |
+
},
|
| 6796 |
+
{
|
| 6797 |
+
"epoch": 10.42,
|
| 6798 |
+
"learning_rate": 8.388859194457636e-05,
|
| 6799 |
+
"loss": 0.3252,
|
| 6800 |
+
"step": 340000
|
| 6801 |
+
},
|
| 6802 |
+
{
|
| 6803 |
+
"epoch": 10.42,
|
| 6804 |
+
"eval_loss": 0.7811650633811951,
|
| 6805 |
+
"eval_runtime": 0.5039,
|
| 6806 |
+
"eval_samples_per_second": 1984.543,
|
| 6807 |
+
"eval_steps_per_second": 31.753,
|
| 6808 |
+
"step": 340000
|
| 6809 |
}
|
| 6810 |
],
|
| 6811 |
"max_steps": 500000,
|
| 6812 |
"num_train_epochs": 16,
|
| 6813 |
+
"total_flos": 1.0862482838813242e+22,
|
| 6814 |
"trial_name": null,
|
| 6815 |
"trial_params": null
|
| 6816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db82336fde2ca798cca2552947b8f10e1dae6da490a1f40a713b479f4e821a2d
|
| 3 |
size 102501541
|