Upload 8 files
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +5 -158
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 343679232
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de097412cad8746b6eb19ea1dc0e2199ef3d2a9a8accae460a9a51bf4e2f8331
|
| 3 |
size 343679232
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 687473786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f862b26aa8c421663d1c84cd6f1f01648f91e1bd1fbe4fca01c7179e3e57342
|
| 3 |
size 687473786
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13990
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec859b2ce68dc22239ae89fc89ca51be37f1eb1e4eec3225a7e5f38fec447b57
|
| 3 |
size 13990
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56247b3349547ea82644afccfe004506052b457e363254cda8547006aa0a9fb8
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "pokemon_models\\checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -694,159 +694,6 @@
|
|
| 694 |
"eval_samples_per_second": 10.376,
|
| 695 |
"eval_steps_per_second": 0.649,
|
| 696 |
"step": 1610
|
| 697 |
-
},
|
| 698 |
-
{
|
| 699 |
-
"epoch": 23.14,
|
| 700 |
-
"learning_rate": 1.0555555555555555e-05,
|
| 701 |
-
"loss": 0.5869,
|
| 702 |
-
"step": 1620
|
| 703 |
-
},
|
| 704 |
-
{
|
| 705 |
-
"epoch": 23.43,
|
| 706 |
-
"learning_rate": 1e-05,
|
| 707 |
-
"loss": 0.5809,
|
| 708 |
-
"step": 1640
|
| 709 |
-
},
|
| 710 |
-
{
|
| 711 |
-
"epoch": 23.71,
|
| 712 |
-
"learning_rate": 9.444444444444445e-06,
|
| 713 |
-
"loss": 0.5972,
|
| 714 |
-
"step": 1660
|
| 715 |
-
},
|
| 716 |
-
{
|
| 717 |
-
"epoch": 24.0,
|
| 718 |
-
"learning_rate": 8.88888888888889e-06,
|
| 719 |
-
"loss": 0.5726,
|
| 720 |
-
"step": 1680
|
| 721 |
-
},
|
| 722 |
-
{
|
| 723 |
-
"epoch": 24.0,
|
| 724 |
-
"eval_accuracy": 0.9186773905272565,
|
| 725 |
-
"eval_loss": 0.8351905345916748,
|
| 726 |
-
"eval_runtime": 109.1081,
|
| 727 |
-
"eval_samples_per_second": 10.256,
|
| 728 |
-
"eval_steps_per_second": 0.642,
|
| 729 |
-
"step": 1680
|
| 730 |
-
},
|
| 731 |
-
{
|
| 732 |
-
"epoch": 24.29,
|
| 733 |
-
"learning_rate": 8.333333333333334e-06,
|
| 734 |
-
"loss": 0.5562,
|
| 735 |
-
"step": 1700
|
| 736 |
-
},
|
| 737 |
-
{
|
| 738 |
-
"epoch": 24.57,
|
| 739 |
-
"learning_rate": 7.777777777777777e-06,
|
| 740 |
-
"loss": 0.5687,
|
| 741 |
-
"step": 1720
|
| 742 |
-
},
|
| 743 |
-
{
|
| 744 |
-
"epoch": 24.86,
|
| 745 |
-
"learning_rate": 7.222222222222222e-06,
|
| 746 |
-
"loss": 0.5418,
|
| 747 |
-
"step": 1740
|
| 748 |
-
},
|
| 749 |
-
{
|
| 750 |
-
"epoch": 25.0,
|
| 751 |
-
"eval_accuracy": 0.9240393208221627,
|
| 752 |
-
"eval_loss": 0.8053392767906189,
|
| 753 |
-
"eval_runtime": 111.5634,
|
| 754 |
-
"eval_samples_per_second": 10.03,
|
| 755 |
-
"eval_steps_per_second": 0.627,
|
| 756 |
-
"step": 1750
|
| 757 |
-
},
|
| 758 |
-
{
|
| 759 |
-
"epoch": 25.14,
|
| 760 |
-
"learning_rate": 6.666666666666667e-06,
|
| 761 |
-
"loss": 0.5496,
|
| 762 |
-
"step": 1760
|
| 763 |
-
},
|
| 764 |
-
{
|
| 765 |
-
"epoch": 25.43,
|
| 766 |
-
"learning_rate": 6.111111111111111e-06,
|
| 767 |
-
"loss": 0.5354,
|
| 768 |
-
"step": 1780
|
| 769 |
-
},
|
| 770 |
-
{
|
| 771 |
-
"epoch": 25.71,
|
| 772 |
-
"learning_rate": 5.555555555555556e-06,
|
| 773 |
-
"loss": 0.5564,
|
| 774 |
-
"step": 1800
|
| 775 |
-
},
|
| 776 |
-
{
|
| 777 |
-
"epoch": 26.0,
|
| 778 |
-
"learning_rate": 5e-06,
|
| 779 |
-
"loss": 0.579,
|
| 780 |
-
"step": 1820
|
| 781 |
-
},
|
| 782 |
-
{
|
| 783 |
-
"epoch": 26.0,
|
| 784 |
-
"eval_accuracy": 0.9151027703306523,
|
| 785 |
-
"eval_loss": 0.8136078119277954,
|
| 786 |
-
"eval_runtime": 105.9237,
|
| 787 |
-
"eval_samples_per_second": 10.564,
|
| 788 |
-
"eval_steps_per_second": 0.661,
|
| 789 |
-
"step": 1820
|
| 790 |
-
},
|
| 791 |
-
{
|
| 792 |
-
"epoch": 26.29,
|
| 793 |
-
"learning_rate": 4.444444444444445e-06,
|
| 794 |
-
"loss": 0.5322,
|
| 795 |
-
"step": 1840
|
| 796 |
-
},
|
| 797 |
-
{
|
| 798 |
-
"epoch": 26.57,
|
| 799 |
-
"learning_rate": 3.888888888888889e-06,
|
| 800 |
-
"loss": 0.5529,
|
| 801 |
-
"step": 1860
|
| 802 |
-
},
|
| 803 |
-
{
|
| 804 |
-
"epoch": 26.86,
|
| 805 |
-
"learning_rate": 3.3333333333333333e-06,
|
| 806 |
-
"loss": 0.5077,
|
| 807 |
-
"step": 1880
|
| 808 |
-
},
|
| 809 |
-
{
|
| 810 |
-
"epoch": 27.0,
|
| 811 |
-
"eval_accuracy": 0.9168900804289544,
|
| 812 |
-
"eval_loss": 0.7921976447105408,
|
| 813 |
-
"eval_runtime": 105.3647,
|
| 814 |
-
"eval_samples_per_second": 10.62,
|
| 815 |
-
"eval_steps_per_second": 0.664,
|
| 816 |
-
"step": 1890
|
| 817 |
-
},
|
| 818 |
-
{
|
| 819 |
-
"epoch": 27.14,
|
| 820 |
-
"learning_rate": 2.777777777777778e-06,
|
| 821 |
-
"loss": 0.5443,
|
| 822 |
-
"step": 1900
|
| 823 |
-
},
|
| 824 |
-
{
|
| 825 |
-
"epoch": 27.43,
|
| 826 |
-
"learning_rate": 2.2222222222222225e-06,
|
| 827 |
-
"loss": 0.5181,
|
| 828 |
-
"step": 1920
|
| 829 |
-
},
|
| 830 |
-
{
|
| 831 |
-
"epoch": 27.71,
|
| 832 |
-
"learning_rate": 1.6666666666666667e-06,
|
| 833 |
-
"loss": 0.5318,
|
| 834 |
-
"step": 1940
|
| 835 |
-
},
|
| 836 |
-
{
|
| 837 |
-
"epoch": 28.0,
|
| 838 |
-
"learning_rate": 1.1111111111111112e-06,
|
| 839 |
-
"loss": 0.5138,
|
| 840 |
-
"step": 1960
|
| 841 |
-
},
|
| 842 |
-
{
|
| 843 |
-
"epoch": 28.0,
|
| 844 |
-
"eval_accuracy": 0.9133154602323503,
|
| 845 |
-
"eval_loss": 0.7894989252090454,
|
| 846 |
-
"eval_runtime": 107.1689,
|
| 847 |
-
"eval_samples_per_second": 10.441,
|
| 848 |
-
"eval_steps_per_second": 0.653,
|
| 849 |
-
"step": 1960
|
| 850 |
}
|
| 851 |
],
|
| 852 |
"logging_steps": 20,
|
|
@@ -854,7 +701,7 @@
|
|
| 854 |
"num_input_tokens_seen": 0,
|
| 855 |
"num_train_epochs": 29,
|
| 856 |
"save_steps": 500,
|
| 857 |
-
"total_flos":
|
| 858 |
"train_batch_size": 16,
|
| 859 |
"trial_name": null,
|
| 860 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.9142091152815014,
|
| 3 |
+
"best_model_checkpoint": "pokemon_models\\checkpoint-1610",
|
| 4 |
+
"epoch": 23.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1610,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 694 |
"eval_samples_per_second": 10.376,
|
| 695 |
"eval_steps_per_second": 0.649,
|
| 696 |
"step": 1610
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
}
|
| 698 |
],
|
| 699 |
"logging_steps": 20,
|
|
|
|
| 701 |
"num_input_tokens_seen": 0,
|
| 702 |
"num_train_epochs": 29,
|
| 703 |
"save_steps": 500,
|
| 704 |
+
"total_flos": 7.982873471516332e+18,
|
| 705 |
"train_batch_size": 16,
|
| 706 |
"trial_name": null,
|
| 707 |
"trial_params": null
|