Training in progress, step 120, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 45118424
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e36bb4966b3713f17079f0f0073225f3c17789e78598436f125bc5847c546220
|
| 3 |
size 45118424
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 23159290
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31631c7141c9eed8d3d67722b7f007bb55e7b4644efb82e4b7c07b72a46d6b5f
|
| 3 |
size 23159290
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:330e765b24011cd6e18b8db74d77f7195e5780a184071a5df72e72c642350c23
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61c2b4927e3039b26d377375be782c03ce853d193f96b5868ccf559441e84af9
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.1519354581832886,
|
| 3 |
"best_model_checkpoint": "miner_id_besimray/checkpoint-60",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -755,6 +755,154 @@
|
|
| 755 |
"eval_samples_per_second": 48.569,
|
| 756 |
"eval_steps_per_second": 4.857,
|
| 757 |
"step": 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
}
|
| 759 |
],
|
| 760 |
"logging_steps": 1,
|
|
@@ -769,7 +917,7 @@
|
|
| 769 |
"early_stopping_threshold": 0.0
|
| 770 |
},
|
| 771 |
"attributes": {
|
| 772 |
-
"early_stopping_patience_counter":
|
| 773 |
}
|
| 774 |
},
|
| 775 |
"TrainerControl": {
|
|
@@ -778,12 +926,12 @@
|
|
| 778 |
"should_evaluate": false,
|
| 779 |
"should_log": false,
|
| 780 |
"should_save": true,
|
| 781 |
-
"should_training_stop":
|
| 782 |
},
|
| 783 |
"attributes": {}
|
| 784 |
}
|
| 785 |
},
|
| 786 |
-
"total_flos": 1.
|
| 787 |
"train_batch_size": 10,
|
| 788 |
"trial_name": null,
|
| 789 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.1519354581832886,
|
| 3 |
"best_model_checkpoint": "miner_id_besimray/checkpoint-60",
|
| 4 |
+
"epoch": 2.526315789473684,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 120,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 755 |
"eval_samples_per_second": 48.569,
|
| 756 |
"eval_steps_per_second": 4.857,
|
| 757 |
"step": 100
|
| 758 |
+
},
|
| 759 |
+
{
|
| 760 |
+
"epoch": 2.126315789473684,
|
| 761 |
+
"grad_norm": 0.47087791562080383,
|
| 762 |
+
"learning_rate": 5.4600950026045326e-05,
|
| 763 |
+
"loss": 0.994,
|
| 764 |
+
"step": 101
|
| 765 |
+
},
|
| 766 |
+
{
|
| 767 |
+
"epoch": 2.1473684210526316,
|
| 768 |
+
"grad_norm": 0.46321335434913635,
|
| 769 |
+
"learning_rate": 5.261313375270014e-05,
|
| 770 |
+
"loss": 0.8965,
|
| 771 |
+
"step": 102
|
| 772 |
+
},
|
| 773 |
+
{
|
| 774 |
+
"epoch": 2.168421052631579,
|
| 775 |
+
"grad_norm": 0.48722636699676514,
|
| 776 |
+
"learning_rate": 5.0649178193565314e-05,
|
| 777 |
+
"loss": 1.0028,
|
| 778 |
+
"step": 103
|
| 779 |
+
},
|
| 780 |
+
{
|
| 781 |
+
"epoch": 2.1894736842105265,
|
| 782 |
+
"grad_norm": 0.5477016568183899,
|
| 783 |
+
"learning_rate": 4.87100722594094e-05,
|
| 784 |
+
"loss": 0.9755,
|
| 785 |
+
"step": 104
|
| 786 |
+
},
|
| 787 |
+
{
|
| 788 |
+
"epoch": 2.2105263157894735,
|
| 789 |
+
"grad_norm": 0.43870726227760315,
|
| 790 |
+
"learning_rate": 4.6796792348466356e-05,
|
| 791 |
+
"loss": 0.9023,
|
| 792 |
+
"step": 105
|
| 793 |
+
},
|
| 794 |
+
{
|
| 795 |
+
"epoch": 2.231578947368421,
|
| 796 |
+
"grad_norm": 0.4974609911441803,
|
| 797 |
+
"learning_rate": 4.491030185478976e-05,
|
| 798 |
+
"loss": 1.0978,
|
| 799 |
+
"step": 106
|
| 800 |
+
},
|
| 801 |
+
{
|
| 802 |
+
"epoch": 2.2526315789473683,
|
| 803 |
+
"grad_norm": 0.48663774132728577,
|
| 804 |
+
"learning_rate": 4.305155068315481e-05,
|
| 805 |
+
"loss": 1.1326,
|
| 806 |
+
"step": 107
|
| 807 |
+
},
|
| 808 |
+
{
|
| 809 |
+
"epoch": 2.2736842105263158,
|
| 810 |
+
"grad_norm": 0.47879499197006226,
|
| 811 |
+
"learning_rate": 4.12214747707527e-05,
|
| 812 |
+
"loss": 0.8403,
|
| 813 |
+
"step": 108
|
| 814 |
+
},
|
| 815 |
+
{
|
| 816 |
+
"epoch": 2.294736842105263,
|
| 817 |
+
"grad_norm": 0.4391883909702301,
|
| 818 |
+
"learning_rate": 3.942099561591802e-05,
|
| 819 |
+
"loss": 1.0096,
|
| 820 |
+
"step": 109
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"epoch": 2.3157894736842106,
|
| 824 |
+
"grad_norm": 0.5225970149040222,
|
| 825 |
+
"learning_rate": 3.7651019814126654e-05,
|
| 826 |
+
"loss": 0.9684,
|
| 827 |
+
"step": 110
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
"epoch": 2.336842105263158,
|
| 831 |
+
"grad_norm": 0.529344379901886,
|
| 832 |
+
"learning_rate": 3.591243860149759e-05,
|
| 833 |
+
"loss": 0.9164,
|
| 834 |
+
"step": 111
|
| 835 |
+
},
|
| 836 |
+
{
|
| 837 |
+
"epoch": 2.3578947368421055,
|
| 838 |
+
"grad_norm": 0.4865782856941223,
|
| 839 |
+
"learning_rate": 3.4206127406028745e-05,
|
| 840 |
+
"loss": 1.0993,
|
| 841 |
+
"step": 112
|
| 842 |
+
},
|
| 843 |
+
{
|
| 844 |
+
"epoch": 2.3789473684210525,
|
| 845 |
+
"grad_norm": 0.4908663332462311,
|
| 846 |
+
"learning_rate": 3.253294540679257e-05,
|
| 847 |
+
"loss": 1.1203,
|
| 848 |
+
"step": 113
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"epoch": 2.4,
|
| 852 |
+
"grad_norm": 0.4688137471675873,
|
| 853 |
+
"learning_rate": 3.089373510131354e-05,
|
| 854 |
+
"loss": 0.8358,
|
| 855 |
+
"step": 114
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"epoch": 2.4210526315789473,
|
| 859 |
+
"grad_norm": 0.5007145404815674,
|
| 860 |
+
"learning_rate": 2.9289321881345254e-05,
|
| 861 |
+
"loss": 1.0975,
|
| 862 |
+
"step": 115
|
| 863 |
+
},
|
| 864 |
+
{
|
| 865 |
+
"epoch": 2.442105263157895,
|
| 866 |
+
"grad_norm": 0.4280741214752197,
|
| 867 |
+
"learning_rate": 2.7720513617260856e-05,
|
| 868 |
+
"loss": 1.0134,
|
| 869 |
+
"step": 116
|
| 870 |
+
},
|
| 871 |
+
{
|
| 872 |
+
"epoch": 2.463157894736842,
|
| 873 |
+
"grad_norm": 0.5474169850349426,
|
| 874 |
+
"learning_rate": 2.6188100251265945e-05,
|
| 875 |
+
"loss": 0.9781,
|
| 876 |
+
"step": 117
|
| 877 |
+
},
|
| 878 |
+
{
|
| 879 |
+
"epoch": 2.4842105263157896,
|
| 880 |
+
"grad_norm": 0.4554167091846466,
|
| 881 |
+
"learning_rate": 2.4692853399638917e-05,
|
| 882 |
+
"loss": 1.082,
|
| 883 |
+
"step": 118
|
| 884 |
+
},
|
| 885 |
+
{
|
| 886 |
+
"epoch": 2.5052631578947366,
|
| 887 |
+
"grad_norm": 0.5812304615974426,
|
| 888 |
+
"learning_rate": 2.323552596419889e-05,
|
| 889 |
+
"loss": 0.9826,
|
| 890 |
+
"step": 119
|
| 891 |
+
},
|
| 892 |
+
{
|
| 893 |
+
"epoch": 2.526315789473684,
|
| 894 |
+
"grad_norm": 0.4756172001361847,
|
| 895 |
+
"learning_rate": 2.181685175319702e-05,
|
| 896 |
+
"loss": 1.1045,
|
| 897 |
+
"step": 120
|
| 898 |
+
},
|
| 899 |
+
{
|
| 900 |
+
"epoch": 2.526315789473684,
|
| 901 |
+
"eval_loss": 1.1679396629333496,
|
| 902 |
+
"eval_runtime": 2.0595,
|
| 903 |
+
"eval_samples_per_second": 48.555,
|
| 904 |
+
"eval_steps_per_second": 4.856,
|
| 905 |
+
"step": 120
|
| 906 |
}
|
| 907 |
],
|
| 908 |
"logging_steps": 1,
|
|
|
|
| 917 |
"early_stopping_threshold": 0.0
|
| 918 |
},
|
| 919 |
"attributes": {
|
| 920 |
+
"early_stopping_patience_counter": 3
|
| 921 |
}
|
| 922 |
},
|
| 923 |
"TrainerControl": {
|
|
|
|
| 926 |
"should_evaluate": false,
|
| 927 |
"should_log": false,
|
| 928 |
"should_save": true,
|
| 929 |
+
"should_training_stop": true
|
| 930 |
},
|
| 931 |
"attributes": {}
|
| 932 |
}
|
| 933 |
},
|
| 934 |
+
"total_flos": 1.214189411500032e+16,
|
| 935 |
"train_batch_size": 10,
|
| 936 |
"trial_name": null,
|
| 937 |
"trial_params": null
|