Training in progress, step 5100, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b8241d490cf7c6d73daa6e58d1e953d37a78b29a116a5819c07ede09f2a18a6
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a28d22fdc04db2c35d665a1ac5cb8cd349b54d05fc00d81df4548b08481678c
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67c6f49cb50d8b09b0f2e9704dcb4986f8fc63f53d3b695322fdb8756b868c02
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:736f4102e80c412e5bd9ae55e7c4ee4195aa9541999b56cf808f798e57d982a7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4792,6 +4792,151 @@
|
|
| 4792 |
"EMA_steps_per_second": 23.17,
|
| 4793 |
"epoch": 215.2173913043478,
|
| 4794 |
"step": 4950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4795 |
}
|
| 4796 |
],
|
| 4797 |
"logging_steps": 10,
|
|
@@ -4811,7 +4956,7 @@
|
|
| 4811 |
"attributes": {}
|
| 4812 |
}
|
| 4813 |
},
|
| 4814 |
-
"total_flos": 1.
|
| 4815 |
"train_batch_size": 4,
|
| 4816 |
"trial_name": null,
|
| 4817 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 221.7391304347826,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 5100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4792 |
"EMA_steps_per_second": 23.17,
|
| 4793 |
"epoch": 215.2173913043478,
|
| 4794 |
"step": 4950
|
| 4795 |
+
},
|
| 4796 |
+
{
|
| 4797 |
+
"epoch": 215.65217391304347,
|
| 4798 |
+
"grad_norm": 2.2454140186309814,
|
| 4799 |
+
"learning_rate": 1.5293145627308224e-06,
|
| 4800 |
+
"loss": 0.2412,
|
| 4801 |
+
"step": 4960
|
| 4802 |
+
},
|
| 4803 |
+
{
|
| 4804 |
+
"epoch": 216.08695652173913,
|
| 4805 |
+
"grad_norm": 2.0021111965179443,
|
| 4806 |
+
"learning_rate": 1.52929440471819e-06,
|
| 4807 |
+
"loss": 0.2214,
|
| 4808 |
+
"step": 4970
|
| 4809 |
+
},
|
| 4810 |
+
{
|
| 4811 |
+
"epoch": 216.52173913043478,
|
| 4812 |
+
"grad_norm": 1.6608623266220093,
|
| 4813 |
+
"learning_rate": 1.5292739437586956e-06,
|
| 4814 |
+
"loss": 0.215,
|
| 4815 |
+
"step": 4980
|
| 4816 |
+
},
|
| 4817 |
+
{
|
| 4818 |
+
"epoch": 216.95652173913044,
|
| 4819 |
+
"grad_norm": 1.7904845476150513,
|
| 4820 |
+
"learning_rate": 1.5292531798604489e-06,
|
| 4821 |
+
"loss": 0.2533,
|
| 4822 |
+
"step": 4990
|
| 4823 |
+
},
|
| 4824 |
+
{
|
| 4825 |
+
"epoch": 217.3913043478261,
|
| 4826 |
+
"grad_norm": 2.007638931274414,
|
| 4827 |
+
"learning_rate": 1.52923211303168e-06,
|
| 4828 |
+
"loss": 0.2257,
|
| 4829 |
+
"step": 5000
|
| 4830 |
+
},
|
| 4831 |
+
{
|
| 4832 |
+
"epoch": 217.82608695652175,
|
| 4833 |
+
"grad_norm": 2.0071678161621094,
|
| 4834 |
+
"learning_rate": 1.5292107432807391e-06,
|
| 4835 |
+
"loss": 0.2142,
|
| 4836 |
+
"step": 5010
|
| 4837 |
+
},
|
| 4838 |
+
{
|
| 4839 |
+
"epoch": 218.2608695652174,
|
| 4840 |
+
"grad_norm": 1.6132997274398804,
|
| 4841 |
+
"learning_rate": 1.5291890706160969e-06,
|
| 4842 |
+
"loss": 0.2562,
|
| 4843 |
+
"step": 5020
|
| 4844 |
+
},
|
| 4845 |
+
{
|
| 4846 |
+
"epoch": 218.69565217391303,
|
| 4847 |
+
"grad_norm": 1.4705387353897095,
|
| 4848 |
+
"learning_rate": 1.5291670950463434e-06,
|
| 4849 |
+
"loss": 0.2503,
|
| 4850 |
+
"step": 5030
|
| 4851 |
+
},
|
| 4852 |
+
{
|
| 4853 |
+
"epoch": 219.1304347826087,
|
| 4854 |
+
"grad_norm": 2.801940679550171,
|
| 4855 |
+
"learning_rate": 1.529144816580189e-06,
|
| 4856 |
+
"loss": 0.1838,
|
| 4857 |
+
"step": 5040
|
| 4858 |
+
},
|
| 4859 |
+
{
|
| 4860 |
+
"epoch": 219.56521739130434,
|
| 4861 |
+
"grad_norm": 2.1510982513427734,
|
| 4862 |
+
"learning_rate": 1.5291222352264644e-06,
|
| 4863 |
+
"loss": 0.2212,
|
| 4864 |
+
"step": 5050
|
| 4865 |
+
},
|
| 4866 |
+
{
|
| 4867 |
+
"epoch": 220.0,
|
| 4868 |
+
"grad_norm": 3.703181028366089,
|
| 4869 |
+
"learning_rate": 1.5290993509941199e-06,
|
| 4870 |
+
"loss": 0.2361,
|
| 4871 |
+
"step": 5060
|
| 4872 |
+
},
|
| 4873 |
+
{
|
| 4874 |
+
"epoch": 220.43478260869566,
|
| 4875 |
+
"grad_norm": 2.0578255653381348,
|
| 4876 |
+
"learning_rate": 1.5290761638922261e-06,
|
| 4877 |
+
"loss": 0.1747,
|
| 4878 |
+
"step": 5070
|
| 4879 |
+
},
|
| 4880 |
+
{
|
| 4881 |
+
"epoch": 220.8695652173913,
|
| 4882 |
+
"grad_norm": 2.1845853328704834,
|
| 4883 |
+
"learning_rate": 1.5290526739299738e-06,
|
| 4884 |
+
"loss": 0.2639,
|
| 4885 |
+
"step": 5080
|
| 4886 |
+
},
|
| 4887 |
+
{
|
| 4888 |
+
"epoch": 221.30434782608697,
|
| 4889 |
+
"grad_norm": 2.506991386413574,
|
| 4890 |
+
"learning_rate": 1.5290288811166734e-06,
|
| 4891 |
+
"loss": 0.2194,
|
| 4892 |
+
"step": 5090
|
| 4893 |
+
},
|
| 4894 |
+
{
|
| 4895 |
+
"epoch": 221.7391304347826,
|
| 4896 |
+
"grad_norm": 2.7278904914855957,
|
| 4897 |
+
"learning_rate": 1.529004785461756e-06,
|
| 4898 |
+
"loss": 0.2112,
|
| 4899 |
+
"step": 5100
|
| 4900 |
+
},
|
| 4901 |
+
{
|
| 4902 |
+
"epoch": 221.7391304347826,
|
| 4903 |
+
"eval_loss": 0.9890514612197876,
|
| 4904 |
+
"eval_runtime": 0.4992,
|
| 4905 |
+
"eval_samples_per_second": 20.032,
|
| 4906 |
+
"eval_steps_per_second": 20.032,
|
| 4907 |
+
"step": 5100
|
| 4908 |
+
},
|
| 4909 |
+
{
|
| 4910 |
+
"Start_State_loss": 0.8609819412231445,
|
| 4911 |
+
"Start_State_runtime": 0.4385,
|
| 4912 |
+
"Start_State_samples_per_second": 22.807,
|
| 4913 |
+
"Start_State_steps_per_second": 22.807,
|
| 4914 |
+
"epoch": 221.7391304347826,
|
| 4915 |
+
"step": 5100
|
| 4916 |
+
},
|
| 4917 |
+
{
|
| 4918 |
+
"Raw_Model_loss": 0.9890514612197876,
|
| 4919 |
+
"Raw_Model_runtime": 0.4364,
|
| 4920 |
+
"Raw_Model_samples_per_second": 22.915,
|
| 4921 |
+
"Raw_Model_steps_per_second": 22.915,
|
| 4922 |
+
"epoch": 221.7391304347826,
|
| 4923 |
+
"step": 5100
|
| 4924 |
+
},
|
| 4925 |
+
{
|
| 4926 |
+
"SWA_loss": 0.8240174055099487,
|
| 4927 |
+
"SWA_runtime": 0.4349,
|
| 4928 |
+
"SWA_samples_per_second": 22.995,
|
| 4929 |
+
"SWA_steps_per_second": 22.995,
|
| 4930 |
+
"epoch": 221.7391304347826,
|
| 4931 |
+
"step": 5100
|
| 4932 |
+
},
|
| 4933 |
+
{
|
| 4934 |
+
"EMA_loss": 0.8594372868537903,
|
| 4935 |
+
"EMA_runtime": 0.4421,
|
| 4936 |
+
"EMA_samples_per_second": 22.619,
|
| 4937 |
+
"EMA_steps_per_second": 22.619,
|
| 4938 |
+
"epoch": 221.7391304347826,
|
| 4939 |
+
"step": 5100
|
| 4940 |
}
|
| 4941 |
],
|
| 4942 |
"logging_steps": 10,
|
|
|
|
| 4956 |
"attributes": {}
|
| 4957 |
}
|
| 4958 |
},
|
| 4959 |
+
"total_flos": 1.3106424730691174e+17,
|
| 4960 |
"train_batch_size": 4,
|
| 4961 |
"trial_name": null,
|
| 4962 |
"trial_params": null
|