Training in progress, step 714, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 310662536
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:294e8fe234cd6d1f59b14c1140e50fdbdb90a57220e6173c48a127e84e49c4e8
|
| 3 |
size 310662536
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 162452055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31fca9108c676adcfdd8ed80962605abd4605ab61e005b4a771e012d23b1c627
|
| 3 |
size 162452055
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb52ddc7b4d4702afb8bac65566641a27c974b1518c3f2f4987cdc6cc976a909
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e5d5ca4d0f7bab3012c2ed7b0a337ccf31a6ebdeb831e6c0b5b1e71cabedc08
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4908,6 +4908,104 @@
|
|
| 4908 |
"learning_rate": 4.2313117066290545e-06,
|
| 4909 |
"loss": 0.5183,
|
| 4910 |
"step": 700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4911 |
}
|
| 4912 |
],
|
| 4913 |
"logging_steps": 1,
|
|
@@ -4922,12 +5020,12 @@
|
|
| 4922 |
"should_evaluate": false,
|
| 4923 |
"should_log": false,
|
| 4924 |
"should_save": true,
|
| 4925 |
-
"should_training_stop":
|
| 4926 |
},
|
| 4927 |
"attributes": {}
|
| 4928 |
}
|
| 4929 |
},
|
| 4930 |
-
"total_flos": 8.
|
| 4931 |
"train_batch_size": 2,
|
| 4932 |
"trial_name": null,
|
| 4933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 714,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4908 |
"learning_rate": 4.2313117066290545e-06,
|
| 4909 |
"loss": 0.5183,
|
| 4910 |
"step": 700
|
| 4911 |
+
},
|
| 4912 |
+
{
|
| 4913 |
+
"epoch": 0.9824807288016818,
|
| 4914 |
+
"grad_norm": 1.2964062690734863,
|
| 4915 |
+
"learning_rate": 3.949224259520452e-06,
|
| 4916 |
+
"loss": 0.45,
|
| 4917 |
+
"step": 701
|
| 4918 |
+
},
|
| 4919 |
+
{
|
| 4920 |
+
"epoch": 0.9838822704975473,
|
| 4921 |
+
"grad_norm": 1.2322115898132324,
|
| 4922 |
+
"learning_rate": 3.667136812411848e-06,
|
| 4923 |
+
"loss": 0.3094,
|
| 4924 |
+
"step": 702
|
| 4925 |
+
},
|
| 4926 |
+
{
|
| 4927 |
+
"epoch": 0.9852838121934128,
|
| 4928 |
+
"grad_norm": 1.0808106660842896,
|
| 4929 |
+
"learning_rate": 3.3850493653032446e-06,
|
| 4930 |
+
"loss": 0.2605,
|
| 4931 |
+
"step": 703
|
| 4932 |
+
},
|
| 4933 |
+
{
|
| 4934 |
+
"epoch": 0.9866853538892782,
|
| 4935 |
+
"grad_norm": 1.4192579984664917,
|
| 4936 |
+
"learning_rate": 3.1029619181946405e-06,
|
| 4937 |
+
"loss": 0.3415,
|
| 4938 |
+
"step": 704
|
| 4939 |
+
},
|
| 4940 |
+
{
|
| 4941 |
+
"epoch": 0.9880868955851436,
|
| 4942 |
+
"grad_norm": 1.0820287466049194,
|
| 4943 |
+
"learning_rate": 2.8208744710860367e-06,
|
| 4944 |
+
"loss": 0.2336,
|
| 4945 |
+
"step": 705
|
| 4946 |
+
},
|
| 4947 |
+
{
|
| 4948 |
+
"epoch": 0.9894884372810091,
|
| 4949 |
+
"grad_norm": 1.4534846544265747,
|
| 4950 |
+
"learning_rate": 2.538787023977433e-06,
|
| 4951 |
+
"loss": 0.4909,
|
| 4952 |
+
"step": 706
|
| 4953 |
+
},
|
| 4954 |
+
{
|
| 4955 |
+
"epoch": 0.9908899789768746,
|
| 4956 |
+
"grad_norm": 1.3459112644195557,
|
| 4957 |
+
"learning_rate": 2.2566995768688293e-06,
|
| 4958 |
+
"loss": 0.3371,
|
| 4959 |
+
"step": 707
|
| 4960 |
+
},
|
| 4961 |
+
{
|
| 4962 |
+
"epoch": 0.9922915206727401,
|
| 4963 |
+
"grad_norm": 1.2415499687194824,
|
| 4964 |
+
"learning_rate": 1.974612129760226e-06,
|
| 4965 |
+
"loss": 0.2859,
|
| 4966 |
+
"step": 708
|
| 4967 |
+
},
|
| 4968 |
+
{
|
| 4969 |
+
"epoch": 0.9936930623686054,
|
| 4970 |
+
"grad_norm": 1.022193431854248,
|
| 4971 |
+
"learning_rate": 1.6925246826516223e-06,
|
| 4972 |
+
"loss": 0.3233,
|
| 4973 |
+
"step": 709
|
| 4974 |
+
},
|
| 4975 |
+
{
|
| 4976 |
+
"epoch": 0.9950946040644709,
|
| 4977 |
+
"grad_norm": 1.0108222961425781,
|
| 4978 |
+
"learning_rate": 1.4104372355430184e-06,
|
| 4979 |
+
"loss": 0.2245,
|
| 4980 |
+
"step": 710
|
| 4981 |
+
},
|
| 4982 |
+
{
|
| 4983 |
+
"epoch": 0.9964961457603364,
|
| 4984 |
+
"grad_norm": 1.4070162773132324,
|
| 4985 |
+
"learning_rate": 1.1283497884344147e-06,
|
| 4986 |
+
"loss": 0.4077,
|
| 4987 |
+
"step": 711
|
| 4988 |
+
},
|
| 4989 |
+
{
|
| 4990 |
+
"epoch": 0.9978976874562018,
|
| 4991 |
+
"grad_norm": 1.2831224203109741,
|
| 4992 |
+
"learning_rate": 8.462623413258111e-07,
|
| 4993 |
+
"loss": 0.4383,
|
| 4994 |
+
"step": 712
|
| 4995 |
+
},
|
| 4996 |
+
{
|
| 4997 |
+
"epoch": 0.9992992291520673,
|
| 4998 |
+
"grad_norm": 1.686324119567871,
|
| 4999 |
+
"learning_rate": 5.641748942172073e-07,
|
| 5000 |
+
"loss": 0.5735,
|
| 5001 |
+
"step": 713
|
| 5002 |
+
},
|
| 5003 |
+
{
|
| 5004 |
+
"epoch": 1.0,
|
| 5005 |
+
"grad_norm": 1.848501205444336,
|
| 5006 |
+
"learning_rate": 2.8208744710860366e-07,
|
| 5007 |
+
"loss": 0.429,
|
| 5008 |
+
"step": 714
|
| 5009 |
}
|
| 5010 |
],
|
| 5011 |
"logging_steps": 1,
|
|
|
|
| 5020 |
"should_evaluate": false,
|
| 5021 |
"should_log": false,
|
| 5022 |
"should_save": true,
|
| 5023 |
+
"should_training_stop": true
|
| 5024 |
},
|
| 5025 |
"attributes": {}
|
| 5026 |
}
|
| 5027 |
},
|
| 5028 |
+
"total_flos": 8.19498707616338e+16,
|
| 5029 |
"train_batch_size": 2,
|
| 5030 |
"trial_name": null,
|
| 5031 |
"trial_params": null
|