Training in progress, step 6600, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1244,6 +1244,10 @@ You can finetune this model on your own dataset.
|
|
| 1244 |
| 0.1113 | 6300 | 0.4175 |
|
| 1245 |
| 0.1122 | 6350 | 0.4387 |
|
| 1246 |
| 0.1131 | 6400 | 0.4554 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1247 |
|
| 1248 |
|
| 1249 |
### Framework Versions
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.6159, 0.1268],
|
| 290 |
+
# [0.6159, 1.0000, 0.1762],
|
| 291 |
+
# [0.1268, 0.1762, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1244 |
| 0.1113 | 6300 | 0.4175 |
|
| 1245 |
| 0.1122 | 6350 | 0.4387 |
|
| 1246 |
| 0.1131 | 6400 | 0.4554 |
|
| 1247 |
+
| 0.1140 | 6450 | 0.581 |
|
| 1248 |
+
| 0.1149 | 6500 | 0.4746 |
|
| 1249 |
+
| 0.1157 | 6550 | 0.4511 |
|
| 1250 |
+
| 0.1166 | 6600 | 0.3871 |
|
| 1251 |
|
| 1252 |
|
| 1253 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:533a4ed7cc131c123cb5372a76db759d112ce54c54aa81a76148675ba2517e16
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4b208b08280c11c448779e04cc3b8922c613d68c1eab949db3e687b7cc69b1e
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be44f909c9d108035e113ae97840e629f406742e33dabbb6dac446308a2323aa
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c456427ce8bd7ccdacb5f3df51e3a128141f61214f0829c4af25f2c18a7d9842
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:825a93f9b8689a94da1d037071f9688200700d005038804108d953a8cdcd62a0
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -904,6 +904,34 @@
|
|
| 904 |
"learning_rate": 4.927450864895643e-05,
|
| 905 |
"loss": 0.4554,
|
| 906 |
"step": 6400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 907 |
}
|
| 908 |
],
|
| 909 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.11662631867258044,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 904 |
"learning_rate": 4.927450864895643e-05,
|
| 905 |
"loss": 0.4554,
|
| 906 |
"step": 6400
|
| 907 |
+
},
|
| 908 |
+
{
|
| 909 |
+
"epoch": 0.11397572052093088,
|
| 910 |
+
"grad_norm": 4.113645553588867,
|
| 911 |
+
"learning_rate": 4.92254226306179e-05,
|
| 912 |
+
"loss": 0.581,
|
| 913 |
+
"step": 6450
|
| 914 |
+
},
|
| 915 |
+
{
|
| 916 |
+
"epoch": 0.11485925323814741,
|
| 917 |
+
"grad_norm": 1.6027569770812988,
|
| 918 |
+
"learning_rate": 4.917633661227936e-05,
|
| 919 |
+
"loss": 0.4746,
|
| 920 |
+
"step": 6500
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.11574278595536393,
|
| 924 |
+
"grad_norm": 2.0555272102355957,
|
| 925 |
+
"learning_rate": 4.912725059394083e-05,
|
| 926 |
+
"loss": 0.4511,
|
| 927 |
+
"step": 6550
|
| 928 |
+
},
|
| 929 |
+
{
|
| 930 |
+
"epoch": 0.11662631867258044,
|
| 931 |
+
"grad_norm": 2.6827495098114014,
|
| 932 |
+
"learning_rate": 4.9078164575602285e-05,
|
| 933 |
+
"loss": 0.3871,
|
| 934 |
+
"step": 6600
|
| 935 |
}
|
| 936 |
],
|
| 937 |
"logging_steps": 50,
|