Training in progress, step 6400, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1240,6 +1240,10 @@ You can finetune this model on your own dataset.
|
|
| 1240 |
| 0.1078 | 6100 | 0.3949 |
|
| 1241 |
| 0.1087 | 6150 | 0.4491 |
|
| 1242 |
| 0.1096 | 6200 | 0.435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1243 |
|
| 1244 |
|
| 1245 |
### Framework Versions
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.6415, 0.0981],
|
| 290 |
+
# [0.6415, 1.0000, 0.1488],
|
| 291 |
+
# [0.0981, 0.1488, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1240 |
| 0.1078 | 6100 | 0.3949 |
|
| 1241 |
| 0.1087 | 6150 | 0.4491 |
|
| 1242 |
| 0.1096 | 6200 | 0.435 |
|
| 1243 |
+
| 0.1104 | 6250 | 0.3865 |
|
| 1244 |
+
| 0.1113 | 6300 | 0.4175 |
|
| 1245 |
+
| 0.1122 | 6350 | 0.4387 |
|
| 1246 |
+
| 0.1131 | 6400 | 0.4554 |
|
| 1247 |
|
| 1248 |
|
| 1249 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d95e3d0943240c31da6df746f9f3cca8daffbcbf4c519e4d4baa811b80a13fb
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f29fd6ade8a7591354a162ab7d03eaddfcef75d266f53ddc2969b95870d3ff8d
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d3d8144207555c6ec60990d6474d3fd0ec58c08d1eca3fbe5bc862bb1b86096
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1838e2ba3161fe9cda5ffbc1228be866f2d720c99e25c4fc0f69015d501e1608
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:526489e33cb7a870777fedd3daf2b089d8bfd92de79a88cf54c04050587dcfea
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -876,6 +876,34 @@
|
|
| 876 |
"learning_rate": 4.947085272231058e-05,
|
| 877 |
"loss": 0.435,
|
| 878 |
"step": 6200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 879 |
}
|
| 880 |
],
|
| 881 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.11309218780371437,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 876 |
"learning_rate": 4.947085272231058e-05,
|
| 877 |
"loss": 0.435,
|
| 878 |
"step": 6200
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.11044158965206481,
|
| 882 |
+
"grad_norm": 2.447551727294922,
|
| 883 |
+
"learning_rate": 4.9421766703972046e-05,
|
| 884 |
+
"loss": 0.3865,
|
| 885 |
+
"step": 6250
|
| 886 |
+
},
|
| 887 |
+
{
|
| 888 |
+
"epoch": 0.11132512236928134,
|
| 889 |
+
"grad_norm": 2.126950740814209,
|
| 890 |
+
"learning_rate": 4.9372680685633504e-05,
|
| 891 |
+
"loss": 0.4175,
|
| 892 |
+
"step": 6300
|
| 893 |
+
},
|
| 894 |
+
{
|
| 895 |
+
"epoch": 0.11220865508649785,
|
| 896 |
+
"grad_norm": 2.22995924949646,
|
| 897 |
+
"learning_rate": 4.932359466729497e-05,
|
| 898 |
+
"loss": 0.4387,
|
| 899 |
+
"step": 6350
|
| 900 |
+
},
|
| 901 |
+
{
|
| 902 |
+
"epoch": 0.11309218780371437,
|
| 903 |
+
"grad_norm": 1.5801736116409302,
|
| 904 |
+
"learning_rate": 4.927450864895643e-05,
|
| 905 |
+
"loss": 0.4554,
|
| 906 |
+
"step": 6400
|
| 907 |
}
|
| 908 |
],
|
| 909 |
"logging_steps": 50,
|