Training in progress, step 6800, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1248,6 +1248,10 @@ You can finetune this model on your own dataset.
|
|
| 1248 |
| 0.1149 | 6500 | 0.4746 |
|
| 1249 |
| 0.1157 | 6550 | 0.4511 |
|
| 1250 |
| 0.1166 | 6600 | 0.3871 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1251 |
|
| 1252 |
|
| 1253 |
### Framework Versions
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.6386, 0.0907],
|
| 290 |
+
# [0.6386, 1.0000, 0.1293],
|
| 291 |
+
# [0.0907, 0.1293, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1248 |
| 0.1149 | 6500 | 0.4746 |
|
| 1249 |
| 0.1157 | 6550 | 0.4511 |
|
| 1250 |
| 0.1166 | 6600 | 0.3871 |
|
| 1251 |
+
| 0.1175 | 6650 | 0.449 |
|
| 1252 |
+
| 0.1184 | 6700 | 0.3458 |
|
| 1253 |
+
| 0.1193 | 6750 | 0.4791 |
|
| 1254 |
+
| 0.1202 | 6800 | 0.4445 |
|
| 1255 |
|
| 1256 |
|
| 1257 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b02e9263fae48b1d9ab2f1178fcee32bed4de27687bb815e26abc0ce008c821e
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:913066d7b5165875dea05ddec8312156da1e56f82e24e99d791ccdb18b2f2155
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e882bce39886348a50b26b191125b019e0e62425e804cb094d791f2c1855ea8c
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50fb3d1e5c722b019233137abea1c664e37622c6ede4dfdabd46503c9bbc26b1
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11e206eec8216dd19609780d011982a4e55c6f1b80f70dfd43f28e3b1dc99011
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -932,6 +932,34 @@
|
|
| 932 |
"learning_rate": 4.9078164575602285e-05,
|
| 933 |
"loss": 0.3871,
|
| 934 |
"step": 6600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 935 |
}
|
| 936 |
],
|
| 937 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.12016044954144652,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6800,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 932 |
"learning_rate": 4.9078164575602285e-05,
|
| 933 |
"loss": 0.3871,
|
| 934 |
"step": 6600
|
| 935 |
+
},
|
| 936 |
+
{
|
| 937 |
+
"epoch": 0.11750985138979697,
|
| 938 |
+
"grad_norm": 1.969202995300293,
|
| 939 |
+
"learning_rate": 4.902907855726375e-05,
|
| 940 |
+
"loss": 0.449,
|
| 941 |
+
"step": 6650
|
| 942 |
+
},
|
| 943 |
+
{
|
| 944 |
+
"epoch": 0.11839338410701349,
|
| 945 |
+
"grad_norm": 1.9535086154937744,
|
| 946 |
+
"learning_rate": 4.8979992538925214e-05,
|
| 947 |
+
"loss": 0.3458,
|
| 948 |
+
"step": 6700
|
| 949 |
+
},
|
| 950 |
+
{
|
| 951 |
+
"epoch": 0.11927691682423,
|
| 952 |
+
"grad_norm": 1.7251821756362915,
|
| 953 |
+
"learning_rate": 4.893090652058668e-05,
|
| 954 |
+
"loss": 0.4791,
|
| 955 |
+
"step": 6750
|
| 956 |
+
},
|
| 957 |
+
{
|
| 958 |
+
"epoch": 0.12016044954144652,
|
| 959 |
+
"grad_norm": 1.7175688743591309,
|
| 960 |
+
"learning_rate": 4.8881820502248144e-05,
|
| 961 |
+
"loss": 0.4445,
|
| 962 |
+
"step": 6800
|
| 963 |
}
|
| 964 |
],
|
| 965 |
"logging_steps": 50,
|