Training in progress, step 4200, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -286,9 +286,9 @@ print(embeddings.shape)
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
-
# tensor([[1.0000, 0.
|
| 290 |
-
# [0.
|
| 291 |
-
# [0.
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
@@ -1304,6 +1304,10 @@ You can finetune this model on your own dataset.
|
|
| 1304 |
| 0.0689 | 3900 | 0.479 |
|
| 1305 |
| 0.0698 | 3950 | 0.5652 |
|
| 1306 |
| 0.0707 | 4000 | 0.5272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1307 |
|
| 1308 |
|
| 1309 |
### Framework Versions
|
|
|
|
| 286 |
# Get the similarity scores for the embeddings
|
| 287 |
similarities = model.similarity(embeddings, embeddings)
|
| 288 |
print(similarities)
|
| 289 |
+
# tensor([[1.0000, 0.6197, 0.0899],
|
| 290 |
+
# [0.6197, 1.0000, 0.1018],
|
| 291 |
+
# [0.0899, 0.1018, 1.0000]])
|
| 292 |
```
|
| 293 |
|
| 294 |
<!--
|
|
|
|
| 1304 |
| 0.0689 | 3900 | 0.479 |
|
| 1305 |
| 0.0698 | 3950 | 0.5652 |
|
| 1306 |
| 0.0707 | 4000 | 0.5272 |
|
| 1307 |
+
| 0.0716 | 4050 | 0.4904 |
|
| 1308 |
+
| 0.0724 | 4100 | 0.4755 |
|
| 1309 |
+
| 0.0733 | 4150 | 0.4897 |
|
| 1310 |
+
| 0.0742 | 4200 | 0.4679 |
|
| 1311 |
|
| 1312 |
|
| 1313 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6edbab9b964e9ca7b0bab9e7c4f66e7e7bc86ba965571aa4d27edf17e6ee9ae
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609611
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49ef51099777f465890db4e01e4666b0ea051323d10a6459c3da580d7d661a3c
|
| 3 |
size 180609611
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6b1fbe40a2f2e085bd8df80fdc83132b78dd273708a845bf0221a8bf75df21c
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0563f0c122c10b3bb9fb8ef7a029cf4fe15b88c3307945ff50155d3485ee7edd
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77c82cc641eba2e585eb912c19a748e90231d19cc7c5365b3984d32e47cc490a
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -568,6 +568,34 @@
|
|
| 568 |
"learning_rate": 3.5326855123674914e-05,
|
| 569 |
"loss": 0.5272,
|
| 570 |
"step": 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
}
|
| 572 |
],
|
| 573 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.07421674824618756,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 568 |
"learning_rate": 3.5326855123674914e-05,
|
| 569 |
"loss": 0.5272,
|
| 570 |
"step": 4000
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"epoch": 0.071566150094538,
|
| 574 |
+
"grad_norm": 2.356076717376709,
|
| 575 |
+
"learning_rate": 3.576855123674912e-05,
|
| 576 |
+
"loss": 0.4904,
|
| 577 |
+
"step": 4050
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 0.07244968281175453,
|
| 581 |
+
"grad_norm": 1.7549006938934326,
|
| 582 |
+
"learning_rate": 3.621024734982332e-05,
|
| 583 |
+
"loss": 0.4755,
|
| 584 |
+
"step": 4100
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.07333321552897104,
|
| 588 |
+
"grad_norm": 2.0377912521362305,
|
| 589 |
+
"learning_rate": 3.665194346289753e-05,
|
| 590 |
+
"loss": 0.4897,
|
| 591 |
+
"step": 4150
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 0.07421674824618756,
|
| 595 |
+
"grad_norm": 2.4711716175079346,
|
| 596 |
+
"learning_rate": 3.709363957597173e-05,
|
| 597 |
+
"loss": 0.4679,
|
| 598 |
+
"step": 4200
|
| 599 |
}
|
| 600 |
],
|
| 601 |
"logging_steps": 50,
|