Training in progress, epoch 3, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -38,6 +38,21 @@ widget:
|
|
| 38 |
- kids game
|
| 39 |
pipeline_tag: sentence-similarity
|
| 40 |
library_name: sentence-transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
---
|
| 42 |
|
| 43 |
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
|
@@ -101,9 +116,9 @@ print(embeddings.shape)
|
|
| 101 |
# Get the similarity scores for the embeddings
|
| 102 |
similarities = model.similarity(embeddings, embeddings)
|
| 103 |
print(similarities)
|
| 104 |
-
# tensor([[1.0000, 0.
|
| 105 |
-
# [0.
|
| 106 |
-
# [0.
|
| 107 |
```
|
| 108 |
|
| 109 |
<!--
|
|
@@ -130,6 +145,18 @@ You can finetune this model on your own dataset.
|
|
| 130 |
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 131 |
-->
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
<!--
|
| 134 |
## Bias, Risks and Limitations
|
| 135 |
|
|
@@ -337,9 +364,10 @@ You can finetune this model on your own dataset.
|
|
| 337 |
</details>
|
| 338 |
|
| 339 |
### Training Logs
|
| 340 |
-
| Epoch | Step | Training Loss |
|
| 341 |
-
|
| 342 |
-
| 0.0004 | 1 | 5.3655 |
|
|
|
|
| 343 |
|
| 344 |
|
| 345 |
### Framework Versions
|
|
|
|
| 38 |
- kids game
|
| 39 |
pipeline_tag: sentence-similarity
|
| 40 |
library_name: sentence-transformers
|
| 41 |
+
metrics:
|
| 42 |
+
- cosine_accuracy
|
| 43 |
+
model-index:
|
| 44 |
+
- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
| 45 |
+
results:
|
| 46 |
+
- task:
|
| 47 |
+
type: triplet
|
| 48 |
+
name: Triplet
|
| 49 |
+
dataset:
|
| 50 |
+
name: Unknown
|
| 51 |
+
type: unknown
|
| 52 |
+
metrics:
|
| 53 |
+
- type: cosine_accuracy
|
| 54 |
+
value: 0.9412940740585327
|
| 55 |
+
name: Cosine Accuracy
|
| 56 |
---
|
| 57 |
|
| 58 |
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
|
|
|
| 116 |
# Get the similarity scores for the embeddings
|
| 117 |
similarities = model.similarity(embeddings, embeddings)
|
| 118 |
print(similarities)
|
| 119 |
+
# tensor([[1.0000, 0.7198, 0.3823],
|
| 120 |
+
# [0.7198, 1.0000, 0.3737],
|
| 121 |
+
# [0.3823, 0.3737, 1.0000]])
|
| 122 |
```
|
| 123 |
|
| 124 |
<!--
|
|
|
|
| 145 |
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 146 |
-->
|
| 147 |
|
| 148 |
+
## Evaluation
|
| 149 |
+
|
| 150 |
+
### Metrics
|
| 151 |
+
|
| 152 |
+
#### Triplet
|
| 153 |
+
|
| 154 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
| 155 |
+
|
| 156 |
+
| Metric | Value |
|
| 157 |
+
|:--------------------|:-----------|
|
| 158 |
+
| **cosine_accuracy** | **0.9413** |
|
| 159 |
+
|
| 160 |
<!--
|
| 161 |
## Bias, Risks and Limitations
|
| 162 |
|
|
|
|
| 364 |
</details>
|
| 365 |
|
| 366 |
### Training Logs
|
| 367 |
+
| Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
|
| 368 |
+
|:------:|:----:|:-------------:|:---------------:|:---------------:|
|
| 369 |
+
| 0.0004 | 1 | 5.3655 | - | - |
|
| 370 |
+
| 2.1949 | 5000 | 2.1423 | 0.7694 | 0.9413 |
|
| 371 |
|
| 372 |
|
| 373 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e46149fd09a9867b9acad65acdb71570057411c6a87b5b28cc4922225edf94c
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180607738
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f309c0f49859e92f45e91d15d010c986b3039d5aee5aa13a7a6a8b652636cbd3
|
| 3 |
size 180607738
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d11ae26ad0553937353377362dcdfdfc64b495a56e520ee9d5cafa528daa8602
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38e75fca916f8178bf9cd33054df9c31b71689bd5bddb2e11917964dcae00b45
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b8ed2557d72b721bbe933588bb84b4e8fd67437924faa2318d545f860f51f41
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 5000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -15,6 +15,22 @@
|
|
| 15 |
"learning_rate": 0.0,
|
| 16 |
"loss": 5.3655,
|
| 17 |
"step": 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
}
|
| 19 |
],
|
| 20 |
"logging_steps": 5000,
|
|
@@ -29,7 +45,7 @@
|
|
| 29 |
"should_evaluate": false,
|
| 30 |
"should_log": false,
|
| 31 |
"should_save": true,
|
| 32 |
-
"should_training_stop":
|
| 33 |
},
|
| 34 |
"attributes": {}
|
| 35 |
}
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
"eval_steps": 5000,
|
| 7 |
+
"global_step": 6834,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 15 |
"learning_rate": 0.0,
|
| 16 |
"loss": 5.3655,
|
| 17 |
"step": 1
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 2.194907813871817,
|
| 21 |
+
"grad_norm": 5.572308540344238,
|
| 22 |
+
"learning_rate": 2.2396976347232385e-05,
|
| 23 |
+
"loss": 2.1423,
|
| 24 |
+
"step": 5000
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"epoch": 2.194907813871817,
|
| 28 |
+
"eval_cosine_accuracy": 0.9412940740585327,
|
| 29 |
+
"eval_loss": 0.7694418430328369,
|
| 30 |
+
"eval_runtime": 32.5011,
|
| 31 |
+
"eval_samples_per_second": 292.451,
|
| 32 |
+
"eval_steps_per_second": 2.308,
|
| 33 |
+
"step": 5000
|
| 34 |
}
|
| 35 |
],
|
| 36 |
"logging_steps": 5000,
|
|
|
|
| 45 |
"should_evaluate": false,
|
| 46 |
"should_log": false,
|
| 47 |
"should_save": true,
|
| 48 |
+
"should_training_stop": true
|
| 49 |
},
|
| 50 |
"attributes": {}
|
| 51 |
}
|