CocoRoF/ModernBERT-SimCSE-multitask_v03
Browse files- 2_Dense/model.safetensors +1 -1
- README.md +71 -71
- model.safetensors +1 -1
2_Dense/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2362528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:952a6c22e6fd47eb3c9872be6da5ff1152332bd8f6c51082eed8e3eb73962f49
|
| 3 |
size 2362528
|
README.md
CHANGED
|
@@ -58,34 +58,34 @@ model-index:
|
|
| 58 |
type: sts_dev
|
| 59 |
metrics:
|
| 60 |
- type: pearson_cosine
|
| 61 |
-
value: 0.
|
| 62 |
name: Pearson Cosine
|
| 63 |
- type: spearman_cosine
|
| 64 |
-
value: 0.
|
| 65 |
name: Spearman Cosine
|
| 66 |
- type: pearson_euclidean
|
| 67 |
-
value: 0.
|
| 68 |
name: Pearson Euclidean
|
| 69 |
- type: spearman_euclidean
|
| 70 |
-
value: 0.
|
| 71 |
name: Spearman Euclidean
|
| 72 |
- type: pearson_manhattan
|
| 73 |
-
value: 0.
|
| 74 |
name: Pearson Manhattan
|
| 75 |
- type: spearman_manhattan
|
| 76 |
-
value: 0.
|
| 77 |
name: Spearman Manhattan
|
| 78 |
- type: pearson_dot
|
| 79 |
-
value: 0.
|
| 80 |
name: Pearson Dot
|
| 81 |
- type: spearman_dot
|
| 82 |
-
value: 0.
|
| 83 |
name: Spearman Dot
|
| 84 |
- type: pearson_max
|
| 85 |
-
value: 0.
|
| 86 |
name: Pearson Max
|
| 87 |
- type: spearman_max
|
| 88 |
-
value: 0.
|
| 89 |
name: Spearman Max
|
| 90 |
---
|
| 91 |
|
|
@@ -136,7 +136,7 @@ Then you can load this model and run inference.
|
|
| 136 |
from sentence_transformers import SentenceTransformer
|
| 137 |
|
| 138 |
# Download from the 🤗 Hub
|
| 139 |
-
model = SentenceTransformer("CocoRoF/ModernBERT-SimCSE-
|
| 140 |
# Run inference
|
| 141 |
sentences = [
|
| 142 |
'버스가 바쁜 길을 따라 운전한다.',
|
|
@@ -186,18 +186,18 @@ You can finetune this model on your own dataset.
|
|
| 186 |
* Dataset: `sts_dev`
|
| 187 |
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
| 188 |
|
| 189 |
-
| Metric | Value
|
| 190 |
-
|
| 191 |
-
| pearson_cosine | 0.
|
| 192 |
-
| spearman_cosine | 0.
|
| 193 |
-
| pearson_euclidean | 0.
|
| 194 |
-
| spearman_euclidean | 0.
|
| 195 |
-
| pearson_manhattan | 0.
|
| 196 |
-
| spearman_manhattan | 0.
|
| 197 |
-
| pearson_dot | 0.
|
| 198 |
-
| spearman_dot | 0.
|
| 199 |
-
| pearson_max | 0.
|
| 200 |
-
| **spearman_max** | **0.
|
| 201 |
|
| 202 |
<!--
|
| 203 |
## Bias, Risks and Limitations
|
|
@@ -271,11 +271,11 @@ You can finetune this model on your own dataset.
|
|
| 271 |
- `per_device_train_batch_size`: 16
|
| 272 |
- `per_device_eval_batch_size`: 16
|
| 273 |
- `gradient_accumulation_steps`: 8
|
| 274 |
-
- `learning_rate`:
|
| 275 |
- `num_train_epochs`: 10.0
|
| 276 |
- `warmup_ratio`: 0.1
|
| 277 |
- `push_to_hub`: True
|
| 278 |
-
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-
|
| 279 |
- `hub_strategy`: checkpoint
|
| 280 |
- `batch_sampler`: no_duplicates
|
| 281 |
|
|
@@ -293,7 +293,7 @@ You can finetune this model on your own dataset.
|
|
| 293 |
- `gradient_accumulation_steps`: 8
|
| 294 |
- `eval_accumulation_steps`: None
|
| 295 |
- `torch_empty_cache_steps`: None
|
| 296 |
-
- `learning_rate`:
|
| 297 |
- `weight_decay`: 0.0
|
| 298 |
- `adam_beta1`: 0.9
|
| 299 |
- `adam_beta2`: 0.999
|
|
@@ -362,7 +362,7 @@ You can finetune this model on your own dataset.
|
|
| 362 |
- `use_legacy_prediction_loop`: False
|
| 363 |
- `push_to_hub`: True
|
| 364 |
- `resume_from_checkpoint`: None
|
| 365 |
-
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-
|
| 366 |
- `hub_strategy`: checkpoint
|
| 367 |
- `hub_private_repo`: None
|
| 368 |
- `hub_always_push`: False
|
|
@@ -403,50 +403,50 @@ You can finetune this model on your own dataset.
|
|
| 403 |
### Training Logs
|
| 404 |
| Epoch | Step | Training Loss | Validation Loss | sts_dev_spearman_max |
|
| 405 |
|:------:|:----:|:-------------:|:---------------:|:--------------------:|
|
| 406 |
-
| 0.2228 | 10 | 0.
|
| 407 |
-
| 0.4457 | 20 | 0.
|
| 408 |
-
| 0.6685 | 30 | 0.0305 | 0.
|
| 409 |
-
| 0.8914 | 40 | 0.
|
| 410 |
-
| 1.1337 | 50 | 0.
|
| 411 |
-
| 1.3565 | 60 | 0.
|
| 412 |
-
| 1.5794 | 70 | 0.
|
| 413 |
-
| 1.8022 | 80 | 0.
|
| 414 |
-
| 2.0446 | 90 | 0.
|
| 415 |
-
| 2.2674 | 100 | 0.
|
| 416 |
-
| 2.4903 | 110 | 0.
|
| 417 |
-
| 2.7131 | 120 | 0.
|
| 418 |
-
| 2.9359 | 130 | 0.
|
| 419 |
-
| 3.1783 | 140 | 0.
|
| 420 |
-
| 3.4011 | 150 | 0.
|
| 421 |
-
| 3.6240 | 160 | 0.
|
| 422 |
-
| 3.8468 | 170 | 0.
|
| 423 |
-
| 4.0891 | 180 | 0.
|
| 424 |
-
| 4.3120 | 190 | 0.
|
| 425 |
-
| 4.5348 | 200 | 0.
|
| 426 |
-
| 4.7577 | 210 | 0.
|
| 427 |
-
| 4.9805 | 220 | 0.
|
| 428 |
-
| 5.2228 | 230 | 0.
|
| 429 |
-
| 5.4457 | 240 | 0.
|
| 430 |
-
| 5.6685 | 250 | 0.
|
| 431 |
-
| 5.8914 | 260 | 0.
|
| 432 |
-
| 6.1337 | 270 | 0.
|
| 433 |
-
| 6.3565 | 280 | 0.
|
| 434 |
-
| 6.5794 | 290 | 0.
|
| 435 |
-
| 6.8022 | 300 | 0.
|
| 436 |
-
| 7.0446 | 310 | 0.
|
| 437 |
-
| 7.2674 | 320 | 0.
|
| 438 |
-
| 7.4903 | 330 | 0.
|
| 439 |
-
| 7.7131 | 340 | 0.
|
| 440 |
-
| 7.9359 | 350 | 0.
|
| 441 |
-
| 8.1783 | 360 | 0.
|
| 442 |
-
| 8.4011 | 370 | 0.
|
| 443 |
-
| 8.6240 | 380 | 0.
|
| 444 |
-
| 8.8468 | 390 | 0.
|
| 445 |
-
| 9.0891 | 400 | 0.
|
| 446 |
-
| 9.3120 | 410 | 0.
|
| 447 |
-
| 9.5348 | 420 | 0.
|
| 448 |
-
| 9.7577 | 430 | 0.
|
| 449 |
-
| 9.9805 | 440 | 0.
|
| 450 |
|
| 451 |
|
| 452 |
### Framework Versions
|
|
|
|
| 58 |
type: sts_dev
|
| 59 |
metrics:
|
| 60 |
- type: pearson_cosine
|
| 61 |
+
value: 0.8223949445074785
|
| 62 |
name: Pearson Cosine
|
| 63 |
- type: spearman_cosine
|
| 64 |
+
value: 0.8220107207834706
|
| 65 |
name: Spearman Cosine
|
| 66 |
- type: pearson_euclidean
|
| 67 |
+
value: 0.7785831525283676
|
| 68 |
name: Pearson Euclidean
|
| 69 |
- type: spearman_euclidean
|
| 70 |
+
value: 0.7815628643916452
|
| 71 |
name: Spearman Euclidean
|
| 72 |
- type: pearson_manhattan
|
| 73 |
+
value: 0.7809119630672191
|
| 74 |
name: Pearson Manhattan
|
| 75 |
- type: spearman_manhattan
|
| 76 |
+
value: 0.7846536514745763
|
| 77 |
name: Spearman Manhattan
|
| 78 |
- type: pearson_dot
|
| 79 |
+
value: 0.7543765794886113
|
| 80 |
name: Pearson Dot
|
| 81 |
- type: spearman_dot
|
| 82 |
+
value: 0.7434525191412167
|
| 83 |
name: Spearman Dot
|
| 84 |
- type: pearson_max
|
| 85 |
+
value: 0.8223949445074785
|
| 86 |
name: Pearson Max
|
| 87 |
- type: spearman_max
|
| 88 |
+
value: 0.8220107207834706
|
| 89 |
name: Spearman Max
|
| 90 |
---
|
| 91 |
|
|
|
|
| 136 |
from sentence_transformers import SentenceTransformer
|
| 137 |
|
| 138 |
# Download from the 🤗 Hub
|
| 139 |
+
model = SentenceTransformer("CocoRoF/ModernBERT-SimCSE-multitask_v03")
|
| 140 |
# Run inference
|
| 141 |
sentences = [
|
| 142 |
'버스가 바쁜 길을 따라 운전한다.',
|
|
|
|
| 186 |
* Dataset: `sts_dev`
|
| 187 |
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
| 188 |
|
| 189 |
+
| Metric | Value |
|
| 190 |
+
|:-------------------|:----------|
|
| 191 |
+
| pearson_cosine | 0.8224 |
|
| 192 |
+
| spearman_cosine | 0.822 |
|
| 193 |
+
| pearson_euclidean | 0.7786 |
|
| 194 |
+
| spearman_euclidean | 0.7816 |
|
| 195 |
+
| pearson_manhattan | 0.7809 |
|
| 196 |
+
| spearman_manhattan | 0.7847 |
|
| 197 |
+
| pearson_dot | 0.7544 |
|
| 198 |
+
| spearman_dot | 0.7435 |
|
| 199 |
+
| pearson_max | 0.8224 |
|
| 200 |
+
| **spearman_max** | **0.822** |
|
| 201 |
|
| 202 |
<!--
|
| 203 |
## Bias, Risks and Limitations
|
|
|
|
| 271 |
- `per_device_train_batch_size`: 16
|
| 272 |
- `per_device_eval_batch_size`: 16
|
| 273 |
- `gradient_accumulation_steps`: 8
|
| 274 |
+
- `learning_rate`: 1e-05
|
| 275 |
- `num_train_epochs`: 10.0
|
| 276 |
- `warmup_ratio`: 0.1
|
| 277 |
- `push_to_hub`: True
|
| 278 |
+
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03
|
| 279 |
- `hub_strategy`: checkpoint
|
| 280 |
- `batch_sampler`: no_duplicates
|
| 281 |
|
|
|
|
| 293 |
- `gradient_accumulation_steps`: 8
|
| 294 |
- `eval_accumulation_steps`: None
|
| 295 |
- `torch_empty_cache_steps`: None
|
| 296 |
+
- `learning_rate`: 1e-05
|
| 297 |
- `weight_decay`: 0.0
|
| 298 |
- `adam_beta1`: 0.9
|
| 299 |
- `adam_beta2`: 0.999
|
|
|
|
| 362 |
- `use_legacy_prediction_loop`: False
|
| 363 |
- `push_to_hub`: True
|
| 364 |
- `resume_from_checkpoint`: None
|
| 365 |
+
- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03
|
| 366 |
- `hub_strategy`: checkpoint
|
| 367 |
- `hub_private_repo`: None
|
| 368 |
- `hub_always_push`: False
|
|
|
|
| 403 |
### Training Logs
|
| 404 |
| Epoch | Step | Training Loss | Validation Loss | sts_dev_spearman_max |
|
| 405 |
|:------:|:----:|:-------------:|:---------------:|:--------------------:|
|
| 406 |
+
| 0.2228 | 10 | 0.0283 | - | - |
|
| 407 |
+
| 0.4457 | 20 | 0.0344 | - | - |
|
| 408 |
+
| 0.6685 | 30 | 0.0305 | 0.0310 | 0.7939 |
|
| 409 |
+
| 0.8914 | 40 | 0.0489 | - | - |
|
| 410 |
+
| 1.1337 | 50 | 0.0382 | - | - |
|
| 411 |
+
| 1.3565 | 60 | 0.0271 | 0.0293 | 0.7994 |
|
| 412 |
+
| 1.5794 | 70 | 0.0344 | - | - |
|
| 413 |
+
| 1.8022 | 80 | 0.0382 | - | - |
|
| 414 |
+
| 2.0446 | 90 | 0.0419 | 0.0280 | 0.8059 |
|
| 415 |
+
| 2.2674 | 100 | 0.0244 | - | - |
|
| 416 |
+
| 2.4903 | 110 | 0.0307 | - | - |
|
| 417 |
+
| 2.7131 | 120 | 0.0291 | 0.0269 | 0.8108 |
|
| 418 |
+
| 2.9359 | 130 | 0.038 | - | - |
|
| 419 |
+
| 3.1783 | 140 | 0.0269 | - | - |
|
| 420 |
+
| 3.4011 | 150 | 0.0268 | 0.0262 | 0.8155 |
|
| 421 |
+
| 3.6240 | 160 | 0.0246 | - | - |
|
| 422 |
+
| 3.8468 | 170 | 0.0313 | - | - |
|
| 423 |
+
| 4.0891 | 180 | 0.0303 | 0.0259 | 0.8185 |
|
| 424 |
+
| 4.3120 | 190 | 0.0198 | - | - |
|
| 425 |
+
| 4.5348 | 200 | 0.0257 | - | - |
|
| 426 |
+
| 4.7577 | 210 | 0.0242 | 0.0255 | 0.8202 |
|
| 427 |
+
| 4.9805 | 220 | 0.0293 | - | - |
|
| 428 |
+
| 5.2228 | 230 | 0.0193 | - | - |
|
| 429 |
+
| 5.4457 | 240 | 0.0222 | 0.0254 | 0.8222 |
|
| 430 |
+
| 5.6685 | 250 | 0.0184 | - | - |
|
| 431 |
+
| 5.8914 | 260 | 0.0243 | - | - |
|
| 432 |
+
| 6.1337 | 270 | 0.0204 | 0.0254 | 0.8235 |
|
| 433 |
+
| 6.3565 | 280 | 0.0147 | - | - |
|
| 434 |
+
| 6.5794 | 290 | 0.0196 | - | - |
|
| 435 |
+
| 6.8022 | 300 | 0.0176 | 0.0253 | 0.8227 |
|
| 436 |
+
| 7.0446 | 310 | 0.0202 | - | - |
|
| 437 |
+
| 7.2674 | 320 | 0.0123 | - | - |
|
| 438 |
+
| 7.4903 | 330 | 0.0151 | 0.0254 | 0.8236 |
|
| 439 |
+
| 7.7131 | 340 | 0.0132 | - | - |
|
| 440 |
+
| 7.9359 | 350 | 0.0158 | - | - |
|
| 441 |
+
| 8.1783 | 360 | 0.0118 | 0.0256 | 0.8240 |
|
| 442 |
+
| 8.4011 | 370 | 0.0115 | - | - |
|
| 443 |
+
| 8.6240 | 380 | 0.0105 | - | - |
|
| 444 |
+
| 8.8468 | 390 | 0.0111 | 0.0256 | 0.8215 |
|
| 445 |
+
| 9.0891 | 400 | 0.011 | - | - |
|
| 446 |
+
| 9.3120 | 410 | 0.0076 | - | - |
|
| 447 |
+
| 9.5348 | 420 | 0.0091 | 0.0256 | 0.8220 |
|
| 448 |
+
| 9.7577 | 430 | 0.0075 | - | - |
|
| 449 |
+
| 9.9805 | 440 | 0.0093 | - | - |
|
| 450 |
|
| 451 |
|
| 452 |
### Framework Versions
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 735216376
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0a6eec0e90768185fb0e3eca583968ac1e1fe92c4787c043214ae4f116edeb1
|
| 3 |
size 735216376
|