x2bee
/

ModernBERT-SimCSE-multitask_v03

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b7c0f8a59cf410eb4af4ecd59404fe5c2a8fab817c5e38c5817cd7c7b02c4a4
 size 2362528

 version https://git-lfs.github.com/spec/v1
+oid sha256:952a6c22e6fd47eb3c9872be6da5ff1152332bd8f6c51082eed8e3eb73962f49
 size 2362528

README.md CHANGED Viewed

@@ -58,34 +58,34 @@ model-index:
       type: sts_dev
     metrics:
     - type: pearson_cosine
-      value: 0.805539118357127
       name: Pearson Cosine
     - type: spearman_cosine
-      value: 0.8061033061285413
       name: Spearman Cosine
     - type: pearson_euclidean
-      value: 0.7633523638911596
       name: Pearson Euclidean
     - type: spearman_euclidean
-      value: 0.7628951831481233
       name: Spearman Euclidean
     - type: pearson_manhattan
-      value: 0.7652880535446602
       name: Pearson Manhattan
     - type: spearman_manhattan
-      value: 0.7657560923304267
       name: Spearman Manhattan
     - type: pearson_dot
-      value: 0.7133686434266335
       name: Pearson Dot
     - type: spearman_dot
-      value: 0.7015065203951969
       name: Spearman Dot
     - type: pearson_max
-      value: 0.805539118357127
       name: Pearson Max
     - type: spearman_max
-      value: 0.8061033061285413
       name: Spearman Max
 ---
@@ -136,7 +136,7 @@ Then you can load this model and run inference.
 from sentence_transformers import SentenceTransformer
 # Download from the 🤗 Hub
-model = SentenceTransformer("CocoRoF/ModernBERT-SimCSE-multitask_v02")
 # Run inference
 sentences = [
     '버스가 바쁜 길을 따라 운전한다.',
@@ -186,18 +186,18 @@ You can finetune this model on your own dataset.
 * Dataset: `sts_dev`
 * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
-| Metric             | Value      |
-|:-------------------|:-----------|
-| pearson_cosine     | 0.8055     |
-| spearman_cosine    | 0.8061     |
-| pearson_euclidean  | 0.7634     |
-| spearman_euclidean | 0.7629     |
-| pearson_manhattan  | 0.7653     |
-| spearman_manhattan | 0.7658     |
-| pearson_dot        | 0.7134     |
-| spearman_dot       | 0.7015     |
-| pearson_max        | 0.8055     |
-| **spearman_max**   | **0.8061** |
 <!--
 ## Bias, Risks and Limitations
@@ -271,11 +271,11 @@ You can finetune this model on your own dataset.
 - `per_device_train_batch_size`: 16
 - `per_device_eval_batch_size`: 16
 - `gradient_accumulation_steps`: 8
-- `learning_rate`: 5e-07
 - `num_train_epochs`: 10.0
 - `warmup_ratio`: 0.1
 - `push_to_hub`: True
-- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v02
 - `hub_strategy`: checkpoint
 - `batch_sampler`: no_duplicates
@@ -293,7 +293,7 @@ You can finetune this model on your own dataset.
 - `gradient_accumulation_steps`: 8
 - `eval_accumulation_steps`: None
 - `torch_empty_cache_steps`: None
-- `learning_rate`: 5e-07
 - `weight_decay`: 0.0
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
@@ -362,7 +362,7 @@ You can finetune this model on your own dataset.
 - `use_legacy_prediction_loop`: False
 - `push_to_hub`: True
 - `resume_from_checkpoint`: None
-- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v02
 - `hub_strategy`: checkpoint
 - `hub_private_repo`: None
 - `hub_always_push`: False
@@ -403,50 +403,50 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch  | Step | Training Loss | Validation Loss | sts_dev_spearman_max |
 |:------:|:----:|:-------------:|:---------------:|:--------------------:|
-| 0.2228 | 10   | 0.0284        | -               | -                    |
-| 0.4457 | 20   | 0.0346        | -               | -                    |
-| 0.6685 | 30   | 0.0305        | 0.0317          | 0.7927               |
-| 0.8914 | 40   | 0.0495        | -               | -                    |
-| 1.1337 | 50   | 0.04          | -               | -                    |
-| 1.3565 | 60   | 0.0295        | 0.0316          | 0.7930               |
-| 1.5794 | 70   | 0.0352        | -               | -                    |
-| 1.8022 | 80   | 0.042         | -               | -                    |
-| 2.0446 | 90   | 0.0476        | 0.0314          | 0.7933               |
-| 2.2674 | 100  | 0.0289        | -               | -                    |
-| 2.4903 | 110  | 0.0345        | -               | -                    |
-| 2.7131 | 120  | 0.0339        | 0.0311          | 0.7940               |
-| 2.9359 | 130  | 0.0493        | -               | -                    |
-| 3.1783 | 140  | 0.0341        | -               | -                    |
-| 3.4011 | 150  | 0.0332        | 0.0308          | 0.7952               |
-| 3.6240 | 160  | 0.0303        | -               | -                    |
-| 3.8468 | 170  | 0.045         | -               | -                    |
-| 4.0891 | 180  | 0.0422        | 0.0305          | 0.7961               |
-| 4.3120 | 190  | 0.0278        | -               | -                    |
-| 4.5348 | 200  | 0.0338        | -               | -                    |
-| 4.7577 | 210  | 0.0372        | 0.0302          | 0.7968               |
-| 4.9805 | 220  | 0.0469        | -               | -                    |
-| 5.2228 | 230  | 0.0303        | -               | -                    |
-| 5.4457 | 240  | 0.0328        | 0.0297          | 0.7982               |
-| 5.6685 | 250  | 0.0295        | -               | -                    |
-| 5.8914 | 260  | 0.0458        | -               | -                    |
-| 6.1337 | 270  | 0.0363        | 0.0295          | 0.7997               |
-| 6.3565 | 280  | 0.0265        | -               | -                    |
-| 6.5794 | 290  | 0.0341        | -               | -                    |
-| 6.8022 | 300  | 0.0384        | 0.0291          | 0.8007               |
-| 7.0446 | 310  | 0.0431        | -               | -                    |
-| 7.2674 | 320  | 0.0256        | -               | -                    |
-| 7.4903 | 330  | 0.0321        | 0.0287          | 0.8022               |
-| 7.7131 | 340  | 0.0315        | -               | -                    |
-| 7.9359 | 350  | 0.0438        | -               | -                    |
-| 8.1783 | 360  | 0.0301        | 0.0284          | 0.8038               |
-| 8.4011 | 370  | 0.0301        | -               | -                    |
-| 8.6240 | 380  | 0.0285        | -               | -                    |
-| 8.8468 | 390  | 0.0394        | 0.0282          | 0.8049               |
-| 9.0891 | 400  | 0.0374        | -               | -                    |
-| 9.3120 | 410  | 0.0245        | -               | -                    |
-| 9.5348 | 420  | 0.0316        | 0.0279          | 0.8061               |
-| 9.7577 | 430  | 0.0331        | -               | -                    |
-| 9.9805 | 440  | 0.0411        | -               | -                    |
 ### Framework Versions

       type: sts_dev
     metrics:
     - type: pearson_cosine
+      value: 0.8223949445074785
       name: Pearson Cosine
     - type: spearman_cosine
+      value: 0.8220107207834706
       name: Spearman Cosine
     - type: pearson_euclidean
+      value: 0.7785831525283676
       name: Pearson Euclidean
     - type: spearman_euclidean
+      value: 0.7815628643916452
       name: Spearman Euclidean
     - type: pearson_manhattan
+      value: 0.7809119630672191
       name: Pearson Manhattan
     - type: spearman_manhattan
+      value: 0.7846536514745763
       name: Spearman Manhattan
     - type: pearson_dot
+      value: 0.7543765794886113
       name: Pearson Dot
     - type: spearman_dot
+      value: 0.7434525191412167
       name: Spearman Dot
     - type: pearson_max
+      value: 0.8223949445074785
       name: Pearson Max
     - type: spearman_max
+      value: 0.8220107207834706
       name: Spearman Max
 ---
 from sentence_transformers import SentenceTransformer
 # Download from the 🤗 Hub
+model = SentenceTransformer("CocoRoF/ModernBERT-SimCSE-multitask_v03")
 # Run inference
 sentences = [
     '버스가 바쁜 길을 따라 운전한다.',
 * Dataset: `sts_dev`
 * Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
+| Metric             | Value     |
+|:-------------------|:----------|
+| pearson_cosine     | 0.8224    |
+| spearman_cosine    | 0.822     |
+| pearson_euclidean  | 0.7786    |
+| spearman_euclidean | 0.7816    |
+| pearson_manhattan  | 0.7809    |
+| spearman_manhattan | 0.7847    |
+| pearson_dot        | 0.7544    |
+| spearman_dot       | 0.7435    |
+| pearson_max        | 0.8224    |
+| **spearman_max**   | **0.822** |
 <!--
 ## Bias, Risks and Limitations
 - `per_device_train_batch_size`: 16
 - `per_device_eval_batch_size`: 16
 - `gradient_accumulation_steps`: 8
+- `learning_rate`: 1e-05
 - `num_train_epochs`: 10.0
 - `warmup_ratio`: 0.1
 - `push_to_hub`: True
+- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03
 - `hub_strategy`: checkpoint
 - `batch_sampler`: no_duplicates
 - `gradient_accumulation_steps`: 8
 - `eval_accumulation_steps`: None
 - `torch_empty_cache_steps`: None
+- `learning_rate`: 1e-05
 - `weight_decay`: 0.0
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `use_legacy_prediction_loop`: False
 - `push_to_hub`: True
 - `resume_from_checkpoint`: None
+- `hub_model_id`: CocoRoF/ModernBERT-SimCSE-multitask_v03
 - `hub_strategy`: checkpoint
 - `hub_private_repo`: None
 - `hub_always_push`: False
 ### Training Logs
 | Epoch  | Step | Training Loss | Validation Loss | sts_dev_spearman_max |
 |:------:|:----:|:-------------:|:---------------:|:--------------------:|
+| 0.2228 | 10   | 0.0283        | -               | -                    |
+| 0.4457 | 20   | 0.0344        | -               | -                    |
+| 0.6685 | 30   | 0.0305        | 0.0310          | 0.7939               |
+| 0.8914 | 40   | 0.0489        | -               | -                    |
+| 1.1337 | 50   | 0.0382        | -               | -                    |
+| 1.3565 | 60   | 0.0271        | 0.0293          | 0.7994               |
+| 1.5794 | 70   | 0.0344        | -               | -                    |
+| 1.8022 | 80   | 0.0382        | -               | -                    |
+| 2.0446 | 90   | 0.0419        | 0.0280          | 0.8059               |
+| 2.2674 | 100  | 0.0244        | -               | -                    |
+| 2.4903 | 110  | 0.0307        | -               | -                    |
+| 2.7131 | 120  | 0.0291        | 0.0269          | 0.8108               |
+| 2.9359 | 130  | 0.038         | -               | -                    |
+| 3.1783 | 140  | 0.0269        | -               | -                    |
+| 3.4011 | 150  | 0.0268        | 0.0262          | 0.8155               |
+| 3.6240 | 160  | 0.0246        | -               | -                    |
+| 3.8468 | 170  | 0.0313        | -               | -                    |
+| 4.0891 | 180  | 0.0303        | 0.0259          | 0.8185               |
+| 4.3120 | 190  | 0.0198        | -               | -                    |
+| 4.5348 | 200  | 0.0257        | -               | -                    |
+| 4.7577 | 210  | 0.0242        | 0.0255          | 0.8202               |
+| 4.9805 | 220  | 0.0293        | -               | -                    |
+| 5.2228 | 230  | 0.0193        | -               | -                    |
+| 5.4457 | 240  | 0.0222        | 0.0254          | 0.8222               |
+| 5.6685 | 250  | 0.0184        | -               | -                    |
+| 5.8914 | 260  | 0.0243        | -               | -                    |
+| 6.1337 | 270  | 0.0204        | 0.0254          | 0.8235               |
+| 6.3565 | 280  | 0.0147        | -               | -                    |
+| 6.5794 | 290  | 0.0196        | -               | -                    |
+| 6.8022 | 300  | 0.0176        | 0.0253          | 0.8227               |
+| 7.0446 | 310  | 0.0202        | -               | -                    |
+| 7.2674 | 320  | 0.0123        | -               | -                    |
+| 7.4903 | 330  | 0.0151        | 0.0254          | 0.8236               |
+| 7.7131 | 340  | 0.0132        | -               | -                    |
+| 7.9359 | 350  | 0.0158        | -               | -                    |
+| 8.1783 | 360  | 0.0118        | 0.0256          | 0.8240               |
+| 8.4011 | 370  | 0.0115        | -               | -                    |
+| 8.6240 | 380  | 0.0105        | -               | -                    |
+| 8.8468 | 390  | 0.0111        | 0.0256          | 0.8215               |
+| 9.0891 | 400  | 0.011         | -               | -                    |
+| 9.3120 | 410  | 0.0076        | -               | -                    |
+| 9.5348 | 420  | 0.0091        | 0.0256          | 0.8220               |
+| 9.7577 | 430  | 0.0075        | -               | -                    |
+| 9.9805 | 440  | 0.0093        | -               | -                    |
 ### Framework Versions

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0089cd9e1eb926c27edf4a3eba92fe4d1b1affb5a38446b48a4d318fab339be6
 size 735216376

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0a6eec0e90768185fb0e3eca583968ac1e1fe92c4787c043214ae4f116edeb1
 size 735216376