SBERT FT - 3 epoch

Browse files

Files changed (7) hide show

1_Pooling/config.json +2 -7
README.md +52 -44
config_sentence_transformers.json +6 -6
model.safetensors +1 -1
modules.json +3 -3
sentence_bert_config.json +8 -2
tokenizer_config.json +1 -1

1_Pooling/config.json CHANGED Viewed

@@ -1,10 +1,5 @@
 {
-    "word_embedding_dimension": 768,
-    "pooling_mode_cls_token": false,
-    "pooling_mode_mean_tokens": true,
-    "pooling_mode_max_tokens": false,
-    "pooling_mode_mean_sqrt_len_tokens": false,
-    "pooling_mode_weightedmean_tokens": false,
-    "pooling_mode_lasttoken": false,
     "include_prompt": true
 }

 {
+    "embedding_dimension": 768,
+    "pooling_mode": "mean",
     "include_prompt": true
 }

README.md CHANGED Viewed

@@ -3,7 +3,6 @@ tags:
 - sentence-transformers
 - sentence-similarity
 - feature-extraction
-- dense
 - generated_from_trainer
 - dataset_size:5920
 - loss:MultipleNegativesRankingLoss
@@ -255,7 +254,7 @@ library_name: sentence-transformers
 # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
-This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
 ## Model Details
@@ -265,6 +264,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
 - **Maximum Sequence Length:** 256 tokens
 - **Output Dimensionality:** 768 dimensions
 - **Similarity Function:** Cosine Similarity
 <!-- - **Training Dataset:** Unknown -->
 <!-- - **Language:** Unknown -->
 <!-- - **License:** Unknown -->
@@ -279,9 +279,9 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
 ```
 SentenceTransformer(
-  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'MPNetModel'})
-  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
-  (2): Normalize()
 )
 ```
@@ -294,7 +294,6 @@ First install the Sentence Transformers library:
 ```bash
 pip install -U sentence-transformers
 ```
 Then you can load this model and run inference.
 ```python
 from sentence_transformers import SentenceTransformer
@@ -314,11 +313,10 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[ 1.0000,  0.8299, -0.1454],
-#         [ 0.8299,  1.0000, -0.0903],
-#         [-0.1454, -0.0903,  1.0000]])
 ```
 <!--
 ### Direct Usage (Transformers)
@@ -379,7 +377,13 @@ You can finetune this model on your own dataset.
   {
       "scale": 20.0,
       "similarity_fct": "cos_sim",
-      "gather_across_devices": false
   }
   ```
@@ -496,36 +500,39 @@ You can finetune this model on your own dataset.
 | Epoch  | Step | Training Loss |
 |:------:|:----:|:-------------:|
 | 0.1351 | 50   | 0.3131        |
-| 0.2703 | 100  | 0.2310        |
-| 0.4054 | 150  | 0.3020        |
-| 0.5405 | 200  | 0.2301        |
-| 0.6757 | 250  | 0.2584        |
-| 0.8108 | 300  | 0.2278        |
-| 0.9459 | 350  | 0.2608        |
-| 1.0811 | 400  | 0.1618        |
-| 1.2162 | 450  | 0.0894        |
-| 1.3514 | 500  | 0.0825        |
-| 1.4865 | 550  | 0.1157        |
-| 1.6216 | 600  | 0.1062        |
-| 1.7568 | 650  | 0.0873        |
-| 1.8919 | 700  | 0.0677        |
-| 2.0270 | 750  | 0.0996        |
-| 2.1622 | 800  | 0.0387        |
-| 2.2973 | 850  | 0.0389        |
-| 2.4324 | 900  | 0.0469        |
-| 2.5676 | 950  | 0.0345        |
-| 2.7027 | 1000 | 0.0407        |
-| 2.8378 | 1050 | 0.0403        |
-| 2.9730 | 1100 | 0.0385        |
 ### Framework Versions
-- Python: 3.12.12
-- Sentence Transformers: 5.2.3
 - Transformers: 5.0.0
 - PyTorch: 2.10.0+cu128
-- Accelerate: 1.12.0
-- Datasets: 4.8.3
 - Tokenizers: 0.22.2
 ## Citation
@@ -547,13 +554,14 @@ You can finetune this model on your own dataset.
 #### MultipleNegativesRankingLoss
 ```bibtex
-@misc{henderson2017efficient,
-    title={Efficient Natural Language Response Suggestion for Smart Reply},
-    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
-    year={2017},
-    eprint={1705.00652},
-    archivePrefix={arXiv},
-    primaryClass={cs.CL}
 }
 ```

 - sentence-transformers
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
 - dataset_size:5920
 - loss:MultipleNegativesRankingLoss
 # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for retrieval.
 ## Model Details
 - **Maximum Sequence Length:** 256 tokens
 - **Output Dimensionality:** 768 dimensions
 - **Similarity Function:** Cosine Similarity
+- **Supported Modality:** Text
 <!-- - **Training Dataset:** Unknown -->
 <!-- - **Language:** Unknown -->
 <!-- - **License:** Unknown -->
 ```
 SentenceTransformer(
+  (0): Transformer({'transformer_task': 'feature-extraction', 'modality_config': {'text': {'method': 'forward', 'method_output_name': 'last_hidden_state'}}, 'module_output_name': 'token_embeddings', 'architecture': 'MPNetModel'})
+  (1): Pooling({'embedding_dimension': 768, 'pooling_mode': 'mean', 'include_prompt': True})
+  (2): Normalize({})
 )
 ```
 ```bash
 pip install -U sentence-transformers
 ```
 Then you can load this model and run inference.
 ```python
 from sentence_transformers import SentenceTransformer
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[ 1.0000,  0.7935, -0.1523],
+#         [ 0.7935,  1.0000, -0.1127],
+#         [-0.1523, -0.1127,  1.0000]])
 ```
 <!--
 ### Direct Usage (Transformers)
   {
       "scale": 20.0,
       "similarity_fct": "cos_sim",
+      "gather_across_devices": false,
+      "directions": [
+          "query_to_doc"
+      ],
+      "partition_mode": "joint",
+      "hardness_mode": null,
+      "hardness_strength": 0.0
   }
   ```
 | Epoch  | Step | Training Loss |
 |:------:|:----:|:-------------:|
 | 0.1351 | 50   | 0.3131        |
+| 0.2703 | 100  | 0.2313        |
+| 0.4054 | 150  | 0.3017        |
+| 0.5405 | 200  | 0.2300        |
+| 0.6757 | 250  | 0.2556        |
+| 0.8108 | 300  | 0.2238        |
+| 0.9459 | 350  | 0.2625        |
+| 1.0811 | 400  | 0.1785        |
+| 1.2162 | 450  | 0.0880        |
+| 1.3514 | 500  | 0.0839        |
+| 1.4865 | 550  | 0.1220        |
+| 1.6216 | 600  | 0.0988        |
+| 1.7568 | 650  | 0.0837        |
+| 1.8919 | 700  | 0.0685        |
+| 2.0270 | 750  | 0.0924        |
+| 2.1622 | 800  | 0.0381        |
+| 2.2973 | 850  | 0.0368        |
+| 2.4324 | 900  | 0.0475        |
+| 2.5676 | 950  | 0.0324        |
+| 2.7027 | 1000 | 0.0439        |
+| 2.8378 | 1050 | 0.0421        |
+| 2.9730 | 1100 | 0.0403        |
+### Training Time
+- **Training**: 16.8 minutes
 ### Framework Versions
+- Python: 3.12.13
+- Sentence Transformers: 5.4.0
 - Transformers: 5.0.0
 - PyTorch: 2.10.0+cu128
+- Accelerate: 1.13.0
+- Datasets: 4.8.5
 - Tokenizers: 0.22.2
 ## Citation
 #### MultipleNegativesRankingLoss
 ```bibtex
+@misc{oord2019representationlearningcontrastivepredictive,
+      title={Representation Learning with Contrastive Predictive Coding},
+      author={Aaron van den Oord and Yazhe Li and Oriol Vinyals},
+      year={2019},
+      eprint={1807.03748},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/1807.03748},
 }
 ```

config_sentence_transformers.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
   "__version__": {
-    "sentence_transformers": "5.2.3",
-    "transformers": "5.0.0",
-    "pytorch": "2.10.0+cu128"
   },
   "model_type": "SentenceTransformer",
   "prompts": {
-    "query": "",
-    "document": ""
   },
-  "default_prompt_name": null,
   "similarity_fn_name": "cosine"
 }

 {
   "__version__": {
+    "pytorch": "2.10.0+cu128",
+    "sentence_transformers": "5.4.0",
+    "transformers": "5.0.0"
   },
+  "default_prompt_name": null,
   "model_type": "SentenceTransformer",
   "prompts": {
+    "document": "",
+    "query": ""
   },
   "similarity_fn_name": "cosine"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a3f6b54fac9c258f46cc46422c22b4047b304b34d1da086c41b6bad4d916e51
 size 437967648

 version https://git-lfs.github.com/spec/v1
+oid sha256:887a4612dd0e340d62ec36181ebec7bd101bec723ceec585be1fa7c7868ee2c0
 size 437967648

modules.json CHANGED Viewed

@@ -3,18 +3,18 @@
     "idx": 0,
     "name": "0",
     "path": "",
-    "type": "sentence_transformers.models.Transformer"
   },
   {
     "idx": 1,
     "name": "1",
     "path": "1_Pooling",
-    "type": "sentence_transformers.models.Pooling"
   },
   {
     "idx": 2,
     "name": "2",
     "path": "2_Normalize",
-    "type": "sentence_transformers.models.Normalize"
   }
 ]

     "idx": 0,
     "name": "0",
     "path": "",
+    "type": "sentence_transformers.base.modules.transformer.Transformer"
   },
   {
     "idx": 1,
     "name": "1",
     "path": "1_Pooling",
+    "type": "sentence_transformers.sentence_transformer.modules.pooling.Pooling"
   },
   {
     "idx": 2,
     "name": "2",
     "path": "2_Normalize",
+    "type": "sentence_transformers.sentence_transformer.modules.normalize.Normalize"
   }
 ]

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,10 @@
 {
-    "max_seq_length": 256,
-    "do_lower_case": false
 }

 {
+    "transformer_task": "feature-extraction",
+    "modality_config": {
+        "text": {
+            "method": "forward",
+            "method_output_name": "last_hidden_state"
+        }
+    },
+    "module_output_name": "token_embeddings"
 }

tokenizer_config.json CHANGED Viewed

@@ -6,7 +6,7 @@
   "eos_token": "</s>",
   "is_local": false,
   "mask_token": "<mask>",
-  "model_max_length": 384,
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "strip_accents": null,

   "eos_token": "</s>",
   "is_local": false,
   "mask_token": "<mask>",
+  "model_max_length": 256,
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "strip_accents": null,