Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

1_Pooling/config.json +8 -8
README.md +27 -21
config_sentence_transformers.json +7 -3
model.safetensors +3 -0
sentence_bert_config.json +2 -2
tokenizer_config.json +7 -0

1_Pooling/config.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "word_embedding_dimension": 1024,
-  "pooling_mode_cls_token": false,
-  "pooling_mode_mean_tokens": true,
-  "pooling_mode_max_tokens": false,
-  "pooling_mode_mean_sqrt_len_tokens": false,
-  "pooling_mode_weightedmean_tokens": false,
-  "pooling_mode_lasttoken": false,
-  "include_prompt": true
 }

 {
+    "word_embedding_dimension": 1024,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
 }

README.md CHANGED Viewed

@@ -3,10 +3,11 @@ tags:
 - sentence-transformers
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
 - dataset_size:50000
 - loss:MultipleNegativesRankingLoss
-base_model: intfloat/e5-large-v2
 widget:
 - source_sentence: AVS Video Editor AVS Video Editor is a video editing software published
     by Online Media Technologies Ltd. It is a part of AVS4YOU software suite which
@@ -143,15 +144,15 @@ pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 ---
-# SentenceTransformer based on intfloat/e5-large-v2
-This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/e5-large-v2](https://huggingface.co/intfloat/e5-large-v2). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
 ## Model Details
 ### Model Description
 - **Model Type:** Sentence Transformer
-- **Base model:** [intfloat/e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) <!-- at revision f169b11e22de13617baa190a028a32f3493550b6 -->
 - **Maximum Sequence Length:** 512 tokens
 - **Output Dimensionality:** 1024 dimensions
 - **Similarity Function:** Cosine Similarity
@@ -169,7 +170,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [i
 ```
 SentenceTransformer(
-  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
   (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
   (2): Normalize()
 )
@@ -203,8 +204,10 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
-print(similarities.shape)
-# [3, 3]
 ```
 <!--
@@ -266,7 +269,8 @@ You can finetune this model on your own dataset.
   ```json
   {
       "scale": 20.0,
-      "similarity_fct": "cos_sim"
   }
   ```
@@ -393,29 +397,31 @@ You can finetune this model on your own dataset.
 - `prompts`: None
 - `batch_sampler`: batch_sampler
 - `multi_dataset_batch_sampler`: round_robin
 </details>
 ### Training Logs
 | Epoch | Step | Training Loss |
 |:-----:|:----:|:-------------:|
-| 0.08  | 500  | 0.3751        |
-| 0.16  | 1000 | 0.1414        |
-| 0.24  | 1500 | 0.1219        |
-| 0.32  | 2000 | 0.0979        |
-| 0.4   | 2500 | 0.083         |
-| 0.48  | 3000 | 0.067         |
-| 0.56  | 3500 | 0.0645        |
-| 0.64  | 4000 | 0.0578        |
-| 0.72  | 4500 | 0.0454        |
-| 0.8   | 5000 | 0.0404        |
-| 0.88  | 5500 | 0.0419        |
-| 0.96  | 6000 | 0.0402        |
 ### Framework Versions
 - Python: 3.11.13
-- Sentence Transformers: 4.1.0
 - Transformers: 4.52.4
 - PyTorch: 2.6.0+cu124
 - Accelerate: 1.8.1

 - sentence-transformers
 - sentence-similarity
 - feature-extraction
+- dense
 - generated_from_trainer
 - dataset_size:50000
 - loss:MultipleNegativesRankingLoss
+base_model: Yash911/e5-finetuned
 widget:
 - source_sentence: AVS Video Editor AVS Video Editor is a video editing software published
     by Online Media Technologies Ltd. It is a part of AVS4YOU software suite which
 library_name: sentence-transformers
 ---
+# SentenceTransformer based on Yash911/e5-finetuned
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Yash911/e5-finetuned](https://huggingface.co/Yash911/e5-finetuned). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
 ## Model Details
 ### Model Description
 - **Model Type:** Sentence Transformer
+- **Base model:** [Yash911/e5-finetuned](https://huggingface.co/Yash911/e5-finetuned) <!-- at revision 3225106ac46f6cb6475281b1d428aad055924db7 -->
 - **Maximum Sequence Length:** 512 tokens
 - **Output Dimensionality:** 1024 dimensions
 - **Similarity Function:** Cosine Similarity
 ```
 SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'BertModel'})
   (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
   (2): Normalize()
 )
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[ 1.0000,  0.6894, -0.0088],
+#         [ 0.6894,  1.0000, -0.0543],
+#         [-0.0088, -0.0543,  1.0000]])
 ```
 <!--
   ```json
   {
       "scale": 20.0,
+      "similarity_fct": "cos_sim",
+      "gather_across_devices": false
   }
   ```
 - `prompts`: None
 - `batch_sampler`: batch_sampler
 - `multi_dataset_batch_sampler`: round_robin
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
 </details>
 ### Training Logs
 | Epoch | Step | Training Loss |
 |:-----:|:----:|:-------------:|
+| 0.08  | 500  | 0.0323        |
+| 0.16  | 1000 | 0.0356        |
+| 0.24  | 1500 | 0.0426        |
+| 0.32  | 2000 | 0.0451        |
+| 0.4   | 2500 | 0.0306        |
+| 0.48  | 3000 | 0.0341        |
+| 0.56  | 3500 | 0.0374        |
+| 0.64  | 4000 | 0.0291        |
+| 0.72  | 4500 | 0.0266        |
+| 0.8   | 5000 | 0.0214        |
+| 0.88  | 5500 | 0.0331        |
+| 0.96  | 6000 | 0.0281        |
 ### Framework Versions
 - Python: 3.11.13
+- Sentence Transformers: 5.1.0
 - Transformers: 4.52.4
 - PyTorch: 2.6.0+cu124
 - Accelerate: 1.8.1

config_sentence_transformers.json CHANGED Viewed

@@ -1,10 +1,14 @@
 {
   "__version__": {
-    "sentence_transformers": "4.1.0",
     "transformers": "4.52.4",
     "pytorch": "2.6.0+cu124"
   },
-  "prompts": {},
   "default_prompt_name": null,
-  "similarity_fn_name": "cosine"
 }

 {
   "__version__": {
+    "sentence_transformers": "5.1.0",
     "transformers": "4.52.4",
     "pytorch": "2.6.0+cu124"
   },
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
   "default_prompt_name": null,
+  "similarity_fn_name": "cosine",
+  "model_type": "SentenceTransformer"
 }

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e9435891aaf138c9519d918f0dfd6b9d607abfeafec2575a06d3137bd292a0c
+size 1340612432

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 512,
-  "do_lower_case": false
 }

 {
+    "max_seq_length": 512,
+    "do_lower_case": false
 }

tokenizer_config.json CHANGED Viewed

@@ -46,11 +46,18 @@
   "do_lower_case": true,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   "do_lower_case": true,
   "extra_special_tokens": {},
   "mask_token": "[MASK]",
+  "max_length": 512,
   "model_max_length": 512,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }