radoslavralev commited on
Commit
ee120ba
·
verified ·
1 Parent(s): 589a913

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - generated_from_trainer
8
  - dataset_size:359997
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: Alibaba-NLP/gte-modernbert-base
11
  widget:
12
  - source_sentence: When do you use Ms. or Mrs.? Is one for a married woman and one
13
  for one that's not married? Which one is for what?
@@ -60,7 +60,7 @@ metrics:
60
  - cosine_mrr@10
61
  - cosine_map@100
62
  model-index:
63
- - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
64
  results:
65
  - task:
66
  type: information-retrieval
@@ -70,49 +70,49 @@ model-index:
70
  type: NanoMSMARCO
71
  metrics:
72
  - type: cosine_accuracy@1
73
- value: 0.38
74
  name: Cosine Accuracy@1
75
  - type: cosine_accuracy@3
76
- value: 0.62
77
  name: Cosine Accuracy@3
78
  - type: cosine_accuracy@5
79
- value: 0.72
80
  name: Cosine Accuracy@5
81
  - type: cosine_accuracy@10
82
- value: 0.78
83
  name: Cosine Accuracy@10
84
  - type: cosine_precision@1
85
- value: 0.38
86
  name: Cosine Precision@1
87
  - type: cosine_precision@3
88
- value: 0.20666666666666667
89
  name: Cosine Precision@3
90
  - type: cosine_precision@5
91
- value: 0.14400000000000002
92
  name: Cosine Precision@5
93
  - type: cosine_precision@10
94
- value: 0.078
95
  name: Cosine Precision@10
96
  - type: cosine_recall@1
97
- value: 0.38
98
  name: Cosine Recall@1
99
  - type: cosine_recall@3
100
- value: 0.62
101
  name: Cosine Recall@3
102
  - type: cosine_recall@5
103
- value: 0.72
104
  name: Cosine Recall@5
105
  - type: cosine_recall@10
106
- value: 0.78
107
  name: Cosine Recall@10
108
  - type: cosine_ndcg@10
109
- value: 0.5792677770404034
110
  name: Cosine Ndcg@10
111
  - type: cosine_mrr@10
112
- value: 0.5150238095238094
113
  name: Cosine Mrr@10
114
  - type: cosine_map@100
115
- value: 0.5260186479155519
116
  name: Cosine Map@100
117
  - task:
118
  type: information-retrieval
@@ -122,49 +122,49 @@ model-index:
122
  type: NanoNQ
123
  metrics:
124
  - type: cosine_accuracy@1
125
- value: 0.38
126
  name: Cosine Accuracy@1
127
  - type: cosine_accuracy@3
128
- value: 0.58
129
  name: Cosine Accuracy@3
130
  - type: cosine_accuracy@5
131
- value: 0.66
132
  name: Cosine Accuracy@5
133
  - type: cosine_accuracy@10
134
- value: 0.74
135
  name: Cosine Accuracy@10
136
  - type: cosine_precision@1
137
- value: 0.38
138
  name: Cosine Precision@1
139
  - type: cosine_precision@3
140
- value: 0.2
141
  name: Cosine Precision@3
142
  - type: cosine_precision@5
143
- value: 0.14
144
  name: Cosine Precision@5
145
  - type: cosine_precision@10
146
- value: 0.078
147
  name: Cosine Precision@10
148
  - type: cosine_recall@1
149
- value: 0.36
150
  name: Cosine Recall@1
151
  - type: cosine_recall@3
152
- value: 0.54
153
  name: Cosine Recall@3
154
  - type: cosine_recall@5
155
- value: 0.62
156
  name: Cosine Recall@5
157
  - type: cosine_recall@10
158
- value: 0.7
159
  name: Cosine Recall@10
160
  - type: cosine_ndcg@10
161
- value: 0.5417937853620868
162
  name: Cosine Ndcg@10
163
  - type: cosine_mrr@10
164
- value: 0.5033571428571428
165
  name: Cosine Mrr@10
166
  - type: cosine_map@100
167
- value: 0.4942594774374801
168
  name: Cosine Map@100
169
  - task:
170
  type: nano-beir
@@ -174,63 +174,63 @@ model-index:
174
  type: NanoBEIR_mean
175
  metrics:
176
  - type: cosine_accuracy@1
177
- value: 0.38
178
  name: Cosine Accuracy@1
179
  - type: cosine_accuracy@3
180
- value: 0.6
181
  name: Cosine Accuracy@3
182
  - type: cosine_accuracy@5
183
- value: 0.69
184
  name: Cosine Accuracy@5
185
  - type: cosine_accuracy@10
186
- value: 0.76
187
  name: Cosine Accuracy@10
188
  - type: cosine_precision@1
189
- value: 0.38
190
  name: Cosine Precision@1
191
  - type: cosine_precision@3
192
- value: 0.20333333333333334
193
  name: Cosine Precision@3
194
  - type: cosine_precision@5
195
- value: 0.14200000000000002
196
  name: Cosine Precision@5
197
  - type: cosine_precision@10
198
- value: 0.078
199
  name: Cosine Precision@10
200
  - type: cosine_recall@1
201
- value: 0.37
202
  name: Cosine Recall@1
203
  - type: cosine_recall@3
204
- value: 0.5800000000000001
205
  name: Cosine Recall@3
206
  - type: cosine_recall@5
207
- value: 0.6699999999999999
208
  name: Cosine Recall@5
209
  - type: cosine_recall@10
210
- value: 0.74
211
  name: Cosine Recall@10
212
  - type: cosine_ndcg@10
213
- value: 0.5605307812012451
214
  name: Cosine Ndcg@10
215
  - type: cosine_mrr@10
216
- value: 0.5091904761904762
217
  name: Cosine Mrr@10
218
  - type: cosine_map@100
219
- value: 0.510139062676516
220
  name: Cosine Map@100
221
  ---
222
 
223
- # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
224
 
225
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
226
 
227
  ## Model Details
228
 
229
  ### Model Description
230
  - **Model Type:** Sentence Transformer
231
- - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
232
  - **Maximum Sequence Length:** 128 tokens
233
- - **Output Dimensionality:** 768 dimensions
234
  - **Similarity Function:** Cosine Similarity
235
  <!-- - **Training Dataset:** Unknown -->
236
  <!-- - **Language:** Unknown -->
@@ -246,8 +246,9 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
246
 
247
  ```
248
  SentenceTransformer(
249
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
250
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
251
  )
252
  ```
253
 
@@ -275,14 +276,14 @@ sentences = [
275
  ]
276
  embeddings = model.encode(sentences)
277
  print(embeddings.shape)
278
- # [3, 768]
279
 
280
  # Get the similarity scores for the embeddings
281
  similarities = model.similarity(embeddings, embeddings)
282
  print(similarities)
283
- # tensor([[ 1.0000, 0.9926, -0.0086],
284
- # [ 0.9926, 1.0000, -0.0135],
285
- # [-0.0086, -0.0135, 1.0000]])
286
  ```
287
 
288
  <!--
@@ -320,21 +321,21 @@ You can finetune this model on your own dataset.
320
 
321
  | Metric | NanoMSMARCO | NanoNQ |
322
  |:--------------------|:------------|:-----------|
323
- | cosine_accuracy@1 | 0.38 | 0.38 |
324
- | cosine_accuracy@3 | 0.62 | 0.58 |
325
- | cosine_accuracy@5 | 0.72 | 0.66 |
326
- | cosine_accuracy@10 | 0.78 | 0.74 |
327
- | cosine_precision@1 | 0.38 | 0.38 |
328
- | cosine_precision@3 | 0.2067 | 0.2 |
329
- | cosine_precision@5 | 0.144 | 0.14 |
330
- | cosine_precision@10 | 0.078 | 0.078 |
331
- | cosine_recall@1 | 0.38 | 0.36 |
332
- | cosine_recall@3 | 0.62 | 0.54 |
333
- | cosine_recall@5 | 0.72 | 0.62 |
334
- | cosine_recall@10 | 0.78 | 0.7 |
335
- | **cosine_ndcg@10** | **0.5793** | **0.5418** |
336
- | cosine_mrr@10 | 0.515 | 0.5034 |
337
- | cosine_map@100 | 0.526 | 0.4943 |
338
 
339
  #### Nano BEIR
340
 
@@ -352,21 +353,21 @@ You can finetune this model on your own dataset.
352
 
353
  | Metric | Value |
354
  |:--------------------|:-----------|
355
- | cosine_accuracy@1 | 0.38 |
356
- | cosine_accuracy@3 | 0.6 |
357
- | cosine_accuracy@5 | 0.69 |
358
- | cosine_accuracy@10 | 0.76 |
359
- | cosine_precision@1 | 0.38 |
360
- | cosine_precision@3 | 0.2033 |
361
- | cosine_precision@5 | 0.142 |
362
- | cosine_precision@10 | 0.078 |
363
- | cosine_recall@1 | 0.37 |
364
- | cosine_recall@3 | 0.58 |
365
- | cosine_recall@5 | 0.67 |
366
- | cosine_recall@10 | 0.74 |
367
- | **cosine_ndcg@10** | **0.5605** |
368
- | cosine_mrr@10 | 0.5092 |
369
- | cosine_map@100 | 0.5101 |
370
 
371
  <!--
372
  ## Bias, Risks and Limitations
@@ -389,10 +390,10 @@ You can finetune this model on your own dataset.
389
  * Size: 359,997 training samples
390
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
391
  * Approximate statistics based on the first 1000 samples:
392
- | | anchor | positive | negative |
393
- |:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
394
- | type | string | string | string |
395
- | details | <ul><li>min: 4 tokens</li><li>mean: 15.4 tokens</li><li>max: 47 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.47 tokens</li><li>max: 47 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 16.9 tokens</li><li>max: 125 tokens</li></ul> |
396
  * Samples:
397
  | anchor | positive | negative |
398
  |:--------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|
@@ -418,7 +419,7 @@ You can finetune this model on your own dataset.
418
  | | anchor | positive | negative |
419
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
420
  | type | string | string | string |
421
- | details | <ul><li>min: 6 tokens</li><li>mean: 15.68 tokens</li><li>max: 72 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.75 tokens</li><li>max: 72 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.95 tokens</li><li>max: 78 tokens</li></ul> |
422
  * Samples:
423
  | anchor | positive | negative |
424
  |:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------|
@@ -583,27 +584,27 @@ You can finetune this model on your own dataset.
583
  ### Training Logs
584
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
585
  |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
586
- | 0 | 0 | - | 2.1886 | 0.6530 | 0.6552 | 0.6541 |
587
- | 0.0889 | 250 | 0.9475 | 0.4116 | 0.6233 | 0.6439 | 0.6336 |
588
- | 0.1778 | 500 | 0.3963 | 0.3836 | 0.6137 | 0.6372 | 0.6254 |
589
- | 0.2667 | 750 | 0.3776 | 0.3707 | 0.6243 | 0.6259 | 0.6251 |
590
- | 0.3556 | 1000 | 0.3675 | 0.3638 | 0.6250 | 0.5981 | 0.6116 |
591
- | 0.4445 | 1250 | 0.358 | 0.3581 | 0.6170 | 0.6045 | 0.6108 |
592
- | 0.5334 | 1500 | 0.3575 | 0.3544 | 0.6049 | 0.5821 | 0.5935 |
593
- | 0.6223 | 1750 | 0.3521 | 0.3513 | 0.5835 | 0.5619 | 0.5727 |
594
- | 0.7112 | 2000 | 0.3489 | 0.3486 | 0.5955 | 0.5576 | 0.5765 |
595
- | 0.8001 | 2250 | 0.3465 | 0.3463 | 0.6037 | 0.5786 | 0.5911 |
596
- | 0.8890 | 2500 | 0.3461 | 0.3440 | 0.5884 | 0.5691 | 0.5788 |
597
- | 0.9780 | 2750 | 0.3446 | 0.3428 | 0.5809 | 0.5627 | 0.5718 |
598
- | 1.0669 | 3000 | 0.328 | 0.3423 | 0.5701 | 0.5599 | 0.5650 |
599
- | 1.1558 | 3250 | 0.3235 | 0.3416 | 0.5691 | 0.5419 | 0.5555 |
600
- | 1.2447 | 3500 | 0.3221 | 0.3406 | 0.5694 | 0.5534 | 0.5614 |
601
- | 1.3336 | 3750 | 0.3221 | 0.3397 | 0.5736 | 0.5519 | 0.5628 |
602
- | 1.4225 | 4000 | 0.3196 | 0.3391 | 0.5811 | 0.5416 | 0.5613 |
603
- | 1.5114 | 4250 | 0.3201 | 0.3386 | 0.5525 | 0.5538 | 0.5532 |
604
- | 1.6003 | 4500 | 0.321 | 0.3384 | 0.5801 | 0.5380 | 0.5591 |
605
- | 1.6892 | 4750 | 0.3192 | 0.3382 | 0.5799 | 0.5474 | 0.5636 |
606
- | 1.7781 | 5000 | 0.3203 | 0.3379 | 0.5793 | 0.5418 | 0.5605 |
607
 
608
 
609
  ### Framework Versions
 
7
  - generated_from_trainer
8
  - dataset_size:359997
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: sentence-transformers/all-MiniLM-L6-v2
11
  widget:
12
  - source_sentence: When do you use Ms. or Mrs.? Is one for a married woman and one
13
  for one that's not married? Which one is for what?
 
60
  - cosine_mrr@10
61
  - cosine_map@100
62
  model-index:
63
+ - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
64
  results:
65
  - task:
66
  type: information-retrieval
 
70
  type: NanoMSMARCO
71
  metrics:
72
  - type: cosine_accuracy@1
73
+ value: 0.22
74
  name: Cosine Accuracy@1
75
  - type: cosine_accuracy@3
76
+ value: 0.5
77
  name: Cosine Accuracy@3
78
  - type: cosine_accuracy@5
79
+ value: 0.62
80
  name: Cosine Accuracy@5
81
  - type: cosine_accuracy@10
82
+ value: 0.74
83
  name: Cosine Accuracy@10
84
  - type: cosine_precision@1
85
+ value: 0.22
86
  name: Cosine Precision@1
87
  - type: cosine_precision@3
88
+ value: 0.16666666666666663
89
  name: Cosine Precision@3
90
  - type: cosine_precision@5
91
+ value: 0.124
92
  name: Cosine Precision@5
93
  - type: cosine_precision@10
94
+ value: 0.07400000000000001
95
  name: Cosine Precision@10
96
  - type: cosine_recall@1
97
+ value: 0.22
98
  name: Cosine Recall@1
99
  - type: cosine_recall@3
100
+ value: 0.5
101
  name: Cosine Recall@3
102
  - type: cosine_recall@5
103
+ value: 0.62
104
  name: Cosine Recall@5
105
  - type: cosine_recall@10
106
+ value: 0.74
107
  name: Cosine Recall@10
108
  - type: cosine_ndcg@10
109
+ value: 0.47667177266958005
110
  name: Cosine Ndcg@10
111
  - type: cosine_mrr@10
112
+ value: 0.39240476190476187
113
  name: Cosine Mrr@10
114
  - type: cosine_map@100
115
+ value: 0.406991563991564
116
  name: Cosine Map@100
117
  - task:
118
  type: information-retrieval
 
122
  type: NanoNQ
123
  metrics:
124
  - type: cosine_accuracy@1
125
+ value: 0.28
126
  name: Cosine Accuracy@1
127
  - type: cosine_accuracy@3
128
+ value: 0.46
129
  name: Cosine Accuracy@3
130
  - type: cosine_accuracy@5
131
+ value: 0.56
132
  name: Cosine Accuracy@5
133
  - type: cosine_accuracy@10
134
+ value: 0.64
135
  name: Cosine Accuracy@10
136
  - type: cosine_precision@1
137
+ value: 0.28
138
  name: Cosine Precision@1
139
  - type: cosine_precision@3
140
+ value: 0.15999999999999998
141
  name: Cosine Precision@3
142
  - type: cosine_precision@5
143
+ value: 0.11600000000000002
144
  name: Cosine Precision@5
145
  - type: cosine_precision@10
146
+ value: 0.066
147
  name: Cosine Precision@10
148
  - type: cosine_recall@1
149
+ value: 0.27
150
  name: Cosine Recall@1
151
  - type: cosine_recall@3
152
+ value: 0.45
153
  name: Cosine Recall@3
154
  - type: cosine_recall@5
155
+ value: 0.54
156
  name: Cosine Recall@5
157
  - type: cosine_recall@10
158
+ value: 0.61
159
  name: Cosine Recall@10
160
  - type: cosine_ndcg@10
161
+ value: 0.4442430372694745
162
  name: Cosine Ndcg@10
163
  - type: cosine_mrr@10
164
+ value: 0.39785714285714285
165
  name: Cosine Mrr@10
166
  - type: cosine_map@100
167
+ value: 0.39869586832265574
168
  name: Cosine Map@100
169
  - task:
170
  type: nano-beir
 
174
  type: NanoBEIR_mean
175
  metrics:
176
  - type: cosine_accuracy@1
177
+ value: 0.25
178
  name: Cosine Accuracy@1
179
  - type: cosine_accuracy@3
180
+ value: 0.48
181
  name: Cosine Accuracy@3
182
  - type: cosine_accuracy@5
183
+ value: 0.5900000000000001
184
  name: Cosine Accuracy@5
185
  - type: cosine_accuracy@10
186
+ value: 0.69
187
  name: Cosine Accuracy@10
188
  - type: cosine_precision@1
189
+ value: 0.25
190
  name: Cosine Precision@1
191
  - type: cosine_precision@3
192
+ value: 0.1633333333333333
193
  name: Cosine Precision@3
194
  - type: cosine_precision@5
195
+ value: 0.12000000000000001
196
  name: Cosine Precision@5
197
  - type: cosine_precision@10
198
+ value: 0.07
199
  name: Cosine Precision@10
200
  - type: cosine_recall@1
201
+ value: 0.245
202
  name: Cosine Recall@1
203
  - type: cosine_recall@3
204
+ value: 0.475
205
  name: Cosine Recall@3
206
  - type: cosine_recall@5
207
+ value: 0.5800000000000001
208
  name: Cosine Recall@5
209
  - type: cosine_recall@10
210
+ value: 0.675
211
  name: Cosine Recall@10
212
  - type: cosine_ndcg@10
213
+ value: 0.46045740496952725
214
  name: Cosine Ndcg@10
215
  - type: cosine_mrr@10
216
+ value: 0.39513095238095236
217
  name: Cosine Mrr@10
218
  - type: cosine_map@100
219
+ value: 0.4028437161571099
220
  name: Cosine Map@100
221
  ---
222
 
223
+ # SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
224
 
225
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
226
 
227
  ## Model Details
228
 
229
  ### Model Description
230
  - **Model Type:** Sentence Transformer
231
+ - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
232
  - **Maximum Sequence Length:** 128 tokens
233
+ - **Output Dimensionality:** 384 dimensions
234
  - **Similarity Function:** Cosine Similarity
235
  <!-- - **Training Dataset:** Unknown -->
236
  <!-- - **Language:** Unknown -->
 
246
 
247
  ```
248
  SentenceTransformer(
249
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
250
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
251
+ (2): Normalize()
252
  )
253
  ```
254
 
 
276
  ]
277
  embeddings = model.encode(sentences)
278
  print(embeddings.shape)
279
+ # [3, 384]
280
 
281
  # Get the similarity scores for the embeddings
282
  similarities = model.similarity(embeddings, embeddings)
283
  print(similarities)
284
+ # tensor([[1.0000, 0.9894, 0.0074],
285
+ # [0.9894, 1.0000, 0.0136],
286
+ # [0.0074, 0.0136, 1.0000]])
287
  ```
288
 
289
  <!--
 
321
 
322
  | Metric | NanoMSMARCO | NanoNQ |
323
  |:--------------------|:------------|:-----------|
324
+ | cosine_accuracy@1 | 0.22 | 0.28 |
325
+ | cosine_accuracy@3 | 0.5 | 0.46 |
326
+ | cosine_accuracy@5 | 0.62 | 0.56 |
327
+ | cosine_accuracy@10 | 0.74 | 0.64 |
328
+ | cosine_precision@1 | 0.22 | 0.28 |
329
+ | cosine_precision@3 | 0.1667 | 0.16 |
330
+ | cosine_precision@5 | 0.124 | 0.116 |
331
+ | cosine_precision@10 | 0.074 | 0.066 |
332
+ | cosine_recall@1 | 0.22 | 0.27 |
333
+ | cosine_recall@3 | 0.5 | 0.45 |
334
+ | cosine_recall@5 | 0.62 | 0.54 |
335
+ | cosine_recall@10 | 0.74 | 0.61 |
336
+ | **cosine_ndcg@10** | **0.4767** | **0.4442** |
337
+ | cosine_mrr@10 | 0.3924 | 0.3979 |
338
+ | cosine_map@100 | 0.407 | 0.3987 |
339
 
340
  #### Nano BEIR
341
 
 
353
 
354
  | Metric | Value |
355
  |:--------------------|:-----------|
356
+ | cosine_accuracy@1 | 0.25 |
357
+ | cosine_accuracy@3 | 0.48 |
358
+ | cosine_accuracy@5 | 0.59 |
359
+ | cosine_accuracy@10 | 0.69 |
360
+ | cosine_precision@1 | 0.25 |
361
+ | cosine_precision@3 | 0.1633 |
362
+ | cosine_precision@5 | 0.12 |
363
+ | cosine_precision@10 | 0.07 |
364
+ | cosine_recall@1 | 0.245 |
365
+ | cosine_recall@3 | 0.475 |
366
+ | cosine_recall@5 | 0.58 |
367
+ | cosine_recall@10 | 0.675 |
368
+ | **cosine_ndcg@10** | **0.4605** |
369
+ | cosine_mrr@10 | 0.3951 |
370
+ | cosine_map@100 | 0.4028 |
371
 
372
  <!--
373
  ## Bias, Risks and Limitations
 
390
  * Size: 359,997 training samples
391
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
392
  * Approximate statistics based on the first 1000 samples:
393
+ | | anchor | positive | negative |
394
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
395
+ | type | string | string | string |
396
+ | details | <ul><li>min: 4 tokens</li><li>mean: 15.46 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 15.52 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 16.99 tokens</li><li>max: 128 tokens</li></ul> |
397
  * Samples:
398
  | anchor | positive | negative |
399
  |:--------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|
 
419
  | | anchor | positive | negative |
420
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
421
  | type | string | string | string |
422
+ | details | <ul><li>min: 6 tokens</li><li>mean: 15.71 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.79 tokens</li><li>max: 65 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.97 tokens</li><li>max: 78 tokens</li></ul> |
423
  * Samples:
424
  | anchor | positive | negative |
425
  |:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------|
 
584
  ### Training Logs
585
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
586
  |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
587
+ | 0 | 0 | - | 0.5501 | 0.5540 | 0.5931 | 0.5735 |
588
+ | 0.0889 | 250 | 0.6218 | 0.4360 | 0.5499 | 0.5725 | 0.5612 |
589
+ | 0.1778 | 500 | 0.557 | 0.4231 | 0.5414 | 0.5239 | 0.5326 |
590
+ | 0.2667 | 750 | 0.5359 | 0.4146 | 0.5188 | 0.5189 | 0.5188 |
591
+ | 0.3556 | 1000 | 0.5213 | 0.4095 | 0.4998 | 0.5138 | 0.5068 |
592
+ | 0.4445 | 1250 | 0.51 | 0.4058 | 0.5021 | 0.4988 | 0.5005 |
593
+ | 0.5334 | 1500 | 0.5086 | 0.4030 | 0.5040 | 0.4970 | 0.5005 |
594
+ | 0.6223 | 1750 | 0.5031 | 0.4002 | 0.4963 | 0.4997 | 0.4980 |
595
+ | 0.7112 | 2000 | 0.4964 | 0.3979 | 0.5033 | 0.4880 | 0.4956 |
596
+ | 0.8001 | 2250 | 0.4927 | 0.3960 | 0.5077 | 0.4881 | 0.4979 |
597
+ | 0.8890 | 2500 | 0.4925 | 0.3946 | 0.4939 | 0.4826 | 0.4882 |
598
+ | 0.9780 | 2750 | 0.4889 | 0.3936 | 0.4953 | 0.4778 | 0.4865 |
599
+ | 1.0669 | 3000 | 0.4819 | 0.3917 | 0.4838 | 0.4723 | 0.4781 |
600
+ | 1.1558 | 3250 | 0.4798 | 0.3910 | 0.4900 | 0.4587 | 0.4743 |
601
+ | 1.2447 | 3500 | 0.4773 | 0.3905 | 0.4888 | 0.4557 | 0.4723 |
602
+ | 1.3336 | 3750 | 0.476 | 0.3899 | 0.4782 | 0.4512 | 0.4647 |
603
+ | 1.4225 | 4000 | 0.4738 | 0.3891 | 0.4873 | 0.4508 | 0.4691 |
604
+ | 1.5114 | 4250 | 0.4727 | 0.3887 | 0.4849 | 0.4464 | 0.4657 |
605
+ | 1.6003 | 4500 | 0.4737 | 0.3887 | 0.4772 | 0.4482 | 0.4627 |
606
+ | 1.6892 | 4750 | 0.4722 | 0.3884 | 0.4810 | 0.4432 | 0.4621 |
607
+ | 1.7781 | 5000 | 0.4739 | 0.3883 | 0.4767 | 0.4442 | 0.4605 |
608
 
609
 
610
  ### Framework Versions
config_sentence_transformers.json CHANGED
@@ -4,11 +4,11 @@
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
 
7
  "prompts": {
8
  "query": "",
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
- "similarity_fn_name": "cosine",
13
- "model_type": "SentenceTransformer"
14
  }
 
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
7
+ "model_type": "SentenceTransformer",
8
  "prompts": {
9
  "query": "",
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
 
14
  }
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]