radoslavralev commited on
Commit
7a6298f
·
verified ·
1 Parent(s): 50d711c

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - generated_from_trainer
8
  - dataset_size:111470
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: sentence-transformers/all-MiniLM-L12-v2
11
  widget:
12
  - source_sentence: why are some rocks radioactive
13
  sentences:
@@ -106,7 +106,7 @@ metrics:
106
  - cosine_mrr@10
107
  - cosine_map@100
108
  model-index:
109
- - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L12-v2
110
  results:
111
  - task:
112
  type: information-retrieval
@@ -116,49 +116,49 @@ model-index:
116
  type: NanoMSMARCO
117
  metrics:
118
  - type: cosine_accuracy@1
119
- value: 0.36
120
  name: Cosine Accuracy@1
121
  - type: cosine_accuracy@3
122
- value: 0.58
123
  name: Cosine Accuracy@3
124
  - type: cosine_accuracy@5
125
- value: 0.62
126
  name: Cosine Accuracy@5
127
  - type: cosine_accuracy@10
128
- value: 0.7
129
  name: Cosine Accuracy@10
130
  - type: cosine_precision@1
131
- value: 0.36
132
  name: Cosine Precision@1
133
  - type: cosine_precision@3
134
- value: 0.19333333333333333
135
  name: Cosine Precision@3
136
  - type: cosine_precision@5
137
- value: 0.124
138
  name: Cosine Precision@5
139
  - type: cosine_precision@10
140
- value: 0.07
141
  name: Cosine Precision@10
142
  - type: cosine_recall@1
143
- value: 0.36
144
  name: Cosine Recall@1
145
  - type: cosine_recall@3
146
- value: 0.58
147
  name: Cosine Recall@3
148
  - type: cosine_recall@5
149
- value: 0.62
150
  name: Cosine Recall@5
151
  - type: cosine_recall@10
152
- value: 0.7
153
  name: Cosine Recall@10
154
  - type: cosine_ndcg@10
155
- value: 0.5293969846378502
156
  name: Cosine Ndcg@10
157
  - type: cosine_mrr@10
158
- value: 0.47507936507936505
159
  name: Cosine Mrr@10
160
  - type: cosine_map@100
161
- value: 0.4876720668963073
162
  name: Cosine Map@100
163
  - task:
164
  type: information-retrieval
@@ -168,49 +168,49 @@ model-index:
168
  type: NanoNQ
169
  metrics:
170
  - type: cosine_accuracy@1
171
- value: 0.4
172
  name: Cosine Accuracy@1
173
  - type: cosine_accuracy@3
174
  value: 0.56
175
  name: Cosine Accuracy@3
176
  - type: cosine_accuracy@5
177
- value: 0.58
178
  name: Cosine Accuracy@5
179
  - type: cosine_accuracy@10
180
  value: 0.74
181
  name: Cosine Accuracy@10
182
  - type: cosine_precision@1
183
- value: 0.4
184
  name: Cosine Precision@1
185
  - type: cosine_precision@3
186
- value: 0.19333333333333333
187
  name: Cosine Precision@3
188
  - type: cosine_precision@5
189
- value: 0.12
190
  name: Cosine Precision@5
191
  - type: cosine_precision@10
192
- value: 0.07600000000000001
193
  name: Cosine Precision@10
194
  - type: cosine_recall@1
195
- value: 0.37
196
  name: Cosine Recall@1
197
  - type: cosine_recall@3
198
- value: 0.53
199
  name: Cosine Recall@3
200
  - type: cosine_recall@5
201
- value: 0.55
202
  name: Cosine Recall@5
203
  - type: cosine_recall@10
204
- value: 0.7
205
  name: Cosine Recall@10
206
  - type: cosine_ndcg@10
207
- value: 0.5283136453383426
208
  name: Cosine Ndcg@10
209
  - type: cosine_mrr@10
210
- value: 0.49621428571428566
211
  name: Cosine Mrr@10
212
  - type: cosine_map@100
213
- value: 0.47570813068007134
214
  name: Cosine Map@100
215
  - task:
216
  type: nano-beir
@@ -220,63 +220,63 @@ model-index:
220
  type: NanoBEIR_mean
221
  metrics:
222
  - type: cosine_accuracy@1
223
- value: 0.38
224
  name: Cosine Accuracy@1
225
  - type: cosine_accuracy@3
226
- value: 0.5700000000000001
227
  name: Cosine Accuracy@3
228
  - type: cosine_accuracy@5
229
- value: 0.6
230
  name: Cosine Accuracy@5
231
  - type: cosine_accuracy@10
232
- value: 0.72
233
  name: Cosine Accuracy@10
234
  - type: cosine_precision@1
235
- value: 0.38
236
  name: Cosine Precision@1
237
  - type: cosine_precision@3
238
- value: 0.19333333333333333
239
  name: Cosine Precision@3
240
  - type: cosine_precision@5
241
- value: 0.122
242
  name: Cosine Precision@5
243
  - type: cosine_precision@10
244
- value: 0.07300000000000001
245
  name: Cosine Precision@10
246
  - type: cosine_recall@1
247
- value: 0.365
248
  name: Cosine Recall@1
249
  - type: cosine_recall@3
250
- value: 0.5549999999999999
251
  name: Cosine Recall@3
252
  - type: cosine_recall@5
253
- value: 0.585
254
  name: Cosine Recall@5
255
  - type: cosine_recall@10
256
- value: 0.7
257
  name: Cosine Recall@10
258
  - type: cosine_ndcg@10
259
- value: 0.5288553149880963
260
  name: Cosine Ndcg@10
261
  - type: cosine_mrr@10
262
- value: 0.48564682539682535
263
  name: Cosine Mrr@10
264
  - type: cosine_map@100
265
- value: 0.48169009878818936
266
  name: Cosine Map@100
267
  ---
268
 
269
- # SentenceTransformer based on sentence-transformers/all-MiniLM-L12-v2
270
 
271
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
272
 
273
  ## Model Details
274
 
275
  ### Model Description
276
  - **Model Type:** Sentence Transformer
277
- - **Base model:** [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) <!-- at revision 936af83a2ecce5fe87a09109ff5cbcefe073173a -->
278
  - **Maximum Sequence Length:** 128 tokens
279
- - **Output Dimensionality:** 384 dimensions
280
  - **Similarity Function:** Cosine Similarity
281
  <!-- - **Training Dataset:** Unknown -->
282
  <!-- - **Language:** Unknown -->
@@ -292,9 +292,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
292
 
293
  ```
294
  SentenceTransformer(
295
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
296
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
297
- (2): Normalize()
298
  )
299
  ```
300
 
@@ -322,14 +321,14 @@ sentences = [
322
  ]
323
  embeddings = model.encode(sentences)
324
  print(embeddings.shape)
325
- # [3, 384]
326
 
327
  # Get the similarity scores for the embeddings
328
  similarities = model.similarity(embeddings, embeddings)
329
  print(similarities)
330
- # tensor([[1.0000, 1.0000, 0.9401],
331
- # [1.0000, 1.0000, 0.9401],
332
- # [0.9401, 0.9401, 1.0000]])
333
  ```
334
 
335
  <!--
@@ -367,21 +366,21 @@ You can finetune this model on your own dataset.
367
 
368
  | Metric | NanoMSMARCO | NanoNQ |
369
  |:--------------------|:------------|:-----------|
370
- | cosine_accuracy@1 | 0.36 | 0.4 |
371
- | cosine_accuracy@3 | 0.58 | 0.56 |
372
- | cosine_accuracy@5 | 0.62 | 0.58 |
373
- | cosine_accuracy@10 | 0.7 | 0.74 |
374
- | cosine_precision@1 | 0.36 | 0.4 |
375
- | cosine_precision@3 | 0.1933 | 0.1933 |
376
- | cosine_precision@5 | 0.124 | 0.12 |
377
- | cosine_precision@10 | 0.07 | 0.076 |
378
- | cosine_recall@1 | 0.36 | 0.37 |
379
- | cosine_recall@3 | 0.58 | 0.53 |
380
- | cosine_recall@5 | 0.62 | 0.55 |
381
- | cosine_recall@10 | 0.7 | 0.7 |
382
- | **cosine_ndcg@10** | **0.5294** | **0.5283** |
383
- | cosine_mrr@10 | 0.4751 | 0.4962 |
384
- | cosine_map@100 | 0.4877 | 0.4757 |
385
 
386
  #### Nano BEIR
387
 
@@ -399,21 +398,21 @@ You can finetune this model on your own dataset.
399
 
400
  | Metric | Value |
401
  |:--------------------|:-----------|
402
- | cosine_accuracy@1 | 0.38 |
403
- | cosine_accuracy@3 | 0.57 |
404
- | cosine_accuracy@5 | 0.6 |
405
- | cosine_accuracy@10 | 0.72 |
406
- | cosine_precision@1 | 0.38 |
407
- | cosine_precision@3 | 0.1933 |
408
- | cosine_precision@5 | 0.122 |
409
- | cosine_precision@10 | 0.073 |
410
- | cosine_recall@1 | 0.365 |
411
- | cosine_recall@3 | 0.555 |
412
- | cosine_recall@5 | 0.585 |
413
- | cosine_recall@10 | 0.7 |
414
- | **cosine_ndcg@10** | **0.5289** |
415
- | cosine_mrr@10 | 0.4856 |
416
- | cosine_map@100 | 0.4817 |
417
 
418
  <!--
419
  ## Bias, Risks and Limitations
@@ -439,7 +438,7 @@ You can finetune this model on your own dataset.
439
  | | anchor | positive | negative |
440
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
441
  | type | string | string | string |
442
- | details | <ul><li>min: 4 tokens</li><li>mean: 10.95 tokens</li><li>max: 60 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 67.57 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 66.64 tokens</li><li>max: 128 tokens</li></ul> |
443
  * Samples:
444
  | anchor | positive | negative |
445
  |:----------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -465,7 +464,7 @@ You can finetune this model on your own dataset.
465
  | | anchor | positive | negative |
466
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
467
  | type | string | string | string |
468
- | details | <ul><li>min: 4 tokens</li><li>mean: 11.11 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 67.99 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 66.08 tokens</li><li>max: 128 tokens</li></ul> |
469
  * Samples:
470
  | anchor | positive | negative |
471
  |:----------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -487,9 +486,9 @@ You can finetune this model on your own dataset.
487
  - `eval_strategy`: steps
488
  - `per_device_train_batch_size`: 128
489
  - `per_device_eval_batch_size`: 128
490
- - `learning_rate`: 8e-05
491
- - `weight_decay`: 0.005
492
- - `max_steps`: 3375
493
  - `warmup_ratio`: 0.1
494
  - `fp16`: True
495
  - `dataloader_drop_last`: True
@@ -516,14 +515,14 @@ You can finetune this model on your own dataset.
516
  - `gradient_accumulation_steps`: 1
517
  - `eval_accumulation_steps`: None
518
  - `torch_empty_cache_steps`: None
519
- - `learning_rate`: 8e-05
520
- - `weight_decay`: 0.005
521
  - `adam_beta1`: 0.9
522
  - `adam_beta2`: 0.999
523
  - `adam_epsilon`: 1e-08
524
  - `max_grad_norm`: 1.0
525
  - `num_train_epochs`: 3.0
526
- - `max_steps`: 3375
527
  - `lr_scheduler_type`: linear
528
  - `lr_scheduler_kwargs`: {}
529
  - `warmup_ratio`: 0.1
@@ -630,20 +629,9 @@ You can finetune this model on your own dataset.
630
  ### Training Logs
631
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
632
  |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
633
- | 0 | 0 | - | 1.1142 | 0.5887 | 0.5786 | 0.5836 |
634
- | 0.2874 | 250 | 1.0308 | 0.8274 | 0.5370 | 0.5295 | 0.5332 |
635
- | 0.5747 | 500 | 0.9418 | 0.8097 | 0.5222 | 0.5367 | 0.5295 |
636
- | 0.8621 | 750 | 0.9198 | 0.7884 | 0.5348 | 0.5194 | 0.5271 |
637
- | 1.1494 | 1000 | 0.8563 | 0.7848 | 0.5172 | 0.5148 | 0.5160 |
638
- | 1.4368 | 1250 | 0.8147 | 0.7826 | 0.5236 | 0.4794 | 0.5015 |
639
- | 1.7241 | 1500 | 0.8074 | 0.7742 | 0.5312 | 0.5117 | 0.5214 |
640
- | 2.0115 | 1750 | 0.8021 | 0.7775 | 0.5468 | 0.4985 | 0.5226 |
641
- | 2.2989 | 2000 | 0.7212 | 0.7763 | 0.5045 | 0.4867 | 0.4956 |
642
- | 2.5862 | 2250 | 0.7197 | 0.7796 | 0.5123 | 0.4800 | 0.4962 |
643
- | 2.8736 | 2500 | 0.7116 | 0.7781 | 0.5451 | 0.5259 | 0.5355 |
644
- | 3.1609 | 2750 | 0.6905 | 0.7827 | 0.5065 | 0.5346 | 0.5205 |
645
- | 3.4483 | 3000 | 0.6656 | 0.7844 | 0.5287 | 0.5066 | 0.5176 |
646
- | 3.7356 | 3250 | 0.6632 | 0.7836 | 0.5294 | 0.5283 | 0.5289 |
647
 
648
 
649
  ### Framework Versions
 
7
  - generated_from_trainer
8
  - dataset_size:111470
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: Alibaba-NLP/gte-modernbert-base
11
  widget:
12
  - source_sentence: why are some rocks radioactive
13
  sentences:
 
106
  - cosine_mrr@10
107
  - cosine_map@100
108
  model-index:
109
+ - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
110
  results:
111
  - task:
112
  type: information-retrieval
 
116
  type: NanoMSMARCO
117
  metrics:
118
  - type: cosine_accuracy@1
119
+ value: 0.42
120
  name: Cosine Accuracy@1
121
  - type: cosine_accuracy@3
122
+ value: 0.64
123
  name: Cosine Accuracy@3
124
  - type: cosine_accuracy@5
125
+ value: 0.78
126
  name: Cosine Accuracy@5
127
  - type: cosine_accuracy@10
128
+ value: 0.84
129
  name: Cosine Accuracy@10
130
  - type: cosine_precision@1
131
+ value: 0.42
132
  name: Cosine Precision@1
133
  - type: cosine_precision@3
134
+ value: 0.21333333333333332
135
  name: Cosine Precision@3
136
  - type: cosine_precision@5
137
+ value: 0.156
138
  name: Cosine Precision@5
139
  - type: cosine_precision@10
140
+ value: 0.08399999999999999
141
  name: Cosine Precision@10
142
  - type: cosine_recall@1
143
+ value: 0.42
144
  name: Cosine Recall@1
145
  - type: cosine_recall@3
146
+ value: 0.64
147
  name: Cosine Recall@3
148
  - type: cosine_recall@5
149
+ value: 0.78
150
  name: Cosine Recall@5
151
  - type: cosine_recall@10
152
+ value: 0.84
153
  name: Cosine Recall@10
154
  - type: cosine_ndcg@10
155
+ value: 0.6273713143801162
156
  name: Cosine Ndcg@10
157
  - type: cosine_mrr@10
158
+ value: 0.5593571428571429
159
  name: Cosine Mrr@10
160
  - type: cosine_map@100
161
+ value: 0.567451526639622
162
  name: Cosine Map@100
163
  - task:
164
  type: information-retrieval
 
168
  type: NanoNQ
169
  metrics:
170
  - type: cosine_accuracy@1
171
+ value: 0.44
172
  name: Cosine Accuracy@1
173
  - type: cosine_accuracy@3
174
  value: 0.56
175
  name: Cosine Accuracy@3
176
  - type: cosine_accuracy@5
177
+ value: 0.62
178
  name: Cosine Accuracy@5
179
  - type: cosine_accuracy@10
180
  value: 0.74
181
  name: Cosine Accuracy@10
182
  - type: cosine_precision@1
183
+ value: 0.44
184
  name: Cosine Precision@1
185
  - type: cosine_precision@3
186
+ value: 0.18666666666666665
187
  name: Cosine Precision@3
188
  - type: cosine_precision@5
189
+ value: 0.128
190
  name: Cosine Precision@5
191
  - type: cosine_precision@10
192
+ value: 0.08
193
  name: Cosine Precision@10
194
  - type: cosine_recall@1
195
+ value: 0.4
196
  name: Cosine Recall@1
197
  - type: cosine_recall@3
198
+ value: 0.52
199
  name: Cosine Recall@3
200
  - type: cosine_recall@5
201
+ value: 0.59
202
  name: Cosine Recall@5
203
  - type: cosine_recall@10
204
+ value: 0.71
205
  name: Cosine Recall@10
206
  - type: cosine_ndcg@10
207
+ value: 0.5468372621429358
208
  name: Cosine Ndcg@10
209
  - type: cosine_mrr@10
210
+ value: 0.5185555555555555
211
  name: Cosine Mrr@10
212
  - type: cosine_map@100
213
+ value: 0.49953000242452567
214
  name: Cosine Map@100
215
  - task:
216
  type: nano-beir
 
220
  type: NanoBEIR_mean
221
  metrics:
222
  - type: cosine_accuracy@1
223
+ value: 0.43
224
  name: Cosine Accuracy@1
225
  - type: cosine_accuracy@3
226
+ value: 0.6000000000000001
227
  name: Cosine Accuracy@3
228
  - type: cosine_accuracy@5
229
+ value: 0.7
230
  name: Cosine Accuracy@5
231
  - type: cosine_accuracy@10
232
+ value: 0.79
233
  name: Cosine Accuracy@10
234
  - type: cosine_precision@1
235
+ value: 0.43
236
  name: Cosine Precision@1
237
  - type: cosine_precision@3
238
+ value: 0.19999999999999998
239
  name: Cosine Precision@3
240
  - type: cosine_precision@5
241
+ value: 0.14200000000000002
242
  name: Cosine Precision@5
243
  - type: cosine_precision@10
244
+ value: 0.08199999999999999
245
  name: Cosine Precision@10
246
  - type: cosine_recall@1
247
+ value: 0.41000000000000003
248
  name: Cosine Recall@1
249
  - type: cosine_recall@3
250
+ value: 0.5800000000000001
251
  name: Cosine Recall@3
252
  - type: cosine_recall@5
253
+ value: 0.685
254
  name: Cosine Recall@5
255
  - type: cosine_recall@10
256
+ value: 0.7749999999999999
257
  name: Cosine Recall@10
258
  - type: cosine_ndcg@10
259
+ value: 0.587104288261526
260
  name: Cosine Ndcg@10
261
  - type: cosine_mrr@10
262
+ value: 0.5389563492063492
263
  name: Cosine Mrr@10
264
  - type: cosine_map@100
265
+ value: 0.5334907645320738
266
  name: Cosine Map@100
267
  ---
268
 
269
+ # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
270
 
271
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
272
 
273
  ## Model Details
274
 
275
  ### Model Description
276
  - **Model Type:** Sentence Transformer
277
+ - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
278
  - **Maximum Sequence Length:** 128 tokens
279
+ - **Output Dimensionality:** 768 dimensions
280
  - **Similarity Function:** Cosine Similarity
281
  <!-- - **Training Dataset:** Unknown -->
282
  <!-- - **Language:** Unknown -->
 
292
 
293
  ```
294
  SentenceTransformer(
295
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
296
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
297
  )
298
  ```
299
 
 
321
  ]
322
  embeddings = model.encode(sentences)
323
  print(embeddings.shape)
324
+ # [3, 768]
325
 
326
  # Get the similarity scores for the embeddings
327
  similarities = model.similarity(embeddings, embeddings)
328
  print(similarities)
329
+ # tensor([[1.0000, 1.0000, 0.3177],
330
+ # [1.0000, 1.0000, 0.3177],
331
+ # [0.3177, 0.3177, 1.0000]])
332
  ```
333
 
334
  <!--
 
366
 
367
  | Metric | NanoMSMARCO | NanoNQ |
368
  |:--------------------|:------------|:-----------|
369
+ | cosine_accuracy@1 | 0.42 | 0.44 |
370
+ | cosine_accuracy@3 | 0.64 | 0.56 |
371
+ | cosine_accuracy@5 | 0.78 | 0.62 |
372
+ | cosine_accuracy@10 | 0.84 | 0.74 |
373
+ | cosine_precision@1 | 0.42 | 0.44 |
374
+ | cosine_precision@3 | 0.2133 | 0.1867 |
375
+ | cosine_precision@5 | 0.156 | 0.128 |
376
+ | cosine_precision@10 | 0.084 | 0.08 |
377
+ | cosine_recall@1 | 0.42 | 0.4 |
378
+ | cosine_recall@3 | 0.64 | 0.52 |
379
+ | cosine_recall@5 | 0.78 | 0.59 |
380
+ | cosine_recall@10 | 0.84 | 0.71 |
381
+ | **cosine_ndcg@10** | **0.6274** | **0.5468** |
382
+ | cosine_mrr@10 | 0.5594 | 0.5186 |
383
+ | cosine_map@100 | 0.5675 | 0.4995 |
384
 
385
  #### Nano BEIR
386
 
 
398
 
399
  | Metric | Value |
400
  |:--------------------|:-----------|
401
+ | cosine_accuracy@1 | 0.43 |
402
+ | cosine_accuracy@3 | 0.6 |
403
+ | cosine_accuracy@5 | 0.7 |
404
+ | cosine_accuracy@10 | 0.79 |
405
+ | cosine_precision@1 | 0.43 |
406
+ | cosine_precision@3 | 0.2 |
407
+ | cosine_precision@5 | 0.142 |
408
+ | cosine_precision@10 | 0.082 |
409
+ | cosine_recall@1 | 0.41 |
410
+ | cosine_recall@3 | 0.58 |
411
+ | cosine_recall@5 | 0.685 |
412
+ | cosine_recall@10 | 0.775 |
413
+ | **cosine_ndcg@10** | **0.5871** |
414
+ | cosine_mrr@10 | 0.539 |
415
+ | cosine_map@100 | 0.5335 |
416
 
417
  <!--
418
  ## Bias, Risks and Limitations
 
438
  | | anchor | positive | negative |
439
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
440
  | type | string | string | string |
441
+ | details | <ul><li>min: 4 tokens</li><li>mean: 11.17 tokens</li><li>max: 59 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 68.53 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 67.56 tokens</li><li>max: 128 tokens</li></ul> |
442
  * Samples:
443
  | anchor | positive | negative |
444
  |:----------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
464
  | | anchor | positive | negative |
465
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
466
  | type | string | string | string |
467
+ | details | <ul><li>min: 4 tokens</li><li>mean: 11.35 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 68.67 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 67.03 tokens</li><li>max: 128 tokens</li></ul> |
468
  * Samples:
469
  | anchor | positive | negative |
470
  |:----------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
486
  - `eval_strategy`: steps
487
  - `per_device_train_batch_size`: 128
488
  - `per_device_eval_batch_size`: 128
489
+ - `learning_rate`: 4e-05
490
+ - `weight_decay`: 0.01
491
+ - `max_steps`: 703
492
  - `warmup_ratio`: 0.1
493
  - `fp16`: True
494
  - `dataloader_drop_last`: True
 
515
  - `gradient_accumulation_steps`: 1
516
  - `eval_accumulation_steps`: None
517
  - `torch_empty_cache_steps`: None
518
+ - `learning_rate`: 4e-05
519
+ - `weight_decay`: 0.01
520
  - `adam_beta1`: 0.9
521
  - `adam_beta2`: 0.999
522
  - `adam_epsilon`: 1e-08
523
  - `max_grad_norm`: 1.0
524
  - `num_train_epochs`: 3.0
525
+ - `max_steps`: 703
526
  - `lr_scheduler_type`: linear
527
  - `lr_scheduler_kwargs`: {}
528
  - `warmup_ratio`: 0.1
 
629
  ### Training Logs
630
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
631
  |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
632
+ | 0 | 0 | - | 2.5772 | 0.6530 | 0.6552 | 0.6541 |
633
+ | 0.2874 | 250 | 0.9649 | 0.7574 | 0.6170 | 0.5720 | 0.5945 |
634
+ | 0.5747 | 500 | 0.7456 | 0.7372 | 0.6274 | 0.5468 | 0.5871 |
 
 
 
 
 
 
 
 
 
 
 
635
 
636
 
637
  ### Framework Versions
config_sentence_transformers.json CHANGED
@@ -4,11 +4,11 @@
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
7
- "model_type": "SentenceTransformer",
8
  "prompts": {
9
  "query": "",
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
- "similarity_fn_name": "cosine"
 
14
  }
 
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
 
7
  "prompts": {
8
  "query": "",
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
+ "similarity_fn_name": "cosine",
13
+ "model_type": "SentenceTransformer"
14
  }
modules.json CHANGED
@@ -10,11 +10,5 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
- },
14
- {
15
- "idx": 2,
16
- "name": "2",
17
- "path": "2_Normalize",
18
- "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]