radoslavralev commited on
Commit
5bebfbf
·
verified ·
1 Parent(s): cd05f4e

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - generated_from_trainer
8
  - dataset_size:90000
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: sentence-transformers/all-MiniLM-L12-v2
11
  widget:
12
  - source_sentence: who is the publisher of the norton anthology american literature
13
  sentences:
@@ -154,7 +154,7 @@ metrics:
154
  - cosine_mrr@10
155
  - cosine_map@100
156
  model-index:
157
- - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L12-v2
158
  results:
159
  - task:
160
  type: information-retrieval
@@ -164,49 +164,49 @@ model-index:
164
  type: NanoMSMARCO
165
  metrics:
166
  - type: cosine_accuracy@1
167
- value: 0.34
168
  name: Cosine Accuracy@1
169
  - type: cosine_accuracy@3
170
- value: 0.54
171
  name: Cosine Accuracy@3
172
  - type: cosine_accuracy@5
173
- value: 0.64
174
  name: Cosine Accuracy@5
175
  - type: cosine_accuracy@10
176
- value: 0.78
177
  name: Cosine Accuracy@10
178
  - type: cosine_precision@1
179
- value: 0.34
180
  name: Cosine Precision@1
181
  - type: cosine_precision@3
182
- value: 0.18
183
  name: Cosine Precision@3
184
  - type: cosine_precision@5
185
- value: 0.128
186
  name: Cosine Precision@5
187
  - type: cosine_precision@10
188
- value: 0.07800000000000001
189
  name: Cosine Precision@10
190
  - type: cosine_recall@1
191
- value: 0.34
192
  name: Cosine Recall@1
193
  - type: cosine_recall@3
194
- value: 0.54
195
  name: Cosine Recall@3
196
  - type: cosine_recall@5
197
- value: 0.64
198
  name: Cosine Recall@5
199
  - type: cosine_recall@10
200
- value: 0.78
201
  name: Cosine Recall@10
202
  - type: cosine_ndcg@10
203
- value: 0.5447080049645561
204
  name: Cosine Ndcg@10
205
  - type: cosine_mrr@10
206
- value: 0.47073809523809523
207
  name: Cosine Mrr@10
208
  - type: cosine_map@100
209
- value: 0.4806962957327628
210
  name: Cosine Map@100
211
  - task:
212
  type: information-retrieval
@@ -216,49 +216,49 @@ model-index:
216
  type: NanoNQ
217
  metrics:
218
  - type: cosine_accuracy@1
219
- value: 0.44
220
  name: Cosine Accuracy@1
221
  - type: cosine_accuracy@3
222
- value: 0.62
223
  name: Cosine Accuracy@3
224
  - type: cosine_accuracy@5
225
- value: 0.7
226
  name: Cosine Accuracy@5
227
  - type: cosine_accuracy@10
228
- value: 0.78
229
  name: Cosine Accuracy@10
230
  - type: cosine_precision@1
231
- value: 0.44
232
  name: Cosine Precision@1
233
  - type: cosine_precision@3
234
- value: 0.21333333333333332
235
  name: Cosine Precision@3
236
  - type: cosine_precision@5
237
- value: 0.14800000000000002
238
  name: Cosine Precision@5
239
  - type: cosine_precision@10
240
- value: 0.08199999999999999
241
  name: Cosine Precision@10
242
  - type: cosine_recall@1
243
- value: 0.43
244
  name: Cosine Recall@1
245
  - type: cosine_recall@3
246
- value: 0.61
247
  name: Cosine Recall@3
248
  - type: cosine_recall@5
249
- value: 0.67
250
  name: Cosine Recall@5
251
  - type: cosine_recall@10
252
- value: 0.74
253
  name: Cosine Recall@10
254
  - type: cosine_ndcg@10
255
- value: 0.5924173512360595
256
  name: Cosine Ndcg@10
257
  - type: cosine_mrr@10
258
- value: 0.5506349206349206
259
  name: Cosine Mrr@10
260
  - type: cosine_map@100
261
- value: 0.5491036387356644
262
  name: Cosine Map@100
263
  - task:
264
  type: nano-beir
@@ -268,63 +268,63 @@ model-index:
268
  type: NanoBEIR_mean
269
  metrics:
270
  - type: cosine_accuracy@1
271
- value: 0.39
272
  name: Cosine Accuracy@1
273
  - type: cosine_accuracy@3
274
- value: 0.5800000000000001
275
  name: Cosine Accuracy@3
276
  - type: cosine_accuracy@5
277
- value: 0.6699999999999999
278
  name: Cosine Accuracy@5
279
  - type: cosine_accuracy@10
280
- value: 0.78
281
  name: Cosine Accuracy@10
282
  - type: cosine_precision@1
283
- value: 0.39
284
  name: Cosine Precision@1
285
  - type: cosine_precision@3
286
- value: 0.19666666666666666
287
  name: Cosine Precision@3
288
  - type: cosine_precision@5
289
- value: 0.138
290
  name: Cosine Precision@5
291
  - type: cosine_precision@10
292
- value: 0.08
293
  name: Cosine Precision@10
294
  - type: cosine_recall@1
295
- value: 0.385
296
  name: Cosine Recall@1
297
  - type: cosine_recall@3
298
- value: 0.575
299
  name: Cosine Recall@3
300
  - type: cosine_recall@5
301
- value: 0.655
302
  name: Cosine Recall@5
303
  - type: cosine_recall@10
304
- value: 0.76
305
  name: Cosine Recall@10
306
  - type: cosine_ndcg@10
307
- value: 0.5685626781003078
308
  name: Cosine Ndcg@10
309
  - type: cosine_mrr@10
310
- value: 0.5106865079365079
311
  name: Cosine Mrr@10
312
  - type: cosine_map@100
313
- value: 0.5148999672342136
314
  name: Cosine Map@100
315
  ---
316
 
317
- # SentenceTransformer based on sentence-transformers/all-MiniLM-L12-v2
318
 
319
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
320
 
321
  ## Model Details
322
 
323
  ### Model Description
324
  - **Model Type:** Sentence Transformer
325
- - **Base model:** [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) <!-- at revision 936af83a2ecce5fe87a09109ff5cbcefe073173a -->
326
  - **Maximum Sequence Length:** 128 tokens
327
- - **Output Dimensionality:** 384 dimensions
328
  - **Similarity Function:** Cosine Similarity
329
  <!-- - **Training Dataset:** Unknown -->
330
  <!-- - **Language:** Unknown -->
@@ -340,9 +340,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
340
 
341
  ```
342
  SentenceTransformer(
343
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
344
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
345
- (2): Normalize()
346
  )
347
  ```
348
 
@@ -370,14 +369,14 @@ sentences = [
370
  ]
371
  embeddings = model.encode(sentences)
372
  print(embeddings.shape)
373
- # [3, 384]
374
 
375
  # Get the similarity scores for the embeddings
376
  similarities = model.similarity(embeddings, embeddings)
377
  print(similarities)
378
- # tensor([[ 1.0000, 0.7187, -0.0053],
379
- # [ 0.7187, 1.0000, 0.0412],
380
- # [-0.0053, 0.0412, 1.0000]])
381
  ```
382
 
383
  <!--
@@ -415,21 +414,21 @@ You can finetune this model on your own dataset.
415
 
416
  | Metric | NanoMSMARCO | NanoNQ |
417
  |:--------------------|:------------|:-----------|
418
- | cosine_accuracy@1 | 0.34 | 0.44 |
419
- | cosine_accuracy@3 | 0.54 | 0.62 |
420
- | cosine_accuracy@5 | 0.64 | 0.7 |
421
- | cosine_accuracy@10 | 0.78 | 0.78 |
422
- | cosine_precision@1 | 0.34 | 0.44 |
423
- | cosine_precision@3 | 0.18 | 0.2133 |
424
- | cosine_precision@5 | 0.128 | 0.148 |
425
- | cosine_precision@10 | 0.078 | 0.082 |
426
- | cosine_recall@1 | 0.34 | 0.43 |
427
- | cosine_recall@3 | 0.54 | 0.61 |
428
- | cosine_recall@5 | 0.64 | 0.67 |
429
- | cosine_recall@10 | 0.78 | 0.74 |
430
- | **cosine_ndcg@10** | **0.5447** | **0.5924** |
431
- | cosine_mrr@10 | 0.4707 | 0.5506 |
432
- | cosine_map@100 | 0.4807 | 0.5491 |
433
 
434
  #### Nano BEIR
435
 
@@ -445,23 +444,23 @@ You can finetune this model on your own dataset.
445
  }
446
  ```
447
 
448
- | Metric | Value |
449
- |:--------------------|:-----------|
450
- | cosine_accuracy@1 | 0.39 |
451
- | cosine_accuracy@3 | 0.58 |
452
- | cosine_accuracy@5 | 0.67 |
453
- | cosine_accuracy@10 | 0.78 |
454
- | cosine_precision@1 | 0.39 |
455
- | cosine_precision@3 | 0.1967 |
456
- | cosine_precision@5 | 0.138 |
457
- | cosine_precision@10 | 0.08 |
458
- | cosine_recall@1 | 0.385 |
459
- | cosine_recall@3 | 0.575 |
460
- | cosine_recall@5 | 0.655 |
461
- | cosine_recall@10 | 0.76 |
462
- | **cosine_ndcg@10** | **0.5686** |
463
- | cosine_mrr@10 | 0.5107 |
464
- | cosine_map@100 | 0.5149 |
465
 
466
  <!--
467
  ## Bias, Risks and Limitations
@@ -484,10 +483,10 @@ You can finetune this model on your own dataset.
484
  * Size: 90,000 training samples
485
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
486
  * Approximate statistics based on the first 1000 samples:
487
- | | anchor | positive | negative |
488
- |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
489
- | type | string | string | string |
490
- | details | <ul><li>min: 9 tokens</li><li>mean: 11.82 tokens</li><li>max: 27 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 106.2 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 104.63 tokens</li><li>max: 128 tokens</li></ul> |
491
  * Samples:
492
  | anchor | positive | negative |
493
  |:----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -513,7 +512,7 @@ You can finetune this model on your own dataset.
513
  | | anchor | positive | negative |
514
  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
515
  | type | string | string | string |
516
- | details | <ul><li>min: 9 tokens</li><li>mean: 11.76 tokens</li><li>max: 24 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 105.95 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 105.69 tokens</li><li>max: 128 tokens</li></ul> |
517
  * Samples:
518
  | anchor | positive | negative |
519
  |:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -535,9 +534,9 @@ You can finetune this model on your own dataset.
535
  - `eval_strategy`: steps
536
  - `per_device_train_batch_size`: 128
537
  - `per_device_eval_batch_size`: 128
538
- - `learning_rate`: 8e-05
539
- - `weight_decay`: 0.005
540
- - `max_steps`: 1125
541
  - `warmup_ratio`: 0.1
542
  - `fp16`: True
543
  - `dataloader_drop_last`: True
@@ -564,14 +563,14 @@ You can finetune this model on your own dataset.
564
  - `gradient_accumulation_steps`: 1
565
  - `eval_accumulation_steps`: None
566
  - `torch_empty_cache_steps`: None
567
- - `learning_rate`: 8e-05
568
- - `weight_decay`: 0.005
569
  - `adam_beta1`: 0.9
570
  - `adam_beta2`: 0.999
571
  - `adam_epsilon`: 1e-08
572
  - `max_grad_norm`: 1.0
573
  - `num_train_epochs`: 3.0
574
- - `max_steps`: 1125
575
  - `lr_scheduler_type`: linear
576
  - `lr_scheduler_kwargs`: {}
577
  - `warmup_ratio`: 0.1
@@ -676,14 +675,13 @@ You can finetune this model on your own dataset.
676
  </details>
677
 
678
  ### Training Logs
679
- | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
680
- |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
681
- | 0 | 0 | - | 0.0731 | 0.5887 | 0.5786 | 0.5836 |
682
- | 0.3556 | 250 | 0.0821 | 0.0701 | 0.5325 | 0.5977 | 0.5651 |
683
- | 0.7112 | 500 | 0.0805 | 0.0640 | 0.5523 | 0.5631 | 0.5577 |
684
- | 1.0669 | 750 | 0.0712 | 0.0572 | 0.5369 | 0.5819 | 0.5594 |
685
- | 1.4225 | 1000 | 0.0371 | 0.0551 | 0.5447 | 0.5924 | 0.5686 |
686
 
 
687
 
688
  ### Framework Versions
689
  - Python: 3.10.18
 
7
  - generated_from_trainer
8
  - dataset_size:90000
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: Alibaba-NLP/gte-modernbert-base
11
  widget:
12
  - source_sentence: who is the publisher of the norton anthology american literature
13
  sentences:
 
154
  - cosine_mrr@10
155
  - cosine_map@100
156
  model-index:
157
+ - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
158
  results:
159
  - task:
160
  type: information-retrieval
 
164
  type: NanoMSMARCO
165
  metrics:
166
  - type: cosine_accuracy@1
167
+ value: 0.38
168
  name: Cosine Accuracy@1
169
  - type: cosine_accuracy@3
170
+ value: 0.68
171
  name: Cosine Accuracy@3
172
  - type: cosine_accuracy@5
173
+ value: 0.8
174
  name: Cosine Accuracy@5
175
  - type: cosine_accuracy@10
176
+ value: 0.86
177
  name: Cosine Accuracy@10
178
  - type: cosine_precision@1
179
+ value: 0.38
180
  name: Cosine Precision@1
181
  - type: cosine_precision@3
182
+ value: 0.22666666666666668
183
  name: Cosine Precision@3
184
  - type: cosine_precision@5
185
+ value: 0.16
186
  name: Cosine Precision@5
187
  - type: cosine_precision@10
188
+ value: 0.08599999999999998
189
  name: Cosine Precision@10
190
  - type: cosine_recall@1
191
+ value: 0.38
192
  name: Cosine Recall@1
193
  - type: cosine_recall@3
194
+ value: 0.68
195
  name: Cosine Recall@3
196
  - type: cosine_recall@5
197
+ value: 0.8
198
  name: Cosine Recall@5
199
  - type: cosine_recall@10
200
+ value: 0.86
201
  name: Cosine Recall@10
202
  - type: cosine_ndcg@10
203
+ value: 0.6232981077766904
204
  name: Cosine Ndcg@10
205
  - type: cosine_mrr@10
206
+ value: 0.5465555555555556
207
  name: Cosine Mrr@10
208
  - type: cosine_map@100
209
+ value: 0.5540526315789474
210
  name: Cosine Map@100
211
  - task:
212
  type: information-retrieval
 
216
  type: NanoNQ
217
  metrics:
218
  - type: cosine_accuracy@1
219
+ value: 0.64
220
  name: Cosine Accuracy@1
221
  - type: cosine_accuracy@3
222
+ value: 0.7
223
  name: Cosine Accuracy@3
224
  - type: cosine_accuracy@5
225
+ value: 0.78
226
  name: Cosine Accuracy@5
227
  - type: cosine_accuracy@10
228
+ value: 0.82
229
  name: Cosine Accuracy@10
230
  - type: cosine_precision@1
231
+ value: 0.64
232
  name: Cosine Precision@1
233
  - type: cosine_precision@3
234
+ value: 0.24
235
  name: Cosine Precision@3
236
  - type: cosine_precision@5
237
+ value: 0.16
238
  name: Cosine Precision@5
239
  - type: cosine_precision@10
240
+ value: 0.08800000000000001
241
  name: Cosine Precision@10
242
  - type: cosine_recall@1
243
+ value: 0.61
244
  name: Cosine Recall@1
245
  - type: cosine_recall@3
246
+ value: 0.66
247
  name: Cosine Recall@3
248
  - type: cosine_recall@5
249
+ value: 0.73
250
  name: Cosine Recall@5
251
  - type: cosine_recall@10
252
+ value: 0.78
253
  name: Cosine Recall@10
254
  - type: cosine_ndcg@10
255
+ value: 0.6987067579229547
256
  name: Cosine Ndcg@10
257
  - type: cosine_mrr@10
258
+ value: 0.69
259
  name: Cosine Mrr@10
260
  - type: cosine_map@100
261
+ value: 0.6733088641959746
262
  name: Cosine Map@100
263
  - task:
264
  type: nano-beir
 
268
  type: NanoBEIR_mean
269
  metrics:
270
  - type: cosine_accuracy@1
271
+ value: 0.51
272
  name: Cosine Accuracy@1
273
  - type: cosine_accuracy@3
274
+ value: 0.69
275
  name: Cosine Accuracy@3
276
  - type: cosine_accuracy@5
277
+ value: 0.79
278
  name: Cosine Accuracy@5
279
  - type: cosine_accuracy@10
280
+ value: 0.84
281
  name: Cosine Accuracy@10
282
  - type: cosine_precision@1
283
+ value: 0.51
284
  name: Cosine Precision@1
285
  - type: cosine_precision@3
286
+ value: 0.23333333333333334
287
  name: Cosine Precision@3
288
  - type: cosine_precision@5
289
+ value: 0.16
290
  name: Cosine Precision@5
291
  - type: cosine_precision@10
292
+ value: 0.087
293
  name: Cosine Precision@10
294
  - type: cosine_recall@1
295
+ value: 0.495
296
  name: Cosine Recall@1
297
  - type: cosine_recall@3
298
+ value: 0.67
299
  name: Cosine Recall@3
300
  - type: cosine_recall@5
301
+ value: 0.765
302
  name: Cosine Recall@5
303
  - type: cosine_recall@10
304
+ value: 0.8200000000000001
305
  name: Cosine Recall@10
306
  - type: cosine_ndcg@10
307
+ value: 0.6610024328498225
308
  name: Cosine Ndcg@10
309
  - type: cosine_mrr@10
310
+ value: 0.6182777777777777
311
  name: Cosine Mrr@10
312
  - type: cosine_map@100
313
+ value: 0.613680747887461
314
  name: Cosine Map@100
315
  ---
316
 
317
+ # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
318
 
319
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
320
 
321
  ## Model Details
322
 
323
  ### Model Description
324
  - **Model Type:** Sentence Transformer
325
+ - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
326
  - **Maximum Sequence Length:** 128 tokens
327
+ - **Output Dimensionality:** 768 dimensions
328
  - **Similarity Function:** Cosine Similarity
329
  <!-- - **Training Dataset:** Unknown -->
330
  <!-- - **Language:** Unknown -->
 
340
 
341
  ```
342
  SentenceTransformer(
343
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
344
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
345
  )
346
  ```
347
 
 
369
  ]
370
  embeddings = model.encode(sentences)
371
  print(embeddings.shape)
372
+ # [3, 768]
373
 
374
  # Get the similarity scores for the embeddings
375
  similarities = model.similarity(embeddings, embeddings)
376
  print(similarities)
377
+ # tensor([[ 1.0001, 0.6919, -0.0133],
378
+ # [ 0.6919, 1.0000, -0.0985],
379
+ # [-0.0133, -0.0985, 1.0000]])
380
  ```
381
 
382
  <!--
 
414
 
415
  | Metric | NanoMSMARCO | NanoNQ |
416
  |:--------------------|:------------|:-----------|
417
+ | cosine_accuracy@1 | 0.38 | 0.64 |
418
+ | cosine_accuracy@3 | 0.68 | 0.7 |
419
+ | cosine_accuracy@5 | 0.8 | 0.78 |
420
+ | cosine_accuracy@10 | 0.86 | 0.82 |
421
+ | cosine_precision@1 | 0.38 | 0.64 |
422
+ | cosine_precision@3 | 0.2267 | 0.24 |
423
+ | cosine_precision@5 | 0.16 | 0.16 |
424
+ | cosine_precision@10 | 0.086 | 0.088 |
425
+ | cosine_recall@1 | 0.38 | 0.61 |
426
+ | cosine_recall@3 | 0.68 | 0.66 |
427
+ | cosine_recall@5 | 0.8 | 0.73 |
428
+ | cosine_recall@10 | 0.86 | 0.78 |
429
+ | **cosine_ndcg@10** | **0.6233** | **0.6987** |
430
+ | cosine_mrr@10 | 0.5466 | 0.69 |
431
+ | cosine_map@100 | 0.5541 | 0.6733 |
432
 
433
  #### Nano BEIR
434
 
 
444
  }
445
  ```
446
 
447
+ | Metric | Value |
448
+ |:--------------------|:----------|
449
+ | cosine_accuracy@1 | 0.51 |
450
+ | cosine_accuracy@3 | 0.69 |
451
+ | cosine_accuracy@5 | 0.79 |
452
+ | cosine_accuracy@10 | 0.84 |
453
+ | cosine_precision@1 | 0.51 |
454
+ | cosine_precision@3 | 0.2333 |
455
+ | cosine_precision@5 | 0.16 |
456
+ | cosine_precision@10 | 0.087 |
457
+ | cosine_recall@1 | 0.495 |
458
+ | cosine_recall@3 | 0.67 |
459
+ | cosine_recall@5 | 0.765 |
460
+ | cosine_recall@10 | 0.82 |
461
+ | **cosine_ndcg@10** | **0.661** |
462
+ | cosine_mrr@10 | 0.6183 |
463
+ | cosine_map@100 | 0.6137 |
464
 
465
  <!--
466
  ## Bias, Risks and Limitations
 
483
  * Size: 90,000 training samples
484
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
485
  * Approximate statistics based on the first 1000 samples:
486
+ | | anchor | positive | negative |
487
+ |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
488
+ | type | string | string | string |
489
+ | details | <ul><li>min: 10 tokens</li><li>mean: 12.57 tokens</li><li>max: 28 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 107.04 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 105.42 tokens</li><li>max: 128 tokens</li></ul> |
490
  * Samples:
491
  | anchor | positive | negative |
492
  |:----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
512
  | | anchor | positive | negative |
513
  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
514
  | type | string | string | string |
515
+ | details | <ul><li>min: 9 tokens</li><li>mean: 12.46 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 106.89 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 106.57 tokens</li><li>max: 128 tokens</li></ul> |
516
  * Samples:
517
  | anchor | positive | negative |
518
  |:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
534
  - `eval_strategy`: steps
535
  - `per_device_train_batch_size`: 128
536
  - `per_device_eval_batch_size`: 128
537
+ - `learning_rate`: 4e-05
538
+ - `weight_decay`: 0.01
539
+ - `max_steps`: 500
540
  - `warmup_ratio`: 0.1
541
  - `fp16`: True
542
  - `dataloader_drop_last`: True
 
563
  - `gradient_accumulation_steps`: 1
564
  - `eval_accumulation_steps`: None
565
  - `torch_empty_cache_steps`: None
566
+ - `learning_rate`: 4e-05
567
+ - `weight_decay`: 0.01
568
  - `adam_beta1`: 0.9
569
  - `adam_beta2`: 0.999
570
  - `adam_epsilon`: 1e-08
571
  - `max_grad_norm`: 1.0
572
  - `num_train_epochs`: 3.0
573
+ - `max_steps`: 500
574
  - `lr_scheduler_type`: linear
575
  - `lr_scheduler_kwargs`: {}
576
  - `warmup_ratio`: 0.1
 
675
  </details>
676
 
677
  ### Training Logs
678
+ | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
679
+ |:----------:|:-------:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
680
+ | 0 | 0 | - | 0.4265 | 0.6530 | 0.6552 | 0.6541 |
681
+ | 0.3556 | 250 | 0.0816 | 0.0565 | 0.6334 | 0.6822 | 0.6578 |
682
+ | **0.7112** | **500** | **0.0517** | **0.052** | **0.6233** | **0.6987** | **0.661** |
 
 
683
 
684
+ * The bold row denotes the saved checkpoint.
685
 
686
  ### Framework Versions
687
  - Python: 3.10.18
config_sentence_transformers.json CHANGED
@@ -4,11 +4,11 @@
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
7
- "model_type": "SentenceTransformer",
8
  "prompts": {
9
  "query": "",
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
- "similarity_fn_name": "cosine"
 
14
  }
 
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
 
7
  "prompts": {
8
  "query": "",
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
+ "similarity_fn_name": "cosine",
13
+ "model_type": "SentenceTransformer"
14
  }
modules.json CHANGED
@@ -10,11 +10,5 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
- },
14
- {
15
- "idx": 2,
16
- "name": "2",
17
- "path": "2_Normalize",
18
- "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]