radoslavralev commited on
Commit
0dc1635
·
verified ·
1 Parent(s): 4facb1d

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - generated_from_trainer
8
  - dataset_size:90000
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: Alibaba-NLP/gte-modernbert-base
11
  widget:
12
  - source_sentence: who is the publisher of the norton anthology american literature
13
  sentences:
@@ -154,7 +154,7 @@ metrics:
154
  - cosine_mrr@10
155
  - cosine_map@100
156
  model-index:
157
- - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
158
  results:
159
  - task:
160
  type: information-retrieval
@@ -167,46 +167,46 @@ model-index:
167
  value: 0.38
168
  name: Cosine Accuracy@1
169
  - type: cosine_accuracy@3
170
- value: 0.68
171
  name: Cosine Accuracy@3
172
  - type: cosine_accuracy@5
173
- value: 0.8
174
  name: Cosine Accuracy@5
175
  - type: cosine_accuracy@10
176
- value: 0.86
177
  name: Cosine Accuracy@10
178
  - type: cosine_precision@1
179
  value: 0.38
180
  name: Cosine Precision@1
181
  - type: cosine_precision@3
182
- value: 0.22666666666666668
183
  name: Cosine Precision@3
184
  - type: cosine_precision@5
185
- value: 0.16
186
  name: Cosine Precision@5
187
  - type: cosine_precision@10
188
- value: 0.08599999999999998
189
  name: Cosine Precision@10
190
  - type: cosine_recall@1
191
  value: 0.38
192
  name: Cosine Recall@1
193
  - type: cosine_recall@3
194
- value: 0.68
195
  name: Cosine Recall@3
196
  - type: cosine_recall@5
197
- value: 0.8
198
  name: Cosine Recall@5
199
  - type: cosine_recall@10
200
- value: 0.86
201
  name: Cosine Recall@10
202
  - type: cosine_ndcg@10
203
- value: 0.6232981077766904
204
  name: Cosine Ndcg@10
205
  - type: cosine_mrr@10
206
- value: 0.5465555555555556
207
  name: Cosine Mrr@10
208
  - type: cosine_map@100
209
- value: 0.5540526315789474
210
  name: Cosine Map@100
211
  - task:
212
  type: information-retrieval
@@ -216,49 +216,49 @@ model-index:
216
  type: NanoNQ
217
  metrics:
218
  - type: cosine_accuracy@1
219
- value: 0.64
220
  name: Cosine Accuracy@1
221
  - type: cosine_accuracy@3
222
- value: 0.7
223
  name: Cosine Accuracy@3
224
  - type: cosine_accuracy@5
225
- value: 0.78
226
  name: Cosine Accuracy@5
227
  - type: cosine_accuracy@10
228
- value: 0.82
229
  name: Cosine Accuracy@10
230
  - type: cosine_precision@1
231
- value: 0.64
232
  name: Cosine Precision@1
233
  - type: cosine_precision@3
234
- value: 0.24
235
  name: Cosine Precision@3
236
  - type: cosine_precision@5
237
- value: 0.16
238
  name: Cosine Precision@5
239
  - type: cosine_precision@10
240
- value: 0.08800000000000001
241
  name: Cosine Precision@10
242
  - type: cosine_recall@1
243
- value: 0.61
244
  name: Cosine Recall@1
245
  - type: cosine_recall@3
246
- value: 0.66
247
  name: Cosine Recall@3
248
  - type: cosine_recall@5
249
- value: 0.73
250
  name: Cosine Recall@5
251
  - type: cosine_recall@10
252
- value: 0.78
253
  name: Cosine Recall@10
254
  - type: cosine_ndcg@10
255
- value: 0.6987067579229547
256
  name: Cosine Ndcg@10
257
  - type: cosine_mrr@10
258
- value: 0.69
259
  name: Cosine Mrr@10
260
  - type: cosine_map@100
261
- value: 0.6733088641959746
262
  name: Cosine Map@100
263
  - task:
264
  type: nano-beir
@@ -268,63 +268,63 @@ model-index:
268
  type: NanoBEIR_mean
269
  metrics:
270
  - type: cosine_accuracy@1
271
- value: 0.51
272
  name: Cosine Accuracy@1
273
  - type: cosine_accuracy@3
274
- value: 0.69
275
  name: Cosine Accuracy@3
276
  - type: cosine_accuracy@5
277
- value: 0.79
278
  name: Cosine Accuracy@5
279
  - type: cosine_accuracy@10
280
- value: 0.84
281
  name: Cosine Accuracy@10
282
  - type: cosine_precision@1
283
- value: 0.51
284
  name: Cosine Precision@1
285
  - type: cosine_precision@3
286
- value: 0.23333333333333334
287
  name: Cosine Precision@3
288
  - type: cosine_precision@5
289
- value: 0.16
290
  name: Cosine Precision@5
291
  - type: cosine_precision@10
292
- value: 0.087
293
  name: Cosine Precision@10
294
  - type: cosine_recall@1
295
- value: 0.495
296
  name: Cosine Recall@1
297
  - type: cosine_recall@3
298
- value: 0.67
299
  name: Cosine Recall@3
300
  - type: cosine_recall@5
301
- value: 0.765
302
  name: Cosine Recall@5
303
  - type: cosine_recall@10
304
- value: 0.8200000000000001
305
  name: Cosine Recall@10
306
  - type: cosine_ndcg@10
307
- value: 0.6610024328498225
308
  name: Cosine Ndcg@10
309
  - type: cosine_mrr@10
310
- value: 0.6182777777777777
311
  name: Cosine Mrr@10
312
  - type: cosine_map@100
313
- value: 0.613680747887461
314
  name: Cosine Map@100
315
  ---
316
 
317
- # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
318
 
319
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
320
 
321
  ## Model Details
322
 
323
  ### Model Description
324
  - **Model Type:** Sentence Transformer
325
- - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
326
  - **Maximum Sequence Length:** 128 tokens
327
- - **Output Dimensionality:** 768 dimensions
328
  - **Similarity Function:** Cosine Similarity
329
  <!-- - **Training Dataset:** Unknown -->
330
  <!-- - **Language:** Unknown -->
@@ -340,8 +340,9 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
340
 
341
  ```
342
  SentenceTransformer(
343
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
344
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
345
  )
346
  ```
347
 
@@ -369,14 +370,14 @@ sentences = [
369
  ]
370
  embeddings = model.encode(sentences)
371
  print(embeddings.shape)
372
- # [3, 768]
373
 
374
  # Get the similarity scores for the embeddings
375
  similarities = model.similarity(embeddings, embeddings)
376
  print(similarities)
377
- # tensor([[ 1.0001, 0.6919, -0.0133],
378
- # [ 0.6919, 1.0000, -0.0985],
379
- # [-0.0133, -0.0985, 1.0000]])
380
  ```
381
 
382
  <!--
@@ -414,21 +415,21 @@ You can finetune this model on your own dataset.
414
 
415
  | Metric | NanoMSMARCO | NanoNQ |
416
  |:--------------------|:------------|:-----------|
417
- | cosine_accuracy@1 | 0.38 | 0.64 |
418
- | cosine_accuracy@3 | 0.68 | 0.7 |
419
- | cosine_accuracy@5 | 0.8 | 0.78 |
420
- | cosine_accuracy@10 | 0.86 | 0.82 |
421
- | cosine_precision@1 | 0.38 | 0.64 |
422
- | cosine_precision@3 | 0.2267 | 0.24 |
423
- | cosine_precision@5 | 0.16 | 0.16 |
424
- | cosine_precision@10 | 0.086 | 0.088 |
425
- | cosine_recall@1 | 0.38 | 0.61 |
426
- | cosine_recall@3 | 0.68 | 0.66 |
427
- | cosine_recall@5 | 0.8 | 0.73 |
428
- | cosine_recall@10 | 0.86 | 0.78 |
429
- | **cosine_ndcg@10** | **0.6233** | **0.6987** |
430
- | cosine_mrr@10 | 0.5466 | 0.69 |
431
- | cosine_map@100 | 0.5541 | 0.6733 |
432
 
433
  #### Nano BEIR
434
 
@@ -444,23 +445,23 @@ You can finetune this model on your own dataset.
444
  }
445
  ```
446
 
447
- | Metric | Value |
448
- |:--------------------|:----------|
449
- | cosine_accuracy@1 | 0.51 |
450
- | cosine_accuracy@3 | 0.69 |
451
- | cosine_accuracy@5 | 0.79 |
452
- | cosine_accuracy@10 | 0.84 |
453
- | cosine_precision@1 | 0.51 |
454
- | cosine_precision@3 | 0.2333 |
455
- | cosine_precision@5 | 0.16 |
456
- | cosine_precision@10 | 0.087 |
457
- | cosine_recall@1 | 0.495 |
458
- | cosine_recall@3 | 0.67 |
459
- | cosine_recall@5 | 0.765 |
460
- | cosine_recall@10 | 0.82 |
461
- | **cosine_ndcg@10** | **0.661** |
462
- | cosine_mrr@10 | 0.6183 |
463
- | cosine_map@100 | 0.6137 |
464
 
465
  <!--
466
  ## Bias, Risks and Limitations
@@ -483,10 +484,10 @@ You can finetune this model on your own dataset.
483
  * Size: 90,000 training samples
484
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
485
  * Approximate statistics based on the first 1000 samples:
486
- | | anchor | positive | negative |
487
- |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
488
- | type | string | string | string |
489
- | details | <ul><li>min: 10 tokens</li><li>mean: 12.57 tokens</li><li>max: 28 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 107.04 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 105.42 tokens</li><li>max: 128 tokens</li></ul> |
490
  * Samples:
491
  | anchor | positive | negative |
492
  |:----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -512,7 +513,7 @@ You can finetune this model on your own dataset.
512
  | | anchor | positive | negative |
513
  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
514
  | type | string | string | string |
515
- | details | <ul><li>min: 9 tokens</li><li>mean: 12.46 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 106.89 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 106.57 tokens</li><li>max: 128 tokens</li></ul> |
516
  * Samples:
517
  | anchor | positive | negative |
518
  |:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -534,9 +535,9 @@ You can finetune this model on your own dataset.
534
  - `eval_strategy`: steps
535
  - `per_device_train_batch_size`: 128
536
  - `per_device_eval_batch_size`: 128
537
- - `learning_rate`: 4e-05
538
- - `weight_decay`: 0.01
539
- - `max_steps`: 500
540
  - `warmup_ratio`: 0.1
541
  - `fp16`: True
542
  - `dataloader_drop_last`: True
@@ -563,14 +564,14 @@ You can finetune this model on your own dataset.
563
  - `gradient_accumulation_steps`: 1
564
  - `eval_accumulation_steps`: None
565
  - `torch_empty_cache_steps`: None
566
- - `learning_rate`: 4e-05
567
- - `weight_decay`: 0.01
568
  - `adam_beta1`: 0.9
569
  - `adam_beta2`: 0.999
570
  - `adam_epsilon`: 1e-08
571
  - `max_grad_norm`: 1.0
572
  - `num_train_epochs`: 3.0
573
- - `max_steps`: 500
574
  - `lr_scheduler_type`: linear
575
  - `lr_scheduler_kwargs`: {}
576
  - `warmup_ratio`: 0.1
@@ -675,13 +676,14 @@ You can finetune this model on your own dataset.
675
  </details>
676
 
677
  ### Training Logs
678
- | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
679
- |:----------:|:-------:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
680
- | 0 | 0 | - | 0.4265 | 0.6530 | 0.6552 | 0.6541 |
681
- | 0.3556 | 250 | 0.0816 | 0.0565 | 0.6334 | 0.6822 | 0.6578 |
682
- | **0.7112** | **500** | **0.0517** | **0.052** | **0.6233** | **0.6987** | **0.661** |
 
 
683
 
684
- * The bold row denotes the saved checkpoint.
685
 
686
  ### Framework Versions
687
  - Python: 3.10.18
 
7
  - generated_from_trainer
8
  - dataset_size:90000
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: thenlper/gte-small
11
  widget:
12
  - source_sentence: who is the publisher of the norton anthology american literature
13
  sentences:
 
154
  - cosine_mrr@10
155
  - cosine_map@100
156
  model-index:
157
+ - name: SentenceTransformer based on thenlper/gte-small
158
  results:
159
  - task:
160
  type: information-retrieval
 
167
  value: 0.38
168
  name: Cosine Accuracy@1
169
  - type: cosine_accuracy@3
170
+ value: 0.58
171
  name: Cosine Accuracy@3
172
  - type: cosine_accuracy@5
173
+ value: 0.66
174
  name: Cosine Accuracy@5
175
  - type: cosine_accuracy@10
176
+ value: 0.8
177
  name: Cosine Accuracy@10
178
  - type: cosine_precision@1
179
  value: 0.38
180
  name: Cosine Precision@1
181
  - type: cosine_precision@3
182
+ value: 0.19333333333333336
183
  name: Cosine Precision@3
184
  - type: cosine_precision@5
185
+ value: 0.132
186
  name: Cosine Precision@5
187
  - type: cosine_precision@10
188
+ value: 0.08
189
  name: Cosine Precision@10
190
  - type: cosine_recall@1
191
  value: 0.38
192
  name: Cosine Recall@1
193
  - type: cosine_recall@3
194
+ value: 0.58
195
  name: Cosine Recall@3
196
  - type: cosine_recall@5
197
+ value: 0.66
198
  name: Cosine Recall@5
199
  - type: cosine_recall@10
200
+ value: 0.8
201
  name: Cosine Recall@10
202
  - type: cosine_ndcg@10
203
+ value: 0.5747352409361379
204
  name: Cosine Ndcg@10
205
  - type: cosine_mrr@10
206
+ value: 0.5051349206349205
207
  name: Cosine Mrr@10
208
  - type: cosine_map@100
209
+ value: 0.5163932476955072
210
  name: Cosine Map@100
211
  - task:
212
  type: information-retrieval
 
216
  type: NanoNQ
217
  metrics:
218
  - type: cosine_accuracy@1
219
+ value: 0.46
220
  name: Cosine Accuracy@1
221
  - type: cosine_accuracy@3
222
+ value: 0.64
223
  name: Cosine Accuracy@3
224
  - type: cosine_accuracy@5
225
+ value: 0.72
226
  name: Cosine Accuracy@5
227
  - type: cosine_accuracy@10
228
+ value: 0.74
229
  name: Cosine Accuracy@10
230
  - type: cosine_precision@1
231
+ value: 0.46
232
  name: Cosine Precision@1
233
  - type: cosine_precision@3
234
+ value: 0.21333333333333332
235
  name: Cosine Precision@3
236
  - type: cosine_precision@5
237
+ value: 0.15200000000000002
238
  name: Cosine Precision@5
239
  - type: cosine_precision@10
240
+ value: 0.08
241
  name: Cosine Precision@10
242
  - type: cosine_recall@1
243
+ value: 0.44
244
  name: Cosine Recall@1
245
  - type: cosine_recall@3
246
+ value: 0.6
247
  name: Cosine Recall@3
248
  - type: cosine_recall@5
249
+ value: 0.69
250
  name: Cosine Recall@5
251
  - type: cosine_recall@10
252
+ value: 0.72
253
  name: Cosine Recall@10
254
  - type: cosine_ndcg@10
255
+ value: 0.5958872018988118
256
  name: Cosine Ndcg@10
257
  - type: cosine_mrr@10
258
+ value: 0.5641904761904761
259
  name: Cosine Mrr@10
260
  - type: cosine_map@100
261
+ value: 0.5591780429569271
262
  name: Cosine Map@100
263
  - task:
264
  type: nano-beir
 
268
  type: NanoBEIR_mean
269
  metrics:
270
  - type: cosine_accuracy@1
271
+ value: 0.42000000000000004
272
  name: Cosine Accuracy@1
273
  - type: cosine_accuracy@3
274
+ value: 0.61
275
  name: Cosine Accuracy@3
276
  - type: cosine_accuracy@5
277
+ value: 0.69
278
  name: Cosine Accuracy@5
279
  - type: cosine_accuracy@10
280
+ value: 0.77
281
  name: Cosine Accuracy@10
282
  - type: cosine_precision@1
283
+ value: 0.42000000000000004
284
  name: Cosine Precision@1
285
  - type: cosine_precision@3
286
+ value: 0.20333333333333334
287
  name: Cosine Precision@3
288
  - type: cosine_precision@5
289
+ value: 0.14200000000000002
290
  name: Cosine Precision@5
291
  - type: cosine_precision@10
292
+ value: 0.08
293
  name: Cosine Precision@10
294
  - type: cosine_recall@1
295
+ value: 0.41000000000000003
296
  name: Cosine Recall@1
297
  - type: cosine_recall@3
298
+ value: 0.59
299
  name: Cosine Recall@3
300
  - type: cosine_recall@5
301
+ value: 0.675
302
  name: Cosine Recall@5
303
  - type: cosine_recall@10
304
+ value: 0.76
305
  name: Cosine Recall@10
306
  - type: cosine_ndcg@10
307
+ value: 0.5853112214174749
308
  name: Cosine Ndcg@10
309
  - type: cosine_mrr@10
310
+ value: 0.5346626984126983
311
  name: Cosine Mrr@10
312
  - type: cosine_map@100
313
+ value: 0.5377856453262171
314
  name: Cosine Map@100
315
  ---
316
 
317
+ # SentenceTransformer based on thenlper/gte-small
318
 
319
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [thenlper/gte-small](https://huggingface.co/thenlper/gte-small). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
320
 
321
  ## Model Details
322
 
323
  ### Model Description
324
  - **Model Type:** Sentence Transformer
325
+ - **Base model:** [thenlper/gte-small](https://huggingface.co/thenlper/gte-small) <!-- at revision 17e1f347d17fe144873b1201da91788898c639cd -->
326
  - **Maximum Sequence Length:** 128 tokens
327
+ - **Output Dimensionality:** 384 dimensions
328
  - **Similarity Function:** Cosine Similarity
329
  <!-- - **Training Dataset:** Unknown -->
330
  <!-- - **Language:** Unknown -->
 
340
 
341
  ```
342
  SentenceTransformer(
343
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
344
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
345
+ (2): Normalize()
346
  )
347
  ```
348
 
 
370
  ]
371
  embeddings = model.encode(sentences)
372
  print(embeddings.shape)
373
+ # [3, 384]
374
 
375
  # Get the similarity scores for the embeddings
376
  similarities = model.similarity(embeddings, embeddings)
377
  print(similarities)
378
+ # tensor([[1.0000, 0.7343, 0.0079],
379
+ # [0.7343, 1.0000, 0.0383],
380
+ # [0.0079, 0.0383, 1.0000]])
381
  ```
382
 
383
  <!--
 
415
 
416
  | Metric | NanoMSMARCO | NanoNQ |
417
  |:--------------------|:------------|:-----------|
418
+ | cosine_accuracy@1 | 0.38 | 0.46 |
419
+ | cosine_accuracy@3 | 0.58 | 0.64 |
420
+ | cosine_accuracy@5 | 0.66 | 0.72 |
421
+ | cosine_accuracy@10 | 0.8 | 0.74 |
422
+ | cosine_precision@1 | 0.38 | 0.46 |
423
+ | cosine_precision@3 | 0.1933 | 0.2133 |
424
+ | cosine_precision@5 | 0.132 | 0.152 |
425
+ | cosine_precision@10 | 0.08 | 0.08 |
426
+ | cosine_recall@1 | 0.38 | 0.44 |
427
+ | cosine_recall@3 | 0.58 | 0.6 |
428
+ | cosine_recall@5 | 0.66 | 0.69 |
429
+ | cosine_recall@10 | 0.8 | 0.72 |
430
+ | **cosine_ndcg@10** | **0.5747** | **0.5959** |
431
+ | cosine_mrr@10 | 0.5051 | 0.5642 |
432
+ | cosine_map@100 | 0.5164 | 0.5592 |
433
 
434
  #### Nano BEIR
435
 
 
445
  }
446
  ```
447
 
448
+ | Metric | Value |
449
+ |:--------------------|:-----------|
450
+ | cosine_accuracy@1 | 0.42 |
451
+ | cosine_accuracy@3 | 0.61 |
452
+ | cosine_accuracy@5 | 0.69 |
453
+ | cosine_accuracy@10 | 0.77 |
454
+ | cosine_precision@1 | 0.42 |
455
+ | cosine_precision@3 | 0.2033 |
456
+ | cosine_precision@5 | 0.142 |
457
+ | cosine_precision@10 | 0.08 |
458
+ | cosine_recall@1 | 0.41 |
459
+ | cosine_recall@3 | 0.59 |
460
+ | cosine_recall@5 | 0.675 |
461
+ | cosine_recall@10 | 0.76 |
462
+ | **cosine_ndcg@10** | **0.5853** |
463
+ | cosine_mrr@10 | 0.5347 |
464
+ | cosine_map@100 | 0.5378 |
465
 
466
  <!--
467
  ## Bias, Risks and Limitations
 
484
  * Size: 90,000 training samples
485
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
486
  * Approximate statistics based on the first 1000 samples:
487
+ | | anchor | positive | negative |
488
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
489
+ | type | string | string | string |
490
+ | details | <ul><li>min: 9 tokens</li><li>mean: 11.82 tokens</li><li>max: 27 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 106.2 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 104.63 tokens</li><li>max: 128 tokens</li></ul> |
491
  * Samples:
492
  | anchor | positive | negative |
493
  |:----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
513
  | | anchor | positive | negative |
514
  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
515
  | type | string | string | string |
516
+ | details | <ul><li>min: 9 tokens</li><li>mean: 11.76 tokens</li><li>max: 24 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 105.95 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 105.69 tokens</li><li>max: 128 tokens</li></ul> |
517
  * Samples:
518
  | anchor | positive | negative |
519
  |:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
535
  - `eval_strategy`: steps
536
  - `per_device_train_batch_size`: 128
537
  - `per_device_eval_batch_size`: 128
538
+ - `learning_rate`: 8e-05
539
+ - `weight_decay`: 0.005
540
+ - `max_steps`: 1125
541
  - `warmup_ratio`: 0.1
542
  - `fp16`: True
543
  - `dataloader_drop_last`: True
 
564
  - `gradient_accumulation_steps`: 1
565
  - `eval_accumulation_steps`: None
566
  - `torch_empty_cache_steps`: None
567
+ - `learning_rate`: 8e-05
568
+ - `weight_decay`: 0.005
569
  - `adam_beta1`: 0.9
570
  - `adam_beta2`: 0.999
571
  - `adam_epsilon`: 1e-08
572
  - `max_grad_norm`: 1.0
573
  - `num_train_epochs`: 3.0
574
+ - `max_steps`: 1125
575
  - `lr_scheduler_type`: linear
576
  - `lr_scheduler_kwargs`: {}
577
  - `warmup_ratio`: 0.1
 
676
  </details>
677
 
678
  ### Training Logs
679
+ | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
680
+ |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
681
+ | 0 | 0 | - | 2.1869 | 0.6259 | 0.6583 | 0.6421 |
682
+ | 0.3556 | 250 | 0.3907 | 0.0761 | 0.5880 | 0.6146 | 0.6013 |
683
+ | 0.7112 | 500 | 0.0814 | 0.0680 | 0.5666 | 0.6170 | 0.5918 |
684
+ | 1.0669 | 750 | 0.0714 | 0.0634 | 0.5580 | 0.5846 | 0.5713 |
685
+ | 1.4225 | 1000 | 0.0406 | 0.0614 | 0.5747 | 0.5959 | 0.5853 |
686
 
 
687
 
688
  ### Framework Versions
689
  - Python: 3.10.18
config_sentence_transformers.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "__version__": {
3
  "sentence_transformers": "5.2.0",
4
  "transformers": "4.57.3",
@@ -9,6 +10,5 @@
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
- "similarity_fn_name": "cosine",
13
- "model_type": "SentenceTransformer"
14
  }
 
1
  {
2
+ "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.2.0",
5
  "transformers": "4.57.3",
 
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
 
14
  }
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]