radoslavralev commited on
Commit
d070a41
·
verified ·
1 Parent(s): 8db701b

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - generated_from_trainer
8
  - dataset_size:90000
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: Alibaba-NLP/gte-modernbert-base
11
  widget:
12
  - source_sentence: who is the publisher of the norton anthology american literature
13
  sentences:
@@ -154,7 +154,7 @@ metrics:
154
  - cosine_mrr@10
155
  - cosine_map@100
156
  model-index:
157
- - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
158
  results:
159
  - task:
160
  type: information-retrieval
@@ -164,49 +164,49 @@ model-index:
164
  type: NanoMSMARCO
165
  metrics:
166
  - type: cosine_accuracy@1
167
- value: 0.26
168
  name: Cosine Accuracy@1
169
  - type: cosine_accuracy@3
170
- value: 0.48
171
  name: Cosine Accuracy@3
172
  - type: cosine_accuracy@5
173
- value: 0.6
174
  name: Cosine Accuracy@5
175
  - type: cosine_accuracy@10
176
- value: 0.68
177
  name: Cosine Accuracy@10
178
  - type: cosine_precision@1
179
- value: 0.26
180
  name: Cosine Precision@1
181
  - type: cosine_precision@3
182
- value: 0.15999999999999998
183
  name: Cosine Precision@3
184
  - type: cosine_precision@5
185
- value: 0.12000000000000002
186
  name: Cosine Precision@5
187
  - type: cosine_precision@10
188
- value: 0.068
189
  name: Cosine Precision@10
190
  - type: cosine_recall@1
191
- value: 0.26
192
  name: Cosine Recall@1
193
  - type: cosine_recall@3
194
- value: 0.48
195
  name: Cosine Recall@3
196
  - type: cosine_recall@5
197
- value: 0.6
198
  name: Cosine Recall@5
199
  - type: cosine_recall@10
200
- value: 0.68
201
  name: Cosine Recall@10
202
  - type: cosine_ndcg@10
203
- value: 0.45896424557362947
204
  name: Cosine Ndcg@10
205
  - type: cosine_mrr@10
206
- value: 0.38885714285714285
207
  name: Cosine Mrr@10
208
  - type: cosine_map@100
209
- value: 0.39926372736834176
210
  name: Cosine Map@100
211
  - task:
212
  type: information-retrieval
@@ -216,49 +216,49 @@ model-index:
216
  type: NanoNQ
217
  metrics:
218
  - type: cosine_accuracy@1
219
- value: 0.36
220
  name: Cosine Accuracy@1
221
  - type: cosine_accuracy@3
222
- value: 0.58
223
  name: Cosine Accuracy@3
224
  - type: cosine_accuracy@5
225
- value: 0.64
226
  name: Cosine Accuracy@5
227
  - type: cosine_accuracy@10
228
- value: 0.8
229
  name: Cosine Accuracy@10
230
  - type: cosine_precision@1
231
- value: 0.36
232
  name: Cosine Precision@1
233
  - type: cosine_precision@3
234
- value: 0.19333333333333333
235
  name: Cosine Precision@3
236
  - type: cosine_precision@5
237
- value: 0.12800000000000003
238
  name: Cosine Precision@5
239
  - type: cosine_precision@10
240
- value: 0.08
241
  name: Cosine Precision@10
242
  - type: cosine_recall@1
243
- value: 0.36
244
  name: Cosine Recall@1
245
  - type: cosine_recall@3
246
- value: 0.57
247
  name: Cosine Recall@3
248
  - type: cosine_recall@5
249
- value: 0.62
250
  name: Cosine Recall@5
251
  - type: cosine_recall@10
252
- value: 0.75
253
  name: Cosine Recall@10
254
  - type: cosine_ndcg@10
255
- value: 0.5491170117720099
256
  name: Cosine Ndcg@10
257
  - type: cosine_mrr@10
258
- value: 0.49174603174603176
259
  name: Cosine Mrr@10
260
  - type: cosine_map@100
261
- value: 0.4918572150858902
262
  name: Cosine Map@100
263
  - task:
264
  type: nano-beir
@@ -268,63 +268,63 @@ model-index:
268
  type: NanoBEIR_mean
269
  metrics:
270
  - type: cosine_accuracy@1
271
- value: 0.31
272
  name: Cosine Accuracy@1
273
  - type: cosine_accuracy@3
274
- value: 0.53
275
  name: Cosine Accuracy@3
276
  - type: cosine_accuracy@5
277
- value: 0.62
278
  name: Cosine Accuracy@5
279
  - type: cosine_accuracy@10
280
- value: 0.74
281
  name: Cosine Accuracy@10
282
  - type: cosine_precision@1
283
- value: 0.31
284
  name: Cosine Precision@1
285
  - type: cosine_precision@3
286
- value: 0.17666666666666664
287
  name: Cosine Precision@3
288
  - type: cosine_precision@5
289
- value: 0.12400000000000003
290
  name: Cosine Precision@5
291
  - type: cosine_precision@10
292
- value: 0.07400000000000001
293
  name: Cosine Precision@10
294
  - type: cosine_recall@1
295
- value: 0.31
296
  name: Cosine Recall@1
297
  - type: cosine_recall@3
298
- value: 0.5249999999999999
299
  name: Cosine Recall@3
300
  - type: cosine_recall@5
301
- value: 0.61
302
  name: Cosine Recall@5
303
  - type: cosine_recall@10
304
- value: 0.7150000000000001
305
  name: Cosine Recall@10
306
  - type: cosine_ndcg@10
307
- value: 0.5040406286728196
308
  name: Cosine Ndcg@10
309
  - type: cosine_mrr@10
310
- value: 0.4403015873015873
311
  name: Cosine Mrr@10
312
  - type: cosine_map@100
313
- value: 0.44556047122711595
314
  name: Cosine Map@100
315
  ---
316
 
317
- # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
318
 
319
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
320
 
321
  ## Model Details
322
 
323
  ### Model Description
324
  - **Model Type:** Sentence Transformer
325
- - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
326
  - **Maximum Sequence Length:** 128 tokens
327
- - **Output Dimensionality:** 768 dimensions
328
  - **Similarity Function:** Cosine Similarity
329
  <!-- - **Training Dataset:** Unknown -->
330
  <!-- - **Language:** Unknown -->
@@ -340,8 +340,9 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
340
 
341
  ```
342
  SentenceTransformer(
343
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
344
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
345
  )
346
  ```
347
 
@@ -369,14 +370,14 @@ sentences = [
369
  ]
370
  embeddings = model.encode(sentences)
371
  print(embeddings.shape)
372
- # [3, 768]
373
 
374
  # Get the similarity scores for the embeddings
375
  similarities = model.similarity(embeddings, embeddings)
376
  print(similarities)
377
- # tensor([[ 1.0000, 0.9877, -0.0962],
378
- # [ 0.9877, 1.0000, -0.0887],
379
- # [-0.0962, -0.0887, 1.0000]])
380
  ```
381
 
382
  <!--
@@ -414,21 +415,21 @@ You can finetune this model on your own dataset.
414
 
415
  | Metric | NanoMSMARCO | NanoNQ |
416
  |:--------------------|:------------|:-----------|
417
- | cosine_accuracy@1 | 0.26 | 0.36 |
418
- | cosine_accuracy@3 | 0.48 | 0.58 |
419
- | cosine_accuracy@5 | 0.6 | 0.64 |
420
- | cosine_accuracy@10 | 0.68 | 0.8 |
421
- | cosine_precision@1 | 0.26 | 0.36 |
422
- | cosine_precision@3 | 0.16 | 0.1933 |
423
- | cosine_precision@5 | 0.12 | 0.128 |
424
- | cosine_precision@10 | 0.068 | 0.08 |
425
- | cosine_recall@1 | 0.26 | 0.36 |
426
- | cosine_recall@3 | 0.48 | 0.57 |
427
- | cosine_recall@5 | 0.6 | 0.62 |
428
- | cosine_recall@10 | 0.68 | 0.75 |
429
- | **cosine_ndcg@10** | **0.459** | **0.5491** |
430
- | cosine_mrr@10 | 0.3889 | 0.4917 |
431
- | cosine_map@100 | 0.3993 | 0.4919 |
432
 
433
  #### Nano BEIR
434
 
@@ -444,23 +445,23 @@ You can finetune this model on your own dataset.
444
  }
445
  ```
446
 
447
- | Metric | Value |
448
- |:--------------------|:----------|
449
- | cosine_accuracy@1 | 0.31 |
450
- | cosine_accuracy@3 | 0.53 |
451
- | cosine_accuracy@5 | 0.62 |
452
- | cosine_accuracy@10 | 0.74 |
453
- | cosine_precision@1 | 0.31 |
454
- | cosine_precision@3 | 0.1767 |
455
- | cosine_precision@5 | 0.124 |
456
- | cosine_precision@10 | 0.074 |
457
- | cosine_recall@1 | 0.31 |
458
- | cosine_recall@3 | 0.525 |
459
- | cosine_recall@5 | 0.61 |
460
- | cosine_recall@10 | 0.715 |
461
- | **cosine_ndcg@10** | **0.504** |
462
- | cosine_mrr@10 | 0.4403 |
463
- | cosine_map@100 | 0.4456 |
464
 
465
  <!--
466
  ## Bias, Risks and Limitations
@@ -483,10 +484,10 @@ You can finetune this model on your own dataset.
483
  * Size: 90,000 training samples
484
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
485
  * Approximate statistics based on the first 1000 samples:
486
- | | anchor | positive | negative |
487
- |:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
488
- | type | string | string | string |
489
- | details | <ul><li>min: 10 tokens</li><li>mean: 12.57 tokens</li><li>max: 28 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 107.04 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 105.42 tokens</li><li>max: 128 tokens</li></ul> |
490
  * Samples:
491
  | anchor | positive | negative |
492
  |:----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -512,7 +513,7 @@ You can finetune this model on your own dataset.
512
  | | anchor | positive | negative |
513
  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
514
  | type | string | string | string |
515
- | details | <ul><li>min: 9 tokens</li><li>mean: 12.46 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 106.89 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 106.57 tokens</li><li>max: 128 tokens</li></ul> |
516
  * Samples:
517
  | anchor | positive | negative |
518
  |:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -534,9 +535,9 @@ You can finetune this model on your own dataset.
534
  - `eval_strategy`: steps
535
  - `per_device_train_batch_size`: 128
536
  - `per_device_eval_batch_size`: 128
537
- - `learning_rate`: 4e-05
538
- - `weight_decay`: 0.01
539
- - `max_steps`: 703
540
  - `warmup_ratio`: 0.1
541
  - `fp16`: True
542
  - `dataloader_drop_last`: True
@@ -563,14 +564,14 @@ You can finetune this model on your own dataset.
563
  - `gradient_accumulation_steps`: 1
564
  - `eval_accumulation_steps`: None
565
  - `torch_empty_cache_steps`: None
566
- - `learning_rate`: 4e-05
567
- - `weight_decay`: 0.01
568
  - `adam_beta1`: 0.9
569
  - `adam_beta2`: 0.999
570
  - `adam_epsilon`: 1e-08
571
  - `max_grad_norm`: 1.0
572
  - `num_train_epochs`: 3.0
573
- - `max_steps`: 703
574
  - `lr_scheduler_type`: linear
575
  - `lr_scheduler_kwargs`: {}
576
  - `warmup_ratio`: 0.1
@@ -677,9 +678,20 @@ You can finetune this model on your own dataset.
677
  ### Training Logs
678
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
679
  |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
680
- | 0 | 0 | - | 4.4513 | 0.6530 | 0.6552 | 0.6541 |
681
- | 0.3556 | 250 | 3.1939 | 2.9908 | 0.4651 | 0.5551 | 0.5101 |
682
- | 0.7112 | 500 | 2.9769 | 2.9599 | 0.4590 | 0.5491 | 0.5040 |
 
 
 
 
 
 
 
 
 
 
 
683
 
684
 
685
  ### Framework Versions
 
7
  - generated_from_trainer
8
  - dataset_size:90000
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: thenlper/gte-small
11
  widget:
12
  - source_sentence: who is the publisher of the norton anthology american literature
13
  sentences:
 
154
  - cosine_mrr@10
155
  - cosine_map@100
156
  model-index:
157
+ - name: SentenceTransformer based on thenlper/gte-small
158
  results:
159
  - task:
160
  type: information-retrieval
 
164
  type: NanoMSMARCO
165
  metrics:
166
  - type: cosine_accuracy@1
167
+ value: 0.12
168
  name: Cosine Accuracy@1
169
  - type: cosine_accuracy@3
170
+ value: 0.32
171
  name: Cosine Accuracy@3
172
  - type: cosine_accuracy@5
173
+ value: 0.48
174
  name: Cosine Accuracy@5
175
  - type: cosine_accuracy@10
176
+ value: 0.6
177
  name: Cosine Accuracy@10
178
  - type: cosine_precision@1
179
+ value: 0.12
180
  name: Cosine Precision@1
181
  - type: cosine_precision@3
182
+ value: 0.10666666666666666
183
  name: Cosine Precision@3
184
  - type: cosine_precision@5
185
+ value: 0.09600000000000002
186
  name: Cosine Precision@5
187
  - type: cosine_precision@10
188
+ value: 0.06
189
  name: Cosine Precision@10
190
  - type: cosine_recall@1
191
+ value: 0.12
192
  name: Cosine Recall@1
193
  - type: cosine_recall@3
194
+ value: 0.32
195
  name: Cosine Recall@3
196
  - type: cosine_recall@5
197
+ value: 0.48
198
  name: Cosine Recall@5
199
  - type: cosine_recall@10
200
+ value: 0.6
201
  name: Cosine Recall@10
202
  - type: cosine_ndcg@10
203
+ value: 0.3451699142127375
204
  name: Cosine Ndcg@10
205
  - type: cosine_mrr@10
206
+ value: 0.2649920634920635
207
  name: Cosine Mrr@10
208
  - type: cosine_map@100
209
+ value: 0.2748673342528789
210
  name: Cosine Map@100
211
  - task:
212
  type: information-retrieval
 
216
  type: NanoNQ
217
  metrics:
218
  - type: cosine_accuracy@1
219
+ value: 0.22
220
  name: Cosine Accuracy@1
221
  - type: cosine_accuracy@3
222
+ value: 0.44
223
  name: Cosine Accuracy@3
224
  - type: cosine_accuracy@5
225
+ value: 0.5
226
  name: Cosine Accuracy@5
227
  - type: cosine_accuracy@10
228
+ value: 0.56
229
  name: Cosine Accuracy@10
230
  - type: cosine_precision@1
231
+ value: 0.22
232
  name: Cosine Precision@1
233
  - type: cosine_precision@3
234
+ value: 0.14666666666666664
235
  name: Cosine Precision@3
236
  - type: cosine_precision@5
237
+ value: 0.1
238
  name: Cosine Precision@5
239
  - type: cosine_precision@10
240
+ value: 0.05800000000000001
241
  name: Cosine Precision@10
242
  - type: cosine_recall@1
243
+ value: 0.22
244
  name: Cosine Recall@1
245
  - type: cosine_recall@3
246
+ value: 0.43
247
  name: Cosine Recall@3
248
  - type: cosine_recall@5
249
+ value: 0.49
250
  name: Cosine Recall@5
251
  - type: cosine_recall@10
252
+ value: 0.54
253
  name: Cosine Recall@10
254
  - type: cosine_ndcg@10
255
+ value: 0.3853992171360362
256
  name: Cosine Ndcg@10
257
  - type: cosine_mrr@10
258
+ value: 0.3358888888888889
259
  name: Cosine Mrr@10
260
  - type: cosine_map@100
261
+ value: 0.3486523060866078
262
  name: Cosine Map@100
263
  - task:
264
  type: nano-beir
 
268
  type: NanoBEIR_mean
269
  metrics:
270
  - type: cosine_accuracy@1
271
+ value: 0.16999999999999998
272
  name: Cosine Accuracy@1
273
  - type: cosine_accuracy@3
274
+ value: 0.38
275
  name: Cosine Accuracy@3
276
  - type: cosine_accuracy@5
277
+ value: 0.49
278
  name: Cosine Accuracy@5
279
  - type: cosine_accuracy@10
280
+ value: 0.5800000000000001
281
  name: Cosine Accuracy@10
282
  - type: cosine_precision@1
283
+ value: 0.16999999999999998
284
  name: Cosine Precision@1
285
  - type: cosine_precision@3
286
+ value: 0.12666666666666665
287
  name: Cosine Precision@3
288
  - type: cosine_precision@5
289
+ value: 0.098
290
  name: Cosine Precision@5
291
  - type: cosine_precision@10
292
+ value: 0.059000000000000004
293
  name: Cosine Precision@10
294
  - type: cosine_recall@1
295
+ value: 0.16999999999999998
296
  name: Cosine Recall@1
297
  - type: cosine_recall@3
298
+ value: 0.375
299
  name: Cosine Recall@3
300
  - type: cosine_recall@5
301
+ value: 0.485
302
  name: Cosine Recall@5
303
  - type: cosine_recall@10
304
+ value: 0.5700000000000001
305
  name: Cosine Recall@10
306
  - type: cosine_ndcg@10
307
+ value: 0.36528456567438683
308
  name: Cosine Ndcg@10
309
  - type: cosine_mrr@10
310
+ value: 0.3004404761904762
311
  name: Cosine Mrr@10
312
  - type: cosine_map@100
313
+ value: 0.31175982016974335
314
  name: Cosine Map@100
315
  ---
316
 
317
+ # SentenceTransformer based on thenlper/gte-small
318
 
319
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [thenlper/gte-small](https://huggingface.co/thenlper/gte-small). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
320
 
321
  ## Model Details
322
 
323
  ### Model Description
324
  - **Model Type:** Sentence Transformer
325
+ - **Base model:** [thenlper/gte-small](https://huggingface.co/thenlper/gte-small) <!-- at revision 17e1f347d17fe144873b1201da91788898c639cd -->
326
  - **Maximum Sequence Length:** 128 tokens
327
+ - **Output Dimensionality:** 384 dimensions
328
  - **Similarity Function:** Cosine Similarity
329
  <!-- - **Training Dataset:** Unknown -->
330
  <!-- - **Language:** Unknown -->
 
340
 
341
  ```
342
  SentenceTransformer(
343
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
344
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
345
+ (2): Normalize()
346
  )
347
  ```
348
 
 
370
  ]
371
  embeddings = model.encode(sentences)
372
  print(embeddings.shape)
373
+ # [3, 384]
374
 
375
  # Get the similarity scores for the embeddings
376
  similarities = model.similarity(embeddings, embeddings)
377
  print(similarities)
378
+ # tensor([[ 1.0000, 0.9977, -0.0309],
379
+ # [ 0.9977, 1.0000, -0.0296],
380
+ # [-0.0309, -0.0296, 1.0000]])
381
  ```
382
 
383
  <!--
 
415
 
416
  | Metric | NanoMSMARCO | NanoNQ |
417
  |:--------------------|:------------|:-----------|
418
+ | cosine_accuracy@1 | 0.12 | 0.22 |
419
+ | cosine_accuracy@3 | 0.32 | 0.44 |
420
+ | cosine_accuracy@5 | 0.48 | 0.5 |
421
+ | cosine_accuracy@10 | 0.6 | 0.56 |
422
+ | cosine_precision@1 | 0.12 | 0.22 |
423
+ | cosine_precision@3 | 0.1067 | 0.1467 |
424
+ | cosine_precision@5 | 0.096 | 0.1 |
425
+ | cosine_precision@10 | 0.06 | 0.058 |
426
+ | cosine_recall@1 | 0.12 | 0.22 |
427
+ | cosine_recall@3 | 0.32 | 0.43 |
428
+ | cosine_recall@5 | 0.48 | 0.49 |
429
+ | cosine_recall@10 | 0.6 | 0.54 |
430
+ | **cosine_ndcg@10** | **0.3452** | **0.3854** |
431
+ | cosine_mrr@10 | 0.265 | 0.3359 |
432
+ | cosine_map@100 | 0.2749 | 0.3487 |
433
 
434
  #### Nano BEIR
435
 
 
445
  }
446
  ```
447
 
448
+ | Metric | Value |
449
+ |:--------------------|:-----------|
450
+ | cosine_accuracy@1 | 0.17 |
451
+ | cosine_accuracy@3 | 0.38 |
452
+ | cosine_accuracy@5 | 0.49 |
453
+ | cosine_accuracy@10 | 0.58 |
454
+ | cosine_precision@1 | 0.17 |
455
+ | cosine_precision@3 | 0.1267 |
456
+ | cosine_precision@5 | 0.098 |
457
+ | cosine_precision@10 | 0.059 |
458
+ | cosine_recall@1 | 0.17 |
459
+ | cosine_recall@3 | 0.375 |
460
+ | cosine_recall@5 | 0.485 |
461
+ | cosine_recall@10 | 0.57 |
462
+ | **cosine_ndcg@10** | **0.3653** |
463
+ | cosine_mrr@10 | 0.3004 |
464
+ | cosine_map@100 | 0.3118 |
465
 
466
  <!--
467
  ## Bias, Risks and Limitations
 
484
  * Size: 90,000 training samples
485
  * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
486
  * Approximate statistics based on the first 1000 samples:
487
+ | | anchor | positive | negative |
488
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
489
+ | type | string | string | string |
490
+ | details | <ul><li>min: 9 tokens</li><li>mean: 11.82 tokens</li><li>max: 27 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 106.2 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 104.63 tokens</li><li>max: 128 tokens</li></ul> |
491
  * Samples:
492
  | anchor | positive | negative |
493
  |:----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
513
  | | anchor | positive | negative |
514
  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
515
  | type | string | string | string |
516
+ | details | <ul><li>min: 9 tokens</li><li>mean: 11.76 tokens</li><li>max: 24 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 105.95 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 105.69 tokens</li><li>max: 128 tokens</li></ul> |
517
  * Samples:
518
  | anchor | positive | negative |
519
  |:-------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
535
  - `eval_strategy`: steps
536
  - `per_device_train_batch_size`: 128
537
  - `per_device_eval_batch_size`: 128
538
+ - `learning_rate`: 8e-05
539
+ - `weight_decay`: 0.005
540
+ - `max_steps`: 3375
541
  - `warmup_ratio`: 0.1
542
  - `fp16`: True
543
  - `dataloader_drop_last`: True
 
564
  - `gradient_accumulation_steps`: 1
565
  - `eval_accumulation_steps`: None
566
  - `torch_empty_cache_steps`: None
567
+ - `learning_rate`: 8e-05
568
+ - `weight_decay`: 0.005
569
  - `adam_beta1`: 0.9
570
  - `adam_beta2`: 0.999
571
  - `adam_epsilon`: 1e-08
572
  - `max_grad_norm`: 1.0
573
  - `num_train_epochs`: 3.0
574
+ - `max_steps`: 3375
575
  - `lr_scheduler_type`: linear
576
  - `lr_scheduler_kwargs`: {}
577
  - `warmup_ratio`: 0.1
 
678
  ### Training Logs
679
  | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
680
  |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
681
+ | 0 | 0 | - | 5.0014 | 0.6259 | 0.6583 | 0.6421 |
682
+ | 0.3556 | 250 | 3.7345 | 3.0513 | 0.4721 | 0.4567 | 0.4644 |
683
+ | 0.7112 | 500 | 3.1165 | 2.9938 | 0.4464 | 0.4306 | 0.4385 |
684
+ | 1.0669 | 750 | 3.055 | 2.9656 | 0.4028 | 0.4675 | 0.4351 |
685
+ | 1.4225 | 1000 | 3.0018 | 2.9558 | 0.3668 | 0.4309 | 0.3989 |
686
+ | 1.7781 | 1250 | 2.988 | 2.9463 | 0.4017 | 0.4426 | 0.4221 |
687
+ | 2.1337 | 1500 | 2.9625 | 2.9372 | 0.3571 | 0.4003 | 0.3787 |
688
+ | 2.4893 | 1750 | 2.9363 | 2.9311 | 0.3729 | 0.4068 | 0.3898 |
689
+ | 2.8450 | 2000 | 2.9287 | 2.9274 | 0.3728 | 0.3778 | 0.3753 |
690
+ | 3.2006 | 2250 | 2.907 | 2.9254 | 0.3770 | 0.3713 | 0.3742 |
691
+ | 3.5562 | 2500 | 2.8979 | 2.9242 | 0.3606 | 0.3884 | 0.3745 |
692
+ | 3.9118 | 2750 | 2.8931 | 2.9215 | 0.3446 | 0.3955 | 0.3700 |
693
+ | 4.2674 | 3000 | 2.883 | 2.9207 | 0.3511 | 0.3777 | 0.3644 |
694
+ | 4.6230 | 3250 | 2.8762 | 2.9201 | 0.3452 | 0.3854 | 0.3653 |
695
 
696
 
697
  ### Framework Versions
config_sentence_transformers.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "__version__": {
3
  "sentence_transformers": "5.2.0",
4
  "transformers": "4.57.3",
@@ -9,6 +10,5 @@
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
- "similarity_fn_name": "cosine",
13
- "model_type": "SentenceTransformer"
14
  }
 
1
  {
2
+ "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.2.0",
5
  "transformers": "4.57.3",
 
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
 
14
  }
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]