radoslavralev commited on
Commit
f60ac8c
·
verified ·
1 Parent(s): e33c413

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 768,
3
- "pooling_mode_cls_token": true,
4
- "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - generated_from_trainer
8
  - dataset_size:111470
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: Alibaba-NLP/gte-modernbert-base
11
  widget:
12
  - source_sentence: when was the first elephant brought to america
13
  sentences:
@@ -132,7 +132,7 @@ metrics:
132
  - cosine_mrr@10
133
  - cosine_map@100
134
  model-index:
135
- - name: SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
136
  results:
137
  - task:
138
  type: information-retrieval
@@ -142,49 +142,49 @@ model-index:
142
  type: NanoMSMARCO
143
  metrics:
144
  - type: cosine_accuracy@1
145
- value: 0.38
146
  name: Cosine Accuracy@1
147
  - type: cosine_accuracy@3
148
- value: 0.64
149
  name: Cosine Accuracy@3
150
  - type: cosine_accuracy@5
151
- value: 0.76
152
  name: Cosine Accuracy@5
153
  - type: cosine_accuracy@10
154
- value: 0.84
155
  name: Cosine Accuracy@10
156
  - type: cosine_precision@1
157
- value: 0.38
158
  name: Cosine Precision@1
159
  - type: cosine_precision@3
160
- value: 0.21333333333333335
161
  name: Cosine Precision@3
162
  - type: cosine_precision@5
163
- value: 0.15200000000000002
164
  name: Cosine Precision@5
165
  - type: cosine_precision@10
166
- value: 0.08399999999999999
167
  name: Cosine Precision@10
168
  - type: cosine_recall@1
169
- value: 0.38
170
  name: Cosine Recall@1
171
  - type: cosine_recall@3
172
- value: 0.64
173
  name: Cosine Recall@3
174
  - type: cosine_recall@5
175
- value: 0.76
176
  name: Cosine Recall@5
177
  - type: cosine_recall@10
178
- value: 0.84
179
  name: Cosine Recall@10
180
  - type: cosine_ndcg@10
181
- value: 0.6071739753451822
182
  name: Cosine Ndcg@10
183
  - type: cosine_mrr@10
184
- value: 0.5318571428571428
185
  name: Cosine Mrr@10
186
  - type: cosine_map@100
187
- value: 0.5383857612227555
188
  name: Cosine Map@100
189
  - task:
190
  type: information-retrieval
@@ -194,49 +194,49 @@ model-index:
194
  type: NanoNQ
195
  metrics:
196
  - type: cosine_accuracy@1
197
- value: 0.58
198
  name: Cosine Accuracy@1
199
  - type: cosine_accuracy@3
200
- value: 0.74
201
  name: Cosine Accuracy@3
202
  - type: cosine_accuracy@5
203
- value: 0.8
204
  name: Cosine Accuracy@5
205
  - type: cosine_accuracy@10
206
- value: 0.84
207
  name: Cosine Accuracy@10
208
  - type: cosine_precision@1
209
- value: 0.58
210
  name: Cosine Precision@1
211
  - type: cosine_precision@3
212
- value: 0.25333333333333335
213
  name: Cosine Precision@3
214
  - type: cosine_precision@5
215
- value: 0.16399999999999998
216
  name: Cosine Precision@5
217
  - type: cosine_precision@10
218
- value: 0.092
219
  name: Cosine Precision@10
220
  - type: cosine_recall@1
221
- value: 0.55
222
  name: Cosine Recall@1
223
  - type: cosine_recall@3
224
- value: 0.7
225
  name: Cosine Recall@3
226
  - type: cosine_recall@5
227
- value: 0.74
228
  name: Cosine Recall@5
229
  - type: cosine_recall@10
230
- value: 0.81
231
  name: Cosine Recall@10
232
  - type: cosine_ndcg@10
233
- value: 0.6914852313456867
234
  name: Cosine Ndcg@10
235
  - type: cosine_mrr@10
236
- value: 0.6720555555555556
237
  name: Cosine Mrr@10
238
  - type: cosine_map@100
239
- value: 0.6484054934853453
240
  name: Cosine Map@100
241
  - task:
242
  type: nano-beir
@@ -246,63 +246,63 @@ model-index:
246
  type: NanoBEIR_mean
247
  metrics:
248
  - type: cosine_accuracy@1
249
- value: 0.48
250
  name: Cosine Accuracy@1
251
  - type: cosine_accuracy@3
252
- value: 0.69
253
  name: Cosine Accuracy@3
254
  - type: cosine_accuracy@5
255
- value: 0.78
256
  name: Cosine Accuracy@5
257
  - type: cosine_accuracy@10
258
- value: 0.84
259
  name: Cosine Accuracy@10
260
  - type: cosine_precision@1
261
- value: 0.48
262
  name: Cosine Precision@1
263
  - type: cosine_precision@3
264
- value: 0.23333333333333334
265
  name: Cosine Precision@3
266
  - type: cosine_precision@5
267
- value: 0.158
268
  name: Cosine Precision@5
269
  - type: cosine_precision@10
270
- value: 0.088
271
  name: Cosine Precision@10
272
  - type: cosine_recall@1
273
- value: 0.465
274
  name: Cosine Recall@1
275
  - type: cosine_recall@3
276
- value: 0.6699999999999999
277
  name: Cosine Recall@3
278
  - type: cosine_recall@5
279
- value: 0.75
280
  name: Cosine Recall@5
281
  - type: cosine_recall@10
282
- value: 0.825
283
  name: Cosine Recall@10
284
  - type: cosine_ndcg@10
285
- value: 0.6493296033454345
286
  name: Cosine Ndcg@10
287
  - type: cosine_mrr@10
288
- value: 0.6019563492063492
289
  name: Cosine Mrr@10
290
  - type: cosine_map@100
291
- value: 0.5933956273540504
292
  name: Cosine Map@100
293
  ---
294
 
295
- # SentenceTransformer based on Alibaba-NLP/gte-modernbert-base
296
 
297
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
298
 
299
  ## Model Details
300
 
301
  ### Model Description
302
  - **Model Type:** Sentence Transformer
303
- - **Base model:** [Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) <!-- at revision e7f32e3c00f91d699e8c43b53106206bcc72bb22 -->
304
  - **Maximum Sequence Length:** 128 tokens
305
- - **Output Dimensionality:** 768 dimensions
306
  - **Similarity Function:** Cosine Similarity
307
  <!-- - **Training Dataset:** Unknown -->
308
  <!-- - **Language:** Unknown -->
@@ -318,8 +318,9 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [A
318
 
319
  ```
320
  SentenceTransformer(
321
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
322
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
323
  )
324
  ```
325
 
@@ -347,14 +348,14 @@ sentences = [
347
  ]
348
  embeddings = model.encode(sentences)
349
  print(embeddings.shape)
350
- # [3, 768]
351
 
352
  # Get the similarity scores for the embeddings
353
  similarities = model.similarity(embeddings, embeddings)
354
  print(similarities)
355
- # tensor([[1.0000, 1.0000, 0.3030],
356
- # [1.0000, 1.0000, 0.3030],
357
- # [0.3030, 0.3030, 1.0001]])
358
  ```
359
 
360
  <!--
@@ -392,21 +393,21 @@ You can finetune this model on your own dataset.
392
 
393
  | Metric | NanoMSMARCO | NanoNQ |
394
  |:--------------------|:------------|:-----------|
395
- | cosine_accuracy@1 | 0.38 | 0.58 |
396
- | cosine_accuracy@3 | 0.64 | 0.74 |
397
- | cosine_accuracy@5 | 0.76 | 0.8 |
398
- | cosine_accuracy@10 | 0.84 | 0.84 |
399
- | cosine_precision@1 | 0.38 | 0.58 |
400
- | cosine_precision@3 | 0.2133 | 0.2533 |
401
- | cosine_precision@5 | 0.152 | 0.164 |
402
- | cosine_precision@10 | 0.084 | 0.092 |
403
- | cosine_recall@1 | 0.38 | 0.55 |
404
- | cosine_recall@3 | 0.64 | 0.7 |
405
- | cosine_recall@5 | 0.76 | 0.74 |
406
- | cosine_recall@10 | 0.84 | 0.81 |
407
- | **cosine_ndcg@10** | **0.6072** | **0.6915** |
408
- | cosine_mrr@10 | 0.5319 | 0.6721 |
409
- | cosine_map@100 | 0.5384 | 0.6484 |
410
 
411
  #### Nano BEIR
412
 
@@ -424,21 +425,21 @@ You can finetune this model on your own dataset.
424
 
425
  | Metric | Value |
426
  |:--------------------|:-----------|
427
- | cosine_accuracy@1 | 0.48 |
428
- | cosine_accuracy@3 | 0.69 |
429
- | cosine_accuracy@5 | 0.78 |
430
- | cosine_accuracy@10 | 0.84 |
431
- | cosine_precision@1 | 0.48 |
432
- | cosine_precision@3 | 0.2333 |
433
- | cosine_precision@5 | 0.158 |
434
- | cosine_precision@10 | 0.088 |
435
- | cosine_recall@1 | 0.465 |
436
- | cosine_recall@3 | 0.67 |
437
- | cosine_recall@5 | 0.75 |
438
- | cosine_recall@10 | 0.825 |
439
- | **cosine_ndcg@10** | **0.6493** |
440
- | cosine_mrr@10 | 0.602 |
441
- | cosine_map@100 | 0.5934 |
442
 
443
  <!--
444
  ## Bias, Risks and Limitations
@@ -464,7 +465,7 @@ You can finetune this model on your own dataset.
464
  | | anchor | positive | negative |
465
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
466
  | type | string | string | string |
467
- | details | <ul><li>min: 6 tokens</li><li>mean: 13.83 tokens</li><li>max: 45 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 91.42 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 90.36 tokens</li><li>max: 128 tokens</li></ul> |
468
  * Samples:
469
  | anchor | positive | negative |
470
  |:--------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -490,7 +491,7 @@ You can finetune this model on your own dataset.
490
  | | anchor | positive | negative |
491
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
492
  | type | string | string | string |
493
- | details | <ul><li>min: 6 tokens</li><li>mean: 13.69 tokens</li><li>max: 45 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 90.17 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 89.67 tokens</li><li>max: 128 tokens</li></ul> |
494
  * Samples:
495
  | anchor | positive | negative |
496
  |:----------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -512,9 +513,9 @@ You can finetune this model on your own dataset.
512
  - `eval_strategy`: steps
513
  - `per_device_train_batch_size`: 128
514
  - `per_device_eval_batch_size`: 128
515
- - `learning_rate`: 4e-05
516
- - `weight_decay`: 0.01
517
- - `max_steps`: 500
518
  - `warmup_ratio`: 0.1
519
  - `fp16`: True
520
  - `dataloader_drop_last`: True
@@ -541,14 +542,14 @@ You can finetune this model on your own dataset.
541
  - `gradient_accumulation_steps`: 1
542
  - `eval_accumulation_steps`: None
543
  - `torch_empty_cache_steps`: None
544
- - `learning_rate`: 4e-05
545
- - `weight_decay`: 0.01
546
  - `adam_beta1`: 0.9
547
  - `adam_beta2`: 0.999
548
  - `adam_epsilon`: 1e-08
549
  - `max_grad_norm`: 1.0
550
  - `num_train_epochs`: 3.0
551
- - `max_steps`: 500
552
  - `lr_scheduler_type`: linear
553
  - `lr_scheduler_kwargs`: {}
554
  - `warmup_ratio`: 0.1
@@ -653,13 +654,14 @@ You can finetune this model on your own dataset.
653
  </details>
654
 
655
  ### Training Logs
656
- | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
657
- |:----------:|:-------:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
658
- | 0 | 0 | - | 0.4148 | 0.6530 | 0.6552 | 0.6541 |
659
- | 0.2874 | 250 | 0.0742 | 0.0408 | 0.6103 | 0.6564 | 0.6333 |
660
- | **0.5747** | **500** | **0.0398** | **0.0376** | **0.6072** | **0.6915** | **0.6493** |
 
 
661
 
662
- * The bold row denotes the saved checkpoint.
663
 
664
  ### Framework Versions
665
  - Python: 3.10.18
 
7
  - generated_from_trainer
8
  - dataset_size:111470
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: thenlper/gte-small
11
  widget:
12
  - source_sentence: when was the first elephant brought to america
13
  sentences:
 
132
  - cosine_mrr@10
133
  - cosine_map@100
134
  model-index:
135
+ - name: SentenceTransformer based on thenlper/gte-small
136
  results:
137
  - task:
138
  type: information-retrieval
 
142
  type: NanoMSMARCO
143
  metrics:
144
  - type: cosine_accuracy@1
145
+ value: 0.34
146
  name: Cosine Accuracy@1
147
  - type: cosine_accuracy@3
148
+ value: 0.56
149
  name: Cosine Accuracy@3
150
  - type: cosine_accuracy@5
151
+ value: 0.64
152
  name: Cosine Accuracy@5
153
  - type: cosine_accuracy@10
154
+ value: 0.76
155
  name: Cosine Accuracy@10
156
  - type: cosine_precision@1
157
+ value: 0.34
158
  name: Cosine Precision@1
159
  - type: cosine_precision@3
160
+ value: 0.18666666666666668
161
  name: Cosine Precision@3
162
  - type: cosine_precision@5
163
+ value: 0.128
164
  name: Cosine Precision@5
165
  - type: cosine_precision@10
166
+ value: 0.07600000000000001
167
  name: Cosine Precision@10
168
  - type: cosine_recall@1
169
+ value: 0.34
170
  name: Cosine Recall@1
171
  - type: cosine_recall@3
172
+ value: 0.56
173
  name: Cosine Recall@3
174
  - type: cosine_recall@5
175
+ value: 0.64
176
  name: Cosine Recall@5
177
  - type: cosine_recall@10
178
+ value: 0.76
179
  name: Cosine Recall@10
180
  - type: cosine_ndcg@10
181
+ value: 0.5416219337167224
182
  name: Cosine Ndcg@10
183
  - type: cosine_mrr@10
184
+ value: 0.47319047619047616
185
  name: Cosine Mrr@10
186
  - type: cosine_map@100
187
+ value: 0.4857841065799604
188
  name: Cosine Map@100
189
  - task:
190
  type: information-retrieval
 
194
  type: NanoNQ
195
  metrics:
196
  - type: cosine_accuracy@1
197
+ value: 0.54
198
  name: Cosine Accuracy@1
199
  - type: cosine_accuracy@3
200
+ value: 0.7
201
  name: Cosine Accuracy@3
202
  - type: cosine_accuracy@5
203
+ value: 0.76
204
  name: Cosine Accuracy@5
205
  - type: cosine_accuracy@10
206
+ value: 0.8
207
  name: Cosine Accuracy@10
208
  - type: cosine_precision@1
209
+ value: 0.54
210
  name: Cosine Precision@1
211
  - type: cosine_precision@3
212
+ value: 0.24
213
  name: Cosine Precision@3
214
  - type: cosine_precision@5
215
+ value: 0.15600000000000003
216
  name: Cosine Precision@5
217
  - type: cosine_precision@10
218
+ value: 0.086
219
  name: Cosine Precision@10
220
  - type: cosine_recall@1
221
+ value: 0.52
222
  name: Cosine Recall@1
223
  - type: cosine_recall@3
224
+ value: 0.66
225
  name: Cosine Recall@3
226
  - type: cosine_recall@5
227
+ value: 0.71
228
  name: Cosine Recall@5
229
  - type: cosine_recall@10
230
+ value: 0.77
231
  name: Cosine Recall@10
232
  - type: cosine_ndcg@10
233
+ value: 0.6525146735767775
234
  name: Cosine Ndcg@10
235
  - type: cosine_mrr@10
236
+ value: 0.6275
237
  name: Cosine Mrr@10
238
  - type: cosine_map@100
239
+ value: 0.6140321846592789
240
  name: Cosine Map@100
241
  - task:
242
  type: nano-beir
 
246
  type: NanoBEIR_mean
247
  metrics:
248
  - type: cosine_accuracy@1
249
+ value: 0.44000000000000006
250
  name: Cosine Accuracy@1
251
  - type: cosine_accuracy@3
252
+ value: 0.63
253
  name: Cosine Accuracy@3
254
  - type: cosine_accuracy@5
255
+ value: 0.7
256
  name: Cosine Accuracy@5
257
  - type: cosine_accuracy@10
258
+ value: 0.78
259
  name: Cosine Accuracy@10
260
  - type: cosine_precision@1
261
+ value: 0.44000000000000006
262
  name: Cosine Precision@1
263
  - type: cosine_precision@3
264
+ value: 0.21333333333333332
265
  name: Cosine Precision@3
266
  - type: cosine_precision@5
267
+ value: 0.14200000000000002
268
  name: Cosine Precision@5
269
  - type: cosine_precision@10
270
+ value: 0.081
271
  name: Cosine Precision@10
272
  - type: cosine_recall@1
273
+ value: 0.43000000000000005
274
  name: Cosine Recall@1
275
  - type: cosine_recall@3
276
+ value: 0.6100000000000001
277
  name: Cosine Recall@3
278
  - type: cosine_recall@5
279
+ value: 0.675
280
  name: Cosine Recall@5
281
  - type: cosine_recall@10
282
+ value: 0.765
283
  name: Cosine Recall@10
284
  - type: cosine_ndcg@10
285
+ value: 0.5970683036467499
286
  name: Cosine Ndcg@10
287
  - type: cosine_mrr@10
288
+ value: 0.550345238095238
289
  name: Cosine Mrr@10
290
  - type: cosine_map@100
291
+ value: 0.5499081456196196
292
  name: Cosine Map@100
293
  ---
294
 
295
+ # SentenceTransformer based on thenlper/gte-small
296
 
297
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [thenlper/gte-small](https://huggingface.co/thenlper/gte-small). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
298
 
299
  ## Model Details
300
 
301
  ### Model Description
302
  - **Model Type:** Sentence Transformer
303
+ - **Base model:** [thenlper/gte-small](https://huggingface.co/thenlper/gte-small) <!-- at revision 17e1f347d17fe144873b1201da91788898c639cd -->
304
  - **Maximum Sequence Length:** 128 tokens
305
+ - **Output Dimensionality:** 384 dimensions
306
  - **Similarity Function:** Cosine Similarity
307
  <!-- - **Training Dataset:** Unknown -->
308
  <!-- - **Language:** Unknown -->
 
318
 
319
  ```
320
  SentenceTransformer(
321
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
322
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
323
+ (2): Normalize()
324
  )
325
  ```
326
 
 
348
  ]
349
  embeddings = model.encode(sentences)
350
  print(embeddings.shape)
351
+ # [3, 384]
352
 
353
  # Get the similarity scores for the embeddings
354
  similarities = model.similarity(embeddings, embeddings)
355
  print(similarities)
356
+ # tensor([[1.0000, 1.0000, 0.8522],
357
+ # [1.0000, 1.0000, 0.8522],
358
+ # [0.8522, 0.8522, 1.0000]])
359
  ```
360
 
361
  <!--
 
393
 
394
  | Metric | NanoMSMARCO | NanoNQ |
395
  |:--------------------|:------------|:-----------|
396
+ | cosine_accuracy@1 | 0.34 | 0.54 |
397
+ | cosine_accuracy@3 | 0.56 | 0.7 |
398
+ | cosine_accuracy@5 | 0.64 | 0.76 |
399
+ | cosine_accuracy@10 | 0.76 | 0.8 |
400
+ | cosine_precision@1 | 0.34 | 0.54 |
401
+ | cosine_precision@3 | 0.1867 | 0.24 |
402
+ | cosine_precision@5 | 0.128 | 0.156 |
403
+ | cosine_precision@10 | 0.076 | 0.086 |
404
+ | cosine_recall@1 | 0.34 | 0.52 |
405
+ | cosine_recall@3 | 0.56 | 0.66 |
406
+ | cosine_recall@5 | 0.64 | 0.71 |
407
+ | cosine_recall@10 | 0.76 | 0.77 |
408
+ | **cosine_ndcg@10** | **0.5416** | **0.6525** |
409
+ | cosine_mrr@10 | 0.4732 | 0.6275 |
410
+ | cosine_map@100 | 0.4858 | 0.614 |
411
 
412
  #### Nano BEIR
413
 
 
425
 
426
  | Metric | Value |
427
  |:--------------------|:-----------|
428
+ | cosine_accuracy@1 | 0.44 |
429
+ | cosine_accuracy@3 | 0.63 |
430
+ | cosine_accuracy@5 | 0.7 |
431
+ | cosine_accuracy@10 | 0.78 |
432
+ | cosine_precision@1 | 0.44 |
433
+ | cosine_precision@3 | 0.2133 |
434
+ | cosine_precision@5 | 0.142 |
435
+ | cosine_precision@10 | 0.081 |
436
+ | cosine_recall@1 | 0.43 |
437
+ | cosine_recall@3 | 0.61 |
438
+ | cosine_recall@5 | 0.675 |
439
+ | cosine_recall@10 | 0.765 |
440
+ | **cosine_ndcg@10** | **0.5971** |
441
+ | cosine_mrr@10 | 0.5503 |
442
+ | cosine_map@100 | 0.5499 |
443
 
444
  <!--
445
  ## Bias, Risks and Limitations
 
465
  | | anchor | positive | negative |
466
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
467
  | type | string | string | string |
468
+ | details | <ul><li>min: 6 tokens</li><li>mean: 13.22 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 90.67 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 89.65 tokens</li><li>max: 128 tokens</li></ul> |
469
  * Samples:
470
  | anchor | positive | negative |
471
  |:--------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
491
  | | anchor | positive | negative |
492
  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
493
  | type | string | string | string |
494
+ | details | <ul><li>min: 6 tokens</li><li>mean: 13.03 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 89.36 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 88.87 tokens</li><li>max: 128 tokens</li></ul> |
495
  * Samples:
496
  | anchor | positive | negative |
497
  |:----------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
513
  - `eval_strategy`: steps
514
  - `per_device_train_batch_size`: 128
515
  - `per_device_eval_batch_size`: 128
516
+ - `learning_rate`: 8e-05
517
+ - `weight_decay`: 0.005
518
+ - `max_steps`: 1125
519
  - `warmup_ratio`: 0.1
520
  - `fp16`: True
521
  - `dataloader_drop_last`: True
 
542
  - `gradient_accumulation_steps`: 1
543
  - `eval_accumulation_steps`: None
544
  - `torch_empty_cache_steps`: None
545
+ - `learning_rate`: 8e-05
546
+ - `weight_decay`: 0.005
547
  - `adam_beta1`: 0.9
548
  - `adam_beta2`: 0.999
549
  - `adam_epsilon`: 1e-08
550
  - `max_grad_norm`: 1.0
551
  - `num_train_epochs`: 3.0
552
+ - `max_steps`: 1125
553
  - `lr_scheduler_type`: linear
554
  - `lr_scheduler_kwargs`: {}
555
  - `warmup_ratio`: 0.1
 
654
  </details>
655
 
656
  ### Training Logs
657
+ | Epoch | Step | Training Loss | Validation Loss | NanoMSMARCO_cosine_ndcg@10 | NanoNQ_cosine_ndcg@10 | NanoBEIR_mean_cosine_ndcg@10 |
658
+ |:------:|:----:|:-------------:|:---------------:|:--------------------------:|:---------------------:|:----------------------------:|
659
+ | 0 | 0 | - | 1.9462 | 0.6259 | 0.6583 | 0.6421 |
660
+ | 0.2874 | 250 | 0.3773 | 0.0669 | 0.5322 | 0.6570 | 0.5946 |
661
+ | 0.5747 | 500 | 0.0787 | 0.0564 | 0.5584 | 0.6307 | 0.5946 |
662
+ | 0.8621 | 750 | 0.0678 | 0.0495 | 0.5390 | 0.6447 | 0.5918 |
663
+ | 1.1494 | 1000 | 0.0517 | 0.0479 | 0.5416 | 0.6525 | 0.5971 |
664
 
 
665
 
666
  ### Framework Versions
667
  - Python: 3.10.18
config_sentence_transformers.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "__version__": {
3
  "sentence_transformers": "5.2.0",
4
  "transformers": "4.57.3",
@@ -9,6 +10,5 @@
9
  "document": ""
10
  },
11
  "default_prompt_name": null,
12
- "similarity_fn_name": "cosine",
13
- "model_type": "SentenceTransformer"
14
  }
 
1
  {
2
+ "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.2.0",
5
  "transformers": "4.57.3",
 
10
  "document": ""
11
  },
12
  "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
 
14
  }
modules.json CHANGED
@@ -10,5 +10,11 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]