radoslavralev commited on
Commit
dbf2dae
·
verified ·
1 Parent(s): 4e52484

Training in progress, step 12000

Browse files
1_Pooling/config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "word_embedding_dimension": 384,
3
- "pooling_mode_cls_token": false,
4
- "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 512,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
Information-Retrieval_evaluation_val_results.csv CHANGED
@@ -11,3 +11,4 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Precisi
11
  -1,-1,0.83295,0.9071,0.9329,0.83295,0.83295,0.3023666666666666,0.9071,0.18658000000000005,0.9329,0.83295,0.872013749999996,0.8760916468253912,0.8970951855878305,0.8781372459990227
12
  -1,-1,0.83545,0.911175,0.9366,0.83545,0.83545,0.303725,0.911175,0.18732000000000001,0.9366,0.83545,0.8751591666666616,0.8790415476190412,0.8999318372974409,0.8810239994800558
13
  -1,-1,0.0,0.0,2.5e-05,0.0,0.0,0.0,0.0,5e-06,2.5e-05,0.0,5e-06,1.697420634920635e-05,4.0643645983386815e-05,5.219463554638405e-05
 
 
11
  -1,-1,0.83295,0.9071,0.9329,0.83295,0.83295,0.3023666666666666,0.9071,0.18658000000000005,0.9329,0.83295,0.872013749999996,0.8760916468253912,0.8970951855878305,0.8781372459990227
12
  -1,-1,0.83545,0.911175,0.9366,0.83545,0.83545,0.303725,0.911175,0.18732000000000001,0.9366,0.83545,0.8751591666666616,0.8790415476190412,0.8999318372974409,0.8810239994800558
13
  -1,-1,0.0,0.0,2.5e-05,0.0,0.0,0.0,0.0,5e-06,2.5e-05,0.0,5e-06,1.697420634920635e-05,4.0643645983386815e-05,5.219463554638405e-05
14
+ -1,-1,0.828275,0.90535,0.930675,0.828275,0.828275,0.3017833333333333,0.90535,0.186135,0.930675,0.828275,0.8685570833333288,0.8726829662698361,0.8940991092644636,0.8748315667834753
README.md CHANGED
@@ -5,123 +5,51 @@ tags:
5
  - feature-extraction
6
  - dense
7
  - generated_from_trainer
8
- - dataset_size:713743
9
  - loss:MultipleNegativesRankingLoss
10
- base_model: sentence-transformers/all-MiniLM-L6-v2
11
  widget:
12
- - source_sentence: 'Abraham Lincoln: Why is the Gettysburg Address so memorable?'
13
  sentences:
14
- - 'Abraham Lincoln: Why is the Gettysburg Address so memorable?'
15
- - What does the Gettysburg Address really mean?
16
- - What is eatalo.com?
17
- - source_sentence: Has the influence of Ancient Carthage in science, math, and society
18
- been underestimated?
19
  sentences:
20
- - How does one earn money online without an investment from home?
21
- - Has the influence of Ancient Carthage in science, math, and society been underestimated?
22
- - Has the influence of the Ancient Etruscans in science and math been underestimated?
23
- - source_sentence: Is there any app that shares charging to others like share it how
24
- we transfer files?
25
  sentences:
26
- - How do you think of Chinese claims that the present Private Arbitration is illegal,
27
- its verdict violates the UNCLOS and is illegal?
28
- - Is there any app that shares charging to others like share it how we transfer
29
- files?
30
- - Are there any platforms that provides end-to-end encryption for file transfer/
31
- sharing?
32
- - source_sentence: Why AAP’s MLA Dinesh Mohaniya has been arrested?
33
  sentences:
34
- - What are your views on the latest sex scandal by AAP MLA Sandeep Kumar?
35
- - What is a dc current? What are some examples?
36
- - Why AAP’s MLA Dinesh Mohaniya has been arrested?
37
- - source_sentence: What is the difference between economic growth and economic development?
38
  sentences:
39
- - How cold can the Gobi Desert get, and how do its average temperatures compare
40
- to the ones in the Simpson Desert?
41
- - the difference between economic growth and economic development is What?
42
- - What is the difference between economic growth and economic development?
43
  pipeline_tag: sentence-similarity
44
  library_name: sentence-transformers
45
- metrics:
46
- - cosine_accuracy@1
47
- - cosine_accuracy@3
48
- - cosine_accuracy@5
49
- - cosine_precision@1
50
- - cosine_precision@3
51
- - cosine_precision@5
52
- - cosine_recall@1
53
- - cosine_recall@3
54
- - cosine_recall@5
55
- - cosine_ndcg@10
56
- - cosine_mrr@1
57
- - cosine_mrr@5
58
- - cosine_mrr@10
59
- - cosine_map@100
60
- model-index:
61
- - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
62
- results:
63
- - task:
64
- type: information-retrieval
65
- name: Information Retrieval
66
- dataset:
67
- name: val
68
- type: val
69
- metrics:
70
- - type: cosine_accuracy@1
71
- value: 0.828275
72
- name: Cosine Accuracy@1
73
- - type: cosine_accuracy@3
74
- value: 0.90535
75
- name: Cosine Accuracy@3
76
- - type: cosine_accuracy@5
77
- value: 0.930675
78
- name: Cosine Accuracy@5
79
- - type: cosine_precision@1
80
- value: 0.828275
81
- name: Cosine Precision@1
82
- - type: cosine_precision@3
83
- value: 0.3017833333333333
84
- name: Cosine Precision@3
85
- - type: cosine_precision@5
86
- value: 0.186135
87
- name: Cosine Precision@5
88
- - type: cosine_recall@1
89
- value: 0.828275
90
- name: Cosine Recall@1
91
- - type: cosine_recall@3
92
- value: 0.90535
93
- name: Cosine Recall@3
94
- - type: cosine_recall@5
95
- value: 0.930675
96
- name: Cosine Recall@5
97
- - type: cosine_ndcg@10
98
- value: 0.8940991092644636
99
- name: Cosine Ndcg@10
100
- - type: cosine_mrr@1
101
- value: 0.828275
102
- name: Cosine Mrr@1
103
- - type: cosine_mrr@5
104
- value: 0.8685570833333288
105
- name: Cosine Mrr@5
106
- - type: cosine_mrr@10
107
- value: 0.8726829662698361
108
- name: Cosine Mrr@10
109
- - type: cosine_map@100
110
- value: 0.8748315667834753
111
- name: Cosine Map@100
112
  ---
113
 
114
- # SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
115
 
116
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
117
 
118
  ## Model Details
119
 
120
  ### Model Description
121
  - **Model Type:** Sentence Transformer
122
- - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
123
  - **Maximum Sequence Length:** 128 tokens
124
- - **Output Dimensionality:** 384 dimensions
125
  - **Similarity Function:** Cosine Similarity
126
  <!-- - **Training Dataset:** Unknown -->
127
  <!-- - **Language:** Unknown -->
@@ -138,8 +66,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
138
  ```
139
  SentenceTransformer(
140
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
141
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
142
- (2): Normalize()
143
  )
144
  ```
145
 
@@ -158,23 +85,23 @@ Then you can load this model and run inference.
158
  from sentence_transformers import SentenceTransformer
159
 
160
  # Download from the 🤗 Hub
161
- model = SentenceTransformer("redis/model-b-structured")
162
  # Run inference
163
  sentences = [
164
- 'What is the difference between economic growth and economic development?',
165
- 'What is the difference between economic growth and economic development?',
166
- 'the difference between economic growth and economic development is What?',
167
  ]
168
  embeddings = model.encode(sentences)
169
  print(embeddings.shape)
170
- # [3, 384]
171
 
172
  # Get the similarity scores for the embeddings
173
  similarities = model.similarity(embeddings, embeddings)
174
  print(similarities)
175
- # tensor([[ 0.9999, 0.9999, -0.0738],
176
- # [ 0.9999, 0.9999, -0.0738],
177
- # [-0.0738, -0.0738, 1.0000]])
178
  ```
179
 
180
  <!--
@@ -201,32 +128,6 @@ You can finetune this model on your own dataset.
201
  *List how the model may foreseeably be misused and address what users ought not to do with the model.*
202
  -->
203
 
204
- ## Evaluation
205
-
206
- ### Metrics
207
-
208
- #### Information Retrieval
209
-
210
- * Dataset: `val`
211
- * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
212
-
213
- | Metric | Value |
214
- |:-------------------|:-----------|
215
- | cosine_accuracy@1 | 0.8283 |
216
- | cosine_accuracy@3 | 0.9053 |
217
- | cosine_accuracy@5 | 0.9307 |
218
- | cosine_precision@1 | 0.8283 |
219
- | cosine_precision@3 | 0.3018 |
220
- | cosine_precision@5 | 0.1861 |
221
- | cosine_recall@1 | 0.8283 |
222
- | cosine_recall@3 | 0.9053 |
223
- | cosine_recall@5 | 0.9307 |
224
- | **cosine_ndcg@10** | **0.8941** |
225
- | cosine_mrr@1 | 0.8283 |
226
- | cosine_mrr@5 | 0.8686 |
227
- | cosine_mrr@10 | 0.8727 |
228
- | cosine_map@100 | 0.8748 |
229
-
230
  <!--
231
  ## Bias, Risks and Limitations
232
 
@@ -245,49 +146,23 @@ You can finetune this model on your own dataset.
245
 
246
  #### Unnamed Dataset
247
 
248
- * Size: 713,743 training samples
249
- * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
250
- * Approximate statistics based on the first 1000 samples:
251
- | | anchor | positive | negative |
252
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
253
- | type | string | string | string |
254
- | details | <ul><li>min: 6 tokens</li><li>mean: 16.07 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.03 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.81 tokens</li><li>max: 58 tokens</li></ul> |
255
- * Samples:
256
- | anchor | positive | negative |
257
- |:-------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------|
258
- | <code>Which one is better Linux OS? Ubuntu or Mint?</code> | <code>Why do you use Linux Mint?</code> | <code>Which one is not better Linux OS ? Ubuntu or Mint ?</code> |
259
- | <code>What is flow?</code> | <code>What is flow?</code> | <code>What are flow lines?</code> |
260
- | <code>How is Trump planning to get Mexico to pay for his supposed wall?</code> | <code>How is it possible for Donald Trump to force Mexico to pay for the wall?</code> | <code>Why do we connect the positive terminal before the negative terminal to ground in a vehicle battery?</code> |
261
- * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
262
- ```json
263
- {
264
- "scale": 7.0,
265
- "similarity_fct": "cos_sim",
266
- "gather_across_devices": false
267
- }
268
- ```
269
-
270
- ### Evaluation Dataset
271
-
272
- #### Unnamed Dataset
273
-
274
- * Size: 40,000 evaluation samples
275
- * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
276
  * Approximate statistics based on the first 1000 samples:
277
- | | anchor | positive | negative |
278
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
279
  | type | string | string | string |
280
- | details | <ul><li>min: 6 tokens</li><li>mean: 15.52 tokens</li><li>max: 74 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.51 tokens</li><li>max: 74 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.79 tokens</li><li>max: 69 tokens</li></ul> |
281
  * Samples:
282
- | anchor | positive | negative |
283
- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
284
- | <code>Why are all my questions on Quora marked needing improvement?</code> | <code>Why are all my questions immediately being marked as needing improvement?</code> | <code>For a post-graduate student in IIT, is it allowed to take an external scholarship as a top-up to his/her MHRD assistantship?</code> |
285
- | <code>Can blue butter fly needle with vaccum tube be reused? Is it HIV risk? . Heard the needle is too small to be reused . Had blood draw at clinic?</code> | <code>Can blue butter fly needle with vaccum tube be reused? Is it HIV risk? . Heard the needle is too small to be reused . Had blood draw at clinic?</code> | <code>Can blue butter fly needle with vaccum tube be reused not ? Is it HIV risk ? . Heard the needle is too small to be reused . Had blood draw at clinic ?</code> |
286
- | <code>Why do people still believe the world is flat?</code> | <code>Why are there still people who believe the world is flat?</code> | <code>I'm not able to buy Udemy course .it is not accepting mine and my friends debit card.my card can be used for Flipkart .how to purchase now?</code> |
287
  * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
288
  ```json
289
  {
290
- "scale": 7.0,
291
  "similarity_fct": "cos_sim",
292
  "gather_across_devices": false
293
  }
@@ -296,49 +171,36 @@ You can finetune this model on your own dataset.
296
  ### Training Hyperparameters
297
  #### Non-Default Hyperparameters
298
 
299
- - `eval_strategy`: steps
300
- - `per_device_train_batch_size`: 256
301
- - `per_device_eval_batch_size`: 256
302
- - `learning_rate`: 2e-05
303
- - `weight_decay`: 0.0001
304
- - `max_steps`: 12000
305
- - `warmup_ratio`: 0.1
306
  - `fp16`: True
307
- - `dataloader_drop_last`: True
308
- - `dataloader_num_workers`: 1
309
- - `dataloader_prefetch_factor`: 1
310
- - `load_best_model_at_end`: True
311
- - `optim`: adamw_torch
312
- - `ddp_find_unused_parameters`: False
313
- - `push_to_hub`: True
314
- - `hub_model_id`: redis/model-b-structured
315
- - `eval_on_start`: True
316
 
317
  #### All Hyperparameters
318
  <details><summary>Click to expand</summary>
319
 
320
  - `overwrite_output_dir`: False
321
  - `do_predict`: False
322
- - `eval_strategy`: steps
323
  - `prediction_loss_only`: True
324
- - `per_device_train_batch_size`: 256
325
- - `per_device_eval_batch_size`: 256
326
  - `per_gpu_train_batch_size`: None
327
  - `per_gpu_eval_batch_size`: None
328
  - `gradient_accumulation_steps`: 1
329
  - `eval_accumulation_steps`: None
330
  - `torch_empty_cache_steps`: None
331
- - `learning_rate`: 2e-05
332
- - `weight_decay`: 0.0001
333
  - `adam_beta1`: 0.9
334
  - `adam_beta2`: 0.999
335
  - `adam_epsilon`: 1e-08
336
- - `max_grad_norm`: 1.0
337
- - `num_train_epochs`: 3.0
338
- - `max_steps`: 12000
339
  - `lr_scheduler_type`: linear
340
  - `lr_scheduler_kwargs`: {}
341
- - `warmup_ratio`: 0.1
342
  - `warmup_steps`: 0
343
  - `log_level`: passive
344
  - `log_level_replica`: warning
@@ -366,14 +228,14 @@ You can finetune this model on your own dataset.
366
  - `tpu_num_cores`: None
367
  - `tpu_metrics_debug`: False
368
  - `debug`: []
369
- - `dataloader_drop_last`: True
370
- - `dataloader_num_workers`: 1
371
- - `dataloader_prefetch_factor`: 1
372
  - `past_index`: -1
373
  - `disable_tqdm`: False
374
  - `remove_unused_columns`: True
375
  - `label_names`: None
376
- - `load_best_model_at_end`: True
377
  - `ignore_data_skip`: False
378
  - `fsdp`: []
379
  - `fsdp_min_num_params`: 0
@@ -383,23 +245,23 @@ You can finetune this model on your own dataset.
383
  - `parallelism_config`: None
384
  - `deepspeed`: None
385
  - `label_smoothing_factor`: 0.0
386
- - `optim`: adamw_torch
387
  - `optim_args`: None
388
  - `adafactor`: False
389
  - `group_by_length`: False
390
  - `length_column_name`: length
391
  - `project`: huggingface
392
  - `trackio_space_id`: trackio
393
- - `ddp_find_unused_parameters`: False
394
  - `ddp_bucket_cap_mb`: None
395
  - `ddp_broadcast_buffers`: False
396
  - `dataloader_pin_memory`: True
397
  - `dataloader_persistent_workers`: False
398
  - `skip_memory_metrics`: True
399
  - `use_legacy_prediction_loop`: False
400
- - `push_to_hub`: True
401
  - `resume_from_checkpoint`: None
402
- - `hub_model_id`: redis/model-b-structured
403
  - `hub_strategy`: every_save
404
  - `hub_private_repo`: None
405
  - `hub_always_push`: False
@@ -426,71 +288,31 @@ You can finetune this model on your own dataset.
426
  - `neftune_noise_alpha`: None
427
  - `optim_target_modules`: None
428
  - `batch_eval_metrics`: False
429
- - `eval_on_start`: True
430
  - `use_liger_kernel`: False
431
  - `liger_kernel_config`: None
432
  - `eval_use_gather_object`: False
433
  - `average_tokens_across_devices`: True
434
  - `prompts`: None
435
  - `batch_sampler`: batch_sampler
436
- - `multi_dataset_batch_sampler`: proportional
437
  - `router_mapping`: {}
438
  - `learning_rate_mapping`: {}
439
 
440
  </details>
441
 
442
  ### Training Logs
443
- | Epoch | Step | Training Loss | Validation Loss | val_cosine_ndcg@10 |
444
- |:------:|:-----:|:-------------:|:---------------:|:------------------:|
445
- | 0 | 0 | - | 1.0340 | 0.8556 |
446
- | 0.0897 | 250 | 1.1083 | 0.7666 | 0.8800 |
447
- | 0.1793 | 500 | 0.9078 | 0.6773 | 0.8870 |
448
- | 0.2690 | 750 | 0.8464 | 0.6531 | 0.8879 |
449
- | 0.3587 | 1000 | 0.8142 | 0.6386 | 0.8886 |
450
- | 0.4484 | 1250 | 0.7882 | 0.6274 | 0.8891 |
451
- | 0.5380 | 1500 | 0.769 | 0.6149 | 0.8896 |
452
- | 0.6277 | 1750 | 0.7567 | 0.6090 | 0.8909 |
453
- | 0.7174 | 2000 | 0.7444 | 0.6039 | 0.8906 |
454
- | 0.8070 | 2250 | 0.736 | 0.5974 | 0.8911 |
455
- | 0.8967 | 2500 | 0.7283 | 0.5959 | 0.8909 |
456
- | 0.9864 | 2750 | 0.723 | 0.5911 | 0.8913 |
457
- | 1.0760 | 3000 | 0.7136 | 0.5871 | 0.8915 |
458
- | 1.1657 | 3250 | 0.7073 | 0.5838 | 0.8912 |
459
- | 1.2554 | 3500 | 0.7023 | 0.5825 | 0.8915 |
460
- | 1.3451 | 3750 | 0.6988 | 0.5794 | 0.8920 |
461
- | 1.4347 | 4000 | 0.6956 | 0.5782 | 0.8920 |
462
- | 1.5244 | 4250 | 0.692 | 0.5758 | 0.8925 |
463
- | 1.6141 | 4500 | 0.6867 | 0.5739 | 0.8925 |
464
- | 1.7037 | 4750 | 0.6848 | 0.5734 | 0.8923 |
465
- | 1.7934 | 5000 | 0.6828 | 0.5709 | 0.8926 |
466
- | 1.8831 | 5250 | 0.6816 | 0.5702 | 0.8925 |
467
- | 1.9727 | 5500 | 0.6778 | 0.5681 | 0.8928 |
468
- | 2.0624 | 5750 | 0.6731 | 0.5669 | 0.8930 |
469
- | 2.1521 | 6000 | 0.6704 | 0.5661 | 0.8931 |
470
- | 2.2418 | 6250 | 0.6699 | 0.5653 | 0.8931 |
471
- | 2.3314 | 6500 | 0.6679 | 0.5640 | 0.8932 |
472
- | 2.4211 | 6750 | 0.6657 | 0.5627 | 0.8933 |
473
- | 2.5108 | 7000 | 0.6648 | 0.5624 | 0.8931 |
474
- | 2.6004 | 7250 | 0.6605 | 0.5608 | 0.8932 |
475
- | 2.6901 | 7500 | 0.6623 | 0.5609 | 0.8934 |
476
- | 2.7798 | 7750 | 0.6605 | 0.5592 | 0.8936 |
477
- | 2.8694 | 8000 | 0.6605 | 0.5586 | 0.8938 |
478
- | 2.9591 | 8250 | 0.6578 | 0.5576 | 0.8936 |
479
- | 3.0488 | 8500 | 0.6565 | 0.5572 | 0.8938 |
480
- | 3.1385 | 8750 | 0.6542 | 0.5566 | 0.8938 |
481
- | 3.2281 | 9000 | 0.6541 | 0.5556 | 0.8939 |
482
- | 3.3178 | 9250 | 0.6535 | 0.5555 | 0.8940 |
483
- | 3.4075 | 9500 | 0.653 | 0.5548 | 0.8941 |
484
- | 3.4971 | 9750 | 0.6531 | 0.5543 | 0.8941 |
485
- | 3.5868 | 10000 | 0.6498 | 0.5543 | 0.8940 |
486
- | 3.6765 | 10250 | 0.6491 | 0.5539 | 0.8940 |
487
- | 3.7661 | 10500 | 0.6492 | 0.5541 | 0.8940 |
488
- | 3.8558 | 10750 | 0.6504 | 0.5533 | 0.8940 |
489
- | 3.9455 | 11000 | 0.6505 | 0.5535 | 0.8943 |
490
- | 4.0352 | 11250 | 0.6489 | 0.5532 | 0.8942 |
491
- | 4.1248 | 11500 | 0.6459 | 0.5530 | 0.8943 |
492
- | 4.2145 | 11750 | 0.6469 | 0.5529 | 0.8941 |
493
- | 4.3042 | 12000 | 0.6483 | 0.5529 | 0.8941 |
494
 
495
 
496
  ### Framework Versions
 
5
  - feature-extraction
6
  - dense
7
  - generated_from_trainer
8
+ - dataset_size:100000
9
  - loss:MultipleNegativesRankingLoss
10
+ base_model: prajjwal1/bert-small
11
  widget:
12
+ - source_sentence: How do I calculate IQ?
13
  sentences:
14
+ - What is the easiest way to know my IQ?
15
+ - How do I calculate not IQ ?
16
+ - What are some creative and innovative business ideas with less investment in India?
17
+ - source_sentence: How can I learn martial arts in my home?
 
18
  sentences:
19
+ - How can I learn martial arts by myself?
20
+ - What are the advantages and disadvantages of investing in gold?
21
+ - Can people see that I have looked at their pictures on instagram if I am not following
22
+ them?
23
+ - source_sentence: When Enterprise picks you up do you have to take them back?
24
  sentences:
25
+ - Are there any software Training institute in Tuticorin?
26
+ - When Enterprise picks you up do you have to take them back?
27
+ - When Enterprise picks you up do them have to take youback?
28
+ - source_sentence: What are some non-capital goods?
 
 
 
29
  sentences:
30
+ - What are capital goods?
31
+ - How is the value of [math]\pi[/math] calculated?
32
+ - What are some non-capital goods?
33
+ - source_sentence: What is the QuickBooks technical support phone number in New York?
34
  sentences:
35
+ - What caused the Great Depression?
36
+ - Can I apply for PR in Canada?
37
+ - Which is the best QuickBooks Hosting Support Number in New York?
 
38
  pipeline_tag: sentence-similarity
39
  library_name: sentence-transformers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ---
41
 
42
+ # SentenceTransformer based on prajjwal1/bert-small
43
 
44
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [prajjwal1/bert-small](https://huggingface.co/prajjwal1/bert-small). It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
45
 
46
  ## Model Details
47
 
48
  ### Model Description
49
  - **Model Type:** Sentence Transformer
50
+ - **Base model:** [prajjwal1/bert-small](https://huggingface.co/prajjwal1/bert-small) <!-- at revision 0ec5f86f27c1a77d704439db5e01c307ea11b9d4 -->
51
  - **Maximum Sequence Length:** 128 tokens
52
+ - **Output Dimensionality:** 512 dimensions
53
  - **Similarity Function:** Cosine Similarity
54
  <!-- - **Training Dataset:** Unknown -->
55
  <!-- - **Language:** Unknown -->
 
66
  ```
67
  SentenceTransformer(
68
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False, 'architecture': 'BertModel'})
69
+ (1): Pooling({'word_embedding_dimension': 512, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
 
70
  )
71
  ```
72
 
 
85
  from sentence_transformers import SentenceTransformer
86
 
87
  # Download from the 🤗 Hub
88
+ model = SentenceTransformer("sentence_transformers_model_id")
89
  # Run inference
90
  sentences = [
91
+ 'What is the QuickBooks technical support phone number in New York?',
92
+ 'Which is the best QuickBooks Hosting Support Number in New York?',
93
+ 'Can I apply for PR in Canada?',
94
  ]
95
  embeddings = model.encode(sentences)
96
  print(embeddings.shape)
97
+ # [3, 512]
98
 
99
  # Get the similarity scores for the embeddings
100
  similarities = model.similarity(embeddings, embeddings)
101
  print(similarities)
102
+ # tensor([[1.0000, 0.8563, 0.0594],
103
+ # [0.8563, 1.0000, 0.1245],
104
+ # [0.0594, 0.1245, 1.0000]])
105
  ```
106
 
107
  <!--
 
128
  *List how the model may foreseeably be misused and address what users ought not to do with the model.*
129
  -->
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  <!--
132
  ## Bias, Risks and Limitations
133
 
 
146
 
147
  #### Unnamed Dataset
148
 
149
+ * Size: 100,000 training samples
150
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  * Approximate statistics based on the first 1000 samples:
152
+ | | sentence_0 | sentence_1 | sentence_2 |
153
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
154
  | type | string | string | string |
155
+ | details | <ul><li>min: 6 tokens</li><li>mean: 15.79 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.68 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 16.37 tokens</li><li>max: 67 tokens</li></ul> |
156
  * Samples:
157
+ | sentence_0 | sentence_1 | sentence_2 |
158
+ |:-----------------------------------------------------------------|:-----------------------------------------------------------------|:----------------------------------------------------------------------------------|
159
+ | <code>Is masturbating bad for boys?</code> | <code>Is masturbating bad for boys?</code> | <code>How harmful or unhealthy is masturbation?</code> |
160
+ | <code>Does a train engine move in reverse?</code> | <code>Does a train engine move in reverse?</code> | <code>Time moves forward, not in reverse. Doesn't that make time a vector?</code> |
161
+ | <code>What is the most badass thing anyone has ever done?</code> | <code>What is the most badass thing anyone has ever done?</code> | <code>anyone is the most badass thing Whathas ever done?</code> |
162
  * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
163
  ```json
164
  {
165
+ "scale": 20.0,
166
  "similarity_fct": "cos_sim",
167
  "gather_across_devices": false
168
  }
 
171
  ### Training Hyperparameters
172
  #### Non-Default Hyperparameters
173
 
174
+ - `per_device_train_batch_size`: 64
175
+ - `per_device_eval_batch_size`: 64
 
 
 
 
 
176
  - `fp16`: True
177
+ - `multi_dataset_batch_sampler`: round_robin
 
 
 
 
 
 
 
 
178
 
179
  #### All Hyperparameters
180
  <details><summary>Click to expand</summary>
181
 
182
  - `overwrite_output_dir`: False
183
  - `do_predict`: False
184
+ - `eval_strategy`: no
185
  - `prediction_loss_only`: True
186
+ - `per_device_train_batch_size`: 64
187
+ - `per_device_eval_batch_size`: 64
188
  - `per_gpu_train_batch_size`: None
189
  - `per_gpu_eval_batch_size`: None
190
  - `gradient_accumulation_steps`: 1
191
  - `eval_accumulation_steps`: None
192
  - `torch_empty_cache_steps`: None
193
+ - `learning_rate`: 5e-05
194
+ - `weight_decay`: 0.0
195
  - `adam_beta1`: 0.9
196
  - `adam_beta2`: 0.999
197
  - `adam_epsilon`: 1e-08
198
+ - `max_grad_norm`: 1
199
+ - `num_train_epochs`: 3
200
+ - `max_steps`: -1
201
  - `lr_scheduler_type`: linear
202
  - `lr_scheduler_kwargs`: {}
203
+ - `warmup_ratio`: 0.0
204
  - `warmup_steps`: 0
205
  - `log_level`: passive
206
  - `log_level_replica`: warning
 
228
  - `tpu_num_cores`: None
229
  - `tpu_metrics_debug`: False
230
  - `debug`: []
231
+ - `dataloader_drop_last`: False
232
+ - `dataloader_num_workers`: 0
233
+ - `dataloader_prefetch_factor`: None
234
  - `past_index`: -1
235
  - `disable_tqdm`: False
236
  - `remove_unused_columns`: True
237
  - `label_names`: None
238
+ - `load_best_model_at_end`: False
239
  - `ignore_data_skip`: False
240
  - `fsdp`: []
241
  - `fsdp_min_num_params`: 0
 
245
  - `parallelism_config`: None
246
  - `deepspeed`: None
247
  - `label_smoothing_factor`: 0.0
248
+ - `optim`: adamw_torch_fused
249
  - `optim_args`: None
250
  - `adafactor`: False
251
  - `group_by_length`: False
252
  - `length_column_name`: length
253
  - `project`: huggingface
254
  - `trackio_space_id`: trackio
255
+ - `ddp_find_unused_parameters`: None
256
  - `ddp_bucket_cap_mb`: None
257
  - `ddp_broadcast_buffers`: False
258
  - `dataloader_pin_memory`: True
259
  - `dataloader_persistent_workers`: False
260
  - `skip_memory_metrics`: True
261
  - `use_legacy_prediction_loop`: False
262
+ - `push_to_hub`: False
263
  - `resume_from_checkpoint`: None
264
+ - `hub_model_id`: None
265
  - `hub_strategy`: every_save
266
  - `hub_private_repo`: None
267
  - `hub_always_push`: False
 
288
  - `neftune_noise_alpha`: None
289
  - `optim_target_modules`: None
290
  - `batch_eval_metrics`: False
291
+ - `eval_on_start`: False
292
  - `use_liger_kernel`: False
293
  - `liger_kernel_config`: None
294
  - `eval_use_gather_object`: False
295
  - `average_tokens_across_devices`: True
296
  - `prompts`: None
297
  - `batch_sampler`: batch_sampler
298
+ - `multi_dataset_batch_sampler`: round_robin
299
  - `router_mapping`: {}
300
  - `learning_rate_mapping`: {}
301
 
302
  </details>
303
 
304
  ### Training Logs
305
+ | Epoch | Step | Training Loss |
306
+ |:------:|:----:|:-------------:|
307
+ | 0.3199 | 500 | 0.4294 |
308
+ | 0.6398 | 1000 | 0.1268 |
309
+ | 0.9597 | 1500 | 0.1 |
310
+ | 1.2796 | 2000 | 0.0792 |
311
+ | 1.5995 | 2500 | 0.0706 |
312
+ | 1.9194 | 3000 | 0.0687 |
313
+ | 2.2393 | 3500 | 0.0584 |
314
+ | 2.5592 | 4000 | 0.057 |
315
+ | 2.8791 | 4500 | 0.0581 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
 
318
  ### Framework Versions
config.json CHANGED
@@ -15,7 +15,7 @@
15
  "max_position_embeddings": 512,
16
  "model_type": "bert",
17
  "num_attention_heads": 12,
18
- "num_hidden_layers": 6,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "transformers_version": "4.57.3",
 
15
  "max_position_embeddings": 512,
16
  "model_type": "bert",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "transformers_version": "4.57.3",
config_sentence_transformers.json CHANGED
@@ -1,10 +1,10 @@
1
  {
 
2
  "__version__": {
3
  "sentence_transformers": "5.2.0",
4
  "transformers": "4.57.3",
5
  "pytorch": "2.9.1+cu128"
6
  },
7
- "model_type": "SentenceTransformer",
8
  "prompts": {
9
  "query": "",
10
  "document": ""
 
1
  {
2
+ "model_type": "SentenceTransformer",
3
  "__version__": {
4
  "sentence_transformers": "5.2.0",
5
  "transformers": "4.57.3",
6
  "pytorch": "2.9.1+cu128"
7
  },
 
8
  "prompts": {
9
  "query": "",
10
  "document": ""
eval/Information-Retrieval_evaluation_val_results.csv CHANGED
@@ -728,3 +728,52 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Precisi
728
  4.124820659971306,11500,0.828425,0.905575,0.9308,0.828425,0.828425,0.3018583333333333,0.905575,0.18616000000000002,0.9308,0.828425,0.8687266666666623,0.872845128968249,0.8942524642669559,0.8749859334121904
729
  4.214490674318508,11750,0.828225,0.905425,0.930775,0.828225,0.828225,0.30180833333333323,0.905425,0.18615500000000001,0.930775,0.828225,0.8685866666666623,0.8726996329365029,0.8941203987290073,0.8748458978394003
730
  4.30416068866571,12000,0.828275,0.90535,0.930675,0.828275,0.828275,0.3017833333333333,0.90535,0.186135,0.930675,0.828275,0.8685570833333288,0.8726829662698361,0.8940991092644636,0.8748315667834753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  4.124820659971306,11500,0.828425,0.905575,0.9308,0.828425,0.828425,0.3018583333333333,0.905575,0.18616000000000002,0.9308,0.828425,0.8687266666666623,0.872845128968249,0.8942524642669559,0.8749859334121904
729
  4.214490674318508,11750,0.828225,0.905425,0.930775,0.828225,0.828225,0.30180833333333323,0.905425,0.18615500000000001,0.930775,0.828225,0.8685866666666623,0.8726996329365029,0.8941203987290073,0.8748458978394003
730
  4.30416068866571,12000,0.828275,0.90535,0.930675,0.828275,0.828275,0.3017833333333333,0.90535,0.186135,0.930675,0.828275,0.8685570833333288,0.8726829662698361,0.8940991092644636,0.8748315667834753
731
+ 0,0,0.7559,0.888025,0.91845,0.7559,0.7559,0.2960083333333333,0.888025,0.18369,0.91845,0.7559,0.8240824999999926,0.8282971428571374,0.8581357018409329,0.8307263143356874
732
+ 0.0896700143472023,250,0.819175,0.902425,0.92695,0.819175,0.819175,0.3008083333333333,0.902425,0.18539000000000005,0.92695,0.819175,0.8621699999999956,0.8662105952380916,0.8882600876290311,0.8683435652760845
733
+ 0.1793400286944046,500,0.823275,0.902625,0.92775,0.823275,0.823275,0.300875,0.902625,0.18555000000000002,0.92775,0.823275,0.8646274999999961,0.8685581249999954,0.8900006285169156,0.8707261561255878
734
+ 0.26901004304160686,750,0.824675,0.903275,0.92795,0.824675,0.824675,0.3010916666666666,0.903275,0.18559,0.92795,0.824675,0.8656208333333302,0.8696415972222192,0.8909776731345715,0.871797475220645
735
+ 0.3586800573888092,1000,0.82635,0.903225,0.927975,0.82635,0.82635,0.3010749999999999,0.903225,0.18559500000000004,0.927975,0.82635,0.8665316666666631,0.870622232142853,0.8918134970790795,0.8727642431197778
736
+ 0.4483500717360115,1250,0.827475,0.903125,0.9286,0.827475,0.827475,0.3010416666666666,0.903125,0.18572000000000002,0.9286,0.827475,0.8673091666666635,0.8713684623015829,0.8924977650440687,0.8735219322199599
737
+ 0.5380200860832137,1500,0.828475,0.90395,0.9285,0.828475,0.828475,0.3013166666666666,0.90395,0.18570000000000003,0.9285,0.828475,0.8678674999999959,0.8719565079365029,0.8929608929558921,0.8740916048377414
738
+ 0.6276901004304161,1750,0.829725,0.904775,0.9299,0.829725,0.829725,0.30159166666666665,0.904775,0.18598000000000003,0.9299,0.829725,0.8691195833333287,0.873115138888884,0.8940316330971427,0.8752482008227935
739
+ 0.7173601147776184,2000,0.8295,0.9045,0.929375,0.8295,0.8295,0.30149999999999993,0.9045,0.18587500000000004,0.929375,0.8295,0.8688637499999952,0.8729051091269786,0.8938138789049028,0.8750573383427221
740
+ 0.8070301291248206,2250,0.82975,0.90475,0.9294,0.82975,0.82975,0.30158333333333326,0.90475,0.18588000000000005,0.9294,0.82975,0.869067499999995,0.873225714285709,0.8942358670703882,0.875332734003183
741
+ 0.896700143472023,2500,0.830375,0.90485,0.92945,0.830375,0.830375,0.30161666666666664,0.90485,0.18589000000000003,0.92945,0.830375,0.869444999999995,0.8736060714285677,0.8945279434723612,0.8757291659831755
742
+ 0.9863701578192252,2750,0.830675,0.905625,0.929875,0.830675,0.830675,0.301875,0.905625,0.18597500000000003,0.929875,0.830675,0.869842499999996,0.8739150793650747,0.8947006908527372,0.876079047343074
743
+ 1.0760401721664274,3000,0.8307,0.905475,0.92995,0.8307,0.8307,0.30182499999999995,0.905475,0.18599000000000002,0.92995,0.8307,0.8697658333333297,0.8738787996031709,0.8947700117112292,0.8760166655105746
744
+ 1.16571018651363,3250,0.8308,0.9054,0.930325,0.8308,0.8308,0.3017999999999999,0.9054,0.18606500000000006,0.930325,0.8308,0.8699204166666626,0.8740043551587267,0.8948902977145758,0.8761365044052841
745
+ 1.2553802008608321,3500,0.831425,0.906775,0.931125,0.831425,0.831425,0.3022583333333333,0.906775,0.18622500000000003,0.931125,0.831425,0.8707058333333282,0.8746948511904716,0.895450161583831,0.8768561646163358
746
+ 1.3450502152080344,3750,0.83105,0.906175,0.9307,0.83105,0.83105,0.3020583333333333,0.906175,0.18614000000000003,0.9307,0.83105,0.8702908333333287,0.8743667757936456,0.8952432862579343,0.8765234686703379
747
+ 1.4347202295552366,4000,0.831475,0.906175,0.9308,0.831475,0.831475,0.3020583333333333,0.906175,0.18616000000000005,0.9308,0.831475,0.8706504166666622,0.8747398115079325,0.8955347599343484,0.8769057791664149
748
+ 1.524390243902439,4250,0.831425,0.906675,0.93125,0.831425,0.831425,0.3022249999999999,0.906675,0.18625,0.93125,0.831425,0.8707308333333289,0.8748166666666614,0.8957446074038042,0.8769376705266312
749
+ 1.6140602582496413,4500,0.8317,0.906425,0.93165,0.8317,0.8317,0.30214166666666664,0.906425,0.18633000000000002,0.93165,0.8317,0.8709862499999952,0.8750757837301526,0.8960523501392612,0.8771549478641858
750
+ 1.7037302725968435,4750,0.832125,0.90685,0.9318,0.832125,0.832125,0.3022833333333333,0.90685,0.18636000000000003,0.9318,0.832125,0.8712933333333281,0.8753743253968194,0.8963036408678163,0.8774542496863047
751
+ 1.793400286944046,5000,0.8317,0.907025,0.93155,0.8317,0.8317,0.3023416666666666,0.907025,0.18631000000000003,0.93155,0.8317,0.8710554166666618,0.8751295535714231,0.896011793678657,0.8772652204005897
752
+ 1.8830703012912482,5250,0.8322,0.906875,0.931775,0.8322,0.8322,0.3022916666666666,0.906875,0.18635500000000005,0.931775,0.8322,0.8712783333333273,0.8753279563492,0.8961943891359282,0.8774395938926657
753
+ 1.9727403156384504,5500,0.8321,0.90765,0.932325,0.8321,0.8321,0.30254999999999993,0.90765,0.18646500000000002,0.932325,0.8321,0.8716016666666615,0.875647906746025,0.8966148741261073,0.8777108818425712
754
+ 2.062410329985653,5750,0.832575,0.90755,0.9323,0.832575,0.832575,0.3025166666666666,0.90755,0.18646000000000004,0.9323,0.832575,0.871835833333329,0.8758839682539643,0.8967451123627076,0.8779855384554521
755
+ 2.152080344332855,6000,0.832225,0.907525,0.93225,0.832225,0.832225,0.3025083333333333,0.907525,0.18645000000000003,0.93225,0.832225,0.8716729166666622,0.8757157242063452,0.8966237714324731,0.8778246940219202
756
+ 2.2417503586800573,6250,0.83185,0.90695,0.93235,0.83185,0.83185,0.3023166666666666,0.90695,0.18647000000000002,0.93235,0.83185,0.871458749999996,0.8754885714285667,0.8964538970320948,0.8775979458722035
757
+ 2.33142037302726,6500,0.832325,0.907575,0.932625,0.832325,0.832325,0.30252499999999993,0.907575,0.18652500000000002,0.932625,0.832325,0.8718462499999958,0.8758440476190427,0.8967327346196875,0.8779598654158232
758
+ 2.421090387374462,6750,0.8322,0.9074,0.932175,0.8322,0.8322,0.30246666666666666,0.9074,0.18643500000000002,0.932175,0.8322,0.8716187499999962,0.8756916765872981,0.8966067985981595,0.8778117908396318
759
+ 2.5107604017216643,7000,0.8328,0.907625,0.93265,0.8328,0.8328,0.30254166666666665,0.907625,0.18653000000000003,0.93265,0.8328,0.8720787499999959,0.8761525496031712,0.8970799391467997,0.8782319610093746
760
+ 2.6004304160688667,7250,0.832175,0.907,0.9324,0.832175,0.832175,0.3023333333333333,0.907,0.18648,0.9324,0.832175,0.8715787499999962,0.8756604761904724,0.8966551195744414,0.8777637140420839
761
+ 2.6901004304160687,7500,0.83255,0.90765,0.9326,0.83255,0.83255,0.30254999999999993,0.90765,0.18652000000000002,0.9326,0.83255,0.8719145833333296,0.876068343253964,0.8971222150834783,0.8781335338497196
762
+ 2.779770444763271,7750,0.8326,0.907925,0.93235,0.8326,0.8326,0.3026416666666666,0.907925,0.18647000000000002,0.93235,0.8326,0.8720045833333293,0.8761904464285671,0.8972294549416494,0.8782446754867779
763
+ 2.869440459110473,8000,0.832725,0.907575,0.93315,0.832725,0.832725,0.30252499999999993,0.907575,0.18663000000000002,0.93315,0.832725,0.8722179166666633,0.8763341765872976,0.8974425489686708,0.8783581076036066
764
+ 2.9591104734576756,8250,0.832475,0.908,0.932975,0.832475,0.832475,0.3026666666666666,0.908,0.186595,0.932975,0.832475,0.87208208333333,0.8761892757936474,0.8972488761308134,0.8782603464745469
765
+ 3.048780487804878,8500,0.8325,0.907825,0.932825,0.8325,0.8325,0.3026083333333333,0.907825,0.186565,0.932825,0.8325,0.8719837499999971,0.8761642361111072,0.8973454134317264,0.8781780180811777
766
+ 3.1384505021520805,8750,0.832725,0.90785,0.932975,0.832725,0.832725,0.3026166666666666,0.90785,0.18659500000000004,0.932975,0.832725,0.8721274999999963,0.8762948611111069,0.8974376134092725,0.8783154074368186
767
+ 3.2281205164992826,9000,0.832875,0.907625,0.932825,0.832875,0.832875,0.3025416666666666,0.907625,0.18656500000000004,0.932825,0.832875,0.8721095833333297,0.8762971130952343,0.8974008967080689,0.8783406503607297
768
+ 3.317790530846485,9250,0.832375,0.907625,0.932675,0.832375,0.832375,0.3025416666666666,0.907625,0.186535,0.932675,0.832375,0.8718604166666639,0.8760936904761871,0.897281523587852,0.8781287932574847
769
+ 3.407460545193687,9500,0.83285,0.90775,0.932775,0.83285,0.83285,0.30258333333333326,0.90775,0.18655500000000003,0.932775,0.83285,0.8722141666666643,0.8764065178571403,0.8974750892280026,0.8784478080264162
770
+ 3.4971305595408895,9750,0.832625,0.907725,0.933325,0.832625,0.832625,0.3025749999999999,0.907725,0.18666500000000003,0.933325,0.832625,0.8722487499999978,0.8763502480158697,0.8974593666198514,0.8783923692454558
771
+ 3.586800573888092,10000,0.832475,0.907475,0.9326,0.832475,0.832475,0.30249166666666666,0.907475,0.18652000000000002,0.9326,0.832475,0.8719595833333303,0.8761949503968224,0.8973473473887147,0.8782349589333184
772
+ 3.6764705882352944,10250,0.8328,0.90785,0.933,0.8328,0.8328,0.3026166666666666,0.90785,0.18660000000000002,0.933,0.8328,0.8722083333333308,0.8763484722222196,0.8973901317601245,0.8784186785896316
773
+ 3.7661406025824964,10500,0.832725,0.907875,0.93305,0.832725,0.832725,0.302625,0.907875,0.18661000000000003,0.93305,0.832725,0.8722287499999977,0.8763971230158705,0.8975008077039119,0.8784433975715353
774
+ 3.855810616929699,10750,0.83275,0.9078,0.933,0.83275,0.83275,0.3025999999999999,0.9078,0.18660000000000004,0.933,0.83275,0.8722195833333306,0.8763931349206315,0.8974924415654918,0.8784418374594553
775
+ 3.945480631276901,11000,0.8329,0.908125,0.933125,0.8329,0.8329,0.30270833333333325,0.908125,0.18662500000000004,0.933125,0.8329,0.8724029166666641,0.8765584821428541,0.8976216004039689,0.8786078728601929
776
+ 4.035150645624103,11250,0.832975,0.907825,0.933125,0.832975,0.832975,0.3026083333333333,0.907825,0.186625,0.933125,0.832975,0.8723212499999978,0.8764675793650762,0.8975238216213755,0.8785236899251642
777
+ 4.124820659971306,11500,0.832975,0.907825,0.9333,0.832975,0.832975,0.3026083333333333,0.907825,0.18666000000000005,0.9333,0.832975,0.8723804166666645,0.8765045734126956,0.8975652123999085,0.8785589645807509
778
+ 4.214490674318508,11750,0.83315,0.90785,0.9332,0.83315,0.83315,0.3026166666666666,0.90785,0.18664000000000003,0.9332,0.83315,0.8724679166666641,0.8766142063492031,0.897652921263943,0.878664477670976
779
+ 4.30416068866571,12000,0.833175,0.90785,0.933075,0.833175,0.833175,0.3026166666666666,0.90785,0.186615,0.933075,0.833175,0.8724479166666644,0.876612886904759,0.8976448899066025,0.8786690345206932
final_metrics.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "val_cosine_accuracy@1": 0.0,
3
- "val_cosine_accuracy@3": 0.0,
4
- "val_cosine_accuracy@5": 2.5e-05,
5
- "val_cosine_precision@1": 0.0,
6
- "val_cosine_precision@3": 0.0,
7
- "val_cosine_precision@5": 5e-06,
8
- "val_cosine_recall@1": 0.0,
9
- "val_cosine_recall@3": 0.0,
10
- "val_cosine_recall@5": 2.5e-05,
11
- "val_cosine_ndcg@10": 4.0643645983386815e-05,
12
- "val_cosine_mrr@1": 0.0,
13
- "val_cosine_mrr@5": 5e-06,
14
- "val_cosine_mrr@10": 1.697420634920635e-05,
15
- "val_cosine_map@100": 5.219463554638405e-05
16
  }
 
1
  {
2
+ "val_cosine_accuracy@1": 0.828275,
3
+ "val_cosine_accuracy@3": 0.90535,
4
+ "val_cosine_accuracy@5": 0.930675,
5
+ "val_cosine_precision@1": 0.828275,
6
+ "val_cosine_precision@3": 0.3017833333333333,
7
+ "val_cosine_precision@5": 0.186135,
8
+ "val_cosine_recall@1": 0.828275,
9
+ "val_cosine_recall@3": 0.90535,
10
+ "val_cosine_recall@5": 0.930675,
11
+ "val_cosine_ndcg@10": 0.8940991092644636,
12
+ "val_cosine_mrr@1": 0.828275,
13
+ "val_cosine_mrr@5": 0.8685570833333288,
14
+ "val_cosine_mrr@10": 0.8726829662698361,
15
+ "val_cosine_map@100": 0.8748315667834753
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c16a8a0c066ce6670147940dcb6ed642b624b948c9a93d349a3f2c82687151c3
3
- size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1047f90ead99257dfe6d228a4901cfc9a8961a56b175574ee550bdf183c5337
3
+ size 133462128
modules.json CHANGED
@@ -10,11 +10,5 @@
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
13
- },
14
- {
15
- "idx": 2,
16
- "name": "2",
17
- "path": "2_Normalize",
18
- "type": "sentence_transformers.models.Normalize"
19
  }
20
  ]
 
10
  "name": "1",
11
  "path": "1_Pooling",
12
  "type": "sentence_transformers.models.Pooling"
 
 
 
 
 
 
13
  }
14
  ]
tokenizer_config.json CHANGED
@@ -48,7 +48,7 @@
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "max_length": 128,
51
- "model_max_length": 256,
52
  "never_split": null,
53
  "pad_to_multiple_of": null,
54
  "pad_token": "[PAD]",
 
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "max_length": 128,
51
+ "model_max_length": 128,
52
  "never_split": null,
53
  "pad_to_multiple_of": null,
54
  "pad_token": "[PAD]",