radoslavralev commited on
Commit
d816dc3
·
verified ·
1 Parent(s): a4867c1

Training in progress, step 2000

Browse files
Information-Retrieval_evaluation_val_results.csv CHANGED
@@ -4,3 +4,4 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Precisi
4
  -1,-1,0.8281,0.9026,0.93105,0.8281,0.8281,0.3008666666666666,0.9026,0.18621000000000004,0.93105,0.8281,0.8677437499999962,0.8721381249999942,0.8942437004811851,0.874246358340888
5
  -1,-1,0.82925,0.903025,0.931175,0.82925,0.82925,0.3010083333333333,0.903025,0.186235,0.931175,0.82925,0.8687345833333282,0.8731489384920591,0.8950131360828151,0.8752091976044037
6
  -1,-1,0.7614,0.82615,0.850775,0.7614,0.7614,0.2753833333333333,0.82615,0.170155,0.850775,0.7614,0.7960862499999959,0.8003843253968239,0.8201550154419872,0.8038332983359062
 
 
4
  -1,-1,0.8281,0.9026,0.93105,0.8281,0.8281,0.3008666666666666,0.9026,0.18621000000000004,0.93105,0.8281,0.8677437499999962,0.8721381249999942,0.8942437004811851,0.874246358340888
5
  -1,-1,0.82925,0.903025,0.931175,0.82925,0.82925,0.3010083333333333,0.903025,0.186235,0.931175,0.82925,0.8687345833333282,0.8731489384920591,0.8950131360828151,0.8752091976044037
6
  -1,-1,0.7614,0.82615,0.850775,0.7614,0.7614,0.2753833333333333,0.82615,0.170155,0.850775,0.7614,0.7960862499999959,0.8003843253968239,0.8201550154419872,0.8038332983359062
7
+ -1,-1,0.7966,0.87425,0.900575,0.7966,0.7966,0.2914166666666666,0.87425,0.180115,0.900575,0.7966,0.8372962499999956,0.8416481150793601,0.8637140791780538,0.8444611118975183
README.md CHANGED
@@ -5,110 +5,38 @@ tags:
5
  - feature-extraction
6
  - dense
7
  - generated_from_trainer
8
- - dataset_size:713743
9
  - loss:MultipleNegativesRankingLoss
10
  base_model: prajjwal1/bert-small
11
  widget:
12
- - source_sentence: 'Abraham Lincoln: Why is the Gettysburg Address so memorable?'
13
  sentences:
14
- - 'Abraham Lincoln: Why is the Gettysburg Address so memorable?'
15
- - What does the Gettysburg Address really mean?
16
- - What is eatalo.com?
17
- - source_sentence: Has the influence of Ancient Carthage in science, math, and society
18
- been underestimated?
19
  sentences:
20
- - How does one earn money online without an investment from home?
21
- - Has the influence of Ancient Carthage in science, math, and society been underestimated?
22
- - Has the influence of the Ancient Etruscans in science and math been underestimated?
23
- - source_sentence: Is there any app that shares charging to others like share it how
24
- we transfer files?
25
  sentences:
26
- - How do you think of Chinese claims that the present Private Arbitration is illegal,
27
- its verdict violates the UNCLOS and is illegal?
28
- - Is there any app that shares charging to others like share it how we transfer
29
- files?
30
- - Are there any platforms that provides end-to-end encryption for file transfer/
31
- sharing?
32
- - source_sentence: Why AAP’s MLA Dinesh Mohaniya has been arrested?
33
  sentences:
34
- - What are your views on the latest sex scandal by AAP MLA Sandeep Kumar?
35
- - What is a dc current? What are some examples?
36
- - Why AAP’s MLA Dinesh Mohaniya has been arrested?
37
- - source_sentence: What is the difference between economic growth and economic development?
38
  sentences:
39
- - How cold can the Gobi Desert get, and how do its average temperatures compare
40
- to the ones in the Simpson Desert?
41
- - the difference between economic growth and economic development is What?
42
- - What is the difference between economic growth and economic development?
43
  pipeline_tag: sentence-similarity
44
  library_name: sentence-transformers
45
- metrics:
46
- - cosine_accuracy@1
47
- - cosine_accuracy@3
48
- - cosine_accuracy@5
49
- - cosine_precision@1
50
- - cosine_precision@3
51
- - cosine_precision@5
52
- - cosine_recall@1
53
- - cosine_recall@3
54
- - cosine_recall@5
55
- - cosine_ndcg@10
56
- - cosine_mrr@1
57
- - cosine_mrr@5
58
- - cosine_mrr@10
59
- - cosine_map@100
60
- model-index:
61
- - name: SentenceTransformer based on prajjwal1/bert-small
62
- results:
63
- - task:
64
- type: information-retrieval
65
- name: Information Retrieval
66
- dataset:
67
- name: val
68
- type: val
69
- metrics:
70
- - type: cosine_accuracy@1
71
- value: 0.7966
72
- name: Cosine Accuracy@1
73
- - type: cosine_accuracy@3
74
- value: 0.87425
75
- name: Cosine Accuracy@3
76
- - type: cosine_accuracy@5
77
- value: 0.900575
78
- name: Cosine Accuracy@5
79
- - type: cosine_precision@1
80
- value: 0.7966
81
- name: Cosine Precision@1
82
- - type: cosine_precision@3
83
- value: 0.2914166666666666
84
- name: Cosine Precision@3
85
- - type: cosine_precision@5
86
- value: 0.180115
87
- name: Cosine Precision@5
88
- - type: cosine_recall@1
89
- value: 0.7966
90
- name: Cosine Recall@1
91
- - type: cosine_recall@3
92
- value: 0.87425
93
- name: Cosine Recall@3
94
- - type: cosine_recall@5
95
- value: 0.900575
96
- name: Cosine Recall@5
97
- - type: cosine_ndcg@10
98
- value: 0.8637140791780538
99
- name: Cosine Ndcg@10
100
- - type: cosine_mrr@1
101
- value: 0.7966
102
- name: Cosine Mrr@1
103
- - type: cosine_mrr@5
104
- value: 0.8372962499999956
105
- name: Cosine Mrr@5
106
- - type: cosine_mrr@10
107
- value: 0.8416481150793601
108
- name: Cosine Mrr@10
109
- - type: cosine_map@100
110
- value: 0.8444611118975183
111
- name: Cosine Map@100
112
  ---
113
 
114
  # SentenceTransformer based on prajjwal1/bert-small
@@ -157,12 +85,12 @@ Then you can load this model and run inference.
157
  from sentence_transformers import SentenceTransformer
158
 
159
  # Download from the 🤗 Hub
160
- model = SentenceTransformer("redis/model-b-structured")
161
  # Run inference
162
  sentences = [
163
- 'What is the difference between economic growth and economic development?',
164
- 'What is the difference between economic growth and economic development?',
165
- 'the difference between economic growth and economic development is What?',
166
  ]
167
  embeddings = model.encode(sentences)
168
  print(embeddings.shape)
@@ -171,9 +99,9 @@ print(embeddings.shape)
171
  # Get the similarity scores for the embeddings
172
  similarities = model.similarity(embeddings, embeddings)
173
  print(similarities)
174
- # tensor([[1.0000, 1.0000, 0.0234],
175
- # [1.0000, 1.0000, 0.0234],
176
- # [0.0234, 0.0234, 0.9999]])
177
  ```
178
 
179
  <!--
@@ -200,32 +128,6 @@ You can finetune this model on your own dataset.
200
  *List how the model may foreseeably be misused and address what users ought not to do with the model.*
201
  -->
202
 
203
- ## Evaluation
204
-
205
- ### Metrics
206
-
207
- #### Information Retrieval
208
-
209
- * Dataset: `val`
210
- * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
211
-
212
- | Metric | Value |
213
- |:-------------------|:-----------|
214
- | cosine_accuracy@1 | 0.7966 |
215
- | cosine_accuracy@3 | 0.8742 |
216
- | cosine_accuracy@5 | 0.9006 |
217
- | cosine_precision@1 | 0.7966 |
218
- | cosine_precision@3 | 0.2914 |
219
- | cosine_precision@5 | 0.1801 |
220
- | cosine_recall@1 | 0.7966 |
221
- | cosine_recall@3 | 0.8742 |
222
- | cosine_recall@5 | 0.9006 |
223
- | **cosine_ndcg@10** | **0.8637** |
224
- | cosine_mrr@1 | 0.7966 |
225
- | cosine_mrr@5 | 0.8373 |
226
- | cosine_mrr@10 | 0.8416 |
227
- | cosine_map@100 | 0.8445 |
228
-
229
  <!--
230
  ## Bias, Risks and Limitations
231
 
@@ -244,49 +146,23 @@ You can finetune this model on your own dataset.
244
 
245
  #### Unnamed Dataset
246
 
247
- * Size: 713,743 training samples
248
- * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
249
- * Approximate statistics based on the first 1000 samples:
250
- | | anchor | positive | negative |
251
- |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
252
- | type | string | string | string |
253
- | details | <ul><li>min: 6 tokens</li><li>mean: 16.07 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.03 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.81 tokens</li><li>max: 58 tokens</li></ul> |
254
- * Samples:
255
- | anchor | positive | negative |
256
- |:-------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------|
257
- | <code>Which one is better Linux OS? Ubuntu or Mint?</code> | <code>Why do you use Linux Mint?</code> | <code>Which one is not better Linux OS ? Ubuntu or Mint ?</code> |
258
- | <code>What is flow?</code> | <code>What is flow?</code> | <code>What are flow lines?</code> |
259
- | <code>How is Trump planning to get Mexico to pay for his supposed wall?</code> | <code>How is it possible for Donald Trump to force Mexico to pay for the wall?</code> | <code>Why do we connect the positive terminal before the negative terminal to ground in a vehicle battery?</code> |
260
- * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
261
- ```json
262
- {
263
- "scale": 5.0,
264
- "similarity_fct": "cos_sim",
265
- "gather_across_devices": false
266
- }
267
- ```
268
-
269
- ### Evaluation Dataset
270
-
271
- #### Unnamed Dataset
272
-
273
- * Size: 40,000 evaluation samples
274
- * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
275
  * Approximate statistics based on the first 1000 samples:
276
- | | anchor | positive | negative |
277
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
278
  | type | string | string | string |
279
- | details | <ul><li>min: 6 tokens</li><li>mean: 15.52 tokens</li><li>max: 74 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.51 tokens</li><li>max: 74 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.79 tokens</li><li>max: 69 tokens</li></ul> |
280
  * Samples:
281
- | anchor | positive | negative |
282
- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
283
- | <code>Why are all my questions on Quora marked needing improvement?</code> | <code>Why are all my questions immediately being marked as needing improvement?</code> | <code>For a post-graduate student in IIT, is it allowed to take an external scholarship as a top-up to his/her MHRD assistantship?</code> |
284
- | <code>Can blue butter fly needle with vaccum tube be reused? Is it HIV risk? . Heard the needle is too small to be reused . Had blood draw at clinic?</code> | <code>Can blue butter fly needle with vaccum tube be reused? Is it HIV risk? . Heard the needle is too small to be reused . Had blood draw at clinic?</code> | <code>Can blue butter fly needle with vaccum tube be reused not ? Is it HIV risk ? . Heard the needle is too small to be reused . Had blood draw at clinic ?</code> |
285
- | <code>Why do people still believe the world is flat?</code> | <code>Why are there still people who believe the world is flat?</code> | <code>I'm not able to buy Udemy course .it is not accepting mine and my friends debit card.my card can be used for Flipkart .how to purchase now?</code> |
286
  * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
287
  ```json
288
  {
289
- "scale": 5.0,
290
  "similarity_fct": "cos_sim",
291
  "gather_across_devices": false
292
  }
@@ -295,49 +171,36 @@ You can finetune this model on your own dataset.
295
  ### Training Hyperparameters
296
  #### Non-Default Hyperparameters
297
 
298
- - `eval_strategy`: steps
299
- - `per_device_train_batch_size`: 1024
300
- - `per_device_eval_batch_size`: 1024
301
- - `learning_rate`: 2e-05
302
- - `weight_decay`: 0.001
303
- - `max_steps`: 5000
304
- - `warmup_ratio`: 0.1
305
  - `fp16`: True
306
- - `dataloader_drop_last`: True
307
- - `dataloader_num_workers`: 1
308
- - `dataloader_prefetch_factor`: 1
309
- - `load_best_model_at_end`: True
310
- - `optim`: adamw_torch
311
- - `ddp_find_unused_parameters`: False
312
- - `push_to_hub`: True
313
- - `hub_model_id`: redis/model-b-structured
314
- - `eval_on_start`: True
315
 
316
  #### All Hyperparameters
317
  <details><summary>Click to expand</summary>
318
 
319
  - `overwrite_output_dir`: False
320
  - `do_predict`: False
321
- - `eval_strategy`: steps
322
  - `prediction_loss_only`: True
323
- - `per_device_train_batch_size`: 1024
324
- - `per_device_eval_batch_size`: 1024
325
  - `per_gpu_train_batch_size`: None
326
  - `per_gpu_eval_batch_size`: None
327
  - `gradient_accumulation_steps`: 1
328
  - `eval_accumulation_steps`: None
329
  - `torch_empty_cache_steps`: None
330
- - `learning_rate`: 2e-05
331
- - `weight_decay`: 0.001
332
  - `adam_beta1`: 0.9
333
  - `adam_beta2`: 0.999
334
  - `adam_epsilon`: 1e-08
335
- - `max_grad_norm`: 1.0
336
- - `num_train_epochs`: 3.0
337
- - `max_steps`: 5000
338
  - `lr_scheduler_type`: linear
339
  - `lr_scheduler_kwargs`: {}
340
- - `warmup_ratio`: 0.1
341
  - `warmup_steps`: 0
342
  - `log_level`: passive
343
  - `log_level_replica`: warning
@@ -365,14 +228,14 @@ You can finetune this model on your own dataset.
365
  - `tpu_num_cores`: None
366
  - `tpu_metrics_debug`: False
367
  - `debug`: []
368
- - `dataloader_drop_last`: True
369
- - `dataloader_num_workers`: 1
370
- - `dataloader_prefetch_factor`: 1
371
  - `past_index`: -1
372
  - `disable_tqdm`: False
373
  - `remove_unused_columns`: True
374
  - `label_names`: None
375
- - `load_best_model_at_end`: True
376
  - `ignore_data_skip`: False
377
  - `fsdp`: []
378
  - `fsdp_min_num_params`: 0
@@ -382,23 +245,23 @@ You can finetune this model on your own dataset.
382
  - `parallelism_config`: None
383
  - `deepspeed`: None
384
  - `label_smoothing_factor`: 0.0
385
- - `optim`: adamw_torch
386
  - `optim_args`: None
387
  - `adafactor`: False
388
  - `group_by_length`: False
389
  - `length_column_name`: length
390
  - `project`: huggingface
391
  - `trackio_space_id`: trackio
392
- - `ddp_find_unused_parameters`: False
393
  - `ddp_bucket_cap_mb`: None
394
  - `ddp_broadcast_buffers`: False
395
  - `dataloader_pin_memory`: True
396
  - `dataloader_persistent_workers`: False
397
  - `skip_memory_metrics`: True
398
  - `use_legacy_prediction_loop`: False
399
- - `push_to_hub`: True
400
  - `resume_from_checkpoint`: None
401
- - `hub_model_id`: redis/model-b-structured
402
  - `hub_strategy`: every_save
403
  - `hub_private_repo`: None
404
  - `hub_always_push`: False
@@ -425,73 +288,31 @@ You can finetune this model on your own dataset.
425
  - `neftune_noise_alpha`: None
426
  - `optim_target_modules`: None
427
  - `batch_eval_metrics`: False
428
- - `eval_on_start`: True
429
  - `use_liger_kernel`: False
430
  - `liger_kernel_config`: None
431
  - `eval_use_gather_object`: False
432
  - `average_tokens_across_devices`: True
433
  - `prompts`: None
434
  - `batch_sampler`: batch_sampler
435
- - `multi_dataset_batch_sampler`: proportional
436
  - `router_mapping`: {}
437
  - `learning_rate_mapping`: {}
438
 
439
  </details>
440
 
441
  ### Training Logs
442
- | Epoch | Step | Training Loss | Validation Loss | val_cosine_ndcg@10 |
443
- |:------:|:----:|:-------------:|:---------------:|:------------------:|
444
- | 0 | 0 | - | 6.2303 | 0.7794 |
445
- | 0.1435 | 100 | 5.8893 | 3.9810 | 0.8165 |
446
- | 0.2869 | 200 | 4.3345 | 3.2957 | 0.8171 |
447
- | 0.4304 | 300 | 3.9405 | 3.2458 | 0.8235 |
448
- | 0.5739 | 400 | 3.7935 | 3.1902 | 0.8399 |
449
- | 0.7174 | 500 | 3.6851 | 3.1551 | 0.8412 |
450
- | 0.8608 | 600 | 3.6116 | 3.1324 | 0.8428 |
451
- | 1.0043 | 700 | 3.5622 | 3.1129 | 0.8439 |
452
- | 1.1478 | 800 | 3.5229 | 3.1004 | 0.8450 |
453
- | 1.2912 | 900 | 3.4948 | 3.0899 | 0.8453 |
454
- | 1.4347 | 1000 | 3.4705 | 3.0789 | 0.8459 |
455
- | 1.5782 | 1100 | 3.4509 | 3.0709 | 0.8466 |
456
- | 1.7217 | 1200 | 3.4351 | 3.0643 | 0.8472 |
457
- | 1.8651 | 1300 | 3.4173 | 3.0582 | 0.8479 |
458
- | 2.0086 | 1400 | 3.4042 | 3.0529 | 0.8485 |
459
- | 2.1521 | 1500 | 3.3912 | 3.0468 | 0.8492 |
460
- | 2.2956 | 1600 | 3.3817 | 3.0427 | 0.8496 |
461
- | 2.4390 | 1700 | 3.3717 | 3.0390 | 0.8501 |
462
- | 2.5825 | 1800 | 3.3607 | 3.0348 | 0.8506 |
463
- | 2.7260 | 1900 | 3.3545 | 3.0320 | 0.8508 |
464
- | 2.8694 | 2000 | 3.3474 | 3.0271 | 0.8513 |
465
- | 3.0129 | 2100 | 3.3405 | 3.0256 | 0.8518 |
466
- | 3.1564 | 2200 | 3.3314 | 3.0220 | 0.8524 |
467
- | 3.2999 | 2300 | 3.3278 | 3.0195 | 0.8528 |
468
- | 3.4433 | 2400 | 3.3205 | 3.0178 | 0.8530 |
469
- | 3.5868 | 2500 | 3.3155 | 3.0148 | 0.8539 |
470
- | 3.7303 | 2600 | 3.3107 | 3.0120 | 0.8556 |
471
- | 3.8737 | 2700 | 3.3033 | 3.0065 | 0.8574 |
472
- | 4.0172 | 2800 | 3.2945 | 2.9982 | 0.8584 |
473
- | 4.1607 | 2900 | 3.2842 | 2.9936 | 0.8590 |
474
- | 4.3042 | 3000 | 3.281 | 2.9905 | 0.8594 |
475
- | 4.4476 | 3100 | 3.2765 | 2.9880 | 0.8596 |
476
- | 4.5911 | 3200 | 3.2711 | 2.9864 | 0.8598 |
477
- | 4.7346 | 3300 | 3.2676 | 2.9844 | 0.8600 |
478
- | 4.8780 | 3400 | 3.2657 | 2.9835 | 0.8603 |
479
- | 5.0215 | 3500 | 3.2631 | 2.9820 | 0.8606 |
480
- | 5.1650 | 3600 | 3.2576 | 2.9804 | 0.8611 |
481
- | 5.3085 | 3700 | 3.2536 | 2.9761 | 0.8625 |
482
- | 5.4519 | 3800 | 3.251 | 2.9738 | 0.8629 |
483
- | 5.5954 | 3900 | 3.2472 | 2.9724 | 0.8632 |
484
- | 5.7389 | 4000 | 3.2448 | 2.9709 | 0.8632 |
485
- | 5.8824 | 4100 | 3.2439 | 2.9697 | 0.8634 |
486
- | 6.0258 | 4200 | 3.241 | 2.9688 | 0.8635 |
487
- | 6.1693 | 4300 | 3.2388 | 2.9677 | 0.8638 |
488
- | 6.3128 | 4400 | 3.238 | 2.9675 | 0.8636 |
489
- | 6.4562 | 4500 | 3.2365 | 2.9671 | 0.8637 |
490
- | 6.5997 | 4600 | 3.2341 | 2.9667 | 0.8638 |
491
- | 6.7432 | 4700 | 3.2334 | 2.9664 | 0.8637 |
492
- | 6.8867 | 4800 | 3.2335 | 2.9661 | 0.8637 |
493
- | 7.0301 | 4900 | 3.2341 | 2.9660 | 0.8637 |
494
- | 7.1736 | 5000 | 3.2314 | 2.9657 | 0.8637 |
495
 
496
 
497
  ### Framework Versions
 
5
  - feature-extraction
6
  - dense
7
  - generated_from_trainer
8
+ - dataset_size:100000
9
  - loss:MultipleNegativesRankingLoss
10
  base_model: prajjwal1/bert-small
11
  widget:
12
+ - source_sentence: How do I calculate IQ?
13
  sentences:
14
+ - What is the easiest way to know my IQ?
15
+ - How do I calculate not IQ ?
16
+ - What are some creative and innovative business ideas with less investment in India?
17
+ - source_sentence: How can I learn martial arts in my home?
 
18
  sentences:
19
+ - How can I learn martial arts by myself?
20
+ - What are the advantages and disadvantages of investing in gold?
21
+ - Can people see that I have looked at their pictures on instagram if I am not following
22
+ them?
23
+ - source_sentence: When Enterprise picks you up do you have to take them back?
24
  sentences:
25
+ - Are there any software Training institute in Tuticorin?
26
+ - When Enterprise picks you up do you have to take them back?
27
+ - When Enterprise picks you up do them have to take youback?
28
+ - source_sentence: What are some non-capital goods?
 
 
 
29
  sentences:
30
+ - What are capital goods?
31
+ - How is the value of [math]\pi[/math] calculated?
32
+ - What are some non-capital goods?
33
+ - source_sentence: What is the QuickBooks technical support phone number in New York?
34
  sentences:
35
+ - What caused the Great Depression?
36
+ - Can I apply for PR in Canada?
37
+ - Which is the best QuickBooks Hosting Support Number in New York?
 
38
  pipeline_tag: sentence-similarity
39
  library_name: sentence-transformers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ---
41
 
42
  # SentenceTransformer based on prajjwal1/bert-small
 
85
  from sentence_transformers import SentenceTransformer
86
 
87
  # Download from the 🤗 Hub
88
+ model = SentenceTransformer("sentence_transformers_model_id")
89
  # Run inference
90
  sentences = [
91
+ 'What is the QuickBooks technical support phone number in New York?',
92
+ 'Which is the best QuickBooks Hosting Support Number in New York?',
93
+ 'Can I apply for PR in Canada?',
94
  ]
95
  embeddings = model.encode(sentences)
96
  print(embeddings.shape)
 
99
  # Get the similarity scores for the embeddings
100
  similarities = model.similarity(embeddings, embeddings)
101
  print(similarities)
102
+ # tensor([[1.0000, 0.8563, 0.0594],
103
+ # [0.8563, 1.0000, 0.1245],
104
+ # [0.0594, 0.1245, 1.0000]])
105
  ```
106
 
107
  <!--
 
128
  *List how the model may foreseeably be misused and address what users ought not to do with the model.*
129
  -->
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  <!--
132
  ## Bias, Risks and Limitations
133
 
 
146
 
147
  #### Unnamed Dataset
148
 
149
+ * Size: 100,000 training samples
150
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  * Approximate statistics based on the first 1000 samples:
152
+ | | sentence_0 | sentence_1 | sentence_2 |
153
  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
154
  | type | string | string | string |
155
+ | details | <ul><li>min: 6 tokens</li><li>mean: 15.79 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.68 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 16.37 tokens</li><li>max: 67 tokens</li></ul> |
156
  * Samples:
157
+ | sentence_0 | sentence_1 | sentence_2 |
158
+ |:-----------------------------------------------------------------|:-----------------------------------------------------------------|:----------------------------------------------------------------------------------|
159
+ | <code>Is masturbating bad for boys?</code> | <code>Is masturbating bad for boys?</code> | <code>How harmful or unhealthy is masturbation?</code> |
160
+ | <code>Does a train engine move in reverse?</code> | <code>Does a train engine move in reverse?</code> | <code>Time moves forward, not in reverse. Doesn't that make time a vector?</code> |
161
+ | <code>What is the most badass thing anyone has ever done?</code> | <code>What is the most badass thing anyone has ever done?</code> | <code>anyone is the most badass thing Whathas ever done?</code> |
162
  * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
163
  ```json
164
  {
165
+ "scale": 20.0,
166
  "similarity_fct": "cos_sim",
167
  "gather_across_devices": false
168
  }
 
171
  ### Training Hyperparameters
172
  #### Non-Default Hyperparameters
173
 
174
+ - `per_device_train_batch_size`: 64
175
+ - `per_device_eval_batch_size`: 64
 
 
 
 
 
176
  - `fp16`: True
177
+ - `multi_dataset_batch_sampler`: round_robin
 
 
 
 
 
 
 
 
178
 
179
  #### All Hyperparameters
180
  <details><summary>Click to expand</summary>
181
 
182
  - `overwrite_output_dir`: False
183
  - `do_predict`: False
184
+ - `eval_strategy`: no
185
  - `prediction_loss_only`: True
186
+ - `per_device_train_batch_size`: 64
187
+ - `per_device_eval_batch_size`: 64
188
  - `per_gpu_train_batch_size`: None
189
  - `per_gpu_eval_batch_size`: None
190
  - `gradient_accumulation_steps`: 1
191
  - `eval_accumulation_steps`: None
192
  - `torch_empty_cache_steps`: None
193
+ - `learning_rate`: 5e-05
194
+ - `weight_decay`: 0.0
195
  - `adam_beta1`: 0.9
196
  - `adam_beta2`: 0.999
197
  - `adam_epsilon`: 1e-08
198
+ - `max_grad_norm`: 1
199
+ - `num_train_epochs`: 3
200
+ - `max_steps`: -1
201
  - `lr_scheduler_type`: linear
202
  - `lr_scheduler_kwargs`: {}
203
+ - `warmup_ratio`: 0.0
204
  - `warmup_steps`: 0
205
  - `log_level`: passive
206
  - `log_level_replica`: warning
 
228
  - `tpu_num_cores`: None
229
  - `tpu_metrics_debug`: False
230
  - `debug`: []
231
+ - `dataloader_drop_last`: False
232
+ - `dataloader_num_workers`: 0
233
+ - `dataloader_prefetch_factor`: None
234
  - `past_index`: -1
235
  - `disable_tqdm`: False
236
  - `remove_unused_columns`: True
237
  - `label_names`: None
238
+ - `load_best_model_at_end`: False
239
  - `ignore_data_skip`: False
240
  - `fsdp`: []
241
  - `fsdp_min_num_params`: 0
 
245
  - `parallelism_config`: None
246
  - `deepspeed`: None
247
  - `label_smoothing_factor`: 0.0
248
+ - `optim`: adamw_torch_fused
249
  - `optim_args`: None
250
  - `adafactor`: False
251
  - `group_by_length`: False
252
  - `length_column_name`: length
253
  - `project`: huggingface
254
  - `trackio_space_id`: trackio
255
+ - `ddp_find_unused_parameters`: None
256
  - `ddp_bucket_cap_mb`: None
257
  - `ddp_broadcast_buffers`: False
258
  - `dataloader_pin_memory`: True
259
  - `dataloader_persistent_workers`: False
260
  - `skip_memory_metrics`: True
261
  - `use_legacy_prediction_loop`: False
262
+ - `push_to_hub`: False
263
  - `resume_from_checkpoint`: None
264
+ - `hub_model_id`: None
265
  - `hub_strategy`: every_save
266
  - `hub_private_repo`: None
267
  - `hub_always_push`: False
 
288
  - `neftune_noise_alpha`: None
289
  - `optim_target_modules`: None
290
  - `batch_eval_metrics`: False
291
+ - `eval_on_start`: False
292
  - `use_liger_kernel`: False
293
  - `liger_kernel_config`: None
294
  - `eval_use_gather_object`: False
295
  - `average_tokens_across_devices`: True
296
  - `prompts`: None
297
  - `batch_sampler`: batch_sampler
298
+ - `multi_dataset_batch_sampler`: round_robin
299
  - `router_mapping`: {}
300
  - `learning_rate_mapping`: {}
301
 
302
  </details>
303
 
304
  ### Training Logs
305
+ | Epoch | Step | Training Loss |
306
+ |:------:|:----:|:-------------:|
307
+ | 0.3199 | 500 | 0.4294 |
308
+ | 0.6398 | 1000 | 0.1268 |
309
+ | 0.9597 | 1500 | 0.1 |
310
+ | 1.2796 | 2000 | 0.0792 |
311
+ | 1.5995 | 2500 | 0.0706 |
312
+ | 1.9194 | 3000 | 0.0687 |
313
+ | 2.2393 | 3500 | 0.0584 |
314
+ | 2.5592 | 4000 | 0.057 |
315
+ | 2.8791 | 4500 | 0.0581 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
 
318
  ### Framework Versions
eval/Information-Retrieval_evaluation_val_results.csv CHANGED
@@ -512,3 +512,24 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Precisi
512
  6.886657101865136,4800,0.79665,0.8745,0.900525,0.79665,0.79665,0.2915,0.8745,0.18010500000000002,0.900525,0.79665,0.8373320833333286,0.8416871428571374,0.863729044462657,0.8445069828327856
513
  7.03012912482066,4900,0.79655,0.87425,0.9004,0.79655,0.79655,0.2914166666666666,0.87425,0.18008000000000002,0.9004,0.79655,0.8372299999999956,0.8416038690476145,0.8636646643385855,0.844421583046012
514
  7.173601147776184,5000,0.7966,0.87425,0.900575,0.7966,0.7966,0.2914166666666666,0.87425,0.180115,0.900575,0.7966,0.8372962499999956,0.8416481150793601,0.8637140791780538,0.8444611118975183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
  6.886657101865136,4800,0.79665,0.8745,0.900525,0.79665,0.79665,0.2915,0.8745,0.18010500000000002,0.900525,0.79665,0.8373320833333286,0.8416871428571374,0.863729044462657,0.8445069828327856
513
  7.03012912482066,4900,0.79655,0.87425,0.9004,0.79655,0.79655,0.2914166666666666,0.87425,0.18008000000000002,0.9004,0.79655,0.8372299999999956,0.8416038690476145,0.8636646643385855,0.844421583046012
514
  7.173601147776184,5000,0.7966,0.87425,0.900575,0.7966,0.7966,0.2914166666666666,0.87425,0.180115,0.900575,0.7966,0.8372962499999956,0.8416481150793601,0.8637140791780538,0.8444611118975183
515
+ 0,0,0.7029,0.796025,0.8218,0.7029,0.7029,0.26534166666666664,0.796025,0.16436,0.8218,0.7029,0.751310833333329,0.7556036507936484,0.7794463470929031,0.7588789249204877
516
+ 0.14347202295552366,100,0.717,0.84145,0.8689,0.717,0.717,0.2804833333333333,0.84145,0.17378000000000002,0.8689,0.717,0.7807374999999929,0.7850385515872969,0.8133815772130083,0.7880657756004839
517
+ 0.28694404591104733,200,0.7095,0.8183,0.844225,0.7095,0.7095,0.27276666666666666,0.8183,0.168845,0.844225,0.7095,0.7656779166666611,0.769763134920631,0.7954360290067323,0.7728197363104299
518
+ 0.430416068866571,300,0.708475,0.81125,0.836575,0.708475,0.708475,0.27041666666666664,0.81125,0.16731500000000002,0.836575,0.708475,0.7615387499999949,0.7655175198412657,0.790111712678878,0.768748409930974
519
+ 0.5738880918220947,400,0.708375,0.808625,0.834575,0.708375,0.708375,0.2695416666666666,0.808625,0.166915,0.834575,0.708375,0.760417499999995,0.764189960317456,0.788286238687547,0.7675263762171609
520
+ 0.7173601147776184,500,0.709675,0.808125,0.83285,0.709675,0.709675,0.269375,0.808125,0.16656999999999997,0.83285,0.709675,0.7605204166666616,0.7645700595238065,0.788585498953581,0.7679165884205366
521
+ 0.860832137733142,600,0.711,0.808575,0.833525,0.711,0.711,0.26952499999999996,0.808575,0.16670500000000002,0.833525,0.711,0.7614741666666615,0.7654778571428551,0.7893595531498633,0.7688425453494171
522
+ 1.0043041606886658,700,0.712975,0.80835,0.83325,0.712975,0.712975,0.26944999999999997,0.80835,0.16665,0.83325,0.712975,0.762359583333328,0.7665036706349182,0.7902748705292052,0.7698428889418915
523
+ 1.1477761836441893,800,0.715325,0.8086,0.8339,0.715325,0.715325,0.2695333333333333,0.8086,0.16678,0.8339,0.715325,0.7637633333333285,0.7679174206349183,0.7914743095975493,0.7712684708256158
524
+ 1.291248206599713,900,0.717125,0.809525,0.834425,0.717125,0.717125,0.26984166666666665,0.809525,0.16688499999999998,0.834425,0.717125,0.765000416666662,0.7691483630952358,0.792521966543574,0.7724750035642856
525
+ 1.4347202295552366,1000,0.7202,0.810175,0.8354,0.7202,0.7202,0.2700583333333333,0.810175,0.16708,0.8354,0.7202,0.7669316666666613,0.7709732936507905,0.793979995472388,0.7743154167736304
526
+ 1.5781922525107603,1100,0.723025,0.810625,0.8357,0.723025,0.723025,0.27020833333333333,0.810625,0.16714,0.8357,0.723025,0.7686441666666616,0.7727187698412665,0.7953972937973883,0.7760878987495129
527
+ 1.721664275466284,1200,0.725625,0.8111,0.836425,0.725625,0.725625,0.27036666666666664,0.8111,0.167285,0.836425,0.725625,0.7702162499999953,0.7742493849206322,0.796630347352746,0.777611475763193
528
+ 1.8651362984218078,1300,0.728275,0.812575,0.837425,0.728275,0.728275,0.27085833333333337,0.812575,0.16748500000000002,0.837425,0.728275,0.7721183333333277,0.7761844345238061,0.7983938166094018,0.7795115097947791
529
+ 2.0086083213773316,1400,0.731825,0.814075,0.838425,0.731825,0.731825,0.27135833333333337,0.814075,0.167685,0.838425,0.731825,0.7745562499999946,0.7785773115079319,0.800378123753988,0.7819358620660539
530
+ 2.152080344332855,1500,0.736,0.8153,0.839425,0.736,0.736,0.27176666666666666,0.8153,0.167885,0.839425,0.736,0.7772470833333275,0.7812455158730118,0.8025644479318247,0.7846263981160897
531
+ 2.2955523672883786,1600,0.73945,0.816475,0.840475,0.73945,0.73945,0.2721583333333333,0.816475,0.168095,0.840475,0.73945,0.7794904166666612,0.7835206249999958,0.8045946386038447,0.7868534347008024
532
+ 2.4390243902439024,1700,0.742875,0.817575,0.8412,0.742875,0.742875,0.27252499999999996,0.817575,0.16824,0.8412,0.742875,0.781825833333328,0.7859333035714243,0.8066779192564868,0.789242473994736
533
+ 2.582496413199426,1800,0.74505,0.818425,0.841625,0.74505,0.74505,0.2728083333333334,0.818425,0.168325,0.841625,0.74505,0.7832987499999948,0.7874403472222192,0.8079818137688616,0.790751407389254
534
+ 2.72596843615495,1900,0.74615,0.81875,0.842175,0.74615,0.74615,0.27291666666666664,0.81875,0.168435,0.842175,0.74615,0.784008333333328,0.7880891666666628,0.8085014133195643,0.7914131097624945
535
+ 2.869440459110473,2000,0.7467,0.81875,0.842275,0.7467,0.7467,0.27291666666666664,0.81875,0.16845500000000002,0.842275,0.7467,0.784354583333328,0.7884659325396792,0.8088581445720447,0.7917670616349511
final_metrics.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "val_cosine_accuracy@1": 0.7614,
3
- "val_cosine_accuracy@3": 0.82615,
4
- "val_cosine_accuracy@5": 0.850775,
5
- "val_cosine_precision@1": 0.7614,
6
- "val_cosine_precision@3": 0.2753833333333333,
7
- "val_cosine_precision@5": 0.170155,
8
- "val_cosine_recall@1": 0.7614,
9
- "val_cosine_recall@3": 0.82615,
10
- "val_cosine_recall@5": 0.850775,
11
- "val_cosine_ndcg@10": 0.8201550154419872,
12
- "val_cosine_mrr@1": 0.7614,
13
- "val_cosine_mrr@5": 0.7960862499999959,
14
- "val_cosine_mrr@10": 0.8003843253968239,
15
- "val_cosine_map@100": 0.8038332983359062
16
  }
 
1
  {
2
+ "val_cosine_accuracy@1": 0.7966,
3
+ "val_cosine_accuracy@3": 0.87425,
4
+ "val_cosine_accuracy@5": 0.900575,
5
+ "val_cosine_precision@1": 0.7966,
6
+ "val_cosine_precision@3": 0.2914166666666666,
7
+ "val_cosine_precision@5": 0.180115,
8
+ "val_cosine_recall@1": 0.7966,
9
+ "val_cosine_recall@3": 0.87425,
10
+ "val_cosine_recall@5": 0.900575,
11
+ "val_cosine_ndcg@10": 0.8637140791780538,
12
+ "val_cosine_mrr@1": 0.7966,
13
+ "val_cosine_mrr@5": 0.8372962499999956,
14
+ "val_cosine_mrr@10": 0.8416481150793601,
15
+ "val_cosine_map@100": 0.8444611118975183
16
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db1674c675ac3fecdc903e40be0f70444de31052681e25137b3a8818fa28d06
3
  size 114011616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586b42ce1f6d0ffeb16af5e7af005e44c0d889f2fde7117eaa7651cf0314b342
3
  size 114011616
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d4c139a71ec4d4b0dfbafa31c4950656cd8f9c5e2ab0d688a8a2c2fdc8272c3
3
  size 6161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2feedcd06746c7b018202fdf853c68e3b62e0d4abb502a6073ae87307445cab
3
  size 6161