Training in progress, step 2000

Browse files

Files changed (6) hide show

Information-Retrieval_evaluation_val_results.csv +1 -0
README.md +72 -251
eval/Information-Retrieval_evaluation_val_results.csv +21 -0
final_metrics.json +14 -14
model.safetensors +1 -1
training_args.bin +1 -1

Information-Retrieval_evaluation_val_results.csv CHANGED Viewed

@@ -4,3 +4,4 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Precisi
 -1,-1,0.8281,0.9026,0.93105,0.8281,0.8281,0.3008666666666666,0.9026,0.18621000000000004,0.93105,0.8281,0.8677437499999962,0.8721381249999942,0.8942437004811851,0.874246358340888
 -1,-1,0.82925,0.903025,0.931175,0.82925,0.82925,0.3010083333333333,0.903025,0.186235,0.931175,0.82925,0.8687345833333282,0.8731489384920591,0.8950131360828151,0.8752091976044037
 -1,-1,0.7614,0.82615,0.850775,0.7614,0.7614,0.2753833333333333,0.82615,0.170155,0.850775,0.7614,0.7960862499999959,0.8003843253968239,0.8201550154419872,0.8038332983359062

 -1,-1,0.8281,0.9026,0.93105,0.8281,0.8281,0.3008666666666666,0.9026,0.18621000000000004,0.93105,0.8281,0.8677437499999962,0.8721381249999942,0.8942437004811851,0.874246358340888
 -1,-1,0.82925,0.903025,0.931175,0.82925,0.82925,0.3010083333333333,0.903025,0.186235,0.931175,0.82925,0.8687345833333282,0.8731489384920591,0.8950131360828151,0.8752091976044037
 -1,-1,0.7614,0.82615,0.850775,0.7614,0.7614,0.2753833333333333,0.82615,0.170155,0.850775,0.7614,0.7960862499999959,0.8003843253968239,0.8201550154419872,0.8038332983359062
+-1,-1,0.7966,0.87425,0.900575,0.7966,0.7966,0.2914166666666666,0.87425,0.180115,0.900575,0.7966,0.8372962499999956,0.8416481150793601,0.8637140791780538,0.8444611118975183

README.md CHANGED Viewed

@@ -5,110 +5,38 @@ tags:
 - feature-extraction
 - dense
 - generated_from_trainer
-- dataset_size:713743
 - loss:MultipleNegativesRankingLoss
 base_model: prajjwal1/bert-small
 widget:
-- source_sentence: 'Abraham Lincoln: Why is the Gettysburg Address so memorable?'
   sentences:
-  - 'Abraham Lincoln: Why is the Gettysburg Address so memorable?'
-  - What does the Gettysburg Address really mean?
-  - What is eatalo.com?
-- source_sentence: Has the influence of Ancient Carthage in science, math, and society
-    been underestimated?
   sentences:
-  - How does one earn money online without an investment from home?
-  - Has the influence of Ancient Carthage in science, math, and society been underestimated?
-  - Has the influence of the Ancient Etruscans in science and math been underestimated?
-- source_sentence: Is there any app that shares charging to others like share it how
-    we transfer files?
   sentences:
-  - How do you think of Chinese claims that the present Private Arbitration is illegal,
-    its verdict violates the UNCLOS and is illegal?
-  - Is there any app that shares charging to others like share it how we transfer
-    files?
-  - Are there any platforms that provides end-to-end encryption for file transfer/
-    sharing?
-- source_sentence: Why AAP’s MLA Dinesh Mohaniya has been arrested?
   sentences:
-  - What are your views on the latest sex scandal by AAP MLA Sandeep Kumar?
-  - What is a dc current? What are some examples?
-  - Why AAP’s MLA Dinesh Mohaniya has been arrested?
-- source_sentence: What is the difference between economic growth and economic development?
   sentences:
-  - How cold can the Gobi Desert get, and how do its average temperatures compare
-    to the ones in the Simpson Desert?
-  - the difference between economic growth and economic development is What?
-  - What is the difference between economic growth and economic development?
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
-metrics:
-- cosine_accuracy@1
-- cosine_accuracy@3
-- cosine_accuracy@5
-- cosine_precision@1
-- cosine_precision@3
-- cosine_precision@5
-- cosine_recall@1
-- cosine_recall@3
-- cosine_recall@5
-- cosine_ndcg@10
-- cosine_mrr@1
-- cosine_mrr@5
-- cosine_mrr@10
-- cosine_map@100
-model-index:
-- name: SentenceTransformer based on prajjwal1/bert-small
-  results:
-  - task:
-      type: information-retrieval
-      name: Information Retrieval
-    dataset:
-      name: val
-      type: val
-    metrics:
-    - type: cosine_accuracy@1
-      value: 0.7966
-      name: Cosine Accuracy@1
-    - type: cosine_accuracy@3
-      value: 0.87425
-      name: Cosine Accuracy@3
-    - type: cosine_accuracy@5
-      value: 0.900575
-      name: Cosine Accuracy@5
-    - type: cosine_precision@1
-      value: 0.7966
-      name: Cosine Precision@1
-    - type: cosine_precision@3
-      value: 0.2914166666666666
-      name: Cosine Precision@3
-    - type: cosine_precision@5
-      value: 0.180115
-      name: Cosine Precision@5
-    - type: cosine_recall@1
-      value: 0.7966
-      name: Cosine Recall@1
-    - type: cosine_recall@3
-      value: 0.87425
-      name: Cosine Recall@3
-    - type: cosine_recall@5
-      value: 0.900575
-      name: Cosine Recall@5
-    - type: cosine_ndcg@10
-      value: 0.8637140791780538
-      name: Cosine Ndcg@10
-    - type: cosine_mrr@1
-      value: 0.7966
-      name: Cosine Mrr@1
-    - type: cosine_mrr@5
-      value: 0.8372962499999956
-      name: Cosine Mrr@5
-    - type: cosine_mrr@10
-      value: 0.8416481150793601
-      name: Cosine Mrr@10
-    - type: cosine_map@100
-      value: 0.8444611118975183
-      name: Cosine Map@100
 ---
 # SentenceTransformer based on prajjwal1/bert-small
@@ -157,12 +85,12 @@ Then you can load this model and run inference.
 from sentence_transformers import SentenceTransformer
 # Download from the 🤗 Hub
-model = SentenceTransformer("redis/model-b-structured")
 # Run inference
 sentences = [
-    'What is the difference between economic growth and economic development?',
-    'What is the difference between economic growth and economic development?',
-    'the difference between economic growth and economic development is What?',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
@@ -171,9 +99,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[1.0000, 1.0000, 0.0234],
-#         [1.0000, 1.0000, 0.0234],
-#         [0.0234, 0.0234, 0.9999]])
 ```
 <!--
@@ -200,32 +128,6 @@ You can finetune this model on your own dataset.
 *List how the model may foreseeably be misused and address what users ought not to do with the model.*
 -->
-## Evaluation
-### Metrics
-#### Information Retrieval
-* Dataset: `val`
-* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
-| Metric             | Value      |
-|:-------------------|:-----------|
-| cosine_accuracy@1  | 0.7966     |
-| cosine_accuracy@3  | 0.8742     |
-| cosine_accuracy@5  | 0.9006     |
-| cosine_precision@1 | 0.7966     |
-| cosine_precision@3 | 0.2914     |
-| cosine_precision@5 | 0.1801     |
-| cosine_recall@1    | 0.7966     |
-| cosine_recall@3    | 0.8742     |
-| cosine_recall@5    | 0.9006     |
-| **cosine_ndcg@10** | **0.8637** |
-| cosine_mrr@1       | 0.7966     |
-| cosine_mrr@5       | 0.8373     |
-| cosine_mrr@10      | 0.8416     |
-| cosine_map@100     | 0.8445     |
 <!--
 ## Bias, Risks and Limitations
@@ -244,49 +146,23 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 713,743 training samples
-* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
-* Approximate statistics based on the first 1000 samples:
-  |         | anchor                                                                            | positive                                                                          | negative                                                                          |
-  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
-  | type    | string                                                                            | string                                                                            | string                                                                            |
-  | details | <ul><li>min: 6 tokens</li><li>mean: 16.07 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.03 tokens</li><li>max: 53 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.81 tokens</li><li>max: 58 tokens</li></ul> |
-* Samples:
-  | anchor                                                                         | positive                                                                              | negative                                                                                                          |
-  |:-------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------|
-  | <code>Which one is better Linux OS? Ubuntu or Mint?</code>                     | <code>Why do you use Linux Mint?</code>                                               | <code>Which one is not better Linux OS ? Ubuntu or Mint ?</code>                                                  |
-  | <code>What is flow?</code>                                                     | <code>What is flow?</code>                                                            | <code>What are flow lines?</code>                                                                                 |
-  | <code>How is Trump planning to get Mexico to pay for his supposed wall?</code> | <code>How is it possible for Donald Trump to force Mexico to pay for the wall?</code> | <code>Why do we connect the positive terminal before the negative terminal to ground in a vehicle battery?</code> |
-* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
-  ```json
-  {
-      "scale": 5.0,
-      "similarity_fct": "cos_sim",
-      "gather_across_devices": false
-  }
-  ```
-### Evaluation Dataset
-#### Unnamed Dataset
-* Size: 40,000 evaluation samples
-* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
 * Approximate statistics based on the first 1000 samples:
-  |         | anchor                                                                            | positive                                                                          | negative                                                                          |
   |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
   | type    | string                                                                            | string                                                                            | string                                                                            |
-  | details | <ul><li>min: 6 tokens</li><li>mean: 15.52 tokens</li><li>max: 74 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.51 tokens</li><li>max: 74 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 16.79 tokens</li><li>max: 69 tokens</li></ul> |
 * Samples:
-  | anchor                                                                                                                                                       | positive                                                                                                                                                     | negative                                                                                                                                                            |
-  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-  | <code>Why are all my questions on Quora marked needing improvement?</code>                                                                                   | <code>Why are all my questions immediately being marked as needing improvement?</code>                                                                       | <code>For a post-graduate student in IIT, is it allowed to take an external scholarship as a top-up to his/her MHRD assistantship?</code>                           |
-  | <code>Can blue butter fly needle with vaccum tube be reused? Is it HIV risk? . Heard the needle is too small to be reused . Had blood draw at clinic?</code> | <code>Can blue butter fly needle with vaccum tube be reused? Is it HIV risk? . Heard the needle is too small to be reused . Had blood draw at clinic?</code> | <code>Can blue butter fly needle with vaccum tube be reused not ? Is it HIV risk ? . Heard the needle is too small to be reused . Had blood draw at clinic ?</code> |
-  | <code>Why do people still believe the world is flat?</code>                                                                                                  | <code>Why are there still people who believe the world is flat?</code>                                                                                       | <code>I'm not able to buy Udemy course .it is not accepting mine and my friends debit card.my card can be used for Flipkart .how to purchase now?</code>            |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
-      "scale": 5.0,
       "similarity_fct": "cos_sim",
       "gather_across_devices": false
   }
@@ -295,49 +171,36 @@ You can finetune this model on your own dataset.
 ### Training Hyperparameters
 #### Non-Default Hyperparameters
-- `eval_strategy`: steps
-- `per_device_train_batch_size`: 1024
-- `per_device_eval_batch_size`: 1024
-- `learning_rate`: 2e-05
-- `weight_decay`: 0.001
-- `max_steps`: 5000
-- `warmup_ratio`: 0.1
 - `fp16`: True
-- `dataloader_drop_last`: True
-- `dataloader_num_workers`: 1
-- `dataloader_prefetch_factor`: 1
-- `load_best_model_at_end`: True
-- `optim`: adamw_torch
-- `ddp_find_unused_parameters`: False
-- `push_to_hub`: True
-- `hub_model_id`: redis/model-b-structured
-- `eval_on_start`: True
 #### All Hyperparameters
 <details><summary>Click to expand</summary>
 - `overwrite_output_dir`: False
 - `do_predict`: False
-- `eval_strategy`: steps
 - `prediction_loss_only`: True
-- `per_device_train_batch_size`: 1024
-- `per_device_eval_batch_size`: 1024
 - `per_gpu_train_batch_size`: None
 - `per_gpu_eval_batch_size`: None
 - `gradient_accumulation_steps`: 1
 - `eval_accumulation_steps`: None
 - `torch_empty_cache_steps`: None
-- `learning_rate`: 2e-05
-- `weight_decay`: 0.001
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
-- `max_grad_norm`: 1.0
-- `num_train_epochs`: 3.0
-- `max_steps`: 5000
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: {}
-- `warmup_ratio`: 0.1
 - `warmup_steps`: 0
 - `log_level`: passive
 - `log_level_replica`: warning
@@ -365,14 +228,14 @@ You can finetune this model on your own dataset.
 - `tpu_num_cores`: None
 - `tpu_metrics_debug`: False
 - `debug`: []
-- `dataloader_drop_last`: True
-- `dataloader_num_workers`: 1
-- `dataloader_prefetch_factor`: 1
 - `past_index`: -1
 - `disable_tqdm`: False
 - `remove_unused_columns`: True
 - `label_names`: None
-- `load_best_model_at_end`: True
 - `ignore_data_skip`: False
 - `fsdp`: []
 - `fsdp_min_num_params`: 0
@@ -382,23 +245,23 @@ You can finetune this model on your own dataset.
 - `parallelism_config`: None
 - `deepspeed`: None
 - `label_smoothing_factor`: 0.0
-- `optim`: adamw_torch
 - `optim_args`: None
 - `adafactor`: False
 - `group_by_length`: False
 - `length_column_name`: length
 - `project`: huggingface
 - `trackio_space_id`: trackio
-- `ddp_find_unused_parameters`: False
 - `ddp_bucket_cap_mb`: None
 - `ddp_broadcast_buffers`: False
 - `dataloader_pin_memory`: True
 - `dataloader_persistent_workers`: False
 - `skip_memory_metrics`: True
 - `use_legacy_prediction_loop`: False
-- `push_to_hub`: True
 - `resume_from_checkpoint`: None
-- `hub_model_id`: redis/model-b-structured
 - `hub_strategy`: every_save
 - `hub_private_repo`: None
 - `hub_always_push`: False
@@ -425,73 +288,31 @@ You can finetune this model on your own dataset.
 - `neftune_noise_alpha`: None
 - `optim_target_modules`: None
 - `batch_eval_metrics`: False
-- `eval_on_start`: True
 - `use_liger_kernel`: False
 - `liger_kernel_config`: None
 - `eval_use_gather_object`: False
 - `average_tokens_across_devices`: True
 - `prompts`: None
 - `batch_sampler`: batch_sampler
-- `multi_dataset_batch_sampler`: proportional
 - `router_mapping`: {}
 - `learning_rate_mapping`: {}
 </details>
 ### Training Logs
-| Epoch  | Step | Training Loss | Validation Loss | val_cosine_ndcg@10 |
-|:------:|:----:|:-------------:|:---------------:|:------------------:|
-| 0      | 0    | -             | 6.2303          | 0.7794             |
-| 0.1435 | 100  | 5.8893        | 3.9810          | 0.8165             |
-| 0.2869 | 200  | 4.3345        | 3.2957          | 0.8171             |
-| 0.4304 | 300  | 3.9405        | 3.2458          | 0.8235             |
-| 0.5739 | 400  | 3.7935        | 3.1902          | 0.8399             |
-| 0.7174 | 500  | 3.6851        | 3.1551          | 0.8412             |
-| 0.8608 | 600  | 3.6116        | 3.1324          | 0.8428             |
-| 1.0043 | 700  | 3.5622        | 3.1129          | 0.8439             |
-| 1.1478 | 800  | 3.5229        | 3.1004          | 0.8450             |
-| 1.2912 | 900  | 3.4948        | 3.0899          | 0.8453             |
-| 1.4347 | 1000 | 3.4705        | 3.0789          | 0.8459             |
-| 1.5782 | 1100 | 3.4509        | 3.0709          | 0.8466             |
-| 1.7217 | 1200 | 3.4351        | 3.0643          | 0.8472             |
-| 1.8651 | 1300 | 3.4173        | 3.0582          | 0.8479             |
-| 2.0086 | 1400 | 3.4042        | 3.0529          | 0.8485             |
-| 2.1521 | 1500 | 3.3912        | 3.0468          | 0.8492             |
-| 2.2956 | 1600 | 3.3817        | 3.0427          | 0.8496             |
-| 2.4390 | 1700 | 3.3717        | 3.0390          | 0.8501             |
-| 2.5825 | 1800 | 3.3607        | 3.0348          | 0.8506             |
-| 2.7260 | 1900 | 3.3545        | 3.0320          | 0.8508             |
-| 2.8694 | 2000 | 3.3474        | 3.0271          | 0.8513             |
-| 3.0129 | 2100 | 3.3405        | 3.0256          | 0.8518             |
-| 3.1564 | 2200 | 3.3314        | 3.0220          | 0.8524             |
-| 3.2999 | 2300 | 3.3278        | 3.0195          | 0.8528             |
-| 3.4433 | 2400 | 3.3205        | 3.0178          | 0.8530             |
-| 3.5868 | 2500 | 3.3155        | 3.0148          | 0.8539             |
-| 3.7303 | 2600 | 3.3107        | 3.0120          | 0.8556             |
-| 3.8737 | 2700 | 3.3033        | 3.0065          | 0.8574             |
-| 4.0172 | 2800 | 3.2945        | 2.9982          | 0.8584             |
-| 4.1607 | 2900 | 3.2842        | 2.9936          | 0.8590             |
-| 4.3042 | 3000 | 3.281         | 2.9905          | 0.8594             |
-| 4.4476 | 3100 | 3.2765        | 2.9880          | 0.8596             |
-| 4.5911 | 3200 | 3.2711        | 2.9864          | 0.8598             |
-| 4.7346 | 3300 | 3.2676        | 2.9844          | 0.8600             |
-| 4.8780 | 3400 | 3.2657        | 2.9835          | 0.8603             |
-| 5.0215 | 3500 | 3.2631        | 2.9820          | 0.8606             |
-| 5.1650 | 3600 | 3.2576        | 2.9804          | 0.8611             |
-| 5.3085 | 3700 | 3.2536        | 2.9761          | 0.8625             |
-| 5.4519 | 3800 | 3.251         | 2.9738          | 0.8629             |
-| 5.5954 | 3900 | 3.2472        | 2.9724          | 0.8632             |
-| 5.7389 | 4000 | 3.2448        | 2.9709          | 0.8632             |
-| 5.8824 | 4100 | 3.2439        | 2.9697          | 0.8634             |
-| 6.0258 | 4200 | 3.241         | 2.9688          | 0.8635             |
-| 6.1693 | 4300 | 3.2388        | 2.9677          | 0.8638             |
-| 6.3128 | 4400 | 3.238         | 2.9675          | 0.8636             |
-| 6.4562 | 4500 | 3.2365        | 2.9671          | 0.8637             |
-| 6.5997 | 4600 | 3.2341        | 2.9667          | 0.8638             |
-| 6.7432 | 4700 | 3.2334        | 2.9664          | 0.8637             |
-| 6.8867 | 4800 | 3.2335        | 2.9661          | 0.8637             |
-| 7.0301 | 4900 | 3.2341        | 2.9660          | 0.8637             |
-| 7.1736 | 5000 | 3.2314        | 2.9657          | 0.8637             |
 ### Framework Versions

 - feature-extraction
 - dense
 - generated_from_trainer
+- dataset_size:100000
 - loss:MultipleNegativesRankingLoss
 base_model: prajjwal1/bert-small
 widget:
+- source_sentence: How do I calculate IQ?
   sentences:
+  - What is the easiest way to know my IQ?
+  - How do I calculate not IQ ?
+  - What are some creative and innovative business ideas with less investment in India?
+- source_sentence: How can I learn martial arts in my home?
   sentences:
+  - How can I learn martial arts by myself?
+  - What are the advantages and disadvantages of investing in gold?
+  - Can people see that I have looked at their pictures on instagram if I am not following
+    them?
+- source_sentence: When Enterprise picks you up do you have to take them back?
   sentences:
+  - Are there any software Training institute in Tuticorin?
+  - When Enterprise picks you up do you have to take them back?
+  - When Enterprise picks you up do them have to take youback?
+- source_sentence: What are some non-capital goods?
   sentences:
+  - What are capital goods?
+  - How is the value of [math]\pi[/math] calculated?
+  - What are some non-capital goods?
+- source_sentence: What is the QuickBooks technical support phone number in New York?
   sentences:
+  - What caused the Great Depression?
+  - Can I apply for PR in Canada?
+  - Which is the best QuickBooks Hosting Support Number in New York?
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 ---
 # SentenceTransformer based on prajjwal1/bert-small
 from sentence_transformers import SentenceTransformer
 # Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
 # Run inference
 sentences = [
+    'What is the QuickBooks technical support phone number in New York?',
+    'Which is the best QuickBooks Hosting Support Number in New York?',
+    'Can I apply for PR in Canada?',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[1.0000, 0.8563, 0.0594],
+#         [0.8563, 1.0000, 0.1245],
+#         [0.0594, 0.1245, 1.0000]])
 ```
 <!--
 *List how the model may foreseeably be misused and address what users ought not to do with the model.*
 -->
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 100,000 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
 * Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                        | sentence_1                                                                        | sentence_2                                                                        |
   |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
   | type    | string                                                                            | string                                                                            | string                                                                            |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 15.79 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.68 tokens</li><li>max: 66 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 16.37 tokens</li><li>max: 67 tokens</li></ul> |
 * Samples:
+  | sentence_0                                                       | sentence_1                                                       | sentence_2                                                                        |
+  |:-----------------------------------------------------------------|:-----------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | <code>Is masturbating bad for boys?</code>                       | <code>Is masturbating bad for boys?</code>                       | <code>How harmful or unhealthy is masturbation?</code>                            |
+  | <code>Does a train engine move in reverse?</code>                | <code>Does a train engine move in reverse?</code>                | <code>Time moves forward, not in reverse. Doesn't that make time a vector?</code> |
+  | <code>What is the most badass thing anyone has ever done?</code> | <code>What is the most badass thing anyone has ever done?</code> | <code>anyone is the most badass thing Whathas ever done?</code>                   |
 * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
   ```json
   {
+      "scale": 20.0,
       "similarity_fct": "cos_sim",
       "gather_across_devices": false
   }
 ### Training Hyperparameters
 #### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 64
 - `fp16`: True
+- `multi_dataset_batch_sampler`: round_robin
 #### All Hyperparameters
 <details><summary>Click to expand</summary>
 - `overwrite_output_dir`: False
 - `do_predict`: False
+- `eval_strategy`: no
 - `prediction_loss_only`: True
+- `per_device_train_batch_size`: 64
+- `per_device_eval_batch_size`: 64
 - `per_gpu_train_batch_size`: None
 - `per_gpu_eval_batch_size`: None
 - `gradient_accumulation_steps`: 1
 - `eval_accumulation_steps`: None
 - `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
 - `adam_beta1`: 0.9
 - `adam_beta2`: 0.999
 - `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 3
+- `max_steps`: -1
 - `lr_scheduler_type`: linear
 - `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
 - `warmup_steps`: 0
 - `log_level`: passive
 - `log_level_replica`: warning
 - `tpu_num_cores`: None
 - `tpu_metrics_debug`: False
 - `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
 - `past_index`: -1
 - `disable_tqdm`: False
 - `remove_unused_columns`: True
 - `label_names`: None
+- `load_best_model_at_end`: False
 - `ignore_data_skip`: False
 - `fsdp`: []
 - `fsdp_min_num_params`: 0
 - `parallelism_config`: None
 - `deepspeed`: None
 - `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
 - `optim_args`: None
 - `adafactor`: False
 - `group_by_length`: False
 - `length_column_name`: length
 - `project`: huggingface
 - `trackio_space_id`: trackio
+- `ddp_find_unused_parameters`: None
 - `ddp_bucket_cap_mb`: None
 - `ddp_broadcast_buffers`: False
 - `dataloader_pin_memory`: True
 - `dataloader_persistent_workers`: False
 - `skip_memory_metrics`: True
 - `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
 - `resume_from_checkpoint`: None
+- `hub_model_id`: None
 - `hub_strategy`: every_save
 - `hub_private_repo`: None
 - `hub_always_push`: False
 - `neftune_noise_alpha`: None
 - `optim_target_modules`: None
 - `batch_eval_metrics`: False
+- `eval_on_start`: False
 - `use_liger_kernel`: False
 - `liger_kernel_config`: None
 - `eval_use_gather_object`: False
 - `average_tokens_across_devices`: True
 - `prompts`: None
 - `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
 - `router_mapping`: {}
 - `learning_rate_mapping`: {}
 </details>
 ### Training Logs
+| Epoch  | Step | Training Loss |
+|:------:|:----:|:-------------:|
+| 0.3199 | 500  | 0.4294        |
+| 0.6398 | 1000 | 0.1268        |
+| 0.9597 | 1500 | 0.1           |
+| 1.2796 | 2000 | 0.0792        |
+| 1.5995 | 2500 | 0.0706        |
+| 1.9194 | 3000 | 0.0687        |
+| 2.2393 | 3500 | 0.0584        |
+| 2.5592 | 4000 | 0.057         |
+| 2.8791 | 4500 | 0.0581        |
 ### Framework Versions

eval/Information-Retrieval_evaluation_val_results.csv CHANGED Viewed

@@ -512,3 +512,24 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Precisi
 6.886657101865136,4800,0.79665,0.8745,0.900525,0.79665,0.79665,0.2915,0.8745,0.18010500000000002,0.900525,0.79665,0.8373320833333286,0.8416871428571374,0.863729044462657,0.8445069828327856
 7.03012912482066,4900,0.79655,0.87425,0.9004,0.79655,0.79655,0.2914166666666666,0.87425,0.18008000000000002,0.9004,0.79655,0.8372299999999956,0.8416038690476145,0.8636646643385855,0.844421583046012
 7.173601147776184,5000,0.7966,0.87425,0.900575,0.7966,0.7966,0.2914166666666666,0.87425,0.180115,0.900575,0.7966,0.8372962499999956,0.8416481150793601,0.8637140791780538,0.8444611118975183

 6.886657101865136,4800,0.79665,0.8745,0.900525,0.79665,0.79665,0.2915,0.8745,0.18010500000000002,0.900525,0.79665,0.8373320833333286,0.8416871428571374,0.863729044462657,0.8445069828327856
 7.03012912482066,4900,0.79655,0.87425,0.9004,0.79655,0.79655,0.2914166666666666,0.87425,0.18008000000000002,0.9004,0.79655,0.8372299999999956,0.8416038690476145,0.8636646643385855,0.844421583046012
 7.173601147776184,5000,0.7966,0.87425,0.900575,0.7966,0.7966,0.2914166666666666,0.87425,0.180115,0.900575,0.7966,0.8372962499999956,0.8416481150793601,0.8637140791780538,0.8444611118975183
+0,0,0.7029,0.796025,0.8218,0.7029,0.7029,0.26534166666666664,0.796025,0.16436,0.8218,0.7029,0.751310833333329,0.7556036507936484,0.7794463470929031,0.7588789249204877
+0.14347202295552366,100,0.717,0.84145,0.8689,0.717,0.717,0.2804833333333333,0.84145,0.17378000000000002,0.8689,0.717,0.7807374999999929,0.7850385515872969,0.8133815772130083,0.7880657756004839
+0.28694404591104733,200,0.7095,0.8183,0.844225,0.7095,0.7095,0.27276666666666666,0.8183,0.168845,0.844225,0.7095,0.7656779166666611,0.769763134920631,0.7954360290067323,0.7728197363104299
+0.430416068866571,300,0.708475,0.81125,0.836575,0.708475,0.708475,0.27041666666666664,0.81125,0.16731500000000002,0.836575,0.708475,0.7615387499999949,0.7655175198412657,0.790111712678878,0.768748409930974
+0.5738880918220947,400,0.708375,0.808625,0.834575,0.708375,0.708375,0.2695416666666666,0.808625,0.166915,0.834575,0.708375,0.760417499999995,0.764189960317456,0.788286238687547,0.7675263762171609
+0.7173601147776184,500,0.709675,0.808125,0.83285,0.709675,0.709675,0.269375,0.808125,0.16656999999999997,0.83285,0.709675,0.7605204166666616,0.7645700595238065,0.788585498953581,0.7679165884205366
+0.860832137733142,600,0.711,0.808575,0.833525,0.711,0.711,0.26952499999999996,0.808575,0.16670500000000002,0.833525,0.711,0.7614741666666615,0.7654778571428551,0.7893595531498633,0.7688425453494171
+1.0043041606886658,700,0.712975,0.80835,0.83325,0.712975,0.712975,0.26944999999999997,0.80835,0.16665,0.83325,0.712975,0.762359583333328,0.7665036706349182,0.7902748705292052,0.7698428889418915
+1.1477761836441893,800,0.715325,0.8086,0.8339,0.715325,0.715325,0.2695333333333333,0.8086,0.16678,0.8339,0.715325,0.7637633333333285,0.7679174206349183,0.7914743095975493,0.7712684708256158
+1.291248206599713,900,0.717125,0.809525,0.834425,0.717125,0.717125,0.26984166666666665,0.809525,0.16688499999999998,0.834425,0.717125,0.765000416666662,0.7691483630952358,0.792521966543574,0.7724750035642856
+1.4347202295552366,1000,0.7202,0.810175,0.8354,0.7202,0.7202,0.2700583333333333,0.810175,0.16708,0.8354,0.7202,0.7669316666666613,0.7709732936507905,0.793979995472388,0.7743154167736304
+1.5781922525107603,1100,0.723025,0.810625,0.8357,0.723025,0.723025,0.27020833333333333,0.810625,0.16714,0.8357,0.723025,0.7686441666666616,0.7727187698412665,0.7953972937973883,0.7760878987495129
+1.721664275466284,1200,0.725625,0.8111,0.836425,0.725625,0.725625,0.27036666666666664,0.8111,0.167285,0.836425,0.725625,0.7702162499999953,0.7742493849206322,0.796630347352746,0.777611475763193
+1.8651362984218078,1300,0.728275,0.812575,0.837425,0.728275,0.728275,0.27085833333333337,0.812575,0.16748500000000002,0.837425,0.728275,0.7721183333333277,0.7761844345238061,0.7983938166094018,0.7795115097947791
+2.0086083213773316,1400,0.731825,0.814075,0.838425,0.731825,0.731825,0.27135833333333337,0.814075,0.167685,0.838425,0.731825,0.7745562499999946,0.7785773115079319,0.800378123753988,0.7819358620660539
+2.152080344332855,1500,0.736,0.8153,0.839425,0.736,0.736,0.27176666666666666,0.8153,0.167885,0.839425,0.736,0.7772470833333275,0.7812455158730118,0.8025644479318247,0.7846263981160897
+2.2955523672883786,1600,0.73945,0.816475,0.840475,0.73945,0.73945,0.2721583333333333,0.816475,0.168095,0.840475,0.73945,0.7794904166666612,0.7835206249999958,0.8045946386038447,0.7868534347008024
+2.4390243902439024,1700,0.742875,0.817575,0.8412,0.742875,0.742875,0.27252499999999996,0.817575,0.16824,0.8412,0.742875,0.781825833333328,0.7859333035714243,0.8066779192564868,0.789242473994736
+2.582496413199426,1800,0.74505,0.818425,0.841625,0.74505,0.74505,0.2728083333333334,0.818425,0.168325,0.841625,0.74505,0.7832987499999948,0.7874403472222192,0.8079818137688616,0.790751407389254
+2.72596843615495,1900,0.74615,0.81875,0.842175,0.74615,0.74615,0.27291666666666664,0.81875,0.168435,0.842175,0.74615,0.784008333333328,0.7880891666666628,0.8085014133195643,0.7914131097624945
+2.869440459110473,2000,0.7467,0.81875,0.842275,0.7467,0.7467,0.27291666666666664,0.81875,0.16845500000000002,0.842275,0.7467,0.784354583333328,0.7884659325396792,0.8088581445720447,0.7917670616349511

final_metrics.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
-  "val_cosine_accuracy@1": 0.7614,
-  "val_cosine_accuracy@3": 0.82615,
-  "val_cosine_accuracy@5": 0.850775,
-  "val_cosine_precision@1": 0.7614,
-  "val_cosine_precision@3": 0.2753833333333333,
-  "val_cosine_precision@5": 0.170155,
-  "val_cosine_recall@1": 0.7614,
-  "val_cosine_recall@3": 0.82615,
-  "val_cosine_recall@5": 0.850775,
-  "val_cosine_ndcg@10": 0.8201550154419872,
-  "val_cosine_mrr@1": 0.7614,
-  "val_cosine_mrr@5": 0.7960862499999959,
-  "val_cosine_mrr@10": 0.8003843253968239,
-  "val_cosine_map@100": 0.8038332983359062
 }

 {
+  "val_cosine_accuracy@1": 0.7966,
+  "val_cosine_accuracy@3": 0.87425,
+  "val_cosine_accuracy@5": 0.900575,
+  "val_cosine_precision@1": 0.7966,
+  "val_cosine_precision@3": 0.2914166666666666,
+  "val_cosine_precision@5": 0.180115,
+  "val_cosine_recall@1": 0.7966,
+  "val_cosine_recall@3": 0.87425,
+  "val_cosine_recall@5": 0.900575,
+  "val_cosine_ndcg@10": 0.8637140791780538,
+  "val_cosine_mrr@1": 0.7966,
+  "val_cosine_mrr@5": 0.8372962499999956,
+  "val_cosine_mrr@10": 0.8416481150793601,
+  "val_cosine_map@100": 0.8444611118975183
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5db1674c675ac3fecdc903e40be0f70444de31052681e25137b3a8818fa28d06
 size 114011616

 version https://git-lfs.github.com/spec/v1
+oid sha256:586b42ce1f6d0ffeb16af5e7af005e44c0d889f2fde7117eaa7651cf0314b342
 size 114011616

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d4c139a71ec4d4b0dfbafa31c4950656cd8f9c5e2ab0d688a8a2c2fdc8272c3
 size 6161

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2feedcd06746c7b018202fdf853c68e3b62e0d4abb502a6073ae87307445cab
 size 6161