Add new SparseEncoder model
Browse files- 1_SpladePooling/config.json +5 -0
- README.md +509 -0
- config.json +24 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- vocab.txt +0 -0
1_SpladePooling/config.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"pooling_strategy": "max",
|
| 3 |
+
"activation_function": "relu",
|
| 4 |
+
"word_embedding_dimension": 30522
|
| 5 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,509 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: mit
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- sparse-encoder
|
| 8 |
+
- sparse
|
| 9 |
+
- splade
|
| 10 |
+
- generated_from_trainer
|
| 11 |
+
- dataset_size:100000
|
| 12 |
+
- loss:SpladeLoss
|
| 13 |
+
- loss:SparseMultipleNegativesRankingLoss
|
| 14 |
+
- loss:FlopsLoss
|
| 15 |
+
base_model: prajjwal1/bert-tiny
|
| 16 |
+
widget:
|
| 17 |
+
- text: 'The cause of simple liver cysts isn''t known, but they may be the result
|
| 18 |
+
of a malformation present at birth. Rarely, liver cysts may indicate a serious,
|
| 19 |
+
underlying condition such as: 1 Polycystic liver disease, an inherited disorder. Echinococcus
|
| 20 |
+
infection, a parasitic infection.'
|
| 21 |
+
- text: 'Foods that have roughly the same number of calories from fats, calories,
|
| 22 |
+
and protein will be found closer to the center of the pyramid. Read more about
|
| 23 |
+
the Caloric Ratio Pyramid. Caloric Ratio Pyramid for Apples, raw, with skin [Includes
|
| 24 |
+
USDA commodity food A343] Nutrition Facts & Calories. - In Internet Explorer 7
|
| 25 |
+
you will need to adjust the default Shrink To Fit setting. Note: Printing via
|
| 26 |
+
Mac Firefox is currently not supported. - In Internet Explorer 7 you will need
|
| 27 |
+
to adjust the default Shrink To Fit setting.'
|
| 28 |
+
- text: average age virginity loss
|
| 29 |
+
- text: verb (used with object), subsidized, subsidizing. 1. to furnish or aid with
|
| 30 |
+
a subsidy. 2. to purchase the assistance of by the payment of a subsidy. 3. to
|
| 31 |
+
secure the cooperation of by bribery; buy over. Also, especially British, subsidise.
|
| 32 |
+
- text: benefits of health care act
|
| 33 |
+
pipeline_tag: feature-extraction
|
| 34 |
+
library_name: sentence-transformers
|
| 35 |
+
metrics:
|
| 36 |
+
- dot_accuracy@1
|
| 37 |
+
- dot_accuracy@3
|
| 38 |
+
- dot_accuracy@5
|
| 39 |
+
- dot_accuracy@10
|
| 40 |
+
- dot_precision@1
|
| 41 |
+
- dot_precision@3
|
| 42 |
+
- dot_precision@5
|
| 43 |
+
- dot_precision@10
|
| 44 |
+
- dot_recall@1
|
| 45 |
+
- dot_recall@3
|
| 46 |
+
- dot_recall@5
|
| 47 |
+
- dot_recall@10
|
| 48 |
+
- dot_ndcg@10
|
| 49 |
+
- dot_mrr@10
|
| 50 |
+
- dot_map@100
|
| 51 |
+
- query_active_dims
|
| 52 |
+
- query_sparsity_ratio
|
| 53 |
+
- corpus_active_dims
|
| 54 |
+
- corpus_sparsity_ratio
|
| 55 |
+
model-index:
|
| 56 |
+
- name: SPLADE-BERT-Tiny
|
| 57 |
+
results:
|
| 58 |
+
- task:
|
| 59 |
+
type: sparse-information-retrieval
|
| 60 |
+
name: Sparse Information Retrieval
|
| 61 |
+
dataset:
|
| 62 |
+
name: Unknown
|
| 63 |
+
type: unknown
|
| 64 |
+
metrics:
|
| 65 |
+
- type: dot_accuracy@1
|
| 66 |
+
value: 0.7258
|
| 67 |
+
name: Dot Accuracy@1
|
| 68 |
+
- type: dot_accuracy@3
|
| 69 |
+
value: 0.8584
|
| 70 |
+
name: Dot Accuracy@3
|
| 71 |
+
- type: dot_accuracy@5
|
| 72 |
+
value: 0.8986
|
| 73 |
+
name: Dot Accuracy@5
|
| 74 |
+
- type: dot_accuracy@10
|
| 75 |
+
value: 0.9372
|
| 76 |
+
name: Dot Accuracy@10
|
| 77 |
+
- type: dot_precision@1
|
| 78 |
+
value: 0.7258
|
| 79 |
+
name: Dot Precision@1
|
| 80 |
+
- type: dot_precision@3
|
| 81 |
+
value: 0.2861333333333333
|
| 82 |
+
name: Dot Precision@3
|
| 83 |
+
- type: dot_precision@5
|
| 84 |
+
value: 0.17972
|
| 85 |
+
name: Dot Precision@5
|
| 86 |
+
- type: dot_precision@10
|
| 87 |
+
value: 0.09371999999999998
|
| 88 |
+
name: Dot Precision@10
|
| 89 |
+
- type: dot_recall@1
|
| 90 |
+
value: 0.7258
|
| 91 |
+
name: Dot Recall@1
|
| 92 |
+
- type: dot_recall@3
|
| 93 |
+
value: 0.8584
|
| 94 |
+
name: Dot Recall@3
|
| 95 |
+
- type: dot_recall@5
|
| 96 |
+
value: 0.8986
|
| 97 |
+
name: Dot Recall@5
|
| 98 |
+
- type: dot_recall@10
|
| 99 |
+
value: 0.9372
|
| 100 |
+
name: Dot Recall@10
|
| 101 |
+
- type: dot_ndcg@10
|
| 102 |
+
value: 0.8335744020505143
|
| 103 |
+
name: Dot Ndcg@10
|
| 104 |
+
- type: dot_mrr@10
|
| 105 |
+
value: 0.8001331746031765
|
| 106 |
+
name: Dot Mrr@10
|
| 107 |
+
- type: dot_map@100
|
| 108 |
+
value: 0.8025817896431575
|
| 109 |
+
name: Dot Map@100
|
| 110 |
+
- type: query_active_dims
|
| 111 |
+
value: 34.04159927368164
|
| 112 |
+
name: Query Active Dims
|
| 113 |
+
- type: query_sparsity_ratio
|
| 114 |
+
value: 0.9988846864794678
|
| 115 |
+
name: Query Sparsity Ratio
|
| 116 |
+
- type: corpus_active_dims
|
| 117 |
+
value: 192.54740061035156
|
| 118 |
+
name: Corpus Active Dims
|
| 119 |
+
- type: corpus_sparsity_ratio
|
| 120 |
+
value: 0.9936915208501949
|
| 121 |
+
name: Corpus Sparsity Ratio
|
| 122 |
+
---
|
| 123 |
+
|
| 124 |
+
# SPLADE-BERT-Tiny
|
| 125 |
+
|
| 126 |
+
This is a [SPLADE Sparse Encoder](https://www.sbert.net/docs/sparse_encoder/usage/usage.html) model finetuned from [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny) using the [sentence-transformers](https://www.SBERT.net) library. It maps sentences & paragraphs to a 30522-dimensional sparse vector space and can be used for semantic search and sparse retrieval.
|
| 127 |
+
## Model Details
|
| 128 |
+
|
| 129 |
+
### Model Description
|
| 130 |
+
- **Model Type:** SPLADE Sparse Encoder
|
| 131 |
+
- **Base model:** [prajjwal1/bert-tiny](https://huggingface.co/prajjwal1/bert-tiny) <!-- at revision 6f75de8b60a9f8a2fdf7b69cbd86d9e64bcb3837 -->
|
| 132 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 133 |
+
- **Output Dimensionality:** 30522 dimensions
|
| 134 |
+
- **Similarity Function:** Dot Product
|
| 135 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 136 |
+
- **Language:** en
|
| 137 |
+
- **License:** mit
|
| 138 |
+
|
| 139 |
+
### Model Sources
|
| 140 |
+
|
| 141 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 142 |
+
- **Documentation:** [Sparse Encoder Documentation](https://www.sbert.net/docs/sparse_encoder/usage/usage.html)
|
| 143 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 144 |
+
- **Hugging Face:** [Sparse Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=sparse-encoder)
|
| 145 |
+
|
| 146 |
+
### Full Model Architecture
|
| 147 |
+
|
| 148 |
+
```
|
| 149 |
+
SparseEncoder(
|
| 150 |
+
(0): MLMTransformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'BertForMaskedLM'})
|
| 151 |
+
(1): SpladePooling({'pooling_strategy': 'max', 'activation_function': 'relu', 'word_embedding_dimension': 30522})
|
| 152 |
+
)
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
## Usage
|
| 156 |
+
|
| 157 |
+
### Direct Usage (Sentence Transformers)
|
| 158 |
+
|
| 159 |
+
First install the Sentence Transformers library:
|
| 160 |
+
|
| 161 |
+
```bash
|
| 162 |
+
pip install -U sentence-transformers
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
Then you can load this model and run inference.
|
| 166 |
+
```python
|
| 167 |
+
from sentence_transformers import SparseEncoder
|
| 168 |
+
|
| 169 |
+
# Download from the 🤗 Hub
|
| 170 |
+
model = SparseEncoder("rasyosef/SPLADE-BERT-Tiny")
|
| 171 |
+
# Run inference
|
| 172 |
+
queries = [
|
| 173 |
+
"benefits of health care act",
|
| 174 |
+
]
|
| 175 |
+
documents = [
|
| 176 |
+
'Every health insurance plan sold in the Marketplace will offer 10 essential health benefits. These essential health benefits include at least the following items and services: 1 Outpatient care—the kind you get without being admitted to a hospital. Trips to the emergency room.',
|
| 177 |
+
'Essential Health Benefits. A set of 10 categories of services health insurance plans must cover under the Affordable Care Act. These include doctors’ services, inpatient and outpatient hospital care, prescription drug coverage, pregnancy and childbirth, mental health services, and more. Some plans cover more services.',
|
| 178 |
+
'Find Baker Hughes in Minot, ND 58701-5870 on Yellowbook. Get contact details or leave a review about this business.',
|
| 179 |
+
]
|
| 180 |
+
query_embeddings = model.encode_query(queries)
|
| 181 |
+
document_embeddings = model.encode_document(documents)
|
| 182 |
+
print(query_embeddings.shape, document_embeddings.shape)
|
| 183 |
+
# [1, 30522] [3, 30522]
|
| 184 |
+
|
| 185 |
+
# Get the similarity scores for the embeddings
|
| 186 |
+
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 187 |
+
print(similarities)
|
| 188 |
+
# tensor([[13.6736, 18.2270, 0.0508]])
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
<!--
|
| 192 |
+
### Direct Usage (Transformers)
|
| 193 |
+
|
| 194 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 195 |
+
|
| 196 |
+
</details>
|
| 197 |
+
-->
|
| 198 |
+
|
| 199 |
+
<!--
|
| 200 |
+
### Downstream Usage (Sentence Transformers)
|
| 201 |
+
|
| 202 |
+
You can finetune this model on your own dataset.
|
| 203 |
+
|
| 204 |
+
<details><summary>Click to expand</summary>
|
| 205 |
+
|
| 206 |
+
</details>
|
| 207 |
+
-->
|
| 208 |
+
|
| 209 |
+
<!--
|
| 210 |
+
### Out-of-Scope Use
|
| 211 |
+
|
| 212 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 213 |
+
-->
|
| 214 |
+
|
| 215 |
+
## Evaluation
|
| 216 |
+
|
| 217 |
+
### Metrics
|
| 218 |
+
|
| 219 |
+
#### Sparse Information Retrieval
|
| 220 |
+
|
| 221 |
+
* Evaluated with [<code>SparseInformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sparse_encoder/evaluation.html#sentence_transformers.sparse_encoder.evaluation.SparseInformationRetrievalEvaluator)
|
| 222 |
+
|
| 223 |
+
| Metric | Value |
|
| 224 |
+
|:----------------------|:-----------|
|
| 225 |
+
| dot_accuracy@1 | 0.7258 |
|
| 226 |
+
| dot_accuracy@3 | 0.8584 |
|
| 227 |
+
| dot_accuracy@5 | 0.8986 |
|
| 228 |
+
| dot_accuracy@10 | 0.9372 |
|
| 229 |
+
| dot_precision@1 | 0.7258 |
|
| 230 |
+
| dot_precision@3 | 0.2861 |
|
| 231 |
+
| dot_precision@5 | 0.1797 |
|
| 232 |
+
| dot_precision@10 | 0.0937 |
|
| 233 |
+
| dot_recall@1 | 0.7258 |
|
| 234 |
+
| dot_recall@3 | 0.8584 |
|
| 235 |
+
| dot_recall@5 | 0.8986 |
|
| 236 |
+
| dot_recall@10 | 0.9372 |
|
| 237 |
+
| **dot_ndcg@10** | **0.8336** |
|
| 238 |
+
| dot_mrr@10 | 0.8001 |
|
| 239 |
+
| dot_map@100 | 0.8026 |
|
| 240 |
+
| query_active_dims | 34.0416 |
|
| 241 |
+
| query_sparsity_ratio | 0.9989 |
|
| 242 |
+
| corpus_active_dims | 192.5474 |
|
| 243 |
+
| corpus_sparsity_ratio | 0.9937 |
|
| 244 |
+
|
| 245 |
+
<!--
|
| 246 |
+
## Bias, Risks and Limitations
|
| 247 |
+
|
| 248 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 249 |
+
-->
|
| 250 |
+
|
| 251 |
+
<!--
|
| 252 |
+
### Recommendations
|
| 253 |
+
|
| 254 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 255 |
+
-->
|
| 256 |
+
|
| 257 |
+
## Training Details
|
| 258 |
+
|
| 259 |
+
### Training Dataset
|
| 260 |
+
|
| 261 |
+
#### Unnamed Dataset
|
| 262 |
+
|
| 263 |
+
* Size: 100,000 training samples
|
| 264 |
+
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
| 265 |
+
* Approximate statistics based on the first 1000 samples:
|
| 266 |
+
| | query | positive | negative |
|
| 267 |
+
|:--------|:---------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 268 |
+
| type | string | string | string |
|
| 269 |
+
| details | <ul><li>min: 4 tokens</li><li>mean: 8.07 tokens</li><li>max: 21 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 81.21 tokens</li><li>max: 236 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 79.54 tokens</li><li>max: 187 tokens</li></ul> |
|
| 270 |
+
* Samples:
|
| 271 |
+
| query | positive | negative |
|
| 272 |
+
|:----------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 273 |
+
| <code>can a spouse make you move out</code> | <code>If you and your spouse purchased a home together or the house was bought during the course of the marriage, you likely cannot make a spouse move out during divorce.However, if your spouse is engaging in abusive behavior towards you or towards your children, then you could seek a protective order or an injunction.his would be the case if you purchased the home before marriage; if the home is in your name only and if your spouse has made no contributions to the home during the time you were husband and wife.</code> | <code>Many courts will not award a final restraining order unless there’s been actual physical violence. If you get a temporary restraining order because your spouse threatened violence, it may not be enough to keep him out of the home permanently, pending your divorce.tep 1. Talk it out. Your spouse might not want to be in the house with you any more than you want to live there with him, but he may have some viable concerns about moving out. Try to come up with a plan to address those concerns.</code> |
|
| 274 |
+
| <code>can i get pregnant right before period</code> | <code>simran47. It is difficult to get pregnant right before your period, since ovulation takes place around 2 weeks before the next period. This is true among women who have regular menstrual cycles and ovulation can be predicted to occur around mid-cycle. However, some women have irregular cycles. If your ovulation takes place later than expected (mid-cycle), say on the 24th day of the cycle instead of around 14th day, and fertilization and implantation take place, then pregnancy can occur right before your expected period.</code> | <code>The question about whether you can get pregnant on your period or not is probably the most common of the three situations noted above. The answer is yes and no. The probability is that you would not get pregnant having sex during your period.</code> |
|
| 275 |
+
| <code>average training mileage rates</code> | <code>Beginning on Jan. 1, 2014, the standard mileage rates for the use of a car (also vans, pickups or panel trucks) will be: 1 56 cents per mile for business miles driven. 23.5 cents per mile driven for medical or moving purposes.</code> | <code>Schneider Increases Mileage Rates. Trucker to Trucker is the major online resource for sourcing trucks for sale and for selling all trucking equipment, including rigs and semi trailers. Schneider National is a name known to all truckers and operators and has been in business for over three-quarters of a century.</code> |
|
| 276 |
+
* Loss: [<code>SpladeLoss</code>](https://sbert.net/docs/package_reference/sparse_encoder/losses.html#spladeloss) with these parameters:
|
| 277 |
+
```json
|
| 278 |
+
{
|
| 279 |
+
"loss": "SparseMultipleNegativesRankingLoss(scale=1.0, similarity_fct='dot_score')",
|
| 280 |
+
"document_regularizer_weight": 0.001,
|
| 281 |
+
"query_regularizer_weight": 0.002
|
| 282 |
+
}
|
| 283 |
+
```
|
| 284 |
+
|
| 285 |
+
### Training Hyperparameters
|
| 286 |
+
#### Non-Default Hyperparameters
|
| 287 |
+
|
| 288 |
+
- `eval_strategy`: epoch
|
| 289 |
+
- `per_device_train_batch_size`: 32
|
| 290 |
+
- `per_device_eval_batch_size`: 32
|
| 291 |
+
- `learning_rate`: 4e-05
|
| 292 |
+
- `num_train_epochs`: 4
|
| 293 |
+
- `lr_scheduler_type`: cosine
|
| 294 |
+
- `warmup_ratio`: 0.025
|
| 295 |
+
- `fp16`: True
|
| 296 |
+
- `optim`: adamw_torch_fused
|
| 297 |
+
- `batch_sampler`: no_duplicates
|
| 298 |
+
|
| 299 |
+
#### All Hyperparameters
|
| 300 |
+
<details><summary>Click to expand</summary>
|
| 301 |
+
|
| 302 |
+
- `overwrite_output_dir`: False
|
| 303 |
+
- `do_predict`: False
|
| 304 |
+
- `eval_strategy`: epoch
|
| 305 |
+
- `prediction_loss_only`: True
|
| 306 |
+
- `per_device_train_batch_size`: 32
|
| 307 |
+
- `per_device_eval_batch_size`: 32
|
| 308 |
+
- `per_gpu_train_batch_size`: None
|
| 309 |
+
- `per_gpu_eval_batch_size`: None
|
| 310 |
+
- `gradient_accumulation_steps`: 1
|
| 311 |
+
- `eval_accumulation_steps`: None
|
| 312 |
+
- `torch_empty_cache_steps`: None
|
| 313 |
+
- `learning_rate`: 4e-05
|
| 314 |
+
- `weight_decay`: 0.0
|
| 315 |
+
- `adam_beta1`: 0.9
|
| 316 |
+
- `adam_beta2`: 0.999
|
| 317 |
+
- `adam_epsilon`: 1e-08
|
| 318 |
+
- `max_grad_norm`: 1.0
|
| 319 |
+
- `num_train_epochs`: 4
|
| 320 |
+
- `max_steps`: -1
|
| 321 |
+
- `lr_scheduler_type`: cosine
|
| 322 |
+
- `lr_scheduler_kwargs`: {}
|
| 323 |
+
- `warmup_ratio`: 0.025
|
| 324 |
+
- `warmup_steps`: 0
|
| 325 |
+
- `log_level`: passive
|
| 326 |
+
- `log_level_replica`: warning
|
| 327 |
+
- `log_on_each_node`: True
|
| 328 |
+
- `logging_nan_inf_filter`: True
|
| 329 |
+
- `save_safetensors`: True
|
| 330 |
+
- `save_on_each_node`: False
|
| 331 |
+
- `save_only_model`: False
|
| 332 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 333 |
+
- `no_cuda`: False
|
| 334 |
+
- `use_cpu`: False
|
| 335 |
+
- `use_mps_device`: False
|
| 336 |
+
- `seed`: 42
|
| 337 |
+
- `data_seed`: None
|
| 338 |
+
- `jit_mode_eval`: False
|
| 339 |
+
- `use_ipex`: False
|
| 340 |
+
- `bf16`: False
|
| 341 |
+
- `fp16`: True
|
| 342 |
+
- `fp16_opt_level`: O1
|
| 343 |
+
- `half_precision_backend`: auto
|
| 344 |
+
- `bf16_full_eval`: False
|
| 345 |
+
- `fp16_full_eval`: False
|
| 346 |
+
- `tf32`: None
|
| 347 |
+
- `local_rank`: 0
|
| 348 |
+
- `ddp_backend`: None
|
| 349 |
+
- `tpu_num_cores`: None
|
| 350 |
+
- `tpu_metrics_debug`: False
|
| 351 |
+
- `debug`: []
|
| 352 |
+
- `dataloader_drop_last`: False
|
| 353 |
+
- `dataloader_num_workers`: 0
|
| 354 |
+
- `dataloader_prefetch_factor`: None
|
| 355 |
+
- `past_index`: -1
|
| 356 |
+
- `disable_tqdm`: False
|
| 357 |
+
- `remove_unused_columns`: True
|
| 358 |
+
- `label_names`: None
|
| 359 |
+
- `load_best_model_at_end`: False
|
| 360 |
+
- `ignore_data_skip`: False
|
| 361 |
+
- `fsdp`: []
|
| 362 |
+
- `fsdp_min_num_params`: 0
|
| 363 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 364 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 365 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 366 |
+
- `deepspeed`: None
|
| 367 |
+
- `label_smoothing_factor`: 0.0
|
| 368 |
+
- `optim`: adamw_torch_fused
|
| 369 |
+
- `optim_args`: None
|
| 370 |
+
- `adafactor`: False
|
| 371 |
+
- `group_by_length`: False
|
| 372 |
+
- `length_column_name`: length
|
| 373 |
+
- `ddp_find_unused_parameters`: None
|
| 374 |
+
- `ddp_bucket_cap_mb`: None
|
| 375 |
+
- `ddp_broadcast_buffers`: False
|
| 376 |
+
- `dataloader_pin_memory`: True
|
| 377 |
+
- `dataloader_persistent_workers`: False
|
| 378 |
+
- `skip_memory_metrics`: True
|
| 379 |
+
- `use_legacy_prediction_loop`: False
|
| 380 |
+
- `push_to_hub`: False
|
| 381 |
+
- `resume_from_checkpoint`: None
|
| 382 |
+
- `hub_model_id`: None
|
| 383 |
+
- `hub_strategy`: every_save
|
| 384 |
+
- `hub_private_repo`: None
|
| 385 |
+
- `hub_always_push`: False
|
| 386 |
+
- `hub_revision`: None
|
| 387 |
+
- `gradient_checkpointing`: False
|
| 388 |
+
- `gradient_checkpointing_kwargs`: None
|
| 389 |
+
- `include_inputs_for_metrics`: False
|
| 390 |
+
- `include_for_metrics`: []
|
| 391 |
+
- `eval_do_concat_batches`: True
|
| 392 |
+
- `fp16_backend`: auto
|
| 393 |
+
- `push_to_hub_model_id`: None
|
| 394 |
+
- `push_to_hub_organization`: None
|
| 395 |
+
- `mp_parameters`:
|
| 396 |
+
- `auto_find_batch_size`: False
|
| 397 |
+
- `full_determinism`: False
|
| 398 |
+
- `torchdynamo`: None
|
| 399 |
+
- `ray_scope`: last
|
| 400 |
+
- `ddp_timeout`: 1800
|
| 401 |
+
- `torch_compile`: False
|
| 402 |
+
- `torch_compile_backend`: None
|
| 403 |
+
- `torch_compile_mode`: None
|
| 404 |
+
- `include_tokens_per_second`: False
|
| 405 |
+
- `include_num_input_tokens_seen`: False
|
| 406 |
+
- `neftune_noise_alpha`: None
|
| 407 |
+
- `optim_target_modules`: None
|
| 408 |
+
- `batch_eval_metrics`: False
|
| 409 |
+
- `eval_on_start`: False
|
| 410 |
+
- `use_liger_kernel`: False
|
| 411 |
+
- `liger_kernel_config`: None
|
| 412 |
+
- `eval_use_gather_object`: False
|
| 413 |
+
- `average_tokens_across_devices`: False
|
| 414 |
+
- `prompts`: None
|
| 415 |
+
- `batch_sampler`: no_duplicates
|
| 416 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 417 |
+
- `router_mapping`: {}
|
| 418 |
+
- `learning_rate_mapping`: {}
|
| 419 |
+
|
| 420 |
+
</details>
|
| 421 |
+
|
| 422 |
+
### Training Logs
|
| 423 |
+
| Epoch | Step | Training Loss | dot_ndcg@10 |
|
| 424 |
+
|:-----:|:-----:|:-------------:|:-----------:|
|
| 425 |
+
| 1.0 | 3125 | 22.6481 | 0.7960 |
|
| 426 |
+
| 2.0 | 6250 | 0.6743 | 0.8296 |
|
| 427 |
+
| 3.0 | 9375 | 0.5636 | 0.8333 |
|
| 428 |
+
| 4.0 | 12500 | 0.5257 | 0.8336 |
|
| 429 |
+
| -1 | -1 | - | 0.8336 |
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
### Framework Versions
|
| 433 |
+
- Python: 3.11.13
|
| 434 |
+
- Sentence Transformers: 5.0.0
|
| 435 |
+
- Transformers: 4.53.0
|
| 436 |
+
- PyTorch: 2.6.0+cu124
|
| 437 |
+
- Accelerate: 1.8.1
|
| 438 |
+
- Datasets: 3.6.0
|
| 439 |
+
- Tokenizers: 0.21.2
|
| 440 |
+
|
| 441 |
+
## Citation
|
| 442 |
+
|
| 443 |
+
### BibTeX
|
| 444 |
+
|
| 445 |
+
#### Sentence Transformers
|
| 446 |
+
```bibtex
|
| 447 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 448 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 449 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 450 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 451 |
+
month = "11",
|
| 452 |
+
year = "2019",
|
| 453 |
+
publisher = "Association for Computational Linguistics",
|
| 454 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 455 |
+
}
|
| 456 |
+
```
|
| 457 |
+
|
| 458 |
+
#### SpladeLoss
|
| 459 |
+
```bibtex
|
| 460 |
+
@misc{formal2022distillationhardnegativesampling,
|
| 461 |
+
title={From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective},
|
| 462 |
+
author={Thibault Formal and Carlos Lassance and Benjamin Piwowarski and Stéphane Clinchant},
|
| 463 |
+
year={2022},
|
| 464 |
+
eprint={2205.04733},
|
| 465 |
+
archivePrefix={arXiv},
|
| 466 |
+
primaryClass={cs.IR},
|
| 467 |
+
url={https://arxiv.org/abs/2205.04733},
|
| 468 |
+
}
|
| 469 |
+
```
|
| 470 |
+
|
| 471 |
+
#### SparseMultipleNegativesRankingLoss
|
| 472 |
+
```bibtex
|
| 473 |
+
@misc{henderson2017efficient,
|
| 474 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 475 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 476 |
+
year={2017},
|
| 477 |
+
eprint={1705.00652},
|
| 478 |
+
archivePrefix={arXiv},
|
| 479 |
+
primaryClass={cs.CL}
|
| 480 |
+
}
|
| 481 |
+
```
|
| 482 |
+
|
| 483 |
+
#### FlopsLoss
|
| 484 |
+
```bibtex
|
| 485 |
+
@article{paria2020minimizing,
|
| 486 |
+
title={Minimizing flops to learn efficient sparse representations},
|
| 487 |
+
author={Paria, Biswajit and Yeh, Chih-Kuan and Yen, Ian EH and Xu, Ning and Ravikumar, Pradeep and P{'o}czos, Barnab{'a}s},
|
| 488 |
+
journal={arXiv preprint arXiv:2004.05665},
|
| 489 |
+
year={2020}
|
| 490 |
+
}
|
| 491 |
+
```
|
| 492 |
+
|
| 493 |
+
<!--
|
| 494 |
+
## Glossary
|
| 495 |
+
|
| 496 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 497 |
+
-->
|
| 498 |
+
|
| 499 |
+
<!--
|
| 500 |
+
## Model Card Authors
|
| 501 |
+
|
| 502 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 503 |
+
-->
|
| 504 |
+
|
| 505 |
+
<!--
|
| 506 |
+
## Model Card Contact
|
| 507 |
+
|
| 508 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 509 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertForMaskedLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"hidden_act": "gelu",
|
| 8 |
+
"hidden_dropout_prob": 0.1,
|
| 9 |
+
"hidden_size": 128,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 512,
|
| 12 |
+
"layer_norm_eps": 1e-12,
|
| 13 |
+
"max_position_embeddings": 512,
|
| 14 |
+
"model_type": "bert",
|
| 15 |
+
"num_attention_heads": 2,
|
| 16 |
+
"num_hidden_layers": 2,
|
| 17 |
+
"pad_token_id": 0,
|
| 18 |
+
"position_embedding_type": "absolute",
|
| 19 |
+
"torch_dtype": "float32",
|
| 20 |
+
"transformers_version": "4.53.0",
|
| 21 |
+
"type_vocab_size": 2,
|
| 22 |
+
"use_cache": true,
|
| 23 |
+
"vocab_size": 30522
|
| 24 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "SparseEncoder",
|
| 3 |
+
"__version__": {
|
| 4 |
+
"sentence_transformers": "5.0.0",
|
| 5 |
+
"transformers": "4.53.0",
|
| 6 |
+
"pytorch": "2.6.0+cu124"
|
| 7 |
+
},
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "dot"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90fd50282273fc3ba0af07f264845f68fc9d5e0c23813961674d30b94db33f12
|
| 3 |
+
size 17671560
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.sparse_encoder.models.MLMTransformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_SpladePooling",
|
| 12 |
+
"type": "sentence_transformers.sparse_encoder.models.SpladePooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": true,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 51 |
+
"never_split": null,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"strip_accents": null,
|
| 55 |
+
"tokenize_chinese_chars": true,
|
| 56 |
+
"tokenizer_class": "BertTokenizer",
|
| 57 |
+
"unk_token": "[UNK]"
|
| 58 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|