Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +378 -0
- config.json +25 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 384,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:309
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: Find the element that handles identifies the underlying asset when
|
| 13 |
+
it is an exchange-traded fund.
|
| 14 |
+
sentences:
|
| 15 |
+
- '[exchangeTradedFund]: Identifies the underlying asset when it is an exchange-traded
|
| 16 |
+
fund.'
|
| 17 |
+
- '[mutualFund]: Identifies the class of unit issued by a fund.'
|
| 18 |
+
- '[lcIssuanceFeePayment]: No description available'
|
| 19 |
+
- source_sentence: Find the element that handles specifies the return payments of
|
| 20 |
+
a commodity return swap.
|
| 21 |
+
sentences:
|
| 22 |
+
- '[consentRefused]: No description available'
|
| 23 |
+
- '[commodityReturnLeg]: Specifies the return payments of a commodity return swap.
|
| 24 |
+
There can be one or two return legs. In simple return swaps there is a return
|
| 25 |
+
leg and an interest (a.k.a. ''fee'') leg. In the case of a outperformance swap
|
| 26 |
+
there are two return legs: the return performance of two commodity underlyers
|
| 27 |
+
are swapped. In the case of a fully-funded return swap there is no financing component
|
| 28 |
+
and, therefore, only a single return leg is specified.'
|
| 29 |
+
- '[loanTrade]: No description available'
|
| 30 |
+
- source_sentence: The fpml tag for the parameters for defining the exercise period
|
| 31 |
+
for a european style option together with any rules governing the notional amount
|
| 32 |
+
of the underlying which can be exercised on any given exercise date and any associated
|
| 33 |
+
exercise fees.
|
| 34 |
+
sentences:
|
| 35 |
+
- '[priceSourceDisruption]: If present indicates that the event is considered to
|
| 36 |
+
have occurred if it is impossible to obtain information about the Spot Rate for
|
| 37 |
+
a Valuation Date from the price source specified in the Settlement Rate Option
|
| 38 |
+
that hass been agreed by the parties.'
|
| 39 |
+
- '[europeanExercise]: The parameters for defining the exercise period for a European
|
| 40 |
+
style option together with any rules governing the notional amount of the underlying
|
| 41 |
+
which can be exercised on any given exercise date and any associated exercise
|
| 42 |
+
fees.'
|
| 43 |
+
- '[nonDeliverableSubstitute]: If present indicates that the obligation to pay the
|
| 44 |
+
In-the-Money amount of foreign currency is replaced with an obligation to pay
|
| 45 |
+
an equivalent amount in another currency.'
|
| 46 |
+
- source_sentence: The fpml tag for global element representing a repo.
|
| 47 |
+
sentences:
|
| 48 |
+
- '[facilityPrepaymentFeePayment]: No description available'
|
| 49 |
+
- '[product]: An abstract element used as a place holder for the substituting product
|
| 50 |
+
elements.'
|
| 51 |
+
- '[repo]: Global element representing a Repo.'
|
| 52 |
+
- source_sentence: Can you give me the fpml tag for fxcurvevaluation?
|
| 53 |
+
sentences:
|
| 54 |
+
- '[fxCurveValuation]: No description available'
|
| 55 |
+
- '[loanLegalActionStatement]: No description available'
|
| 56 |
+
- '[loanAllocationSettlementDateAvailability]: No description available'
|
| 57 |
+
pipeline_tag: sentence-similarity
|
| 58 |
+
library_name: sentence-transformers
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
| 62 |
+
|
| 63 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 64 |
+
|
| 65 |
+
## Model Details
|
| 66 |
+
|
| 67 |
+
### Model Description
|
| 68 |
+
- **Model Type:** Sentence Transformer
|
| 69 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
|
| 70 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 71 |
+
- **Output Dimensionality:** 384 dimensions
|
| 72 |
+
- **Similarity Function:** Cosine Similarity
|
| 73 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 74 |
+
<!-- - **Language:** Unknown -->
|
| 75 |
+
<!-- - **License:** Unknown -->
|
| 76 |
+
|
| 77 |
+
### Model Sources
|
| 78 |
+
|
| 79 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 80 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
|
| 81 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 82 |
+
|
| 83 |
+
### Full Model Architecture
|
| 84 |
+
|
| 85 |
+
```
|
| 86 |
+
SentenceTransformer(
|
| 87 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
|
| 88 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 89 |
+
(2): Normalize()
|
| 90 |
+
)
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## Usage
|
| 94 |
+
|
| 95 |
+
### Direct Usage (Sentence Transformers)
|
| 96 |
+
|
| 97 |
+
First install the Sentence Transformers library:
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
pip install -U sentence-transformers
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
Then you can load this model and run inference.
|
| 104 |
+
```python
|
| 105 |
+
from sentence_transformers import SentenceTransformer
|
| 106 |
+
|
| 107 |
+
# Download from the 🤗 Hub
|
| 108 |
+
model = SentenceTransformer("thelocalhost/fpml-semantic-model")
|
| 109 |
+
# Run inference
|
| 110 |
+
sentences = [
|
| 111 |
+
'Can you give me the fpml tag for fxcurvevaluation?',
|
| 112 |
+
'[fxCurveValuation]: No description available',
|
| 113 |
+
'[loanLegalActionStatement]: No description available',
|
| 114 |
+
]
|
| 115 |
+
embeddings = model.encode(sentences)
|
| 116 |
+
print(embeddings.shape)
|
| 117 |
+
# [3, 384]
|
| 118 |
+
|
| 119 |
+
# Get the similarity scores for the embeddings
|
| 120 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 121 |
+
print(similarities)
|
| 122 |
+
# tensor([[1.0000, 0.6213, 0.2119],
|
| 123 |
+
# [0.6213, 1.0000, 0.5905],
|
| 124 |
+
# [0.2119, 0.5905, 1.0000]])
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
<!--
|
| 128 |
+
### Direct Usage (Transformers)
|
| 129 |
+
|
| 130 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 131 |
+
|
| 132 |
+
</details>
|
| 133 |
+
-->
|
| 134 |
+
|
| 135 |
+
<!--
|
| 136 |
+
### Downstream Usage (Sentence Transformers)
|
| 137 |
+
|
| 138 |
+
You can finetune this model on your own dataset.
|
| 139 |
+
|
| 140 |
+
<details><summary>Click to expand</summary>
|
| 141 |
+
|
| 142 |
+
</details>
|
| 143 |
+
-->
|
| 144 |
+
|
| 145 |
+
<!--
|
| 146 |
+
### Out-of-Scope Use
|
| 147 |
+
|
| 148 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 149 |
+
-->
|
| 150 |
+
|
| 151 |
+
<!--
|
| 152 |
+
## Bias, Risks and Limitations
|
| 153 |
+
|
| 154 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 155 |
+
-->
|
| 156 |
+
|
| 157 |
+
<!--
|
| 158 |
+
### Recommendations
|
| 159 |
+
|
| 160 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 161 |
+
-->
|
| 162 |
+
|
| 163 |
+
## Training Details
|
| 164 |
+
|
| 165 |
+
### Training Dataset
|
| 166 |
+
|
| 167 |
+
#### Unnamed Dataset
|
| 168 |
+
|
| 169 |
+
* Size: 309 training samples
|
| 170 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
| 171 |
+
* Approximate statistics based on the first 309 samples:
|
| 172 |
+
| | sentence_0 | sentence_1 |
|
| 173 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
| 174 |
+
| type | string | string |
|
| 175 |
+
| details | <ul><li>min: 9 tokens</li><li>mean: 16.67 tokens</li><li>max: 58 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.44 tokens</li><li>max: 177 tokens</li></ul> |
|
| 176 |
+
* Samples:
|
| 177 |
+
| sentence_0 | sentence_1 |
|
| 178 |
+
|:----------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 179 |
+
| <code>Can you give me the fpml tag for loanpartyprofilestatement?</code> | <code>[loanPartyProfileStatement]: No description available</code> |
|
| 180 |
+
| <code>What is the structure of valuationpostponement?</code> | <code>[valuationPostponement]: Indicates that the Valuation Date for the tranaction shall be deemed to be the first Business Day following the day on which the applicable Disruption Event ceases to exist, unless the events continues to exists for more than a maximum number of days.</code> |
|
| 181 |
+
| <code>I need the fpml tag related to allocationacknowledgement and no description available.</code> | <code>[allocationAcknowledgement]: No description available</code> |
|
| 182 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 183 |
+
```json
|
| 184 |
+
{
|
| 185 |
+
"scale": 20.0,
|
| 186 |
+
"similarity_fct": "cos_sim",
|
| 187 |
+
"gather_across_devices": false
|
| 188 |
+
}
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
### Training Hyperparameters
|
| 192 |
+
#### Non-Default Hyperparameters
|
| 193 |
+
|
| 194 |
+
- `per_device_train_batch_size`: 16
|
| 195 |
+
- `per_device_eval_batch_size`: 16
|
| 196 |
+
- `fp16`: True
|
| 197 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 198 |
+
|
| 199 |
+
#### All Hyperparameters
|
| 200 |
+
<details><summary>Click to expand</summary>
|
| 201 |
+
|
| 202 |
+
- `overwrite_output_dir`: False
|
| 203 |
+
- `do_predict`: False
|
| 204 |
+
- `eval_strategy`: no
|
| 205 |
+
- `prediction_loss_only`: True
|
| 206 |
+
- `per_device_train_batch_size`: 16
|
| 207 |
+
- `per_device_eval_batch_size`: 16
|
| 208 |
+
- `per_gpu_train_batch_size`: None
|
| 209 |
+
- `per_gpu_eval_batch_size`: None
|
| 210 |
+
- `gradient_accumulation_steps`: 1
|
| 211 |
+
- `eval_accumulation_steps`: None
|
| 212 |
+
- `torch_empty_cache_steps`: None
|
| 213 |
+
- `learning_rate`: 5e-05
|
| 214 |
+
- `weight_decay`: 0.0
|
| 215 |
+
- `adam_beta1`: 0.9
|
| 216 |
+
- `adam_beta2`: 0.999
|
| 217 |
+
- `adam_epsilon`: 1e-08
|
| 218 |
+
- `max_grad_norm`: 1
|
| 219 |
+
- `num_train_epochs`: 3
|
| 220 |
+
- `max_steps`: -1
|
| 221 |
+
- `lr_scheduler_type`: linear
|
| 222 |
+
- `lr_scheduler_kwargs`: {}
|
| 223 |
+
- `warmup_ratio`: 0.0
|
| 224 |
+
- `warmup_steps`: 0
|
| 225 |
+
- `log_level`: passive
|
| 226 |
+
- `log_level_replica`: warning
|
| 227 |
+
- `log_on_each_node`: True
|
| 228 |
+
- `logging_nan_inf_filter`: True
|
| 229 |
+
- `save_safetensors`: True
|
| 230 |
+
- `save_on_each_node`: False
|
| 231 |
+
- `save_only_model`: False
|
| 232 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 233 |
+
- `no_cuda`: False
|
| 234 |
+
- `use_cpu`: False
|
| 235 |
+
- `use_mps_device`: False
|
| 236 |
+
- `seed`: 42
|
| 237 |
+
- `data_seed`: None
|
| 238 |
+
- `jit_mode_eval`: False
|
| 239 |
+
- `bf16`: False
|
| 240 |
+
- `fp16`: True
|
| 241 |
+
- `fp16_opt_level`: O1
|
| 242 |
+
- `half_precision_backend`: auto
|
| 243 |
+
- `bf16_full_eval`: False
|
| 244 |
+
- `fp16_full_eval`: False
|
| 245 |
+
- `tf32`: None
|
| 246 |
+
- `local_rank`: 0
|
| 247 |
+
- `ddp_backend`: None
|
| 248 |
+
- `tpu_num_cores`: None
|
| 249 |
+
- `tpu_metrics_debug`: False
|
| 250 |
+
- `debug`: []
|
| 251 |
+
- `dataloader_drop_last`: False
|
| 252 |
+
- `dataloader_num_workers`: 0
|
| 253 |
+
- `dataloader_prefetch_factor`: None
|
| 254 |
+
- `past_index`: -1
|
| 255 |
+
- `disable_tqdm`: False
|
| 256 |
+
- `remove_unused_columns`: True
|
| 257 |
+
- `label_names`: None
|
| 258 |
+
- `load_best_model_at_end`: False
|
| 259 |
+
- `ignore_data_skip`: False
|
| 260 |
+
- `fsdp`: []
|
| 261 |
+
- `fsdp_min_num_params`: 0
|
| 262 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 263 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 264 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 265 |
+
- `parallelism_config`: None
|
| 266 |
+
- `deepspeed`: None
|
| 267 |
+
- `label_smoothing_factor`: 0.0
|
| 268 |
+
- `optim`: adamw_torch_fused
|
| 269 |
+
- `optim_args`: None
|
| 270 |
+
- `adafactor`: False
|
| 271 |
+
- `group_by_length`: False
|
| 272 |
+
- `length_column_name`: length
|
| 273 |
+
- `project`: huggingface
|
| 274 |
+
- `trackio_space_id`: trackio
|
| 275 |
+
- `ddp_find_unused_parameters`: None
|
| 276 |
+
- `ddp_bucket_cap_mb`: None
|
| 277 |
+
- `ddp_broadcast_buffers`: False
|
| 278 |
+
- `dataloader_pin_memory`: True
|
| 279 |
+
- `dataloader_persistent_workers`: False
|
| 280 |
+
- `skip_memory_metrics`: True
|
| 281 |
+
- `use_legacy_prediction_loop`: False
|
| 282 |
+
- `push_to_hub`: False
|
| 283 |
+
- `resume_from_checkpoint`: None
|
| 284 |
+
- `hub_model_id`: None
|
| 285 |
+
- `hub_strategy`: every_save
|
| 286 |
+
- `hub_private_repo`: None
|
| 287 |
+
- `hub_always_push`: False
|
| 288 |
+
- `hub_revision`: None
|
| 289 |
+
- `gradient_checkpointing`: False
|
| 290 |
+
- `gradient_checkpointing_kwargs`: None
|
| 291 |
+
- `include_inputs_for_metrics`: False
|
| 292 |
+
- `include_for_metrics`: []
|
| 293 |
+
- `eval_do_concat_batches`: True
|
| 294 |
+
- `fp16_backend`: auto
|
| 295 |
+
- `push_to_hub_model_id`: None
|
| 296 |
+
- `push_to_hub_organization`: None
|
| 297 |
+
- `mp_parameters`:
|
| 298 |
+
- `auto_find_batch_size`: False
|
| 299 |
+
- `full_determinism`: False
|
| 300 |
+
- `torchdynamo`: None
|
| 301 |
+
- `ray_scope`: last
|
| 302 |
+
- `ddp_timeout`: 1800
|
| 303 |
+
- `torch_compile`: False
|
| 304 |
+
- `torch_compile_backend`: None
|
| 305 |
+
- `torch_compile_mode`: None
|
| 306 |
+
- `include_tokens_per_second`: False
|
| 307 |
+
- `include_num_input_tokens_seen`: no
|
| 308 |
+
- `neftune_noise_alpha`: None
|
| 309 |
+
- `optim_target_modules`: None
|
| 310 |
+
- `batch_eval_metrics`: False
|
| 311 |
+
- `eval_on_start`: False
|
| 312 |
+
- `use_liger_kernel`: False
|
| 313 |
+
- `liger_kernel_config`: None
|
| 314 |
+
- `eval_use_gather_object`: False
|
| 315 |
+
- `average_tokens_across_devices`: True
|
| 316 |
+
- `prompts`: None
|
| 317 |
+
- `batch_sampler`: batch_sampler
|
| 318 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 319 |
+
- `router_mapping`: {}
|
| 320 |
+
- `learning_rate_mapping`: {}
|
| 321 |
+
|
| 322 |
+
</details>
|
| 323 |
+
|
| 324 |
+
### Framework Versions
|
| 325 |
+
- Python: 3.13.5
|
| 326 |
+
- Sentence Transformers: 5.1.2
|
| 327 |
+
- Transformers: 4.57.1
|
| 328 |
+
- PyTorch: 2.9.0+cpu
|
| 329 |
+
- Accelerate: 1.11.0
|
| 330 |
+
- Datasets: 4.4.1
|
| 331 |
+
- Tokenizers: 0.22.1
|
| 332 |
+
|
| 333 |
+
## Citation
|
| 334 |
+
|
| 335 |
+
### BibTeX
|
| 336 |
+
|
| 337 |
+
#### Sentence Transformers
|
| 338 |
+
```bibtex
|
| 339 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 340 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 341 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 342 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 343 |
+
month = "11",
|
| 344 |
+
year = "2019",
|
| 345 |
+
publisher = "Association for Computational Linguistics",
|
| 346 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 347 |
+
}
|
| 348 |
+
```
|
| 349 |
+
|
| 350 |
+
#### MultipleNegativesRankingLoss
|
| 351 |
+
```bibtex
|
| 352 |
+
@misc{henderson2017efficient,
|
| 353 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 354 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 355 |
+
year={2017},
|
| 356 |
+
eprint={1705.00652},
|
| 357 |
+
archivePrefix={arXiv},
|
| 358 |
+
primaryClass={cs.CL}
|
| 359 |
+
}
|
| 360 |
+
```
|
| 361 |
+
|
| 362 |
+
<!--
|
| 363 |
+
## Glossary
|
| 364 |
+
|
| 365 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 366 |
+
-->
|
| 367 |
+
|
| 368 |
+
<!--
|
| 369 |
+
## Model Card Authors
|
| 370 |
+
|
| 371 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 372 |
+
-->
|
| 373 |
+
|
| 374 |
+
<!--
|
| 375 |
+
## Model Card Contact
|
| 376 |
+
|
| 377 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 378 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 384,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 1536,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 6,
|
| 19 |
+
"pad_token_id": 0,
|
| 20 |
+
"position_embedding_type": "absolute",
|
| 21 |
+
"transformers_version": "4.57.1",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.2",
|
| 4 |
+
"transformers": "4.57.1",
|
| 5 |
+
"pytorch": "2.9.0+cpu"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f42ed0bb7817c6021108011d72a87240873c341406db5d3a6b726e243293e47f
|
| 3 |
+
size 90864192
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"max_length": 128,
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
+
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
+
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
+
"strip_accents": null,
|
| 60 |
+
"tokenize_chinese_chars": true,
|
| 61 |
+
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "[UNK]"
|
| 65 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|