Upload 46 files
Browse files- HPEControlMapper/.gitattributes +35 -0
- HPEControlMapper/README.md +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/1_Pooling/config.json +10 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/README.md +429 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config.json +23 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config_sentence_transformers.json +10 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/model.safetensors +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/modules.json +14 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/optimizer.pt +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/rng_state.pth +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/scheduler.pt +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/sentence_bert_config.json +4 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/special_tokens_map.json +51 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer.json +0 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer_config.json +66 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/trainer_state.json +130 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/training_args.bin +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/vocab.txt +0 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/1_Pooling/config.json +10 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/README.md +429 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config.json +23 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config_sentence_transformers.json +10 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/model.safetensors +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/modules.json +14 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/optimizer.pt +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/rng_state.pth +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/scheduler.pt +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/sentence_bert_config.json +4 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/special_tokens_map.json +51 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer.json +0 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer_config.json +66 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/trainer_state.json +130 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/training_args.bin +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/vocab.txt +0 -0
- HPEControlMapper/models/mpnet-base-control-triplet/eval/triplet_evaluation_NIST-control-dev_results.csv +147 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/1_Pooling/config.json +10 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/README.md +440 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/config.json +23 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/config_sentence_transformers.json +10 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/model.safetensors +3 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/modules.json +14 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/sentence_bert_config.json +4 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/special_tokens_map.json +51 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer.json +0 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer_config.json +66 -0
- HPEControlMapper/models/mpnet-base-control-triplet/final/vocab.txt +0 -0
HPEControlMapper/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
HPEControlMapper/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc
|
| 3 |
+
---
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/README.md
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- sentence-similarity
|
| 8 |
+
- feature-extraction
|
| 9 |
+
- generated_from_trainer
|
| 10 |
+
- dataset_size:10000
|
| 11 |
+
- loss:TripletLoss
|
| 12 |
+
base_model: microsoft/mpnet-base
|
| 13 |
+
widget:
|
| 14 |
+
- source_sentence: Use hardware-based methods where available to guarantee role-based
|
| 15 |
+
access control cannot be bypassed.
|
| 16 |
+
sentences:
|
| 17 |
+
- Related control that reinforces stability and assurance in brute force login prevention
|
| 18 |
+
use cases.
|
| 19 |
+
- Audit session replay defense_b11_8 as part of continuous security assessment processes.
|
| 20 |
+
- Core functionality needed to enforce effective role-based access control mechanisms.
|
| 21 |
+
- source_sentence: Provide full-feature access to security enhancements in NVIDIA
|
| 22 |
+
GPU firmware.
|
| 23 |
+
sentences:
|
| 24 |
+
- Implement secure communication channels between host and GPU.
|
| 25 |
+
- A little boy blows bubbles outdoors.
|
| 26 |
+
- Use HTTPS inspection to detect man-in-the-middle attack attempts.
|
| 27 |
+
- source_sentence: Validate source authenticity by requiring signed code in all components.
|
| 28 |
+
sentences:
|
| 29 |
+
- Firewalls are activated by default and preloaded with security policies.
|
| 30 |
+
- Enforce cryptographic validation on third-party software inputs.
|
| 31 |
+
- Display productivity summaries on a weekly dashboard.
|
| 32 |
+
- source_sentence: Mandate organization-wide adherence to policies enforcing siem
|
| 33 |
+
integration using centrally managed tools.
|
| 34 |
+
sentences:
|
| 35 |
+
- Review policies around anonymous proxy blocking_b11_50 to avoid unexpected vectors
|
| 36 |
+
of compromise.
|
| 37 |
+
- Implement key management systems that use secure encryption algorithms.
|
| 38 |
+
- This measure directly supports secure handling within siem integration implementations.
|
| 39 |
+
- source_sentence: Mandate organization-wide adherence to policies enforcing virtual
|
| 40 |
+
machine isolation using centrally managed tools.
|
| 41 |
+
sentences:
|
| 42 |
+
- Adult males stand in front of a brick wall near something made of metal.
|
| 43 |
+
- Monitor for issues related to redundant login blocking_b12_1 as part of extended
|
| 44 |
+
security hygiene.
|
| 45 |
+
- A widely recommended control paired with proper virtual machine isolation implementations.
|
| 46 |
+
pipeline_tag: sentence-similarity
|
| 47 |
+
library_name: sentence-transformers
|
| 48 |
+
metrics:
|
| 49 |
+
- cosine_accuracy
|
| 50 |
+
model-index:
|
| 51 |
+
- name: MPNet base trained on NIST Controls
|
| 52 |
+
results:
|
| 53 |
+
- task:
|
| 54 |
+
type: triplet
|
| 55 |
+
name: Triplet
|
| 56 |
+
dataset:
|
| 57 |
+
name: NIST control dev
|
| 58 |
+
type: NIST-control-dev
|
| 59 |
+
metrics:
|
| 60 |
+
- type: cosine_accuracy
|
| 61 |
+
value: 0.7048740386962891
|
| 62 |
+
name: Cosine Accuracy
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
# MPNet base trained on NIST Controls
|
| 66 |
+
|
| 67 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 68 |
+
|
| 69 |
+
## Model Details
|
| 70 |
+
|
| 71 |
+
### Model Description
|
| 72 |
+
- **Model Type:** Sentence Transformer
|
| 73 |
+
- **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
|
| 74 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 75 |
+
- **Output Dimensionality:** 768 dimensions
|
| 76 |
+
- **Similarity Function:** Cosine Similarity
|
| 77 |
+
- **Training Dataset:**
|
| 78 |
+
- csv
|
| 79 |
+
- **Language:** en
|
| 80 |
+
- **License:** apache-2.0
|
| 81 |
+
|
| 82 |
+
### Model Sources
|
| 83 |
+
|
| 84 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 85 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 86 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 87 |
+
|
| 88 |
+
### Full Model Architecture
|
| 89 |
+
|
| 90 |
+
```
|
| 91 |
+
SentenceTransformer(
|
| 92 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
|
| 93 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 94 |
+
)
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
## Usage
|
| 98 |
+
|
| 99 |
+
### Direct Usage (Sentence Transformers)
|
| 100 |
+
|
| 101 |
+
First install the Sentence Transformers library:
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
pip install -U sentence-transformers
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Then you can load this model and run inference.
|
| 108 |
+
```python
|
| 109 |
+
from sentence_transformers import SentenceTransformer
|
| 110 |
+
|
| 111 |
+
# Download from the 🤗 Hub
|
| 112 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 113 |
+
# Run inference
|
| 114 |
+
sentences = [
|
| 115 |
+
'Mandate organization-wide adherence to policies enforcing virtual machine isolation using centrally managed tools.',
|
| 116 |
+
'A widely recommended control paired with proper virtual machine isolation implementations.',
|
| 117 |
+
'Monitor for issues related to redundant login blocking_b12_1 as part of extended security hygiene.',
|
| 118 |
+
]
|
| 119 |
+
embeddings = model.encode(sentences)
|
| 120 |
+
print(embeddings.shape)
|
| 121 |
+
# [3, 768]
|
| 122 |
+
|
| 123 |
+
# Get the similarity scores for the embeddings
|
| 124 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 125 |
+
print(similarities.shape)
|
| 126 |
+
# [3, 3]
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
<!--
|
| 130 |
+
### Direct Usage (Transformers)
|
| 131 |
+
|
| 132 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 133 |
+
|
| 134 |
+
</details>
|
| 135 |
+
-->
|
| 136 |
+
|
| 137 |
+
<!--
|
| 138 |
+
### Downstream Usage (Sentence Transformers)
|
| 139 |
+
|
| 140 |
+
You can finetune this model on your own dataset.
|
| 141 |
+
|
| 142 |
+
<details><summary>Click to expand</summary>
|
| 143 |
+
|
| 144 |
+
</details>
|
| 145 |
+
-->
|
| 146 |
+
|
| 147 |
+
<!--
|
| 148 |
+
### Out-of-Scope Use
|
| 149 |
+
|
| 150 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 151 |
+
-->
|
| 152 |
+
|
| 153 |
+
## Evaluation
|
| 154 |
+
|
| 155 |
+
### Metrics
|
| 156 |
+
|
| 157 |
+
#### Triplet
|
| 158 |
+
|
| 159 |
+
* Dataset: `NIST-control-dev`
|
| 160 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
| 161 |
+
|
| 162 |
+
| Metric | Value |
|
| 163 |
+
|:--------------------|:-----------|
|
| 164 |
+
| **cosine_accuracy** | **0.7049** |
|
| 165 |
+
|
| 166 |
+
<!--
|
| 167 |
+
## Bias, Risks and Limitations
|
| 168 |
+
|
| 169 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 170 |
+
-->
|
| 171 |
+
|
| 172 |
+
<!--
|
| 173 |
+
### Recommendations
|
| 174 |
+
|
| 175 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 176 |
+
-->
|
| 177 |
+
|
| 178 |
+
## Training Details
|
| 179 |
+
|
| 180 |
+
### Training Dataset
|
| 181 |
+
|
| 182 |
+
#### csv
|
| 183 |
+
|
| 184 |
+
* Dataset: csv
|
| 185 |
+
* Size: 10,000 training samples
|
| 186 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 187 |
+
* Approximate statistics based on the first 1000 samples:
|
| 188 |
+
| | anchor | positive | negative |
|
| 189 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 190 |
+
| type | string | string | string |
|
| 191 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 21.39 tokens</li><li>max: 280 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 17.96 tokens</li><li>max: 171 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.27 tokens</li><li>max: 86 tokens</li></ul> |
|
| 192 |
+
* Samples:
|
| 193 |
+
| anchor | positive | negative |
|
| 194 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|
|
| 195 |
+
| <code>Audit Logs: A secure record of security-sensitive activity required for the audit trail. The audit log is a record of an activity by a system a user or an application. Audit logs are required for the audit trail. Examples of audit events are listed in the requirements section below. It is important that these events are in the audit log as part of HPE's commitment to CISA's Secure by Design Pledge.</code> | <code>Security Event Correlation (v3): Systems that correlate audit logs with security events to detect anomalies.</code> | <code>Remote Access Protection (v3): Controls to secure remote access to systems.</code> |
|
| 196 |
+
| <code>One to One Credential Mapping: This requirement involves getting the specific user login information correct in the audit log down to the specific user and not just listing the role or process ID value.</code> | <code>User Identity Verification (v3): Mechanism to verify user identities before granting access to resources.</code> | <code>Incident Response Testing (v3): Regular testing of incident response plans.</code> |
|
| 197 |
+
| <code>Reproducible Builds: Be able to recreate bit-by-bit identical copies of signed code. Ensure that signed code really came from the source code developed by HPE and possibly reviewed by a third-party.</code> | <code>Build Integrity Checking (v3): Verification process to ensure that the build environment has not been tampered with.</code> | <code>Backup and Recovery Planning (v3): Ensure data is backed up and recoverable.</code> |
|
| 198 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 199 |
+
```json
|
| 200 |
+
{
|
| 201 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 202 |
+
"triplet_margin": 5
|
| 203 |
+
}
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
### Evaluation Dataset
|
| 207 |
+
|
| 208 |
+
#### csv
|
| 209 |
+
|
| 210 |
+
* Dataset: csv
|
| 211 |
+
* Size: 6,709 evaluation samples
|
| 212 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 213 |
+
* Approximate statistics based on the first 1000 samples:
|
| 214 |
+
| | anchor | positive | negative |
|
| 215 |
+
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 216 |
+
| type | string | string | string |
|
| 217 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 18.08 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 13.64 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 18.23 tokens</li><li>max: 34 tokens</li></ul> |
|
| 218 |
+
* Samples:
|
| 219 |
+
| anchor | positive | negative |
|
| 220 |
+
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 221 |
+
| <code>Only allow approved and secured components to run in production environments validating them with cryptographic checks and requiring authenticated configuration changes.</code> | <code>Deploy only approved applications in production environments. Use secure startup methods to validate software integrity. Require authentication for all system configuration changes.</code> | <code>Issue smart cards to employees as physical access tokens. Maintain a contact list for emergency communications. Label system components for asset tracking.</code> |
|
| 222 |
+
| <code>Create system architectures that include failover tamper detection and rollback protections to ensure operations continue even under cyberattack.</code> | <code>Design systems to continue operating under defined attack scenarios. Include tamper detection and automatic rollback capabilities. Ensure recovery processes are tested for effectiveness.</code> | <code>Protect paper-based confidential files in locked cabinets. Post warning signs at restricted data center doors. Track employee security briefings with completion certificates.</code> |
|
| 223 |
+
| <code>Structure networks and applications into isolated segments and apply integrity checking to ensure only trusted software is executed in operational environments.</code> | <code>Use network segmentation and security zones in system design. Validate system behavior using integrity checks. Require signed software for deployment into production.</code> | <code>Distribute weather radios for disaster preparedness in remote facilities. Track badge issuance and disable lost credentials. Train users to avoid reusing passwords across services.</code> |
|
| 224 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 225 |
+
```json
|
| 226 |
+
{
|
| 227 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 228 |
+
"triplet_margin": 5
|
| 229 |
+
}
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
### Training Hyperparameters
|
| 233 |
+
#### Non-Default Hyperparameters
|
| 234 |
+
|
| 235 |
+
- `eval_strategy`: steps
|
| 236 |
+
- `per_device_train_batch_size`: 16
|
| 237 |
+
- `per_device_eval_batch_size`: 16
|
| 238 |
+
- `learning_rate`: 2e-05
|
| 239 |
+
- `num_train_epochs`: 1
|
| 240 |
+
- `warmup_ratio`: 0.1
|
| 241 |
+
- `fp16`: True
|
| 242 |
+
- `batch_sampler`: no_duplicates
|
| 243 |
+
|
| 244 |
+
#### All Hyperparameters
|
| 245 |
+
<details><summary>Click to expand</summary>
|
| 246 |
+
|
| 247 |
+
- `overwrite_output_dir`: False
|
| 248 |
+
- `do_predict`: False
|
| 249 |
+
- `eval_strategy`: steps
|
| 250 |
+
- `prediction_loss_only`: True
|
| 251 |
+
- `per_device_train_batch_size`: 16
|
| 252 |
+
- `per_device_eval_batch_size`: 16
|
| 253 |
+
- `per_gpu_train_batch_size`: None
|
| 254 |
+
- `per_gpu_eval_batch_size`: None
|
| 255 |
+
- `gradient_accumulation_steps`: 1
|
| 256 |
+
- `eval_accumulation_steps`: None
|
| 257 |
+
- `torch_empty_cache_steps`: None
|
| 258 |
+
- `learning_rate`: 2e-05
|
| 259 |
+
- `weight_decay`: 0.0
|
| 260 |
+
- `adam_beta1`: 0.9
|
| 261 |
+
- `adam_beta2`: 0.999
|
| 262 |
+
- `adam_epsilon`: 1e-08
|
| 263 |
+
- `max_grad_norm`: 1.0
|
| 264 |
+
- `num_train_epochs`: 1
|
| 265 |
+
- `max_steps`: -1
|
| 266 |
+
- `lr_scheduler_type`: linear
|
| 267 |
+
- `lr_scheduler_kwargs`: {}
|
| 268 |
+
- `warmup_ratio`: 0.1
|
| 269 |
+
- `warmup_steps`: 0
|
| 270 |
+
- `log_level`: passive
|
| 271 |
+
- `log_level_replica`: warning
|
| 272 |
+
- `log_on_each_node`: True
|
| 273 |
+
- `logging_nan_inf_filter`: True
|
| 274 |
+
- `save_safetensors`: True
|
| 275 |
+
- `save_on_each_node`: False
|
| 276 |
+
- `save_only_model`: False
|
| 277 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 278 |
+
- `no_cuda`: False
|
| 279 |
+
- `use_cpu`: False
|
| 280 |
+
- `use_mps_device`: False
|
| 281 |
+
- `seed`: 42
|
| 282 |
+
- `data_seed`: None
|
| 283 |
+
- `jit_mode_eval`: False
|
| 284 |
+
- `use_ipex`: False
|
| 285 |
+
- `bf16`: False
|
| 286 |
+
- `fp16`: True
|
| 287 |
+
- `fp16_opt_level`: O1
|
| 288 |
+
- `half_precision_backend`: auto
|
| 289 |
+
- `bf16_full_eval`: False
|
| 290 |
+
- `fp16_full_eval`: False
|
| 291 |
+
- `tf32`: None
|
| 292 |
+
- `local_rank`: 0
|
| 293 |
+
- `ddp_backend`: None
|
| 294 |
+
- `tpu_num_cores`: None
|
| 295 |
+
- `tpu_metrics_debug`: False
|
| 296 |
+
- `debug`: []
|
| 297 |
+
- `dataloader_drop_last`: False
|
| 298 |
+
- `dataloader_num_workers`: 0
|
| 299 |
+
- `dataloader_prefetch_factor`: None
|
| 300 |
+
- `past_index`: -1
|
| 301 |
+
- `disable_tqdm`: False
|
| 302 |
+
- `remove_unused_columns`: True
|
| 303 |
+
- `label_names`: None
|
| 304 |
+
- `load_best_model_at_end`: False
|
| 305 |
+
- `ignore_data_skip`: False
|
| 306 |
+
- `fsdp`: []
|
| 307 |
+
- `fsdp_min_num_params`: 0
|
| 308 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 309 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 310 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 311 |
+
- `deepspeed`: None
|
| 312 |
+
- `label_smoothing_factor`: 0.0
|
| 313 |
+
- `optim`: adamw_torch
|
| 314 |
+
- `optim_args`: None
|
| 315 |
+
- `adafactor`: False
|
| 316 |
+
- `group_by_length`: False
|
| 317 |
+
- `length_column_name`: length
|
| 318 |
+
- `ddp_find_unused_parameters`: None
|
| 319 |
+
- `ddp_bucket_cap_mb`: None
|
| 320 |
+
- `ddp_broadcast_buffers`: False
|
| 321 |
+
- `dataloader_pin_memory`: True
|
| 322 |
+
- `dataloader_persistent_workers`: False
|
| 323 |
+
- `skip_memory_metrics`: True
|
| 324 |
+
- `use_legacy_prediction_loop`: False
|
| 325 |
+
- `push_to_hub`: False
|
| 326 |
+
- `resume_from_checkpoint`: None
|
| 327 |
+
- `hub_model_id`: None
|
| 328 |
+
- `hub_strategy`: every_save
|
| 329 |
+
- `hub_private_repo`: None
|
| 330 |
+
- `hub_always_push`: False
|
| 331 |
+
- `gradient_checkpointing`: False
|
| 332 |
+
- `gradient_checkpointing_kwargs`: None
|
| 333 |
+
- `include_inputs_for_metrics`: False
|
| 334 |
+
- `include_for_metrics`: []
|
| 335 |
+
- `eval_do_concat_batches`: True
|
| 336 |
+
- `fp16_backend`: auto
|
| 337 |
+
- `push_to_hub_model_id`: None
|
| 338 |
+
- `push_to_hub_organization`: None
|
| 339 |
+
- `mp_parameters`:
|
| 340 |
+
- `auto_find_batch_size`: False
|
| 341 |
+
- `full_determinism`: False
|
| 342 |
+
- `torchdynamo`: None
|
| 343 |
+
- `ray_scope`: last
|
| 344 |
+
- `ddp_timeout`: 1800
|
| 345 |
+
- `torch_compile`: False
|
| 346 |
+
- `torch_compile_backend`: None
|
| 347 |
+
- `torch_compile_mode`: None
|
| 348 |
+
- `include_tokens_per_second`: False
|
| 349 |
+
- `include_num_input_tokens_seen`: False
|
| 350 |
+
- `neftune_noise_alpha`: None
|
| 351 |
+
- `optim_target_modules`: None
|
| 352 |
+
- `batch_eval_metrics`: False
|
| 353 |
+
- `eval_on_start`: False
|
| 354 |
+
- `use_liger_kernel`: False
|
| 355 |
+
- `eval_use_gather_object`: False
|
| 356 |
+
- `average_tokens_across_devices`: False
|
| 357 |
+
- `prompts`: None
|
| 358 |
+
- `batch_sampler`: no_duplicates
|
| 359 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 360 |
+
|
| 361 |
+
</details>
|
| 362 |
+
|
| 363 |
+
### Training Logs
|
| 364 |
+
| Epoch | Step | Training Loss | Validation Loss | NIST-control-dev_cosine_accuracy |
|
| 365 |
+
|:-----:|:----:|:-------------:|:---------------:|:--------------------------------:|
|
| 366 |
+
| -1 | -1 | - | - | 0.6563 |
|
| 367 |
+
| 0.16 | 100 | 2.6751 | 4.0892 | 0.6661 |
|
| 368 |
+
| 0.32 | 200 | 0.9272 | 3.8595 | 0.7026 |
|
| 369 |
+
| 0.48 | 300 | 0.5711 | 3.8835 | 0.6897 |
|
| 370 |
+
| 0.64 | 400 | 0.3905 | 3.7548 | 0.7071 |
|
| 371 |
+
| 0.8 | 500 | 0.043 | 3.8021 | 0.7035 |
|
| 372 |
+
| 0.96 | 600 | 0.0407 | 3.8115 | 0.7049 |
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
### Framework Versions
|
| 376 |
+
- Python: 3.13.5
|
| 377 |
+
- Sentence Transformers: 4.1.0
|
| 378 |
+
- Transformers: 4.52.4
|
| 379 |
+
- PyTorch: 2.7.1+cpu
|
| 380 |
+
- Accelerate: 1.8.1
|
| 381 |
+
- Datasets: 2.15.0
|
| 382 |
+
- Tokenizers: 0.21.2
|
| 383 |
+
|
| 384 |
+
## Citation
|
| 385 |
+
|
| 386 |
+
### BibTeX
|
| 387 |
+
|
| 388 |
+
#### Sentence Transformers
|
| 389 |
+
```bibtex
|
| 390 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 391 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 392 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 393 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 394 |
+
month = "11",
|
| 395 |
+
year = "2019",
|
| 396 |
+
publisher = "Association for Computational Linguistics",
|
| 397 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 398 |
+
}
|
| 399 |
+
```
|
| 400 |
+
|
| 401 |
+
#### TripletLoss
|
| 402 |
+
```bibtex
|
| 403 |
+
@misc{hermans2017defense,
|
| 404 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
| 405 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
| 406 |
+
year={2017},
|
| 407 |
+
eprint={1703.07737},
|
| 408 |
+
archivePrefix={arXiv},
|
| 409 |
+
primaryClass={cs.CV}
|
| 410 |
+
}
|
| 411 |
+
```
|
| 412 |
+
|
| 413 |
+
<!--
|
| 414 |
+
## Glossary
|
| 415 |
+
|
| 416 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 417 |
+
-->
|
| 418 |
+
|
| 419 |
+
<!--
|
| 420 |
+
## Model Card Authors
|
| 421 |
+
|
| 422 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 423 |
+
-->
|
| 424 |
+
|
| 425 |
+
<!--
|
| 426 |
+
## Model Card Contact
|
| 427 |
+
|
| 428 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 429 |
+
-->
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MPNetModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"eos_token_id": 2,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 3072,
|
| 13 |
+
"layer_norm_eps": 1e-05,
|
| 14 |
+
"max_position_embeddings": 514,
|
| 15 |
+
"model_type": "mpnet",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 12,
|
| 18 |
+
"pad_token_id": 1,
|
| 19 |
+
"relative_attention_num_buckets": 32,
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.52.4",
|
| 22 |
+
"vocab_size": 30527
|
| 23 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.1.0",
|
| 4 |
+
"transformers": "4.52.4",
|
| 5 |
+
"pytorch": "2.7.1+cpu"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c190b5a8af0d51ac697a62f9ae4834b7c6b8b00876476501673aaaaa111a02f
|
| 3 |
+
size 437967672
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84b536e9eb9d20d9d59bcafc8078232b367fdd6e856e1f6734d81e0343382532
|
| 3 |
+
size 871326731
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:383a43729efdff2755ac4825ffc7f76cd2d24d8ce9768c400d43dacdf55c10cc
|
| 3 |
+
size 14391
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f572bc45a40df191b140a5180f678f5e39a38ae45a98002ce6d28ddc5f2fc0d0
|
| 3 |
+
size 1465
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": true,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer_config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"extra_special_tokens": {},
|
| 58 |
+
"mask_token": "<mask>",
|
| 59 |
+
"model_max_length": 512,
|
| 60 |
+
"pad_token": "<pad>",
|
| 61 |
+
"sep_token": "</s>",
|
| 62 |
+
"strip_accents": null,
|
| 63 |
+
"tokenize_chinese_chars": true,
|
| 64 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 65 |
+
"unk_token": "[UNK]"
|
| 66 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/trainer_state.json
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.96,
|
| 6 |
+
"eval_steps": 100,
|
| 7 |
+
"global_step": 600,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.16,
|
| 14 |
+
"grad_norm": 32.675262451171875,
|
| 15 |
+
"learning_rate": 1.8718861209964415e-05,
|
| 16 |
+
"loss": 2.6751,
|
| 17 |
+
"step": 100
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.16,
|
| 21 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.6661201119422913,
|
| 22 |
+
"eval_loss": 4.089230537414551,
|
| 23 |
+
"eval_runtime": 601.3322,
|
| 24 |
+
"eval_samples_per_second": 11.157,
|
| 25 |
+
"eval_steps_per_second": 0.698,
|
| 26 |
+
"step": 100
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 0.32,
|
| 30 |
+
"grad_norm": 12.523146629333496,
|
| 31 |
+
"learning_rate": 1.516014234875445e-05,
|
| 32 |
+
"loss": 0.9272,
|
| 33 |
+
"step": 200
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"epoch": 0.32,
|
| 37 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7026382684707642,
|
| 38 |
+
"eval_loss": 3.859454870223999,
|
| 39 |
+
"eval_runtime": 604.4408,
|
| 40 |
+
"eval_samples_per_second": 11.1,
|
| 41 |
+
"eval_steps_per_second": 0.695,
|
| 42 |
+
"step": 200
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 0.48,
|
| 46 |
+
"grad_norm": 14.002488136291504,
|
| 47 |
+
"learning_rate": 1.1601423487544485e-05,
|
| 48 |
+
"loss": 0.5711,
|
| 49 |
+
"step": 300
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"epoch": 0.48,
|
| 53 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.6896705627441406,
|
| 54 |
+
"eval_loss": 3.883481502532959,
|
| 55 |
+
"eval_runtime": 608.1647,
|
| 56 |
+
"eval_samples_per_second": 11.032,
|
| 57 |
+
"eval_steps_per_second": 0.691,
|
| 58 |
+
"step": 300
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 28.6463565826416,
|
| 63 |
+
"learning_rate": 8.04270462633452e-06,
|
| 64 |
+
"loss": 0.3905,
|
| 65 |
+
"step": 400
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.64,
|
| 69 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7071098685264587,
|
| 70 |
+
"eval_loss": 3.7548305988311768,
|
| 71 |
+
"eval_runtime": 606.0474,
|
| 72 |
+
"eval_samples_per_second": 11.07,
|
| 73 |
+
"eval_steps_per_second": 0.693,
|
| 74 |
+
"step": 400
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 0.8,
|
| 78 |
+
"grad_norm": 0.0,
|
| 79 |
+
"learning_rate": 4.483985765124556e-06,
|
| 80 |
+
"loss": 0.043,
|
| 81 |
+
"step": 500
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"epoch": 0.8,
|
| 85 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7035325765609741,
|
| 86 |
+
"eval_loss": 3.8021018505096436,
|
| 87 |
+
"eval_runtime": 600.763,
|
| 88 |
+
"eval_samples_per_second": 11.167,
|
| 89 |
+
"eval_steps_per_second": 0.699,
|
| 90 |
+
"step": 500
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 0.96,
|
| 94 |
+
"grad_norm": 0.0,
|
| 95 |
+
"learning_rate": 9.252669039145908e-07,
|
| 96 |
+
"loss": 0.0407,
|
| 97 |
+
"step": 600
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"epoch": 0.96,
|
| 101 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7048740386962891,
|
| 102 |
+
"eval_loss": 3.811488628387451,
|
| 103 |
+
"eval_runtime": 599.8706,
|
| 104 |
+
"eval_samples_per_second": 11.184,
|
| 105 |
+
"eval_steps_per_second": 0.7,
|
| 106 |
+
"step": 600
|
| 107 |
+
}
|
| 108 |
+
],
|
| 109 |
+
"logging_steps": 100,
|
| 110 |
+
"max_steps": 625,
|
| 111 |
+
"num_input_tokens_seen": 0,
|
| 112 |
+
"num_train_epochs": 1,
|
| 113 |
+
"save_steps": 100,
|
| 114 |
+
"stateful_callbacks": {
|
| 115 |
+
"TrainerControl": {
|
| 116 |
+
"args": {
|
| 117 |
+
"should_epoch_stop": false,
|
| 118 |
+
"should_evaluate": false,
|
| 119 |
+
"should_log": false,
|
| 120 |
+
"should_save": true,
|
| 121 |
+
"should_training_stop": false
|
| 122 |
+
},
|
| 123 |
+
"attributes": {}
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
"total_flos": 0.0,
|
| 127 |
+
"train_batch_size": 16,
|
| 128 |
+
"trial_name": null,
|
| 129 |
+
"trial_params": null
|
| 130 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:833ef880b3501bd0c81b578a2dc1a700c13add6a501fc8fb0cf7ea0843c2483a
|
| 3 |
+
size 5969
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/README.md
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- sentence-similarity
|
| 8 |
+
- feature-extraction
|
| 9 |
+
- generated_from_trainer
|
| 10 |
+
- dataset_size:10000
|
| 11 |
+
- loss:TripletLoss
|
| 12 |
+
base_model: microsoft/mpnet-base
|
| 13 |
+
widget:
|
| 14 |
+
- source_sentence: Use hardware-based methods where available to guarantee role-based
|
| 15 |
+
access control cannot be bypassed.
|
| 16 |
+
sentences:
|
| 17 |
+
- Related control that reinforces stability and assurance in brute force login prevention
|
| 18 |
+
use cases.
|
| 19 |
+
- Audit session replay defense_b11_8 as part of continuous security assessment processes.
|
| 20 |
+
- Core functionality needed to enforce effective role-based access control mechanisms.
|
| 21 |
+
- source_sentence: Provide full-feature access to security enhancements in NVIDIA
|
| 22 |
+
GPU firmware.
|
| 23 |
+
sentences:
|
| 24 |
+
- Implement secure communication channels between host and GPU.
|
| 25 |
+
- A little boy blows bubbles outdoors.
|
| 26 |
+
- Use HTTPS inspection to detect man-in-the-middle attack attempts.
|
| 27 |
+
- source_sentence: Validate source authenticity by requiring signed code in all components.
|
| 28 |
+
sentences:
|
| 29 |
+
- Firewalls are activated by default and preloaded with security policies.
|
| 30 |
+
- Enforce cryptographic validation on third-party software inputs.
|
| 31 |
+
- Display productivity summaries on a weekly dashboard.
|
| 32 |
+
- source_sentence: Mandate organization-wide adherence to policies enforcing siem
|
| 33 |
+
integration using centrally managed tools.
|
| 34 |
+
sentences:
|
| 35 |
+
- Review policies around anonymous proxy blocking_b11_50 to avoid unexpected vectors
|
| 36 |
+
of compromise.
|
| 37 |
+
- Implement key management systems that use secure encryption algorithms.
|
| 38 |
+
- This measure directly supports secure handling within siem integration implementations.
|
| 39 |
+
- source_sentence: Mandate organization-wide adherence to policies enforcing virtual
|
| 40 |
+
machine isolation using centrally managed tools.
|
| 41 |
+
sentences:
|
| 42 |
+
- Adult males stand in front of a brick wall near something made of metal.
|
| 43 |
+
- Monitor for issues related to redundant login blocking_b12_1 as part of extended
|
| 44 |
+
security hygiene.
|
| 45 |
+
- A widely recommended control paired with proper virtual machine isolation implementations.
|
| 46 |
+
pipeline_tag: sentence-similarity
|
| 47 |
+
library_name: sentence-transformers
|
| 48 |
+
metrics:
|
| 49 |
+
- cosine_accuracy
|
| 50 |
+
model-index:
|
| 51 |
+
- name: MPNet base trained on NIST Controls
|
| 52 |
+
results:
|
| 53 |
+
- task:
|
| 54 |
+
type: triplet
|
| 55 |
+
name: Triplet
|
| 56 |
+
dataset:
|
| 57 |
+
name: NIST control dev
|
| 58 |
+
type: NIST-control-dev
|
| 59 |
+
metrics:
|
| 60 |
+
- type: cosine_accuracy
|
| 61 |
+
value: 0.7048740386962891
|
| 62 |
+
name: Cosine Accuracy
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
# MPNet base trained on NIST Controls
|
| 66 |
+
|
| 67 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 68 |
+
|
| 69 |
+
## Model Details
|
| 70 |
+
|
| 71 |
+
### Model Description
|
| 72 |
+
- **Model Type:** Sentence Transformer
|
| 73 |
+
- **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
|
| 74 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 75 |
+
- **Output Dimensionality:** 768 dimensions
|
| 76 |
+
- **Similarity Function:** Cosine Similarity
|
| 77 |
+
- **Training Dataset:**
|
| 78 |
+
- csv
|
| 79 |
+
- **Language:** en
|
| 80 |
+
- **License:** apache-2.0
|
| 81 |
+
|
| 82 |
+
### Model Sources
|
| 83 |
+
|
| 84 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 85 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 86 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 87 |
+
|
| 88 |
+
### Full Model Architecture
|
| 89 |
+
|
| 90 |
+
```
|
| 91 |
+
SentenceTransformer(
|
| 92 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
|
| 93 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 94 |
+
)
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
## Usage
|
| 98 |
+
|
| 99 |
+
### Direct Usage (Sentence Transformers)
|
| 100 |
+
|
| 101 |
+
First install the Sentence Transformers library:
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
pip install -U sentence-transformers
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
Then you can load this model and run inference.
|
| 108 |
+
```python
|
| 109 |
+
from sentence_transformers import SentenceTransformer
|
| 110 |
+
|
| 111 |
+
# Download from the 🤗 Hub
|
| 112 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 113 |
+
# Run inference
|
| 114 |
+
sentences = [
|
| 115 |
+
'Mandate organization-wide adherence to policies enforcing virtual machine isolation using centrally managed tools.',
|
| 116 |
+
'A widely recommended control paired with proper virtual machine isolation implementations.',
|
| 117 |
+
'Monitor for issues related to redundant login blocking_b12_1 as part of extended security hygiene.',
|
| 118 |
+
]
|
| 119 |
+
embeddings = model.encode(sentences)
|
| 120 |
+
print(embeddings.shape)
|
| 121 |
+
# [3, 768]
|
| 122 |
+
|
| 123 |
+
# Get the similarity scores for the embeddings
|
| 124 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 125 |
+
print(similarities.shape)
|
| 126 |
+
# [3, 3]
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
<!--
|
| 130 |
+
### Direct Usage (Transformers)
|
| 131 |
+
|
| 132 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 133 |
+
|
| 134 |
+
</details>
|
| 135 |
+
-->
|
| 136 |
+
|
| 137 |
+
<!--
|
| 138 |
+
### Downstream Usage (Sentence Transformers)
|
| 139 |
+
|
| 140 |
+
You can finetune this model on your own dataset.
|
| 141 |
+
|
| 142 |
+
<details><summary>Click to expand</summary>
|
| 143 |
+
|
| 144 |
+
</details>
|
| 145 |
+
-->
|
| 146 |
+
|
| 147 |
+
<!--
|
| 148 |
+
### Out-of-Scope Use
|
| 149 |
+
|
| 150 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 151 |
+
-->
|
| 152 |
+
|
| 153 |
+
## Evaluation
|
| 154 |
+
|
| 155 |
+
### Metrics
|
| 156 |
+
|
| 157 |
+
#### Triplet
|
| 158 |
+
|
| 159 |
+
* Dataset: `NIST-control-dev`
|
| 160 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
| 161 |
+
|
| 162 |
+
| Metric | Value |
|
| 163 |
+
|:--------------------|:-----------|
|
| 164 |
+
| **cosine_accuracy** | **0.7049** |
|
| 165 |
+
|
| 166 |
+
<!--
|
| 167 |
+
## Bias, Risks and Limitations
|
| 168 |
+
|
| 169 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 170 |
+
-->
|
| 171 |
+
|
| 172 |
+
<!--
|
| 173 |
+
### Recommendations
|
| 174 |
+
|
| 175 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 176 |
+
-->
|
| 177 |
+
|
| 178 |
+
## Training Details
|
| 179 |
+
|
| 180 |
+
### Training Dataset
|
| 181 |
+
|
| 182 |
+
#### csv
|
| 183 |
+
|
| 184 |
+
* Dataset: csv
|
| 185 |
+
* Size: 10,000 training samples
|
| 186 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 187 |
+
* Approximate statistics based on the first 1000 samples:
|
| 188 |
+
| | anchor | positive | negative |
|
| 189 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 190 |
+
| type | string | string | string |
|
| 191 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 21.39 tokens</li><li>max: 280 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 17.96 tokens</li><li>max: 171 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.27 tokens</li><li>max: 86 tokens</li></ul> |
|
| 192 |
+
* Samples:
|
| 193 |
+
| anchor | positive | negative |
|
| 194 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|
|
| 195 |
+
| <code>Audit Logs: A secure record of security-sensitive activity required for the audit trail. The audit log is a record of an activity by a system a user or an application. Audit logs are required for the audit trail. Examples of audit events are listed in the requirements section below. It is important that these events are in the audit log as part of HPE's commitment to CISA's Secure by Design Pledge.</code> | <code>Security Event Correlation (v3): Systems that correlate audit logs with security events to detect anomalies.</code> | <code>Remote Access Protection (v3): Controls to secure remote access to systems.</code> |
|
| 196 |
+
| <code>One to One Credential Mapping: This requirement involves getting the specific user login information correct in the audit log down to the specific user and not just listing the role or process ID value.</code> | <code>User Identity Verification (v3): Mechanism to verify user identities before granting access to resources.</code> | <code>Incident Response Testing (v3): Regular testing of incident response plans.</code> |
|
| 197 |
+
| <code>Reproducible Builds: Be able to recreate bit-by-bit identical copies of signed code. Ensure that signed code really came from the source code developed by HPE and possibly reviewed by a third-party.</code> | <code>Build Integrity Checking (v3): Verification process to ensure that the build environment has not been tampered with.</code> | <code>Backup and Recovery Planning (v3): Ensure data is backed up and recoverable.</code> |
|
| 198 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 199 |
+
```json
|
| 200 |
+
{
|
| 201 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 202 |
+
"triplet_margin": 5
|
| 203 |
+
}
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
### Evaluation Dataset
|
| 207 |
+
|
| 208 |
+
#### csv
|
| 209 |
+
|
| 210 |
+
* Dataset: csv
|
| 211 |
+
* Size: 6,709 evaluation samples
|
| 212 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 213 |
+
* Approximate statistics based on the first 1000 samples:
|
| 214 |
+
| | anchor | positive | negative |
|
| 215 |
+
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 216 |
+
| type | string | string | string |
|
| 217 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 18.08 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 13.64 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 18.23 tokens</li><li>max: 34 tokens</li></ul> |
|
| 218 |
+
* Samples:
|
| 219 |
+
| anchor | positive | negative |
|
| 220 |
+
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 221 |
+
| <code>Only allow approved and secured components to run in production environments validating them with cryptographic checks and requiring authenticated configuration changes.</code> | <code>Deploy only approved applications in production environments. Use secure startup methods to validate software integrity. Require authentication for all system configuration changes.</code> | <code>Issue smart cards to employees as physical access tokens. Maintain a contact list for emergency communications. Label system components for asset tracking.</code> |
|
| 222 |
+
| <code>Create system architectures that include failover tamper detection and rollback protections to ensure operations continue even under cyberattack.</code> | <code>Design systems to continue operating under defined attack scenarios. Include tamper detection and automatic rollback capabilities. Ensure recovery processes are tested for effectiveness.</code> | <code>Protect paper-based confidential files in locked cabinets. Post warning signs at restricted data center doors. Track employee security briefings with completion certificates.</code> |
|
| 223 |
+
| <code>Structure networks and applications into isolated segments and apply integrity checking to ensure only trusted software is executed in operational environments.</code> | <code>Use network segmentation and security zones in system design. Validate system behavior using integrity checks. Require signed software for deployment into production.</code> | <code>Distribute weather radios for disaster preparedness in remote facilities. Track badge issuance and disable lost credentials. Train users to avoid reusing passwords across services.</code> |
|
| 224 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 225 |
+
```json
|
| 226 |
+
{
|
| 227 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 228 |
+
"triplet_margin": 5
|
| 229 |
+
}
|
| 230 |
+
```
|
| 231 |
+
|
| 232 |
+
### Training Hyperparameters
|
| 233 |
+
#### Non-Default Hyperparameters
|
| 234 |
+
|
| 235 |
+
- `eval_strategy`: steps
|
| 236 |
+
- `per_device_train_batch_size`: 16
|
| 237 |
+
- `per_device_eval_batch_size`: 16
|
| 238 |
+
- `learning_rate`: 2e-05
|
| 239 |
+
- `num_train_epochs`: 1
|
| 240 |
+
- `warmup_ratio`: 0.1
|
| 241 |
+
- `fp16`: True
|
| 242 |
+
- `batch_sampler`: no_duplicates
|
| 243 |
+
|
| 244 |
+
#### All Hyperparameters
|
| 245 |
+
<details><summary>Click to expand</summary>
|
| 246 |
+
|
| 247 |
+
- `overwrite_output_dir`: False
|
| 248 |
+
- `do_predict`: False
|
| 249 |
+
- `eval_strategy`: steps
|
| 250 |
+
- `prediction_loss_only`: True
|
| 251 |
+
- `per_device_train_batch_size`: 16
|
| 252 |
+
- `per_device_eval_batch_size`: 16
|
| 253 |
+
- `per_gpu_train_batch_size`: None
|
| 254 |
+
- `per_gpu_eval_batch_size`: None
|
| 255 |
+
- `gradient_accumulation_steps`: 1
|
| 256 |
+
- `eval_accumulation_steps`: None
|
| 257 |
+
- `torch_empty_cache_steps`: None
|
| 258 |
+
- `learning_rate`: 2e-05
|
| 259 |
+
- `weight_decay`: 0.0
|
| 260 |
+
- `adam_beta1`: 0.9
|
| 261 |
+
- `adam_beta2`: 0.999
|
| 262 |
+
- `adam_epsilon`: 1e-08
|
| 263 |
+
- `max_grad_norm`: 1.0
|
| 264 |
+
- `num_train_epochs`: 1
|
| 265 |
+
- `max_steps`: -1
|
| 266 |
+
- `lr_scheduler_type`: linear
|
| 267 |
+
- `lr_scheduler_kwargs`: {}
|
| 268 |
+
- `warmup_ratio`: 0.1
|
| 269 |
+
- `warmup_steps`: 0
|
| 270 |
+
- `log_level`: passive
|
| 271 |
+
- `log_level_replica`: warning
|
| 272 |
+
- `log_on_each_node`: True
|
| 273 |
+
- `logging_nan_inf_filter`: True
|
| 274 |
+
- `save_safetensors`: True
|
| 275 |
+
- `save_on_each_node`: False
|
| 276 |
+
- `save_only_model`: False
|
| 277 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 278 |
+
- `no_cuda`: False
|
| 279 |
+
- `use_cpu`: False
|
| 280 |
+
- `use_mps_device`: False
|
| 281 |
+
- `seed`: 42
|
| 282 |
+
- `data_seed`: None
|
| 283 |
+
- `jit_mode_eval`: False
|
| 284 |
+
- `use_ipex`: False
|
| 285 |
+
- `bf16`: False
|
| 286 |
+
- `fp16`: True
|
| 287 |
+
- `fp16_opt_level`: O1
|
| 288 |
+
- `half_precision_backend`: auto
|
| 289 |
+
- `bf16_full_eval`: False
|
| 290 |
+
- `fp16_full_eval`: False
|
| 291 |
+
- `tf32`: None
|
| 292 |
+
- `local_rank`: 0
|
| 293 |
+
- `ddp_backend`: None
|
| 294 |
+
- `tpu_num_cores`: None
|
| 295 |
+
- `tpu_metrics_debug`: False
|
| 296 |
+
- `debug`: []
|
| 297 |
+
- `dataloader_drop_last`: False
|
| 298 |
+
- `dataloader_num_workers`: 0
|
| 299 |
+
- `dataloader_prefetch_factor`: None
|
| 300 |
+
- `past_index`: -1
|
| 301 |
+
- `disable_tqdm`: False
|
| 302 |
+
- `remove_unused_columns`: True
|
| 303 |
+
- `label_names`: None
|
| 304 |
+
- `load_best_model_at_end`: False
|
| 305 |
+
- `ignore_data_skip`: False
|
| 306 |
+
- `fsdp`: []
|
| 307 |
+
- `fsdp_min_num_params`: 0
|
| 308 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 309 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 310 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 311 |
+
- `deepspeed`: None
|
| 312 |
+
- `label_smoothing_factor`: 0.0
|
| 313 |
+
- `optim`: adamw_torch
|
| 314 |
+
- `optim_args`: None
|
| 315 |
+
- `adafactor`: False
|
| 316 |
+
- `group_by_length`: False
|
| 317 |
+
- `length_column_name`: length
|
| 318 |
+
- `ddp_find_unused_parameters`: None
|
| 319 |
+
- `ddp_bucket_cap_mb`: None
|
| 320 |
+
- `ddp_broadcast_buffers`: False
|
| 321 |
+
- `dataloader_pin_memory`: True
|
| 322 |
+
- `dataloader_persistent_workers`: False
|
| 323 |
+
- `skip_memory_metrics`: True
|
| 324 |
+
- `use_legacy_prediction_loop`: False
|
| 325 |
+
- `push_to_hub`: False
|
| 326 |
+
- `resume_from_checkpoint`: None
|
| 327 |
+
- `hub_model_id`: None
|
| 328 |
+
- `hub_strategy`: every_save
|
| 329 |
+
- `hub_private_repo`: None
|
| 330 |
+
- `hub_always_push`: False
|
| 331 |
+
- `gradient_checkpointing`: False
|
| 332 |
+
- `gradient_checkpointing_kwargs`: None
|
| 333 |
+
- `include_inputs_for_metrics`: False
|
| 334 |
+
- `include_for_metrics`: []
|
| 335 |
+
- `eval_do_concat_batches`: True
|
| 336 |
+
- `fp16_backend`: auto
|
| 337 |
+
- `push_to_hub_model_id`: None
|
| 338 |
+
- `push_to_hub_organization`: None
|
| 339 |
+
- `mp_parameters`:
|
| 340 |
+
- `auto_find_batch_size`: False
|
| 341 |
+
- `full_determinism`: False
|
| 342 |
+
- `torchdynamo`: None
|
| 343 |
+
- `ray_scope`: last
|
| 344 |
+
- `ddp_timeout`: 1800
|
| 345 |
+
- `torch_compile`: False
|
| 346 |
+
- `torch_compile_backend`: None
|
| 347 |
+
- `torch_compile_mode`: None
|
| 348 |
+
- `include_tokens_per_second`: False
|
| 349 |
+
- `include_num_input_tokens_seen`: False
|
| 350 |
+
- `neftune_noise_alpha`: None
|
| 351 |
+
- `optim_target_modules`: None
|
| 352 |
+
- `batch_eval_metrics`: False
|
| 353 |
+
- `eval_on_start`: False
|
| 354 |
+
- `use_liger_kernel`: False
|
| 355 |
+
- `eval_use_gather_object`: False
|
| 356 |
+
- `average_tokens_across_devices`: False
|
| 357 |
+
- `prompts`: None
|
| 358 |
+
- `batch_sampler`: no_duplicates
|
| 359 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 360 |
+
|
| 361 |
+
</details>
|
| 362 |
+
|
| 363 |
+
### Training Logs
|
| 364 |
+
| Epoch | Step | Training Loss | Validation Loss | NIST-control-dev_cosine_accuracy |
|
| 365 |
+
|:-----:|:----:|:-------------:|:---------------:|:--------------------------------:|
|
| 366 |
+
| -1 | -1 | - | - | 0.6563 |
|
| 367 |
+
| 0.16 | 100 | 2.6751 | 4.0892 | 0.6661 |
|
| 368 |
+
| 0.32 | 200 | 0.9272 | 3.8595 | 0.7026 |
|
| 369 |
+
| 0.48 | 300 | 0.5711 | 3.8835 | 0.6897 |
|
| 370 |
+
| 0.64 | 400 | 0.3905 | 3.7548 | 0.7071 |
|
| 371 |
+
| 0.8 | 500 | 0.043 | 3.8021 | 0.7035 |
|
| 372 |
+
| 0.96 | 600 | 0.0407 | 3.8115 | 0.7049 |
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
### Framework Versions
|
| 376 |
+
- Python: 3.13.5
|
| 377 |
+
- Sentence Transformers: 4.1.0
|
| 378 |
+
- Transformers: 4.52.4
|
| 379 |
+
- PyTorch: 2.7.1+cpu
|
| 380 |
+
- Accelerate: 1.8.1
|
| 381 |
+
- Datasets: 2.15.0
|
| 382 |
+
- Tokenizers: 0.21.2
|
| 383 |
+
|
| 384 |
+
## Citation
|
| 385 |
+
|
| 386 |
+
### BibTeX
|
| 387 |
+
|
| 388 |
+
#### Sentence Transformers
|
| 389 |
+
```bibtex
|
| 390 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 391 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 392 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 393 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 394 |
+
month = "11",
|
| 395 |
+
year = "2019",
|
| 396 |
+
publisher = "Association for Computational Linguistics",
|
| 397 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 398 |
+
}
|
| 399 |
+
```
|
| 400 |
+
|
| 401 |
+
#### TripletLoss
|
| 402 |
+
```bibtex
|
| 403 |
+
@misc{hermans2017defense,
|
| 404 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
| 405 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
| 406 |
+
year={2017},
|
| 407 |
+
eprint={1703.07737},
|
| 408 |
+
archivePrefix={arXiv},
|
| 409 |
+
primaryClass={cs.CV}
|
| 410 |
+
}
|
| 411 |
+
```
|
| 412 |
+
|
| 413 |
+
<!--
|
| 414 |
+
## Glossary
|
| 415 |
+
|
| 416 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 417 |
+
-->
|
| 418 |
+
|
| 419 |
+
<!--
|
| 420 |
+
## Model Card Authors
|
| 421 |
+
|
| 422 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 423 |
+
-->
|
| 424 |
+
|
| 425 |
+
<!--
|
| 426 |
+
## Model Card Contact
|
| 427 |
+
|
| 428 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 429 |
+
-->
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MPNetModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"eos_token_id": 2,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 3072,
|
| 13 |
+
"layer_norm_eps": 1e-05,
|
| 14 |
+
"max_position_embeddings": 514,
|
| 15 |
+
"model_type": "mpnet",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 12,
|
| 18 |
+
"pad_token_id": 1,
|
| 19 |
+
"relative_attention_num_buckets": 32,
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.52.4",
|
| 22 |
+
"vocab_size": 30527
|
| 23 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.1.0",
|
| 4 |
+
"transformers": "4.52.4",
|
| 5 |
+
"pytorch": "2.7.1+cpu"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:023ce0c063565c38d46015c3e97cbd919a210aeac663deb5384d5802e0e6a244
|
| 3 |
+
size 437967672
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33161b4623ae16ae0c17475cad5a8660d3a663c8c86f6a17526188411838e79e
|
| 3 |
+
size 871326731
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff9bcba5a601f138be343f6fc7417a4e3f69d6281c51811c943bae8537eabfe1
|
| 3 |
+
size 14391
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6122107d2ed33617eb1e954ca24bf223862331c275f054134ef9e1a48454372
|
| 3 |
+
size 1465
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": true,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer_config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"extra_special_tokens": {},
|
| 58 |
+
"mask_token": "<mask>",
|
| 59 |
+
"model_max_length": 512,
|
| 60 |
+
"pad_token": "<pad>",
|
| 61 |
+
"sep_token": "</s>",
|
| 62 |
+
"strip_accents": null,
|
| 63 |
+
"tokenize_chinese_chars": true,
|
| 64 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 65 |
+
"unk_token": "[UNK]"
|
| 66 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/trainer_state.json
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
+
"eval_steps": 100,
|
| 7 |
+
"global_step": 625,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.16,
|
| 14 |
+
"grad_norm": 32.675262451171875,
|
| 15 |
+
"learning_rate": 1.8718861209964415e-05,
|
| 16 |
+
"loss": 2.6751,
|
| 17 |
+
"step": 100
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"epoch": 0.16,
|
| 21 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.6661201119422913,
|
| 22 |
+
"eval_loss": 4.089230537414551,
|
| 23 |
+
"eval_runtime": 601.3322,
|
| 24 |
+
"eval_samples_per_second": 11.157,
|
| 25 |
+
"eval_steps_per_second": 0.698,
|
| 26 |
+
"step": 100
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 0.32,
|
| 30 |
+
"grad_norm": 12.523146629333496,
|
| 31 |
+
"learning_rate": 1.516014234875445e-05,
|
| 32 |
+
"loss": 0.9272,
|
| 33 |
+
"step": 200
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"epoch": 0.32,
|
| 37 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7026382684707642,
|
| 38 |
+
"eval_loss": 3.859454870223999,
|
| 39 |
+
"eval_runtime": 604.4408,
|
| 40 |
+
"eval_samples_per_second": 11.1,
|
| 41 |
+
"eval_steps_per_second": 0.695,
|
| 42 |
+
"step": 200
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 0.48,
|
| 46 |
+
"grad_norm": 14.002488136291504,
|
| 47 |
+
"learning_rate": 1.1601423487544485e-05,
|
| 48 |
+
"loss": 0.5711,
|
| 49 |
+
"step": 300
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"epoch": 0.48,
|
| 53 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.6896705627441406,
|
| 54 |
+
"eval_loss": 3.883481502532959,
|
| 55 |
+
"eval_runtime": 608.1647,
|
| 56 |
+
"eval_samples_per_second": 11.032,
|
| 57 |
+
"eval_steps_per_second": 0.691,
|
| 58 |
+
"step": 300
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.64,
|
| 62 |
+
"grad_norm": 28.6463565826416,
|
| 63 |
+
"learning_rate": 8.04270462633452e-06,
|
| 64 |
+
"loss": 0.3905,
|
| 65 |
+
"step": 400
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.64,
|
| 69 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7071098685264587,
|
| 70 |
+
"eval_loss": 3.7548305988311768,
|
| 71 |
+
"eval_runtime": 606.0474,
|
| 72 |
+
"eval_samples_per_second": 11.07,
|
| 73 |
+
"eval_steps_per_second": 0.693,
|
| 74 |
+
"step": 400
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"epoch": 0.8,
|
| 78 |
+
"grad_norm": 0.0,
|
| 79 |
+
"learning_rate": 4.483985765124556e-06,
|
| 80 |
+
"loss": 0.043,
|
| 81 |
+
"step": 500
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"epoch": 0.8,
|
| 85 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7035325765609741,
|
| 86 |
+
"eval_loss": 3.8021018505096436,
|
| 87 |
+
"eval_runtime": 600.763,
|
| 88 |
+
"eval_samples_per_second": 11.167,
|
| 89 |
+
"eval_steps_per_second": 0.699,
|
| 90 |
+
"step": 500
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"epoch": 0.96,
|
| 94 |
+
"grad_norm": 0.0,
|
| 95 |
+
"learning_rate": 9.252669039145908e-07,
|
| 96 |
+
"loss": 0.0407,
|
| 97 |
+
"step": 600
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"epoch": 0.96,
|
| 101 |
+
"eval_NIST-control-dev_cosine_accuracy": 0.7048740386962891,
|
| 102 |
+
"eval_loss": 3.811488628387451,
|
| 103 |
+
"eval_runtime": 599.8706,
|
| 104 |
+
"eval_samples_per_second": 11.184,
|
| 105 |
+
"eval_steps_per_second": 0.7,
|
| 106 |
+
"step": 600
|
| 107 |
+
}
|
| 108 |
+
],
|
| 109 |
+
"logging_steps": 100,
|
| 110 |
+
"max_steps": 625,
|
| 111 |
+
"num_input_tokens_seen": 0,
|
| 112 |
+
"num_train_epochs": 1,
|
| 113 |
+
"save_steps": 100,
|
| 114 |
+
"stateful_callbacks": {
|
| 115 |
+
"TrainerControl": {
|
| 116 |
+
"args": {
|
| 117 |
+
"should_epoch_stop": false,
|
| 118 |
+
"should_evaluate": false,
|
| 119 |
+
"should_log": false,
|
| 120 |
+
"should_save": true,
|
| 121 |
+
"should_training_stop": true
|
| 122 |
+
},
|
| 123 |
+
"attributes": {}
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
"total_flos": 0.0,
|
| 127 |
+
"train_batch_size": 16,
|
| 128 |
+
"trial_name": null,
|
| 129 |
+
"trial_params": null
|
| 130 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:833ef880b3501bd0c81b578a2dc1a700c13add6a501fc8fb0cf7ea0843c2483a
|
| 3 |
+
size 5969
|
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HPEControlMapper/models/mpnet-base-control-triplet/eval/triplet_evaluation_NIST-control-dev_results.csv
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.0028676301904106446,100,0.6373528242111206
|
| 3 |
+
0.005735260380821289,200,0.6752123832702637
|
| 4 |
+
0.008602890571231933,300,0.7472052574157715
|
| 5 |
+
0.011470520761642579,400,0.7840214371681213
|
| 6 |
+
0.014338150952053224,500,0.7856610417366028
|
| 7 |
+
0.017205781142463867,600,0.7878968715667725
|
| 8 |
+
0.020073411332874512,700,0.7940080761909485
|
| 9 |
+
0.022941041523285157,800,0.8038455843925476
|
| 10 |
+
0.025808671713695802,900,0.826948881149292
|
| 11 |
+
0.028676301904106447,1000,0.8160679936408997
|
| 12 |
+
0.03154393209451709,1100,0.833507239818573
|
| 13 |
+
0.034411562284927734,1200,0.8253092765808105
|
| 14 |
+
0.03727919247533838,1300,0.8415561318397522
|
| 15 |
+
0.040146822665749024,1400,0.839171290397644
|
| 16 |
+
0.04301445285615967,1500,0.840512752532959
|
| 17 |
+
0.045882083046570314,1600,0.8504993319511414
|
| 18 |
+
0.048749713236980956,1700,0.8616783618927002
|
| 19 |
+
0.051617343427391604,1800,0.8594425320625305
|
| 20 |
+
0.054484973617802246,1900,0.8482635021209717
|
| 21 |
+
0.057352603808212894,2000,0.8543747067451477
|
| 22 |
+
0.060220233998623536,2100,0.859740674495697
|
| 23 |
+
0.06308786418903418,2200,0.8570576906204224
|
| 24 |
+
0.06595549437944483,2300,0.8566105365753174
|
| 25 |
+
0.06882312456985547,2400,0.8522879481315613
|
| 26 |
+
0.07169075476026611,2500,0.8589953780174255
|
| 27 |
+
0.07455838495067676,2600,0.8507974147796631
|
| 28 |
+
0.0774260151410874,2700,0.8525860905647278
|
| 29 |
+
0.08029364533149805,2800,0.8640632033348083
|
| 30 |
+
0.08316127552190869,2900,0.8533313274383545
|
| 31 |
+
0.08602890571231935,3000,0.8649575114250183
|
| 32 |
+
0.08889653590272999,3100,0.8689819574356079
|
| 33 |
+
0.09176416609314063,3200,0.871813952922821
|
| 34 |
+
0.09463179628355127,3300,0.8509464859962463
|
| 35 |
+
0.09749942647396191,3400,0.8804590702056885
|
| 36 |
+
0.10036705666437257,3500,0.8660008907318115
|
| 37 |
+
0.10323468685478321,3600,0.8649575114250183
|
| 38 |
+
0.10610231704519385,3700,0.8589953780174255
|
| 39 |
+
0.10896994723560449,3800,0.878372311592102
|
| 40 |
+
0.11183757742601515,3900,0.8785213828086853
|
| 41 |
+
0.11470520761642579,4000,0.8785213828086853
|
| 42 |
+
0.11757283780683643,4100,0.8296318650245667
|
| 43 |
+
0.12044046799724707,4200,0.8730064034461975
|
| 44 |
+
0.12330809818765771,4300,0.8673423528671265
|
| 45 |
+
0.12617572837806837,4400,0.8750931620597839
|
| 46 |
+
0.129043358568479,4500,0.8408108353614807
|
| 47 |
+
0.13191098875888965,4600,0.8662989735603333
|
| 48 |
+
0.1347786189493003,4700,0.8719630241394043
|
| 49 |
+
0.13764624913971094,4800,0.8818005919456482
|
| 50 |
+
0.14051387933012158,4900,0.8825458288192749
|
| 51 |
+
0.14338150952053222,5000,0.8628707528114319
|
| 52 |
+
0.1462491397109429,5100,0.8694291114807129
|
| 53 |
+
0.14911676990135353,5200,0.8759874701499939
|
| 54 |
+
0.15198440009176417,5300,0.8685348033905029
|
| 55 |
+
0.1548520302821748,5400,0.8780742287635803
|
| 56 |
+
0.15771966047258545,5500,0.8835892081260681
|
| 57 |
+
0.1605872906629961,5600,0.8828439116477966
|
| 58 |
+
0.16345492085340674,5700,0.8657028079032898
|
| 59 |
+
0.16632255104381738,5800,0.8829929828643799
|
| 60 |
+
0.16919018123422802,5900,0.8770308494567871
|
| 61 |
+
0.1720578114246387,6000,0.8892532587051392
|
| 62 |
+
0.17492544161504933,6100,0.8847816586494446
|
| 63 |
+
0.17779307180545997,6200,0.8818005919456482
|
| 64 |
+
0.18066070199587062,6300,0.8910418748855591
|
| 65 |
+
0.18352833218628126,6400,0.887762725353241
|
| 66 |
+
0.1863959623766919,6500,0.8867193460464478
|
| 67 |
+
0.18926359256710254,6600,0.8879117369651794
|
| 68 |
+
0.19213122275751318,6700,0.8792666792869568
|
| 69 |
+
0.19499885294792382,6800,0.8928305506706238
|
| 70 |
+
0.1978664831383345,6900,0.8855268955230713
|
| 71 |
+
0.20073411332874513,7000,0.8886570334434509
|
| 72 |
+
0.20360174351915578,7100,0.8828439116477966
|
| 73 |
+
0.20646937370956642,7200,0.8929795622825623
|
| 74 |
+
0.016,100,0.6908630132675171
|
| 75 |
+
0.032,200,0.7467581033706665
|
| 76 |
+
0.048,300,0.7501863241195679
|
| 77 |
+
0.064,400,0.7367715239524841
|
| 78 |
+
0.08,500,0.7378149032592773
|
| 79 |
+
0.096,600,0.7382620573043823
|
| 80 |
+
0.112,700,0.7512297034263611
|
| 81 |
+
0.128,800,0.7753763794898987
|
| 82 |
+
0.144,900,0.8031002879142761
|
| 83 |
+
0.16,1000,0.8036965131759644
|
| 84 |
+
0.176,1100,0.8162170052528381
|
| 85 |
+
0.192,1200,0.8276941180229187
|
| 86 |
+
0.208,1300,0.8232225179672241
|
| 87 |
+
0.224,1400,0.8221791386604309
|
| 88 |
+
0.24,1500,0.8157698512077332
|
| 89 |
+
0.256,1600,0.8309733271598816
|
| 90 |
+
0.272,1700,0.8323147892951965
|
| 91 |
+
0.288,1800,0.826948881149292
|
| 92 |
+
0.304,1900,0.8086152672767639
|
| 93 |
+
0.32,2000,0.8166641592979431
|
| 94 |
+
0.336,2100,0.8174094557762146
|
| 95 |
+
0.352,2200,0.8193471431732178
|
| 96 |
+
0.368,2300,0.8141302466392517
|
| 97 |
+
0.384,2400,0.826948881149292
|
| 98 |
+
0.4,2500,0.8358920812606812
|
| 99 |
+
0.416,2600,0.8209867477416992
|
| 100 |
+
0.432,2700,0.8385750651359558
|
| 101 |
+
0.448,2800,0.8105530142784119
|
| 102 |
+
0.464,2900,0.8200924396514893
|
| 103 |
+
0.48,3000,0.8375316858291626
|
| 104 |
+
0.496,3100,0.8205395936965942
|
| 105 |
+
0.512,3200,0.8279922604560852
|
| 106 |
+
0.528,3300,0.825607419013977
|
| 107 |
+
0.544,3400,0.8253092765808105
|
| 108 |
+
0.56,3500,0.8159189224243164
|
| 109 |
+
0.576,3600,0.8183037638664246
|
| 110 |
+
0.592,3700,0.8139812350273132
|
| 111 |
+
0.608,3800,0.833060085773468
|
| 112 |
+
0.624,3900,0.8123416304588318
|
| 113 |
+
0.64,4000,0.8008645176887512
|
| 114 |
+
0.656,4100,0.8239678144454956
|
| 115 |
+
0.672,4200,0.8172603845596313
|
| 116 |
+
0.688,4300,0.8123416304588318
|
| 117 |
+
0.704,4400,0.8186019062995911
|
| 118 |
+
0.72,4500,0.8202414512634277
|
| 119 |
+
0.736,4600,0.8186019062995911
|
| 120 |
+
0.752,4700,0.8148755431175232
|
| 121 |
+
0.768,4800,0.8238187432289124
|
| 122 |
+
0.784,4900,0.827396035194397
|
| 123 |
+
0.8,5000,0.8132359385490417
|
| 124 |
+
0.816,5100,0.8145774602890015
|
| 125 |
+
0.832,5200,0.8111491799354553
|
| 126 |
+
0.848,5300,0.8197942972183228
|
| 127 |
+
0.864,5400,0.8169623017311096
|
| 128 |
+
0.88,5500,0.8178566098213196
|
| 129 |
+
0.896,5600,0.814279317855835
|
| 130 |
+
0.912,5700,0.8206886053085327
|
| 131 |
+
0.928,5800,0.8123416304588318
|
| 132 |
+
0.944,5900,0.8123416304588318
|
| 133 |
+
0.96,6000,0.8110001683235168
|
| 134 |
+
0.976,6100,0.8136830925941467
|
| 135 |
+
0.992,6200,0.8139812350273132
|
| 136 |
+
0.16,100,0.69414222240448
|
| 137 |
+
0.32,200,0.7014458179473877
|
| 138 |
+
0.48,300,0.6860933303833008
|
| 139 |
+
0.64,400,0.7059174180030823
|
| 140 |
+
0.8,500,0.6977194547653198
|
| 141 |
+
0.96,600,0.7017439007759094
|
| 142 |
+
0.16,100,0.6661201119422913
|
| 143 |
+
0.32,200,0.7026382684707642
|
| 144 |
+
0.48,300,0.6896705627441406
|
| 145 |
+
0.64,400,0.7071098685264587
|
| 146 |
+
0.8,500,0.7035325765609741
|
| 147 |
+
0.96,600,0.7048740386962891
|
HPEControlMapper/models/mpnet-base-control-triplet/final/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/final/README.md
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- sentence-similarity
|
| 8 |
+
- feature-extraction
|
| 9 |
+
- generated_from_trainer
|
| 10 |
+
- dataset_size:10000
|
| 11 |
+
- loss:TripletLoss
|
| 12 |
+
base_model: microsoft/mpnet-base
|
| 13 |
+
widget:
|
| 14 |
+
- source_sentence: Use hardware-based methods where available to guarantee role-based
|
| 15 |
+
access control cannot be bypassed.
|
| 16 |
+
sentences:
|
| 17 |
+
- Related control that reinforces stability and assurance in brute force login prevention
|
| 18 |
+
use cases.
|
| 19 |
+
- Audit session replay defense_b11_8 as part of continuous security assessment processes.
|
| 20 |
+
- Core functionality needed to enforce effective role-based access control mechanisms.
|
| 21 |
+
- source_sentence: Provide full-feature access to security enhancements in NVIDIA
|
| 22 |
+
GPU firmware.
|
| 23 |
+
sentences:
|
| 24 |
+
- Implement secure communication channels between host and GPU.
|
| 25 |
+
- A little boy blows bubbles outdoors.
|
| 26 |
+
- Use HTTPS inspection to detect man-in-the-middle attack attempts.
|
| 27 |
+
- source_sentence: Validate source authenticity by requiring signed code in all components.
|
| 28 |
+
sentences:
|
| 29 |
+
- Firewalls are activated by default and preloaded with security policies.
|
| 30 |
+
- Enforce cryptographic validation on third-party software inputs.
|
| 31 |
+
- Display productivity summaries on a weekly dashboard.
|
| 32 |
+
- source_sentence: Mandate organization-wide adherence to policies enforcing siem
|
| 33 |
+
integration using centrally managed tools.
|
| 34 |
+
sentences:
|
| 35 |
+
- Review policies around anonymous proxy blocking_b11_50 to avoid unexpected vectors
|
| 36 |
+
of compromise.
|
| 37 |
+
- Implement key management systems that use secure encryption algorithms.
|
| 38 |
+
- This measure directly supports secure handling within siem integration implementations.
|
| 39 |
+
- source_sentence: Mandate organization-wide adherence to policies enforcing virtual
|
| 40 |
+
machine isolation using centrally managed tools.
|
| 41 |
+
sentences:
|
| 42 |
+
- Adult males stand in front of a brick wall near something made of metal.
|
| 43 |
+
- Monitor for issues related to redundant login blocking_b12_1 as part of extended
|
| 44 |
+
security hygiene.
|
| 45 |
+
- A widely recommended control paired with proper virtual machine isolation implementations.
|
| 46 |
+
pipeline_tag: sentence-similarity
|
| 47 |
+
library_name: sentence-transformers
|
| 48 |
+
metrics:
|
| 49 |
+
- cosine_accuracy
|
| 50 |
+
model-index:
|
| 51 |
+
- name: MPNet base trained on NIST Controls
|
| 52 |
+
results:
|
| 53 |
+
- task:
|
| 54 |
+
type: triplet
|
| 55 |
+
name: Triplet
|
| 56 |
+
dataset:
|
| 57 |
+
name: NIST control dev
|
| 58 |
+
type: NIST-control-dev
|
| 59 |
+
metrics:
|
| 60 |
+
- type: cosine_accuracy
|
| 61 |
+
value: 0.7048740386962891
|
| 62 |
+
name: Cosine Accuracy
|
| 63 |
+
- task:
|
| 64 |
+
type: triplet
|
| 65 |
+
name: Triplet
|
| 66 |
+
dataset:
|
| 67 |
+
name: all nli test
|
| 68 |
+
type: all-nli-test
|
| 69 |
+
metrics:
|
| 70 |
+
- type: cosine_accuracy
|
| 71 |
+
value: 0.7318310737609863
|
| 72 |
+
name: Cosine Accuracy
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
# MPNet base trained on NIST Controls
|
| 76 |
+
|
| 77 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 78 |
+
|
| 79 |
+
## Model Details
|
| 80 |
+
|
| 81 |
+
### Model Description
|
| 82 |
+
- **Model Type:** Sentence Transformer
|
| 83 |
+
- **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
|
| 84 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 85 |
+
- **Output Dimensionality:** 768 dimensions
|
| 86 |
+
- **Similarity Function:** Cosine Similarity
|
| 87 |
+
- **Training Dataset:**
|
| 88 |
+
- csv
|
| 89 |
+
- **Language:** en
|
| 90 |
+
- **License:** apache-2.0
|
| 91 |
+
|
| 92 |
+
### Model Sources
|
| 93 |
+
|
| 94 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 95 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 96 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 97 |
+
|
| 98 |
+
### Full Model Architecture
|
| 99 |
+
|
| 100 |
+
```
|
| 101 |
+
SentenceTransformer(
|
| 102 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
|
| 103 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 104 |
+
)
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
## Usage
|
| 108 |
+
|
| 109 |
+
### Direct Usage (Sentence Transformers)
|
| 110 |
+
|
| 111 |
+
First install the Sentence Transformers library:
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
pip install -U sentence-transformers
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
Then you can load this model and run inference.
|
| 118 |
+
```python
|
| 119 |
+
from sentence_transformers import SentenceTransformer
|
| 120 |
+
|
| 121 |
+
# Download from the 🤗 Hub
|
| 122 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 123 |
+
# Run inference
|
| 124 |
+
sentences = [
|
| 125 |
+
'Mandate organization-wide adherence to policies enforcing virtual machine isolation using centrally managed tools.',
|
| 126 |
+
'A widely recommended control paired with proper virtual machine isolation implementations.',
|
| 127 |
+
'Monitor for issues related to redundant login blocking_b12_1 as part of extended security hygiene.',
|
| 128 |
+
]
|
| 129 |
+
embeddings = model.encode(sentences)
|
| 130 |
+
print(embeddings.shape)
|
| 131 |
+
# [3, 768]
|
| 132 |
+
|
| 133 |
+
# Get the similarity scores for the embeddings
|
| 134 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 135 |
+
print(similarities.shape)
|
| 136 |
+
# [3, 3]
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
<!--
|
| 140 |
+
### Direct Usage (Transformers)
|
| 141 |
+
|
| 142 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 143 |
+
|
| 144 |
+
</details>
|
| 145 |
+
-->
|
| 146 |
+
|
| 147 |
+
<!--
|
| 148 |
+
### Downstream Usage (Sentence Transformers)
|
| 149 |
+
|
| 150 |
+
You can finetune this model on your own dataset.
|
| 151 |
+
|
| 152 |
+
<details><summary>Click to expand</summary>
|
| 153 |
+
|
| 154 |
+
</details>
|
| 155 |
+
-->
|
| 156 |
+
|
| 157 |
+
<!--
|
| 158 |
+
### Out-of-Scope Use
|
| 159 |
+
|
| 160 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 161 |
+
-->
|
| 162 |
+
|
| 163 |
+
## Evaluation
|
| 164 |
+
|
| 165 |
+
### Metrics
|
| 166 |
+
|
| 167 |
+
#### Triplet
|
| 168 |
+
|
| 169 |
+
* Datasets: `NIST-control-dev` and `all-nli-test`
|
| 170 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
| 171 |
+
|
| 172 |
+
| Metric | NIST-control-dev | all-nli-test |
|
| 173 |
+
|:--------------------|:-----------------|:-------------|
|
| 174 |
+
| **cosine_accuracy** | **0.7049** | **0.7318** |
|
| 175 |
+
|
| 176 |
+
<!--
|
| 177 |
+
## Bias, Risks and Limitations
|
| 178 |
+
|
| 179 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 180 |
+
-->
|
| 181 |
+
|
| 182 |
+
<!--
|
| 183 |
+
### Recommendations
|
| 184 |
+
|
| 185 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 186 |
+
-->
|
| 187 |
+
|
| 188 |
+
## Training Details
|
| 189 |
+
|
| 190 |
+
### Training Dataset
|
| 191 |
+
|
| 192 |
+
#### csv
|
| 193 |
+
|
| 194 |
+
* Dataset: csv
|
| 195 |
+
* Size: 10,000 training samples
|
| 196 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 197 |
+
* Approximate statistics based on the first 1000 samples:
|
| 198 |
+
| | anchor | positive | negative |
|
| 199 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 200 |
+
| type | string | string | string |
|
| 201 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 21.39 tokens</li><li>max: 280 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 17.96 tokens</li><li>max: 171 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.27 tokens</li><li>max: 86 tokens</li></ul> |
|
| 202 |
+
* Samples:
|
| 203 |
+
| anchor | positive | negative |
|
| 204 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|
|
| 205 |
+
| <code>Audit Logs: A secure record of security-sensitive activity required for the audit trail. The audit log is a record of an activity by a system a user or an application. Audit logs are required for the audit trail. Examples of audit events are listed in the requirements section below. It is important that these events are in the audit log as part of HPE's commitment to CISA's Secure by Design Pledge.</code> | <code>Security Event Correlation (v3): Systems that correlate audit logs with security events to detect anomalies.</code> | <code>Remote Access Protection (v3): Controls to secure remote access to systems.</code> |
|
| 206 |
+
| <code>One to One Credential Mapping: This requirement involves getting the specific user login information correct in the audit log down to the specific user and not just listing the role or process ID value.</code> | <code>User Identity Verification (v3): Mechanism to verify user identities before granting access to resources.</code> | <code>Incident Response Testing (v3): Regular testing of incident response plans.</code> |
|
| 207 |
+
| <code>Reproducible Builds: Be able to recreate bit-by-bit identical copies of signed code. Ensure that signed code really came from the source code developed by HPE and possibly reviewed by a third-party.</code> | <code>Build Integrity Checking (v3): Verification process to ensure that the build environment has not been tampered with.</code> | <code>Backup and Recovery Planning (v3): Ensure data is backed up and recoverable.</code> |
|
| 208 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 209 |
+
```json
|
| 210 |
+
{
|
| 211 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 212 |
+
"triplet_margin": 5
|
| 213 |
+
}
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
### Evaluation Dataset
|
| 217 |
+
|
| 218 |
+
#### csv
|
| 219 |
+
|
| 220 |
+
* Dataset: csv
|
| 221 |
+
* Size: 6,709 evaluation samples
|
| 222 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 223 |
+
* Approximate statistics based on the first 1000 samples:
|
| 224 |
+
| | anchor | positive | negative |
|
| 225 |
+
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
|
| 226 |
+
| type | string | string | string |
|
| 227 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 18.08 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 13.64 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 18.23 tokens</li><li>max: 34 tokens</li></ul> |
|
| 228 |
+
* Samples:
|
| 229 |
+
| anchor | positive | negative |
|
| 230 |
+
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 231 |
+
| <code>Only allow approved and secured components to run in production environments validating them with cryptographic checks and requiring authenticated configuration changes.</code> | <code>Deploy only approved applications in production environments. Use secure startup methods to validate software integrity. Require authentication for all system configuration changes.</code> | <code>Issue smart cards to employees as physical access tokens. Maintain a contact list for emergency communications. Label system components for asset tracking.</code> |
|
| 232 |
+
| <code>Create system architectures that include failover tamper detection and rollback protections to ensure operations continue even under cyberattack.</code> | <code>Design systems to continue operating under defined attack scenarios. Include tamper detection and automatic rollback capabilities. Ensure recovery processes are tested for effectiveness.</code> | <code>Protect paper-based confidential files in locked cabinets. Post warning signs at restricted data center doors. Track employee security briefings with completion certificates.</code> |
|
| 233 |
+
| <code>Structure networks and applications into isolated segments and apply integrity checking to ensure only trusted software is executed in operational environments.</code> | <code>Use network segmentation and security zones in system design. Validate system behavior using integrity checks. Require signed software for deployment into production.</code> | <code>Distribute weather radios for disaster preparedness in remote facilities. Track badge issuance and disable lost credentials. Train users to avoid reusing passwords across services.</code> |
|
| 234 |
+
* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
|
| 235 |
+
```json
|
| 236 |
+
{
|
| 237 |
+
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
| 238 |
+
"triplet_margin": 5
|
| 239 |
+
}
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
### Training Hyperparameters
|
| 243 |
+
#### Non-Default Hyperparameters
|
| 244 |
+
|
| 245 |
+
- `eval_strategy`: steps
|
| 246 |
+
- `per_device_train_batch_size`: 16
|
| 247 |
+
- `per_device_eval_batch_size`: 16
|
| 248 |
+
- `learning_rate`: 2e-05
|
| 249 |
+
- `num_train_epochs`: 1
|
| 250 |
+
- `warmup_ratio`: 0.1
|
| 251 |
+
- `fp16`: True
|
| 252 |
+
- `batch_sampler`: no_duplicates
|
| 253 |
+
|
| 254 |
+
#### All Hyperparameters
|
| 255 |
+
<details><summary>Click to expand</summary>
|
| 256 |
+
|
| 257 |
+
- `overwrite_output_dir`: False
|
| 258 |
+
- `do_predict`: False
|
| 259 |
+
- `eval_strategy`: steps
|
| 260 |
+
- `prediction_loss_only`: True
|
| 261 |
+
- `per_device_train_batch_size`: 16
|
| 262 |
+
- `per_device_eval_batch_size`: 16
|
| 263 |
+
- `per_gpu_train_batch_size`: None
|
| 264 |
+
- `per_gpu_eval_batch_size`: None
|
| 265 |
+
- `gradient_accumulation_steps`: 1
|
| 266 |
+
- `eval_accumulation_steps`: None
|
| 267 |
+
- `torch_empty_cache_steps`: None
|
| 268 |
+
- `learning_rate`: 2e-05
|
| 269 |
+
- `weight_decay`: 0.0
|
| 270 |
+
- `adam_beta1`: 0.9
|
| 271 |
+
- `adam_beta2`: 0.999
|
| 272 |
+
- `adam_epsilon`: 1e-08
|
| 273 |
+
- `max_grad_norm`: 1.0
|
| 274 |
+
- `num_train_epochs`: 1
|
| 275 |
+
- `max_steps`: -1
|
| 276 |
+
- `lr_scheduler_type`: linear
|
| 277 |
+
- `lr_scheduler_kwargs`: {}
|
| 278 |
+
- `warmup_ratio`: 0.1
|
| 279 |
+
- `warmup_steps`: 0
|
| 280 |
+
- `log_level`: passive
|
| 281 |
+
- `log_level_replica`: warning
|
| 282 |
+
- `log_on_each_node`: True
|
| 283 |
+
- `logging_nan_inf_filter`: True
|
| 284 |
+
- `save_safetensors`: True
|
| 285 |
+
- `save_on_each_node`: False
|
| 286 |
+
- `save_only_model`: False
|
| 287 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 288 |
+
- `no_cuda`: False
|
| 289 |
+
- `use_cpu`: False
|
| 290 |
+
- `use_mps_device`: False
|
| 291 |
+
- `seed`: 42
|
| 292 |
+
- `data_seed`: None
|
| 293 |
+
- `jit_mode_eval`: False
|
| 294 |
+
- `use_ipex`: False
|
| 295 |
+
- `bf16`: False
|
| 296 |
+
- `fp16`: True
|
| 297 |
+
- `fp16_opt_level`: O1
|
| 298 |
+
- `half_precision_backend`: auto
|
| 299 |
+
- `bf16_full_eval`: False
|
| 300 |
+
- `fp16_full_eval`: False
|
| 301 |
+
- `tf32`: None
|
| 302 |
+
- `local_rank`: 0
|
| 303 |
+
- `ddp_backend`: None
|
| 304 |
+
- `tpu_num_cores`: None
|
| 305 |
+
- `tpu_metrics_debug`: False
|
| 306 |
+
- `debug`: []
|
| 307 |
+
- `dataloader_drop_last`: False
|
| 308 |
+
- `dataloader_num_workers`: 0
|
| 309 |
+
- `dataloader_prefetch_factor`: None
|
| 310 |
+
- `past_index`: -1
|
| 311 |
+
- `disable_tqdm`: False
|
| 312 |
+
- `remove_unused_columns`: True
|
| 313 |
+
- `label_names`: None
|
| 314 |
+
- `load_best_model_at_end`: False
|
| 315 |
+
- `ignore_data_skip`: False
|
| 316 |
+
- `fsdp`: []
|
| 317 |
+
- `fsdp_min_num_params`: 0
|
| 318 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 319 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 320 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 321 |
+
- `deepspeed`: None
|
| 322 |
+
- `label_smoothing_factor`: 0.0
|
| 323 |
+
- `optim`: adamw_torch
|
| 324 |
+
- `optim_args`: None
|
| 325 |
+
- `adafactor`: False
|
| 326 |
+
- `group_by_length`: False
|
| 327 |
+
- `length_column_name`: length
|
| 328 |
+
- `ddp_find_unused_parameters`: None
|
| 329 |
+
- `ddp_bucket_cap_mb`: None
|
| 330 |
+
- `ddp_broadcast_buffers`: False
|
| 331 |
+
- `dataloader_pin_memory`: True
|
| 332 |
+
- `dataloader_persistent_workers`: False
|
| 333 |
+
- `skip_memory_metrics`: True
|
| 334 |
+
- `use_legacy_prediction_loop`: False
|
| 335 |
+
- `push_to_hub`: False
|
| 336 |
+
- `resume_from_checkpoint`: None
|
| 337 |
+
- `hub_model_id`: None
|
| 338 |
+
- `hub_strategy`: every_save
|
| 339 |
+
- `hub_private_repo`: None
|
| 340 |
+
- `hub_always_push`: False
|
| 341 |
+
- `gradient_checkpointing`: False
|
| 342 |
+
- `gradient_checkpointing_kwargs`: None
|
| 343 |
+
- `include_inputs_for_metrics`: False
|
| 344 |
+
- `include_for_metrics`: []
|
| 345 |
+
- `eval_do_concat_batches`: True
|
| 346 |
+
- `fp16_backend`: auto
|
| 347 |
+
- `push_to_hub_model_id`: None
|
| 348 |
+
- `push_to_hub_organization`: None
|
| 349 |
+
- `mp_parameters`:
|
| 350 |
+
- `auto_find_batch_size`: False
|
| 351 |
+
- `full_determinism`: False
|
| 352 |
+
- `torchdynamo`: None
|
| 353 |
+
- `ray_scope`: last
|
| 354 |
+
- `ddp_timeout`: 1800
|
| 355 |
+
- `torch_compile`: False
|
| 356 |
+
- `torch_compile_backend`: None
|
| 357 |
+
- `torch_compile_mode`: None
|
| 358 |
+
- `include_tokens_per_second`: False
|
| 359 |
+
- `include_num_input_tokens_seen`: False
|
| 360 |
+
- `neftune_noise_alpha`: None
|
| 361 |
+
- `optim_target_modules`: None
|
| 362 |
+
- `batch_eval_metrics`: False
|
| 363 |
+
- `eval_on_start`: False
|
| 364 |
+
- `use_liger_kernel`: False
|
| 365 |
+
- `eval_use_gather_object`: False
|
| 366 |
+
- `average_tokens_across_devices`: False
|
| 367 |
+
- `prompts`: None
|
| 368 |
+
- `batch_sampler`: no_duplicates
|
| 369 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 370 |
+
|
| 371 |
+
</details>
|
| 372 |
+
|
| 373 |
+
### Training Logs
|
| 374 |
+
| Epoch | Step | Training Loss | Validation Loss | NIST-control-dev_cosine_accuracy | all-nli-test_cosine_accuracy |
|
| 375 |
+
|:-----:|:----:|:-------------:|:---------------:|:--------------------------------:|:----------------------------:|
|
| 376 |
+
| -1 | -1 | - | - | 0.6563 | - |
|
| 377 |
+
| 0.16 | 100 | 2.6751 | 4.0892 | 0.6661 | - |
|
| 378 |
+
| 0.32 | 200 | 0.9272 | 3.8595 | 0.7026 | - |
|
| 379 |
+
| 0.48 | 300 | 0.5711 | 3.8835 | 0.6897 | - |
|
| 380 |
+
| 0.64 | 400 | 0.3905 | 3.7548 | 0.7071 | - |
|
| 381 |
+
| 0.8 | 500 | 0.043 | 3.8021 | 0.7035 | - |
|
| 382 |
+
| 0.96 | 600 | 0.0407 | 3.8115 | 0.7049 | - |
|
| 383 |
+
| -1 | -1 | - | - | - | 0.7318 |
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
### Framework Versions
|
| 387 |
+
- Python: 3.13.5
|
| 388 |
+
- Sentence Transformers: 4.1.0
|
| 389 |
+
- Transformers: 4.52.4
|
| 390 |
+
- PyTorch: 2.7.1+cpu
|
| 391 |
+
- Accelerate: 1.8.1
|
| 392 |
+
- Datasets: 2.15.0
|
| 393 |
+
- Tokenizers: 0.21.2
|
| 394 |
+
|
| 395 |
+
## Citation
|
| 396 |
+
|
| 397 |
+
### BibTeX
|
| 398 |
+
|
| 399 |
+
#### Sentence Transformers
|
| 400 |
+
```bibtex
|
| 401 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 402 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 403 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 404 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 405 |
+
month = "11",
|
| 406 |
+
year = "2019",
|
| 407 |
+
publisher = "Association for Computational Linguistics",
|
| 408 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 409 |
+
}
|
| 410 |
+
```
|
| 411 |
+
|
| 412 |
+
#### TripletLoss
|
| 413 |
+
```bibtex
|
| 414 |
+
@misc{hermans2017defense,
|
| 415 |
+
title={In Defense of the Triplet Loss for Person Re-Identification},
|
| 416 |
+
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
|
| 417 |
+
year={2017},
|
| 418 |
+
eprint={1703.07737},
|
| 419 |
+
archivePrefix={arXiv},
|
| 420 |
+
primaryClass={cs.CV}
|
| 421 |
+
}
|
| 422 |
+
```
|
| 423 |
+
|
| 424 |
+
<!--
|
| 425 |
+
## Glossary
|
| 426 |
+
|
| 427 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 428 |
+
-->
|
| 429 |
+
|
| 430 |
+
<!--
|
| 431 |
+
## Model Card Authors
|
| 432 |
+
|
| 433 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 434 |
+
-->
|
| 435 |
+
|
| 436 |
+
<!--
|
| 437 |
+
## Model Card Contact
|
| 438 |
+
|
| 439 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 440 |
+
-->
|
HPEControlMapper/models/mpnet-base-control-triplet/final/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MPNetModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"eos_token_id": 2,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 3072,
|
| 13 |
+
"layer_norm_eps": 1e-05,
|
| 14 |
+
"max_position_embeddings": 514,
|
| 15 |
+
"model_type": "mpnet",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 12,
|
| 18 |
+
"pad_token_id": 1,
|
| 19 |
+
"relative_attention_num_buckets": 32,
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.52.4",
|
| 22 |
+
"vocab_size": 30527
|
| 23 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/final/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.1.0",
|
| 4 |
+
"transformers": "4.52.4",
|
| 5 |
+
"pytorch": "2.7.1+cpu"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/final/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:023ce0c063565c38d46015c3e97cbd919a210aeac663deb5384d5802e0e6a244
|
| 3 |
+
size 437967672
|
HPEControlMapper/models/mpnet-base-control-triplet/final/modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
HPEControlMapper/models/mpnet-base-control-triplet/final/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/final/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": true,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "[UNK]",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer_config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<s>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<pad>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"3": {
|
| 28 |
+
"content": "<unk>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"104": {
|
| 36 |
+
"content": "[UNK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"30526": {
|
| 44 |
+
"content": "<mask>",
|
| 45 |
+
"lstrip": true,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
}
|
| 51 |
+
},
|
| 52 |
+
"bos_token": "<s>",
|
| 53 |
+
"clean_up_tokenization_spaces": false,
|
| 54 |
+
"cls_token": "<s>",
|
| 55 |
+
"do_lower_case": true,
|
| 56 |
+
"eos_token": "</s>",
|
| 57 |
+
"extra_special_tokens": {},
|
| 58 |
+
"mask_token": "<mask>",
|
| 59 |
+
"model_max_length": 512,
|
| 60 |
+
"pad_token": "<pad>",
|
| 61 |
+
"sep_token": "</s>",
|
| 62 |
+
"strip_accents": null,
|
| 63 |
+
"tokenize_chinese_chars": true,
|
| 64 |
+
"tokenizer_class": "MPNetTokenizer",
|
| 65 |
+
"unk_token": "[UNK]"
|
| 66 |
+
}
|
HPEControlMapper/models/mpnet-base-control-triplet/final/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|