STalasilaHPE commited on
Commit
99d40e3
·
verified ·
1 Parent(s): b12ad4b

Upload 46 files

Browse files
Files changed (46) hide show
  1. HPEControlMapper/.gitattributes +35 -0
  2. HPEControlMapper/README.md +3 -0
  3. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/1_Pooling/config.json +10 -0
  4. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/README.md +429 -0
  5. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config.json +23 -0
  6. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config_sentence_transformers.json +10 -0
  7. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/model.safetensors +3 -0
  8. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/modules.json +14 -0
  9. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/optimizer.pt +3 -0
  10. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/rng_state.pth +3 -0
  11. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/scheduler.pt +3 -0
  12. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/sentence_bert_config.json +4 -0
  13. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/special_tokens_map.json +51 -0
  14. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer.json +0 -0
  15. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer_config.json +66 -0
  16. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/trainer_state.json +130 -0
  17. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/training_args.bin +3 -0
  18. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/vocab.txt +0 -0
  19. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/1_Pooling/config.json +10 -0
  20. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/README.md +429 -0
  21. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config.json +23 -0
  22. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config_sentence_transformers.json +10 -0
  23. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/model.safetensors +3 -0
  24. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/modules.json +14 -0
  25. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/optimizer.pt +3 -0
  26. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/rng_state.pth +3 -0
  27. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/scheduler.pt +3 -0
  28. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/sentence_bert_config.json +4 -0
  29. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/special_tokens_map.json +51 -0
  30. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer.json +0 -0
  31. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer_config.json +66 -0
  32. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/trainer_state.json +130 -0
  33. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/training_args.bin +3 -0
  34. HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/vocab.txt +0 -0
  35. HPEControlMapper/models/mpnet-base-control-triplet/eval/triplet_evaluation_NIST-control-dev_results.csv +147 -0
  36. HPEControlMapper/models/mpnet-base-control-triplet/final/1_Pooling/config.json +10 -0
  37. HPEControlMapper/models/mpnet-base-control-triplet/final/README.md +440 -0
  38. HPEControlMapper/models/mpnet-base-control-triplet/final/config.json +23 -0
  39. HPEControlMapper/models/mpnet-base-control-triplet/final/config_sentence_transformers.json +10 -0
  40. HPEControlMapper/models/mpnet-base-control-triplet/final/model.safetensors +3 -0
  41. HPEControlMapper/models/mpnet-base-control-triplet/final/modules.json +14 -0
  42. HPEControlMapper/models/mpnet-base-control-triplet/final/sentence_bert_config.json +4 -0
  43. HPEControlMapper/models/mpnet-base-control-triplet/final/special_tokens_map.json +51 -0
  44. HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer.json +0 -0
  45. HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer_config.json +66 -0
  46. HPEControlMapper/models/mpnet-base-control-triplet/final/vocab.txt +0 -0
HPEControlMapper/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
HPEControlMapper/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: cc
3
+ ---
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/README.md ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:10000
11
+ - loss:TripletLoss
12
+ base_model: microsoft/mpnet-base
13
+ widget:
14
+ - source_sentence: Use hardware-based methods where available to guarantee role-based
15
+ access control cannot be bypassed.
16
+ sentences:
17
+ - Related control that reinforces stability and assurance in brute force login prevention
18
+ use cases.
19
+ - Audit session replay defense_b11_8 as part of continuous security assessment processes.
20
+ - Core functionality needed to enforce effective role-based access control mechanisms.
21
+ - source_sentence: Provide full-feature access to security enhancements in NVIDIA
22
+ GPU firmware.
23
+ sentences:
24
+ - Implement secure communication channels between host and GPU.
25
+ - A little boy blows bubbles outdoors.
26
+ - Use HTTPS inspection to detect man-in-the-middle attack attempts.
27
+ - source_sentence: Validate source authenticity by requiring signed code in all components.
28
+ sentences:
29
+ - Firewalls are activated by default and preloaded with security policies.
30
+ - Enforce cryptographic validation on third-party software inputs.
31
+ - Display productivity summaries on a weekly dashboard.
32
+ - source_sentence: Mandate organization-wide adherence to policies enforcing siem
33
+ integration using centrally managed tools.
34
+ sentences:
35
+ - Review policies around anonymous proxy blocking_b11_50 to avoid unexpected vectors
36
+ of compromise.
37
+ - Implement key management systems that use secure encryption algorithms.
38
+ - This measure directly supports secure handling within siem integration implementations.
39
+ - source_sentence: Mandate organization-wide adherence to policies enforcing virtual
40
+ machine isolation using centrally managed tools.
41
+ sentences:
42
+ - Adult males stand in front of a brick wall near something made of metal.
43
+ - Monitor for issues related to redundant login blocking_b12_1 as part of extended
44
+ security hygiene.
45
+ - A widely recommended control paired with proper virtual machine isolation implementations.
46
+ pipeline_tag: sentence-similarity
47
+ library_name: sentence-transformers
48
+ metrics:
49
+ - cosine_accuracy
50
+ model-index:
51
+ - name: MPNet base trained on NIST Controls
52
+ results:
53
+ - task:
54
+ type: triplet
55
+ name: Triplet
56
+ dataset:
57
+ name: NIST control dev
58
+ type: NIST-control-dev
59
+ metrics:
60
+ - type: cosine_accuracy
61
+ value: 0.7048740386962891
62
+ name: Cosine Accuracy
63
+ ---
64
+
65
+ # MPNet base trained on NIST Controls
66
+
67
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
68
+
69
+ ## Model Details
70
+
71
+ ### Model Description
72
+ - **Model Type:** Sentence Transformer
73
+ - **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
74
+ - **Maximum Sequence Length:** 512 tokens
75
+ - **Output Dimensionality:** 768 dimensions
76
+ - **Similarity Function:** Cosine Similarity
77
+ - **Training Dataset:**
78
+ - csv
79
+ - **Language:** en
80
+ - **License:** apache-2.0
81
+
82
+ ### Model Sources
83
+
84
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
85
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
86
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
87
+
88
+ ### Full Model Architecture
89
+
90
+ ```
91
+ SentenceTransformer(
92
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
93
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
94
+ )
95
+ ```
96
+
97
+ ## Usage
98
+
99
+ ### Direct Usage (Sentence Transformers)
100
+
101
+ First install the Sentence Transformers library:
102
+
103
+ ```bash
104
+ pip install -U sentence-transformers
105
+ ```
106
+
107
+ Then you can load this model and run inference.
108
+ ```python
109
+ from sentence_transformers import SentenceTransformer
110
+
111
+ # Download from the 🤗 Hub
112
+ model = SentenceTransformer("sentence_transformers_model_id")
113
+ # Run inference
114
+ sentences = [
115
+ 'Mandate organization-wide adherence to policies enforcing virtual machine isolation using centrally managed tools.',
116
+ 'A widely recommended control paired with proper virtual machine isolation implementations.',
117
+ 'Monitor for issues related to redundant login blocking_b12_1 as part of extended security hygiene.',
118
+ ]
119
+ embeddings = model.encode(sentences)
120
+ print(embeddings.shape)
121
+ # [3, 768]
122
+
123
+ # Get the similarity scores for the embeddings
124
+ similarities = model.similarity(embeddings, embeddings)
125
+ print(similarities.shape)
126
+ # [3, 3]
127
+ ```
128
+
129
+ <!--
130
+ ### Direct Usage (Transformers)
131
+
132
+ <details><summary>Click to see the direct usage in Transformers</summary>
133
+
134
+ </details>
135
+ -->
136
+
137
+ <!--
138
+ ### Downstream Usage (Sentence Transformers)
139
+
140
+ You can finetune this model on your own dataset.
141
+
142
+ <details><summary>Click to expand</summary>
143
+
144
+ </details>
145
+ -->
146
+
147
+ <!--
148
+ ### Out-of-Scope Use
149
+
150
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
151
+ -->
152
+
153
+ ## Evaluation
154
+
155
+ ### Metrics
156
+
157
+ #### Triplet
158
+
159
+ * Dataset: `NIST-control-dev`
160
+ * Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
161
+
162
+ | Metric | Value |
163
+ |:--------------------|:-----------|
164
+ | **cosine_accuracy** | **0.7049** |
165
+
166
+ <!--
167
+ ## Bias, Risks and Limitations
168
+
169
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
170
+ -->
171
+
172
+ <!--
173
+ ### Recommendations
174
+
175
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
176
+ -->
177
+
178
+ ## Training Details
179
+
180
+ ### Training Dataset
181
+
182
+ #### csv
183
+
184
+ * Dataset: csv
185
+ * Size: 10,000 training samples
186
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
187
+ * Approximate statistics based on the first 1000 samples:
188
+ | | anchor | positive | negative |
189
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
190
+ | type | string | string | string |
191
+ | details | <ul><li>min: 10 tokens</li><li>mean: 21.39 tokens</li><li>max: 280 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 17.96 tokens</li><li>max: 171 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.27 tokens</li><li>max: 86 tokens</li></ul> |
192
+ * Samples:
193
+ | anchor | positive | negative |
194
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|
195
+ | <code>Audit Logs: A secure record of security-sensitive activity required for the audit trail. The audit log is a record of an activity by a system a user or an application. Audit logs are required for the audit trail. Examples of audit events are listed in the requirements section below. It is important that these events are in the audit log as part of HPE's commitment to CISA's Secure by Design Pledge.</code> | <code>Security Event Correlation (v3): Systems that correlate audit logs with security events to detect anomalies.</code> | <code>Remote Access Protection (v3): Controls to secure remote access to systems.</code> |
196
+ | <code>One to One Credential Mapping: This requirement involves getting the specific user login information correct in the audit log down to the specific user and not just listing the role or process ID value.</code> | <code>User Identity Verification (v3): Mechanism to verify user identities before granting access to resources.</code> | <code>Incident Response Testing (v3): Regular testing of incident response plans.</code> |
197
+ | <code>Reproducible Builds: Be able to recreate bit-by-bit identical copies of signed code. Ensure that signed code really came from the source code developed by HPE and possibly reviewed by a third-party.</code> | <code>Build Integrity Checking (v3): Verification process to ensure that the build environment has not been tampered with.</code> | <code>Backup and Recovery Planning (v3): Ensure data is backed up and recoverable.</code> |
198
+ * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
199
+ ```json
200
+ {
201
+ "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
202
+ "triplet_margin": 5
203
+ }
204
+ ```
205
+
206
+ ### Evaluation Dataset
207
+
208
+ #### csv
209
+
210
+ * Dataset: csv
211
+ * Size: 6,709 evaluation samples
212
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
213
+ * Approximate statistics based on the first 1000 samples:
214
+ | | anchor | positive | negative |
215
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
216
+ | type | string | string | string |
217
+ | details | <ul><li>min: 8 tokens</li><li>mean: 18.08 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 13.64 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 18.23 tokens</li><li>max: 34 tokens</li></ul> |
218
+ * Samples:
219
+ | anchor | positive | negative |
220
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
221
+ | <code>Only allow approved and secured components to run in production environments validating them with cryptographic checks and requiring authenticated configuration changes.</code> | <code>Deploy only approved applications in production environments. Use secure startup methods to validate software integrity. Require authentication for all system configuration changes.</code> | <code>Issue smart cards to employees as physical access tokens. Maintain a contact list for emergency communications. Label system components for asset tracking.</code> |
222
+ | <code>Create system architectures that include failover tamper detection and rollback protections to ensure operations continue even under cyberattack.</code> | <code>Design systems to continue operating under defined attack scenarios. Include tamper detection and automatic rollback capabilities. Ensure recovery processes are tested for effectiveness.</code> | <code>Protect paper-based confidential files in locked cabinets. Post warning signs at restricted data center doors. Track employee security briefings with completion certificates.</code> |
223
+ | <code>Structure networks and applications into isolated segments and apply integrity checking to ensure only trusted software is executed in operational environments.</code> | <code>Use network segmentation and security zones in system design. Validate system behavior using integrity checks. Require signed software for deployment into production.</code> | <code>Distribute weather radios for disaster preparedness in remote facilities. Track badge issuance and disable lost credentials. Train users to avoid reusing passwords across services.</code> |
224
+ * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
225
+ ```json
226
+ {
227
+ "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
228
+ "triplet_margin": 5
229
+ }
230
+ ```
231
+
232
+ ### Training Hyperparameters
233
+ #### Non-Default Hyperparameters
234
+
235
+ - `eval_strategy`: steps
236
+ - `per_device_train_batch_size`: 16
237
+ - `per_device_eval_batch_size`: 16
238
+ - `learning_rate`: 2e-05
239
+ - `num_train_epochs`: 1
240
+ - `warmup_ratio`: 0.1
241
+ - `fp16`: True
242
+ - `batch_sampler`: no_duplicates
243
+
244
+ #### All Hyperparameters
245
+ <details><summary>Click to expand</summary>
246
+
247
+ - `overwrite_output_dir`: False
248
+ - `do_predict`: False
249
+ - `eval_strategy`: steps
250
+ - `prediction_loss_only`: True
251
+ - `per_device_train_batch_size`: 16
252
+ - `per_device_eval_batch_size`: 16
253
+ - `per_gpu_train_batch_size`: None
254
+ - `per_gpu_eval_batch_size`: None
255
+ - `gradient_accumulation_steps`: 1
256
+ - `eval_accumulation_steps`: None
257
+ - `torch_empty_cache_steps`: None
258
+ - `learning_rate`: 2e-05
259
+ - `weight_decay`: 0.0
260
+ - `adam_beta1`: 0.9
261
+ - `adam_beta2`: 0.999
262
+ - `adam_epsilon`: 1e-08
263
+ - `max_grad_norm`: 1.0
264
+ - `num_train_epochs`: 1
265
+ - `max_steps`: -1
266
+ - `lr_scheduler_type`: linear
267
+ - `lr_scheduler_kwargs`: {}
268
+ - `warmup_ratio`: 0.1
269
+ - `warmup_steps`: 0
270
+ - `log_level`: passive
271
+ - `log_level_replica`: warning
272
+ - `log_on_each_node`: True
273
+ - `logging_nan_inf_filter`: True
274
+ - `save_safetensors`: True
275
+ - `save_on_each_node`: False
276
+ - `save_only_model`: False
277
+ - `restore_callback_states_from_checkpoint`: False
278
+ - `no_cuda`: False
279
+ - `use_cpu`: False
280
+ - `use_mps_device`: False
281
+ - `seed`: 42
282
+ - `data_seed`: None
283
+ - `jit_mode_eval`: False
284
+ - `use_ipex`: False
285
+ - `bf16`: False
286
+ - `fp16`: True
287
+ - `fp16_opt_level`: O1
288
+ - `half_precision_backend`: auto
289
+ - `bf16_full_eval`: False
290
+ - `fp16_full_eval`: False
291
+ - `tf32`: None
292
+ - `local_rank`: 0
293
+ - `ddp_backend`: None
294
+ - `tpu_num_cores`: None
295
+ - `tpu_metrics_debug`: False
296
+ - `debug`: []
297
+ - `dataloader_drop_last`: False
298
+ - `dataloader_num_workers`: 0
299
+ - `dataloader_prefetch_factor`: None
300
+ - `past_index`: -1
301
+ - `disable_tqdm`: False
302
+ - `remove_unused_columns`: True
303
+ - `label_names`: None
304
+ - `load_best_model_at_end`: False
305
+ - `ignore_data_skip`: False
306
+ - `fsdp`: []
307
+ - `fsdp_min_num_params`: 0
308
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
309
+ - `fsdp_transformer_layer_cls_to_wrap`: None
310
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
311
+ - `deepspeed`: None
312
+ - `label_smoothing_factor`: 0.0
313
+ - `optim`: adamw_torch
314
+ - `optim_args`: None
315
+ - `adafactor`: False
316
+ - `group_by_length`: False
317
+ - `length_column_name`: length
318
+ - `ddp_find_unused_parameters`: None
319
+ - `ddp_bucket_cap_mb`: None
320
+ - `ddp_broadcast_buffers`: False
321
+ - `dataloader_pin_memory`: True
322
+ - `dataloader_persistent_workers`: False
323
+ - `skip_memory_metrics`: True
324
+ - `use_legacy_prediction_loop`: False
325
+ - `push_to_hub`: False
326
+ - `resume_from_checkpoint`: None
327
+ - `hub_model_id`: None
328
+ - `hub_strategy`: every_save
329
+ - `hub_private_repo`: None
330
+ - `hub_always_push`: False
331
+ - `gradient_checkpointing`: False
332
+ - `gradient_checkpointing_kwargs`: None
333
+ - `include_inputs_for_metrics`: False
334
+ - `include_for_metrics`: []
335
+ - `eval_do_concat_batches`: True
336
+ - `fp16_backend`: auto
337
+ - `push_to_hub_model_id`: None
338
+ - `push_to_hub_organization`: None
339
+ - `mp_parameters`:
340
+ - `auto_find_batch_size`: False
341
+ - `full_determinism`: False
342
+ - `torchdynamo`: None
343
+ - `ray_scope`: last
344
+ - `ddp_timeout`: 1800
345
+ - `torch_compile`: False
346
+ - `torch_compile_backend`: None
347
+ - `torch_compile_mode`: None
348
+ - `include_tokens_per_second`: False
349
+ - `include_num_input_tokens_seen`: False
350
+ - `neftune_noise_alpha`: None
351
+ - `optim_target_modules`: None
352
+ - `batch_eval_metrics`: False
353
+ - `eval_on_start`: False
354
+ - `use_liger_kernel`: False
355
+ - `eval_use_gather_object`: False
356
+ - `average_tokens_across_devices`: False
357
+ - `prompts`: None
358
+ - `batch_sampler`: no_duplicates
359
+ - `multi_dataset_batch_sampler`: proportional
360
+
361
+ </details>
362
+
363
+ ### Training Logs
364
+ | Epoch | Step | Training Loss | Validation Loss | NIST-control-dev_cosine_accuracy |
365
+ |:-----:|:----:|:-------------:|:---------------:|:--------------------------------:|
366
+ | -1 | -1 | - | - | 0.6563 |
367
+ | 0.16 | 100 | 2.6751 | 4.0892 | 0.6661 |
368
+ | 0.32 | 200 | 0.9272 | 3.8595 | 0.7026 |
369
+ | 0.48 | 300 | 0.5711 | 3.8835 | 0.6897 |
370
+ | 0.64 | 400 | 0.3905 | 3.7548 | 0.7071 |
371
+ | 0.8 | 500 | 0.043 | 3.8021 | 0.7035 |
372
+ | 0.96 | 600 | 0.0407 | 3.8115 | 0.7049 |
373
+
374
+
375
+ ### Framework Versions
376
+ - Python: 3.13.5
377
+ - Sentence Transformers: 4.1.0
378
+ - Transformers: 4.52.4
379
+ - PyTorch: 2.7.1+cpu
380
+ - Accelerate: 1.8.1
381
+ - Datasets: 2.15.0
382
+ - Tokenizers: 0.21.2
383
+
384
+ ## Citation
385
+
386
+ ### BibTeX
387
+
388
+ #### Sentence Transformers
389
+ ```bibtex
390
+ @inproceedings{reimers-2019-sentence-bert,
391
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
392
+ author = "Reimers, Nils and Gurevych, Iryna",
393
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
394
+ month = "11",
395
+ year = "2019",
396
+ publisher = "Association for Computational Linguistics",
397
+ url = "https://arxiv.org/abs/1908.10084",
398
+ }
399
+ ```
400
+
401
+ #### TripletLoss
402
+ ```bibtex
403
+ @misc{hermans2017defense,
404
+ title={In Defense of the Triplet Loss for Person Re-Identification},
405
+ author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
406
+ year={2017},
407
+ eprint={1703.07737},
408
+ archivePrefix={arXiv},
409
+ primaryClass={cs.CV}
410
+ }
411
+ ```
412
+
413
+ <!--
414
+ ## Glossary
415
+
416
+ *Clearly define terms in order to be accessible across audiences.*
417
+ -->
418
+
419
+ <!--
420
+ ## Model Card Authors
421
+
422
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
423
+ -->
424
+
425
+ <!--
426
+ ## Model Card Contact
427
+
428
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
429
+ -->
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-05,
14
+ "max_position_embeddings": 514,
15
+ "model_type": "mpnet",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 1,
19
+ "relative_attention_num_buckets": 32,
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.52.4",
22
+ "vocab_size": 30527
23
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.52.4",
5
+ "pytorch": "2.7.1+cpu"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c190b5a8af0d51ac697a62f9ae4834b7c6b8b00876476501673aaaaa111a02f
3
+ size 437967672
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b536e9eb9d20d9d59bcafc8078232b367fdd6e856e1f6734d81e0343382532
3
+ size 871326731
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383a43729efdff2755ac4825ffc7f76cd2d24d8ce9768c400d43dacdf55c10cc
3
+ size 14391
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f572bc45a40df191b140a5180f678f5e39a38ae45a98002ce6d28ddc5f2fc0d0
3
+ size 1465
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "<mask>",
59
+ "model_max_length": 512,
60
+ "pad_token": "<pad>",
61
+ "sep_token": "</s>",
62
+ "strip_accents": null,
63
+ "tokenize_chinese_chars": true,
64
+ "tokenizer_class": "MPNetTokenizer",
65
+ "unk_token": "[UNK]"
66
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.96,
6
+ "eval_steps": 100,
7
+ "global_step": 600,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 32.675262451171875,
15
+ "learning_rate": 1.8718861209964415e-05,
16
+ "loss": 2.6751,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.16,
21
+ "eval_NIST-control-dev_cosine_accuracy": 0.6661201119422913,
22
+ "eval_loss": 4.089230537414551,
23
+ "eval_runtime": 601.3322,
24
+ "eval_samples_per_second": 11.157,
25
+ "eval_steps_per_second": 0.698,
26
+ "step": 100
27
+ },
28
+ {
29
+ "epoch": 0.32,
30
+ "grad_norm": 12.523146629333496,
31
+ "learning_rate": 1.516014234875445e-05,
32
+ "loss": 0.9272,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.32,
37
+ "eval_NIST-control-dev_cosine_accuracy": 0.7026382684707642,
38
+ "eval_loss": 3.859454870223999,
39
+ "eval_runtime": 604.4408,
40
+ "eval_samples_per_second": 11.1,
41
+ "eval_steps_per_second": 0.695,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 0.48,
46
+ "grad_norm": 14.002488136291504,
47
+ "learning_rate": 1.1601423487544485e-05,
48
+ "loss": 0.5711,
49
+ "step": 300
50
+ },
51
+ {
52
+ "epoch": 0.48,
53
+ "eval_NIST-control-dev_cosine_accuracy": 0.6896705627441406,
54
+ "eval_loss": 3.883481502532959,
55
+ "eval_runtime": 608.1647,
56
+ "eval_samples_per_second": 11.032,
57
+ "eval_steps_per_second": 0.691,
58
+ "step": 300
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 28.6463565826416,
63
+ "learning_rate": 8.04270462633452e-06,
64
+ "loss": 0.3905,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.64,
69
+ "eval_NIST-control-dev_cosine_accuracy": 0.7071098685264587,
70
+ "eval_loss": 3.7548305988311768,
71
+ "eval_runtime": 606.0474,
72
+ "eval_samples_per_second": 11.07,
73
+ "eval_steps_per_second": 0.693,
74
+ "step": 400
75
+ },
76
+ {
77
+ "epoch": 0.8,
78
+ "grad_norm": 0.0,
79
+ "learning_rate": 4.483985765124556e-06,
80
+ "loss": 0.043,
81
+ "step": 500
82
+ },
83
+ {
84
+ "epoch": 0.8,
85
+ "eval_NIST-control-dev_cosine_accuracy": 0.7035325765609741,
86
+ "eval_loss": 3.8021018505096436,
87
+ "eval_runtime": 600.763,
88
+ "eval_samples_per_second": 11.167,
89
+ "eval_steps_per_second": 0.699,
90
+ "step": 500
91
+ },
92
+ {
93
+ "epoch": 0.96,
94
+ "grad_norm": 0.0,
95
+ "learning_rate": 9.252669039145908e-07,
96
+ "loss": 0.0407,
97
+ "step": 600
98
+ },
99
+ {
100
+ "epoch": 0.96,
101
+ "eval_NIST-control-dev_cosine_accuracy": 0.7048740386962891,
102
+ "eval_loss": 3.811488628387451,
103
+ "eval_runtime": 599.8706,
104
+ "eval_samples_per_second": 11.184,
105
+ "eval_steps_per_second": 0.7,
106
+ "step": 600
107
+ }
108
+ ],
109
+ "logging_steps": 100,
110
+ "max_steps": 625,
111
+ "num_input_tokens_seen": 0,
112
+ "num_train_epochs": 1,
113
+ "save_steps": 100,
114
+ "stateful_callbacks": {
115
+ "TrainerControl": {
116
+ "args": {
117
+ "should_epoch_stop": false,
118
+ "should_evaluate": false,
119
+ "should_log": false,
120
+ "should_save": true,
121
+ "should_training_stop": false
122
+ },
123
+ "attributes": {}
124
+ }
125
+ },
126
+ "total_flos": 0.0,
127
+ "train_batch_size": 16,
128
+ "trial_name": null,
129
+ "trial_params": null
130
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833ef880b3501bd0c81b578a2dc1a700c13add6a501fc8fb0cf7ea0843c2483a
3
+ size 5969
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-600/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/README.md ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:10000
11
+ - loss:TripletLoss
12
+ base_model: microsoft/mpnet-base
13
+ widget:
14
+ - source_sentence: Use hardware-based methods where available to guarantee role-based
15
+ access control cannot be bypassed.
16
+ sentences:
17
+ - Related control that reinforces stability and assurance in brute force login prevention
18
+ use cases.
19
+ - Audit session replay defense_b11_8 as part of continuous security assessment processes.
20
+ - Core functionality needed to enforce effective role-based access control mechanisms.
21
+ - source_sentence: Provide full-feature access to security enhancements in NVIDIA
22
+ GPU firmware.
23
+ sentences:
24
+ - Implement secure communication channels between host and GPU.
25
+ - A little boy blows bubbles outdoors.
26
+ - Use HTTPS inspection to detect man-in-the-middle attack attempts.
27
+ - source_sentence: Validate source authenticity by requiring signed code in all components.
28
+ sentences:
29
+ - Firewalls are activated by default and preloaded with security policies.
30
+ - Enforce cryptographic validation on third-party software inputs.
31
+ - Display productivity summaries on a weekly dashboard.
32
+ - source_sentence: Mandate organization-wide adherence to policies enforcing siem
33
+ integration using centrally managed tools.
34
+ sentences:
35
+ - Review policies around anonymous proxy blocking_b11_50 to avoid unexpected vectors
36
+ of compromise.
37
+ - Implement key management systems that use secure encryption algorithms.
38
+ - This measure directly supports secure handling within siem integration implementations.
39
+ - source_sentence: Mandate organization-wide adherence to policies enforcing virtual
40
+ machine isolation using centrally managed tools.
41
+ sentences:
42
+ - Adult males stand in front of a brick wall near something made of metal.
43
+ - Monitor for issues related to redundant login blocking_b12_1 as part of extended
44
+ security hygiene.
45
+ - A widely recommended control paired with proper virtual machine isolation implementations.
46
+ pipeline_tag: sentence-similarity
47
+ library_name: sentence-transformers
48
+ metrics:
49
+ - cosine_accuracy
50
+ model-index:
51
+ - name: MPNet base trained on NIST Controls
52
+ results:
53
+ - task:
54
+ type: triplet
55
+ name: Triplet
56
+ dataset:
57
+ name: NIST control dev
58
+ type: NIST-control-dev
59
+ metrics:
60
+ - type: cosine_accuracy
61
+ value: 0.7048740386962891
62
+ name: Cosine Accuracy
63
+ ---
64
+
65
+ # MPNet base trained on NIST Controls
66
+
67
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
68
+
69
+ ## Model Details
70
+
71
+ ### Model Description
72
+ - **Model Type:** Sentence Transformer
73
+ - **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
74
+ - **Maximum Sequence Length:** 512 tokens
75
+ - **Output Dimensionality:** 768 dimensions
76
+ - **Similarity Function:** Cosine Similarity
77
+ - **Training Dataset:**
78
+ - csv
79
+ - **Language:** en
80
+ - **License:** apache-2.0
81
+
82
+ ### Model Sources
83
+
84
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
85
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
86
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
87
+
88
+ ### Full Model Architecture
89
+
90
+ ```
91
+ SentenceTransformer(
92
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
93
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
94
+ )
95
+ ```
96
+
97
+ ## Usage
98
+
99
+ ### Direct Usage (Sentence Transformers)
100
+
101
+ First install the Sentence Transformers library:
102
+
103
+ ```bash
104
+ pip install -U sentence-transformers
105
+ ```
106
+
107
+ Then you can load this model and run inference.
108
+ ```python
109
+ from sentence_transformers import SentenceTransformer
110
+
111
+ # Download from the 🤗 Hub
112
+ model = SentenceTransformer("sentence_transformers_model_id")
113
+ # Run inference
114
+ sentences = [
115
+ 'Mandate organization-wide adherence to policies enforcing virtual machine isolation using centrally managed tools.',
116
+ 'A widely recommended control paired with proper virtual machine isolation implementations.',
117
+ 'Monitor for issues related to redundant login blocking_b12_1 as part of extended security hygiene.',
118
+ ]
119
+ embeddings = model.encode(sentences)
120
+ print(embeddings.shape)
121
+ # [3, 768]
122
+
123
+ # Get the similarity scores for the embeddings
124
+ similarities = model.similarity(embeddings, embeddings)
125
+ print(similarities.shape)
126
+ # [3, 3]
127
+ ```
128
+
129
+ <!--
130
+ ### Direct Usage (Transformers)
131
+
132
+ <details><summary>Click to see the direct usage in Transformers</summary>
133
+
134
+ </details>
135
+ -->
136
+
137
+ <!--
138
+ ### Downstream Usage (Sentence Transformers)
139
+
140
+ You can finetune this model on your own dataset.
141
+
142
+ <details><summary>Click to expand</summary>
143
+
144
+ </details>
145
+ -->
146
+
147
+ <!--
148
+ ### Out-of-Scope Use
149
+
150
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
151
+ -->
152
+
153
+ ## Evaluation
154
+
155
+ ### Metrics
156
+
157
+ #### Triplet
158
+
159
+ * Dataset: `NIST-control-dev`
160
+ * Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
161
+
162
+ | Metric | Value |
163
+ |:--------------------|:-----------|
164
+ | **cosine_accuracy** | **0.7049** |
165
+
166
+ <!--
167
+ ## Bias, Risks and Limitations
168
+
169
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
170
+ -->
171
+
172
+ <!--
173
+ ### Recommendations
174
+
175
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
176
+ -->
177
+
178
+ ## Training Details
179
+
180
+ ### Training Dataset
181
+
182
+ #### csv
183
+
184
+ * Dataset: csv
185
+ * Size: 10,000 training samples
186
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
187
+ * Approximate statistics based on the first 1000 samples:
188
+ | | anchor | positive | negative |
189
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
190
+ | type | string | string | string |
191
+ | details | <ul><li>min: 10 tokens</li><li>mean: 21.39 tokens</li><li>max: 280 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 17.96 tokens</li><li>max: 171 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.27 tokens</li><li>max: 86 tokens</li></ul> |
192
+ * Samples:
193
+ | anchor | positive | negative |
194
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|
195
+ | <code>Audit Logs: A secure record of security-sensitive activity required for the audit trail. The audit log is a record of an activity by a system a user or an application. Audit logs are required for the audit trail. Examples of audit events are listed in the requirements section below. It is important that these events are in the audit log as part of HPE's commitment to CISA's Secure by Design Pledge.</code> | <code>Security Event Correlation (v3): Systems that correlate audit logs with security events to detect anomalies.</code> | <code>Remote Access Protection (v3): Controls to secure remote access to systems.</code> |
196
+ | <code>One to One Credential Mapping: This requirement involves getting the specific user login information correct in the audit log down to the specific user and not just listing the role or process ID value.</code> | <code>User Identity Verification (v3): Mechanism to verify user identities before granting access to resources.</code> | <code>Incident Response Testing (v3): Regular testing of incident response plans.</code> |
197
+ | <code>Reproducible Builds: Be able to recreate bit-by-bit identical copies of signed code. Ensure that signed code really came from the source code developed by HPE and possibly reviewed by a third-party.</code> | <code>Build Integrity Checking (v3): Verification process to ensure that the build environment has not been tampered with.</code> | <code>Backup and Recovery Planning (v3): Ensure data is backed up and recoverable.</code> |
198
+ * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
199
+ ```json
200
+ {
201
+ "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
202
+ "triplet_margin": 5
203
+ }
204
+ ```
205
+
206
+ ### Evaluation Dataset
207
+
208
+ #### csv
209
+
210
+ * Dataset: csv
211
+ * Size: 6,709 evaluation samples
212
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
213
+ * Approximate statistics based on the first 1000 samples:
214
+ | | anchor | positive | negative |
215
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
216
+ | type | string | string | string |
217
+ | details | <ul><li>min: 8 tokens</li><li>mean: 18.08 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 13.64 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 18.23 tokens</li><li>max: 34 tokens</li></ul> |
218
+ * Samples:
219
+ | anchor | positive | negative |
220
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
221
+ | <code>Only allow approved and secured components to run in production environments validating them with cryptographic checks and requiring authenticated configuration changes.</code> | <code>Deploy only approved applications in production environments. Use secure startup methods to validate software integrity. Require authentication for all system configuration changes.</code> | <code>Issue smart cards to employees as physical access tokens. Maintain a contact list for emergency communications. Label system components for asset tracking.</code> |
222
+ | <code>Create system architectures that include failover tamper detection and rollback protections to ensure operations continue even under cyberattack.</code> | <code>Design systems to continue operating under defined attack scenarios. Include tamper detection and automatic rollback capabilities. Ensure recovery processes are tested for effectiveness.</code> | <code>Protect paper-based confidential files in locked cabinets. Post warning signs at restricted data center doors. Track employee security briefings with completion certificates.</code> |
223
+ | <code>Structure networks and applications into isolated segments and apply integrity checking to ensure only trusted software is executed in operational environments.</code> | <code>Use network segmentation and security zones in system design. Validate system behavior using integrity checks. Require signed software for deployment into production.</code> | <code>Distribute weather radios for disaster preparedness in remote facilities. Track badge issuance and disable lost credentials. Train users to avoid reusing passwords across services.</code> |
224
+ * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
225
+ ```json
226
+ {
227
+ "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
228
+ "triplet_margin": 5
229
+ }
230
+ ```
231
+
232
+ ### Training Hyperparameters
233
+ #### Non-Default Hyperparameters
234
+
235
+ - `eval_strategy`: steps
236
+ - `per_device_train_batch_size`: 16
237
+ - `per_device_eval_batch_size`: 16
238
+ - `learning_rate`: 2e-05
239
+ - `num_train_epochs`: 1
240
+ - `warmup_ratio`: 0.1
241
+ - `fp16`: True
242
+ - `batch_sampler`: no_duplicates
243
+
244
+ #### All Hyperparameters
245
+ <details><summary>Click to expand</summary>
246
+
247
+ - `overwrite_output_dir`: False
248
+ - `do_predict`: False
249
+ - `eval_strategy`: steps
250
+ - `prediction_loss_only`: True
251
+ - `per_device_train_batch_size`: 16
252
+ - `per_device_eval_batch_size`: 16
253
+ - `per_gpu_train_batch_size`: None
254
+ - `per_gpu_eval_batch_size`: None
255
+ - `gradient_accumulation_steps`: 1
256
+ - `eval_accumulation_steps`: None
257
+ - `torch_empty_cache_steps`: None
258
+ - `learning_rate`: 2e-05
259
+ - `weight_decay`: 0.0
260
+ - `adam_beta1`: 0.9
261
+ - `adam_beta2`: 0.999
262
+ - `adam_epsilon`: 1e-08
263
+ - `max_grad_norm`: 1.0
264
+ - `num_train_epochs`: 1
265
+ - `max_steps`: -1
266
+ - `lr_scheduler_type`: linear
267
+ - `lr_scheduler_kwargs`: {}
268
+ - `warmup_ratio`: 0.1
269
+ - `warmup_steps`: 0
270
+ - `log_level`: passive
271
+ - `log_level_replica`: warning
272
+ - `log_on_each_node`: True
273
+ - `logging_nan_inf_filter`: True
274
+ - `save_safetensors`: True
275
+ - `save_on_each_node`: False
276
+ - `save_only_model`: False
277
+ - `restore_callback_states_from_checkpoint`: False
278
+ - `no_cuda`: False
279
+ - `use_cpu`: False
280
+ - `use_mps_device`: False
281
+ - `seed`: 42
282
+ - `data_seed`: None
283
+ - `jit_mode_eval`: False
284
+ - `use_ipex`: False
285
+ - `bf16`: False
286
+ - `fp16`: True
287
+ - `fp16_opt_level`: O1
288
+ - `half_precision_backend`: auto
289
+ - `bf16_full_eval`: False
290
+ - `fp16_full_eval`: False
291
+ - `tf32`: None
292
+ - `local_rank`: 0
293
+ - `ddp_backend`: None
294
+ - `tpu_num_cores`: None
295
+ - `tpu_metrics_debug`: False
296
+ - `debug`: []
297
+ - `dataloader_drop_last`: False
298
+ - `dataloader_num_workers`: 0
299
+ - `dataloader_prefetch_factor`: None
300
+ - `past_index`: -1
301
+ - `disable_tqdm`: False
302
+ - `remove_unused_columns`: True
303
+ - `label_names`: None
304
+ - `load_best_model_at_end`: False
305
+ - `ignore_data_skip`: False
306
+ - `fsdp`: []
307
+ - `fsdp_min_num_params`: 0
308
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
309
+ - `fsdp_transformer_layer_cls_to_wrap`: None
310
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
311
+ - `deepspeed`: None
312
+ - `label_smoothing_factor`: 0.0
313
+ - `optim`: adamw_torch
314
+ - `optim_args`: None
315
+ - `adafactor`: False
316
+ - `group_by_length`: False
317
+ - `length_column_name`: length
318
+ - `ddp_find_unused_parameters`: None
319
+ - `ddp_bucket_cap_mb`: None
320
+ - `ddp_broadcast_buffers`: False
321
+ - `dataloader_pin_memory`: True
322
+ - `dataloader_persistent_workers`: False
323
+ - `skip_memory_metrics`: True
324
+ - `use_legacy_prediction_loop`: False
325
+ - `push_to_hub`: False
326
+ - `resume_from_checkpoint`: None
327
+ - `hub_model_id`: None
328
+ - `hub_strategy`: every_save
329
+ - `hub_private_repo`: None
330
+ - `hub_always_push`: False
331
+ - `gradient_checkpointing`: False
332
+ - `gradient_checkpointing_kwargs`: None
333
+ - `include_inputs_for_metrics`: False
334
+ - `include_for_metrics`: []
335
+ - `eval_do_concat_batches`: True
336
+ - `fp16_backend`: auto
337
+ - `push_to_hub_model_id`: None
338
+ - `push_to_hub_organization`: None
339
+ - `mp_parameters`:
340
+ - `auto_find_batch_size`: False
341
+ - `full_determinism`: False
342
+ - `torchdynamo`: None
343
+ - `ray_scope`: last
344
+ - `ddp_timeout`: 1800
345
+ - `torch_compile`: False
346
+ - `torch_compile_backend`: None
347
+ - `torch_compile_mode`: None
348
+ - `include_tokens_per_second`: False
349
+ - `include_num_input_tokens_seen`: False
350
+ - `neftune_noise_alpha`: None
351
+ - `optim_target_modules`: None
352
+ - `batch_eval_metrics`: False
353
+ - `eval_on_start`: False
354
+ - `use_liger_kernel`: False
355
+ - `eval_use_gather_object`: False
356
+ - `average_tokens_across_devices`: False
357
+ - `prompts`: None
358
+ - `batch_sampler`: no_duplicates
359
+ - `multi_dataset_batch_sampler`: proportional
360
+
361
+ </details>
362
+
363
+ ### Training Logs
364
+ | Epoch | Step | Training Loss | Validation Loss | NIST-control-dev_cosine_accuracy |
365
+ |:-----:|:----:|:-------------:|:---------------:|:--------------------------------:|
366
+ | -1 | -1 | - | - | 0.6563 |
367
+ | 0.16 | 100 | 2.6751 | 4.0892 | 0.6661 |
368
+ | 0.32 | 200 | 0.9272 | 3.8595 | 0.7026 |
369
+ | 0.48 | 300 | 0.5711 | 3.8835 | 0.6897 |
370
+ | 0.64 | 400 | 0.3905 | 3.7548 | 0.7071 |
371
+ | 0.8 | 500 | 0.043 | 3.8021 | 0.7035 |
372
+ | 0.96 | 600 | 0.0407 | 3.8115 | 0.7049 |
373
+
374
+
375
+ ### Framework Versions
376
+ - Python: 3.13.5
377
+ - Sentence Transformers: 4.1.0
378
+ - Transformers: 4.52.4
379
+ - PyTorch: 2.7.1+cpu
380
+ - Accelerate: 1.8.1
381
+ - Datasets: 2.15.0
382
+ - Tokenizers: 0.21.2
383
+
384
+ ## Citation
385
+
386
+ ### BibTeX
387
+
388
+ #### Sentence Transformers
389
+ ```bibtex
390
+ @inproceedings{reimers-2019-sentence-bert,
391
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
392
+ author = "Reimers, Nils and Gurevych, Iryna",
393
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
394
+ month = "11",
395
+ year = "2019",
396
+ publisher = "Association for Computational Linguistics",
397
+ url = "https://arxiv.org/abs/1908.10084",
398
+ }
399
+ ```
400
+
401
+ #### TripletLoss
402
+ ```bibtex
403
+ @misc{hermans2017defense,
404
+ title={In Defense of the Triplet Loss for Person Re-Identification},
405
+ author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
406
+ year={2017},
407
+ eprint={1703.07737},
408
+ archivePrefix={arXiv},
409
+ primaryClass={cs.CV}
410
+ }
411
+ ```
412
+
413
+ <!--
414
+ ## Glossary
415
+
416
+ *Clearly define terms in order to be accessible across audiences.*
417
+ -->
418
+
419
+ <!--
420
+ ## Model Card Authors
421
+
422
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
423
+ -->
424
+
425
+ <!--
426
+ ## Model Card Contact
427
+
428
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
429
+ -->
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-05,
14
+ "max_position_embeddings": 514,
15
+ "model_type": "mpnet",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 1,
19
+ "relative_attention_num_buckets": 32,
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.52.4",
22
+ "vocab_size": 30527
23
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.52.4",
5
+ "pytorch": "2.7.1+cpu"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023ce0c063565c38d46015c3e97cbd919a210aeac663deb5384d5802e0e6a244
3
+ size 437967672
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33161b4623ae16ae0c17475cad5a8660d3a663c8c86f6a17526188411838e79e
3
+ size 871326731
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9bcba5a601f138be343f6fc7417a4e3f69d6281c51811c943bae8537eabfe1
3
+ size 14391
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6122107d2ed33617eb1e954ca24bf223862331c275f054134ef9e1a48454372
3
+ size 1465
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "<mask>",
59
+ "model_max_length": 512,
60
+ "pad_token": "<pad>",
61
+ "sep_token": "</s>",
62
+ "strip_accents": null,
63
+ "tokenize_chinese_chars": true,
64
+ "tokenizer_class": "MPNetTokenizer",
65
+ "unk_token": "[UNK]"
66
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/trainer_state.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 100,
7
+ "global_step": 625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.16,
14
+ "grad_norm": 32.675262451171875,
15
+ "learning_rate": 1.8718861209964415e-05,
16
+ "loss": 2.6751,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.16,
21
+ "eval_NIST-control-dev_cosine_accuracy": 0.6661201119422913,
22
+ "eval_loss": 4.089230537414551,
23
+ "eval_runtime": 601.3322,
24
+ "eval_samples_per_second": 11.157,
25
+ "eval_steps_per_second": 0.698,
26
+ "step": 100
27
+ },
28
+ {
29
+ "epoch": 0.32,
30
+ "grad_norm": 12.523146629333496,
31
+ "learning_rate": 1.516014234875445e-05,
32
+ "loss": 0.9272,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.32,
37
+ "eval_NIST-control-dev_cosine_accuracy": 0.7026382684707642,
38
+ "eval_loss": 3.859454870223999,
39
+ "eval_runtime": 604.4408,
40
+ "eval_samples_per_second": 11.1,
41
+ "eval_steps_per_second": 0.695,
42
+ "step": 200
43
+ },
44
+ {
45
+ "epoch": 0.48,
46
+ "grad_norm": 14.002488136291504,
47
+ "learning_rate": 1.1601423487544485e-05,
48
+ "loss": 0.5711,
49
+ "step": 300
50
+ },
51
+ {
52
+ "epoch": 0.48,
53
+ "eval_NIST-control-dev_cosine_accuracy": 0.6896705627441406,
54
+ "eval_loss": 3.883481502532959,
55
+ "eval_runtime": 608.1647,
56
+ "eval_samples_per_second": 11.032,
57
+ "eval_steps_per_second": 0.691,
58
+ "step": 300
59
+ },
60
+ {
61
+ "epoch": 0.64,
62
+ "grad_norm": 28.6463565826416,
63
+ "learning_rate": 8.04270462633452e-06,
64
+ "loss": 0.3905,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.64,
69
+ "eval_NIST-control-dev_cosine_accuracy": 0.7071098685264587,
70
+ "eval_loss": 3.7548305988311768,
71
+ "eval_runtime": 606.0474,
72
+ "eval_samples_per_second": 11.07,
73
+ "eval_steps_per_second": 0.693,
74
+ "step": 400
75
+ },
76
+ {
77
+ "epoch": 0.8,
78
+ "grad_norm": 0.0,
79
+ "learning_rate": 4.483985765124556e-06,
80
+ "loss": 0.043,
81
+ "step": 500
82
+ },
83
+ {
84
+ "epoch": 0.8,
85
+ "eval_NIST-control-dev_cosine_accuracy": 0.7035325765609741,
86
+ "eval_loss": 3.8021018505096436,
87
+ "eval_runtime": 600.763,
88
+ "eval_samples_per_second": 11.167,
89
+ "eval_steps_per_second": 0.699,
90
+ "step": 500
91
+ },
92
+ {
93
+ "epoch": 0.96,
94
+ "grad_norm": 0.0,
95
+ "learning_rate": 9.252669039145908e-07,
96
+ "loss": 0.0407,
97
+ "step": 600
98
+ },
99
+ {
100
+ "epoch": 0.96,
101
+ "eval_NIST-control-dev_cosine_accuracy": 0.7048740386962891,
102
+ "eval_loss": 3.811488628387451,
103
+ "eval_runtime": 599.8706,
104
+ "eval_samples_per_second": 11.184,
105
+ "eval_steps_per_second": 0.7,
106
+ "step": 600
107
+ }
108
+ ],
109
+ "logging_steps": 100,
110
+ "max_steps": 625,
111
+ "num_input_tokens_seen": 0,
112
+ "num_train_epochs": 1,
113
+ "save_steps": 100,
114
+ "stateful_callbacks": {
115
+ "TrainerControl": {
116
+ "args": {
117
+ "should_epoch_stop": false,
118
+ "should_evaluate": false,
119
+ "should_log": false,
120
+ "should_save": true,
121
+ "should_training_stop": true
122
+ },
123
+ "attributes": {}
124
+ }
125
+ },
126
+ "total_flos": 0.0,
127
+ "train_batch_size": 16,
128
+ "trial_name": null,
129
+ "trial_params": null
130
+ }
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833ef880b3501bd0c81b578a2dc1a700c13add6a501fc8fb0cf7ea0843c2483a
3
+ size 5969
HPEControlMapper/models/mpnet-base-control-triplet/checkpoint-625/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
HPEControlMapper/models/mpnet-base-control-triplet/eval/triplet_evaluation_NIST-control-dev_results.csv ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.0028676301904106446,100,0.6373528242111206
3
+ 0.005735260380821289,200,0.6752123832702637
4
+ 0.008602890571231933,300,0.7472052574157715
5
+ 0.011470520761642579,400,0.7840214371681213
6
+ 0.014338150952053224,500,0.7856610417366028
7
+ 0.017205781142463867,600,0.7878968715667725
8
+ 0.020073411332874512,700,0.7940080761909485
9
+ 0.022941041523285157,800,0.8038455843925476
10
+ 0.025808671713695802,900,0.826948881149292
11
+ 0.028676301904106447,1000,0.8160679936408997
12
+ 0.03154393209451709,1100,0.833507239818573
13
+ 0.034411562284927734,1200,0.8253092765808105
14
+ 0.03727919247533838,1300,0.8415561318397522
15
+ 0.040146822665749024,1400,0.839171290397644
16
+ 0.04301445285615967,1500,0.840512752532959
17
+ 0.045882083046570314,1600,0.8504993319511414
18
+ 0.048749713236980956,1700,0.8616783618927002
19
+ 0.051617343427391604,1800,0.8594425320625305
20
+ 0.054484973617802246,1900,0.8482635021209717
21
+ 0.057352603808212894,2000,0.8543747067451477
22
+ 0.060220233998623536,2100,0.859740674495697
23
+ 0.06308786418903418,2200,0.8570576906204224
24
+ 0.06595549437944483,2300,0.8566105365753174
25
+ 0.06882312456985547,2400,0.8522879481315613
26
+ 0.07169075476026611,2500,0.8589953780174255
27
+ 0.07455838495067676,2600,0.8507974147796631
28
+ 0.0774260151410874,2700,0.8525860905647278
29
+ 0.08029364533149805,2800,0.8640632033348083
30
+ 0.08316127552190869,2900,0.8533313274383545
31
+ 0.08602890571231935,3000,0.8649575114250183
32
+ 0.08889653590272999,3100,0.8689819574356079
33
+ 0.09176416609314063,3200,0.871813952922821
34
+ 0.09463179628355127,3300,0.8509464859962463
35
+ 0.09749942647396191,3400,0.8804590702056885
36
+ 0.10036705666437257,3500,0.8660008907318115
37
+ 0.10323468685478321,3600,0.8649575114250183
38
+ 0.10610231704519385,3700,0.8589953780174255
39
+ 0.10896994723560449,3800,0.878372311592102
40
+ 0.11183757742601515,3900,0.8785213828086853
41
+ 0.11470520761642579,4000,0.8785213828086853
42
+ 0.11757283780683643,4100,0.8296318650245667
43
+ 0.12044046799724707,4200,0.8730064034461975
44
+ 0.12330809818765771,4300,0.8673423528671265
45
+ 0.12617572837806837,4400,0.8750931620597839
46
+ 0.129043358568479,4500,0.8408108353614807
47
+ 0.13191098875888965,4600,0.8662989735603333
48
+ 0.1347786189493003,4700,0.8719630241394043
49
+ 0.13764624913971094,4800,0.8818005919456482
50
+ 0.14051387933012158,4900,0.8825458288192749
51
+ 0.14338150952053222,5000,0.8628707528114319
52
+ 0.1462491397109429,5100,0.8694291114807129
53
+ 0.14911676990135353,5200,0.8759874701499939
54
+ 0.15198440009176417,5300,0.8685348033905029
55
+ 0.1548520302821748,5400,0.8780742287635803
56
+ 0.15771966047258545,5500,0.8835892081260681
57
+ 0.1605872906629961,5600,0.8828439116477966
58
+ 0.16345492085340674,5700,0.8657028079032898
59
+ 0.16632255104381738,5800,0.8829929828643799
60
+ 0.16919018123422802,5900,0.8770308494567871
61
+ 0.1720578114246387,6000,0.8892532587051392
62
+ 0.17492544161504933,6100,0.8847816586494446
63
+ 0.17779307180545997,6200,0.8818005919456482
64
+ 0.18066070199587062,6300,0.8910418748855591
65
+ 0.18352833218628126,6400,0.887762725353241
66
+ 0.1863959623766919,6500,0.8867193460464478
67
+ 0.18926359256710254,6600,0.8879117369651794
68
+ 0.19213122275751318,6700,0.8792666792869568
69
+ 0.19499885294792382,6800,0.8928305506706238
70
+ 0.1978664831383345,6900,0.8855268955230713
71
+ 0.20073411332874513,7000,0.8886570334434509
72
+ 0.20360174351915578,7100,0.8828439116477966
73
+ 0.20646937370956642,7200,0.8929795622825623
74
+ 0.016,100,0.6908630132675171
75
+ 0.032,200,0.7467581033706665
76
+ 0.048,300,0.7501863241195679
77
+ 0.064,400,0.7367715239524841
78
+ 0.08,500,0.7378149032592773
79
+ 0.096,600,0.7382620573043823
80
+ 0.112,700,0.7512297034263611
81
+ 0.128,800,0.7753763794898987
82
+ 0.144,900,0.8031002879142761
83
+ 0.16,1000,0.8036965131759644
84
+ 0.176,1100,0.8162170052528381
85
+ 0.192,1200,0.8276941180229187
86
+ 0.208,1300,0.8232225179672241
87
+ 0.224,1400,0.8221791386604309
88
+ 0.24,1500,0.8157698512077332
89
+ 0.256,1600,0.8309733271598816
90
+ 0.272,1700,0.8323147892951965
91
+ 0.288,1800,0.826948881149292
92
+ 0.304,1900,0.8086152672767639
93
+ 0.32,2000,0.8166641592979431
94
+ 0.336,2100,0.8174094557762146
95
+ 0.352,2200,0.8193471431732178
96
+ 0.368,2300,0.8141302466392517
97
+ 0.384,2400,0.826948881149292
98
+ 0.4,2500,0.8358920812606812
99
+ 0.416,2600,0.8209867477416992
100
+ 0.432,2700,0.8385750651359558
101
+ 0.448,2800,0.8105530142784119
102
+ 0.464,2900,0.8200924396514893
103
+ 0.48,3000,0.8375316858291626
104
+ 0.496,3100,0.8205395936965942
105
+ 0.512,3200,0.8279922604560852
106
+ 0.528,3300,0.825607419013977
107
+ 0.544,3400,0.8253092765808105
108
+ 0.56,3500,0.8159189224243164
109
+ 0.576,3600,0.8183037638664246
110
+ 0.592,3700,0.8139812350273132
111
+ 0.608,3800,0.833060085773468
112
+ 0.624,3900,0.8123416304588318
113
+ 0.64,4000,0.8008645176887512
114
+ 0.656,4100,0.8239678144454956
115
+ 0.672,4200,0.8172603845596313
116
+ 0.688,4300,0.8123416304588318
117
+ 0.704,4400,0.8186019062995911
118
+ 0.72,4500,0.8202414512634277
119
+ 0.736,4600,0.8186019062995911
120
+ 0.752,4700,0.8148755431175232
121
+ 0.768,4800,0.8238187432289124
122
+ 0.784,4900,0.827396035194397
123
+ 0.8,5000,0.8132359385490417
124
+ 0.816,5100,0.8145774602890015
125
+ 0.832,5200,0.8111491799354553
126
+ 0.848,5300,0.8197942972183228
127
+ 0.864,5400,0.8169623017311096
128
+ 0.88,5500,0.8178566098213196
129
+ 0.896,5600,0.814279317855835
130
+ 0.912,5700,0.8206886053085327
131
+ 0.928,5800,0.8123416304588318
132
+ 0.944,5900,0.8123416304588318
133
+ 0.96,6000,0.8110001683235168
134
+ 0.976,6100,0.8136830925941467
135
+ 0.992,6200,0.8139812350273132
136
+ 0.16,100,0.69414222240448
137
+ 0.32,200,0.7014458179473877
138
+ 0.48,300,0.6860933303833008
139
+ 0.64,400,0.7059174180030823
140
+ 0.8,500,0.6977194547653198
141
+ 0.96,600,0.7017439007759094
142
+ 0.16,100,0.6661201119422913
143
+ 0.32,200,0.7026382684707642
144
+ 0.48,300,0.6896705627441406
145
+ 0.64,400,0.7071098685264587
146
+ 0.8,500,0.7035325765609741
147
+ 0.96,600,0.7048740386962891
HPEControlMapper/models/mpnet-base-control-triplet/final/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
HPEControlMapper/models/mpnet-base-control-triplet/final/README.md ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:10000
11
+ - loss:TripletLoss
12
+ base_model: microsoft/mpnet-base
13
+ widget:
14
+ - source_sentence: Use hardware-based methods where available to guarantee role-based
15
+ access control cannot be bypassed.
16
+ sentences:
17
+ - Related control that reinforces stability and assurance in brute force login prevention
18
+ use cases.
19
+ - Audit session replay defense_b11_8 as part of continuous security assessment processes.
20
+ - Core functionality needed to enforce effective role-based access control mechanisms.
21
+ - source_sentence: Provide full-feature access to security enhancements in NVIDIA
22
+ GPU firmware.
23
+ sentences:
24
+ - Implement secure communication channels between host and GPU.
25
+ - A little boy blows bubbles outdoors.
26
+ - Use HTTPS inspection to detect man-in-the-middle attack attempts.
27
+ - source_sentence: Validate source authenticity by requiring signed code in all components.
28
+ sentences:
29
+ - Firewalls are activated by default and preloaded with security policies.
30
+ - Enforce cryptographic validation on third-party software inputs.
31
+ - Display productivity summaries on a weekly dashboard.
32
+ - source_sentence: Mandate organization-wide adherence to policies enforcing siem
33
+ integration using centrally managed tools.
34
+ sentences:
35
+ - Review policies around anonymous proxy blocking_b11_50 to avoid unexpected vectors
36
+ of compromise.
37
+ - Implement key management systems that use secure encryption algorithms.
38
+ - This measure directly supports secure handling within siem integration implementations.
39
+ - source_sentence: Mandate organization-wide adherence to policies enforcing virtual
40
+ machine isolation using centrally managed tools.
41
+ sentences:
42
+ - Adult males stand in front of a brick wall near something made of metal.
43
+ - Monitor for issues related to redundant login blocking_b12_1 as part of extended
44
+ security hygiene.
45
+ - A widely recommended control paired with proper virtual machine isolation implementations.
46
+ pipeline_tag: sentence-similarity
47
+ library_name: sentence-transformers
48
+ metrics:
49
+ - cosine_accuracy
50
+ model-index:
51
+ - name: MPNet base trained on NIST Controls
52
+ results:
53
+ - task:
54
+ type: triplet
55
+ name: Triplet
56
+ dataset:
57
+ name: NIST control dev
58
+ type: NIST-control-dev
59
+ metrics:
60
+ - type: cosine_accuracy
61
+ value: 0.7048740386962891
62
+ name: Cosine Accuracy
63
+ - task:
64
+ type: triplet
65
+ name: Triplet
66
+ dataset:
67
+ name: all nli test
68
+ type: all-nli-test
69
+ metrics:
70
+ - type: cosine_accuracy
71
+ value: 0.7318310737609863
72
+ name: Cosine Accuracy
73
+ ---
74
+
75
+ # MPNet base trained on NIST Controls
76
+
77
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
78
+
79
+ ## Model Details
80
+
81
+ ### Model Description
82
+ - **Model Type:** Sentence Transformer
83
+ - **Base model:** [microsoft/mpnet-base](https://huggingface.co/microsoft/mpnet-base) <!-- at revision 6996ce1e91bd2a9c7d7f61daec37463394f73f09 -->
84
+ - **Maximum Sequence Length:** 512 tokens
85
+ - **Output Dimensionality:** 768 dimensions
86
+ - **Similarity Function:** Cosine Similarity
87
+ - **Training Dataset:**
88
+ - csv
89
+ - **Language:** en
90
+ - **License:** apache-2.0
91
+
92
+ ### Model Sources
93
+
94
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
95
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
96
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
97
+
98
+ ### Full Model Architecture
99
+
100
+ ```
101
+ SentenceTransformer(
102
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel
103
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
104
+ )
105
+ ```
106
+
107
+ ## Usage
108
+
109
+ ### Direct Usage (Sentence Transformers)
110
+
111
+ First install the Sentence Transformers library:
112
+
113
+ ```bash
114
+ pip install -U sentence-transformers
115
+ ```
116
+
117
+ Then you can load this model and run inference.
118
+ ```python
119
+ from sentence_transformers import SentenceTransformer
120
+
121
+ # Download from the 🤗 Hub
122
+ model = SentenceTransformer("sentence_transformers_model_id")
123
+ # Run inference
124
+ sentences = [
125
+ 'Mandate organization-wide adherence to policies enforcing virtual machine isolation using centrally managed tools.',
126
+ 'A widely recommended control paired with proper virtual machine isolation implementations.',
127
+ 'Monitor for issues related to redundant login blocking_b12_1 as part of extended security hygiene.',
128
+ ]
129
+ embeddings = model.encode(sentences)
130
+ print(embeddings.shape)
131
+ # [3, 768]
132
+
133
+ # Get the similarity scores for the embeddings
134
+ similarities = model.similarity(embeddings, embeddings)
135
+ print(similarities.shape)
136
+ # [3, 3]
137
+ ```
138
+
139
+ <!--
140
+ ### Direct Usage (Transformers)
141
+
142
+ <details><summary>Click to see the direct usage in Transformers</summary>
143
+
144
+ </details>
145
+ -->
146
+
147
+ <!--
148
+ ### Downstream Usage (Sentence Transformers)
149
+
150
+ You can finetune this model on your own dataset.
151
+
152
+ <details><summary>Click to expand</summary>
153
+
154
+ </details>
155
+ -->
156
+
157
+ <!--
158
+ ### Out-of-Scope Use
159
+
160
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
161
+ -->
162
+
163
+ ## Evaluation
164
+
165
+ ### Metrics
166
+
167
+ #### Triplet
168
+
169
+ * Datasets: `NIST-control-dev` and `all-nli-test`
170
+ * Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
171
+
172
+ | Metric | NIST-control-dev | all-nli-test |
173
+ |:--------------------|:-----------------|:-------------|
174
+ | **cosine_accuracy** | **0.7049** | **0.7318** |
175
+
176
+ <!--
177
+ ## Bias, Risks and Limitations
178
+
179
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
180
+ -->
181
+
182
+ <!--
183
+ ### Recommendations
184
+
185
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
186
+ -->
187
+
188
+ ## Training Details
189
+
190
+ ### Training Dataset
191
+
192
+ #### csv
193
+
194
+ * Dataset: csv
195
+ * Size: 10,000 training samples
196
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
197
+ * Approximate statistics based on the first 1000 samples:
198
+ | | anchor | positive | negative |
199
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
200
+ | type | string | string | string |
201
+ | details | <ul><li>min: 10 tokens</li><li>mean: 21.39 tokens</li><li>max: 280 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 17.96 tokens</li><li>max: 171 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 20.27 tokens</li><li>max: 86 tokens</li></ul> |
202
+ * Samples:
203
+ | anchor | positive | negative |
204
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------|
205
+ | <code>Audit Logs: A secure record of security-sensitive activity required for the audit trail. The audit log is a record of an activity by a system a user or an application. Audit logs are required for the audit trail. Examples of audit events are listed in the requirements section below. It is important that these events are in the audit log as part of HPE's commitment to CISA's Secure by Design Pledge.</code> | <code>Security Event Correlation (v3): Systems that correlate audit logs with security events to detect anomalies.</code> | <code>Remote Access Protection (v3): Controls to secure remote access to systems.</code> |
206
+ | <code>One to One Credential Mapping: This requirement involves getting the specific user login information correct in the audit log down to the specific user and not just listing the role or process ID value.</code> | <code>User Identity Verification (v3): Mechanism to verify user identities before granting access to resources.</code> | <code>Incident Response Testing (v3): Regular testing of incident response plans.</code> |
207
+ | <code>Reproducible Builds: Be able to recreate bit-by-bit identical copies of signed code. Ensure that signed code really came from the source code developed by HPE and possibly reviewed by a third-party.</code> | <code>Build Integrity Checking (v3): Verification process to ensure that the build environment has not been tampered with.</code> | <code>Backup and Recovery Planning (v3): Ensure data is backed up and recoverable.</code> |
208
+ * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
209
+ ```json
210
+ {
211
+ "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
212
+ "triplet_margin": 5
213
+ }
214
+ ```
215
+
216
+ ### Evaluation Dataset
217
+
218
+ #### csv
219
+
220
+ * Dataset: csv
221
+ * Size: 6,709 evaluation samples
222
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
223
+ * Approximate statistics based on the first 1000 samples:
224
+ | | anchor | positive | negative |
225
+ |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
226
+ | type | string | string | string |
227
+ | details | <ul><li>min: 8 tokens</li><li>mean: 18.08 tokens</li><li>max: 54 tokens</li></ul> | <ul><li>min: 4 tokens</li><li>mean: 13.64 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 18.23 tokens</li><li>max: 34 tokens</li></ul> |
228
+ * Samples:
229
+ | anchor | positive | negative |
230
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
231
+ | <code>Only allow approved and secured components to run in production environments validating them with cryptographic checks and requiring authenticated configuration changes.</code> | <code>Deploy only approved applications in production environments. Use secure startup methods to validate software integrity. Require authentication for all system configuration changes.</code> | <code>Issue smart cards to employees as physical access tokens. Maintain a contact list for emergency communications. Label system components for asset tracking.</code> |
232
+ | <code>Create system architectures that include failover tamper detection and rollback protections to ensure operations continue even under cyberattack.</code> | <code>Design systems to continue operating under defined attack scenarios. Include tamper detection and automatic rollback capabilities. Ensure recovery processes are tested for effectiveness.</code> | <code>Protect paper-based confidential files in locked cabinets. Post warning signs at restricted data center doors. Track employee security briefings with completion certificates.</code> |
233
+ | <code>Structure networks and applications into isolated segments and apply integrity checking to ensure only trusted software is executed in operational environments.</code> | <code>Use network segmentation and security zones in system design. Validate system behavior using integrity checks. Require signed software for deployment into production.</code> | <code>Distribute weather radios for disaster preparedness in remote facilities. Track badge issuance and disable lost credentials. Train users to avoid reusing passwords across services.</code> |
234
+ * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
235
+ ```json
236
+ {
237
+ "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
238
+ "triplet_margin": 5
239
+ }
240
+ ```
241
+
242
+ ### Training Hyperparameters
243
+ #### Non-Default Hyperparameters
244
+
245
+ - `eval_strategy`: steps
246
+ - `per_device_train_batch_size`: 16
247
+ - `per_device_eval_batch_size`: 16
248
+ - `learning_rate`: 2e-05
249
+ - `num_train_epochs`: 1
250
+ - `warmup_ratio`: 0.1
251
+ - `fp16`: True
252
+ - `batch_sampler`: no_duplicates
253
+
254
+ #### All Hyperparameters
255
+ <details><summary>Click to expand</summary>
256
+
257
+ - `overwrite_output_dir`: False
258
+ - `do_predict`: False
259
+ - `eval_strategy`: steps
260
+ - `prediction_loss_only`: True
261
+ - `per_device_train_batch_size`: 16
262
+ - `per_device_eval_batch_size`: 16
263
+ - `per_gpu_train_batch_size`: None
264
+ - `per_gpu_eval_batch_size`: None
265
+ - `gradient_accumulation_steps`: 1
266
+ - `eval_accumulation_steps`: None
267
+ - `torch_empty_cache_steps`: None
268
+ - `learning_rate`: 2e-05
269
+ - `weight_decay`: 0.0
270
+ - `adam_beta1`: 0.9
271
+ - `adam_beta2`: 0.999
272
+ - `adam_epsilon`: 1e-08
273
+ - `max_grad_norm`: 1.0
274
+ - `num_train_epochs`: 1
275
+ - `max_steps`: -1
276
+ - `lr_scheduler_type`: linear
277
+ - `lr_scheduler_kwargs`: {}
278
+ - `warmup_ratio`: 0.1
279
+ - `warmup_steps`: 0
280
+ - `log_level`: passive
281
+ - `log_level_replica`: warning
282
+ - `log_on_each_node`: True
283
+ - `logging_nan_inf_filter`: True
284
+ - `save_safetensors`: True
285
+ - `save_on_each_node`: False
286
+ - `save_only_model`: False
287
+ - `restore_callback_states_from_checkpoint`: False
288
+ - `no_cuda`: False
289
+ - `use_cpu`: False
290
+ - `use_mps_device`: False
291
+ - `seed`: 42
292
+ - `data_seed`: None
293
+ - `jit_mode_eval`: False
294
+ - `use_ipex`: False
295
+ - `bf16`: False
296
+ - `fp16`: True
297
+ - `fp16_opt_level`: O1
298
+ - `half_precision_backend`: auto
299
+ - `bf16_full_eval`: False
300
+ - `fp16_full_eval`: False
301
+ - `tf32`: None
302
+ - `local_rank`: 0
303
+ - `ddp_backend`: None
304
+ - `tpu_num_cores`: None
305
+ - `tpu_metrics_debug`: False
306
+ - `debug`: []
307
+ - `dataloader_drop_last`: False
308
+ - `dataloader_num_workers`: 0
309
+ - `dataloader_prefetch_factor`: None
310
+ - `past_index`: -1
311
+ - `disable_tqdm`: False
312
+ - `remove_unused_columns`: True
313
+ - `label_names`: None
314
+ - `load_best_model_at_end`: False
315
+ - `ignore_data_skip`: False
316
+ - `fsdp`: []
317
+ - `fsdp_min_num_params`: 0
318
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
319
+ - `fsdp_transformer_layer_cls_to_wrap`: None
320
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
321
+ - `deepspeed`: None
322
+ - `label_smoothing_factor`: 0.0
323
+ - `optim`: adamw_torch
324
+ - `optim_args`: None
325
+ - `adafactor`: False
326
+ - `group_by_length`: False
327
+ - `length_column_name`: length
328
+ - `ddp_find_unused_parameters`: None
329
+ - `ddp_bucket_cap_mb`: None
330
+ - `ddp_broadcast_buffers`: False
331
+ - `dataloader_pin_memory`: True
332
+ - `dataloader_persistent_workers`: False
333
+ - `skip_memory_metrics`: True
334
+ - `use_legacy_prediction_loop`: False
335
+ - `push_to_hub`: False
336
+ - `resume_from_checkpoint`: None
337
+ - `hub_model_id`: None
338
+ - `hub_strategy`: every_save
339
+ - `hub_private_repo`: None
340
+ - `hub_always_push`: False
341
+ - `gradient_checkpointing`: False
342
+ - `gradient_checkpointing_kwargs`: None
343
+ - `include_inputs_for_metrics`: False
344
+ - `include_for_metrics`: []
345
+ - `eval_do_concat_batches`: True
346
+ - `fp16_backend`: auto
347
+ - `push_to_hub_model_id`: None
348
+ - `push_to_hub_organization`: None
349
+ - `mp_parameters`:
350
+ - `auto_find_batch_size`: False
351
+ - `full_determinism`: False
352
+ - `torchdynamo`: None
353
+ - `ray_scope`: last
354
+ - `ddp_timeout`: 1800
355
+ - `torch_compile`: False
356
+ - `torch_compile_backend`: None
357
+ - `torch_compile_mode`: None
358
+ - `include_tokens_per_second`: False
359
+ - `include_num_input_tokens_seen`: False
360
+ - `neftune_noise_alpha`: None
361
+ - `optim_target_modules`: None
362
+ - `batch_eval_metrics`: False
363
+ - `eval_on_start`: False
364
+ - `use_liger_kernel`: False
365
+ - `eval_use_gather_object`: False
366
+ - `average_tokens_across_devices`: False
367
+ - `prompts`: None
368
+ - `batch_sampler`: no_duplicates
369
+ - `multi_dataset_batch_sampler`: proportional
370
+
371
+ </details>
372
+
373
+ ### Training Logs
374
+ | Epoch | Step | Training Loss | Validation Loss | NIST-control-dev_cosine_accuracy | all-nli-test_cosine_accuracy |
375
+ |:-----:|:----:|:-------------:|:---------------:|:--------------------------------:|:----------------------------:|
376
+ | -1 | -1 | - | - | 0.6563 | - |
377
+ | 0.16 | 100 | 2.6751 | 4.0892 | 0.6661 | - |
378
+ | 0.32 | 200 | 0.9272 | 3.8595 | 0.7026 | - |
379
+ | 0.48 | 300 | 0.5711 | 3.8835 | 0.6897 | - |
380
+ | 0.64 | 400 | 0.3905 | 3.7548 | 0.7071 | - |
381
+ | 0.8 | 500 | 0.043 | 3.8021 | 0.7035 | - |
382
+ | 0.96 | 600 | 0.0407 | 3.8115 | 0.7049 | - |
383
+ | -1 | -1 | - | - | - | 0.7318 |
384
+
385
+
386
+ ### Framework Versions
387
+ - Python: 3.13.5
388
+ - Sentence Transformers: 4.1.0
389
+ - Transformers: 4.52.4
390
+ - PyTorch: 2.7.1+cpu
391
+ - Accelerate: 1.8.1
392
+ - Datasets: 2.15.0
393
+ - Tokenizers: 0.21.2
394
+
395
+ ## Citation
396
+
397
+ ### BibTeX
398
+
399
+ #### Sentence Transformers
400
+ ```bibtex
401
+ @inproceedings{reimers-2019-sentence-bert,
402
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
403
+ author = "Reimers, Nils and Gurevych, Iryna",
404
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
405
+ month = "11",
406
+ year = "2019",
407
+ publisher = "Association for Computational Linguistics",
408
+ url = "https://arxiv.org/abs/1908.10084",
409
+ }
410
+ ```
411
+
412
+ #### TripletLoss
413
+ ```bibtex
414
+ @misc{hermans2017defense,
415
+ title={In Defense of the Triplet Loss for Person Re-Identification},
416
+ author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
417
+ year={2017},
418
+ eprint={1703.07737},
419
+ archivePrefix={arXiv},
420
+ primaryClass={cs.CV}
421
+ }
422
+ ```
423
+
424
+ <!--
425
+ ## Glossary
426
+
427
+ *Clearly define terms in order to be accessible across audiences.*
428
+ -->
429
+
430
+ <!--
431
+ ## Model Card Authors
432
+
433
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
434
+ -->
435
+
436
+ <!--
437
+ ## Model Card Contact
438
+
439
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
440
+ -->
HPEControlMapper/models/mpnet-base-control-triplet/final/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MPNetModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-05,
14
+ "max_position_embeddings": 514,
15
+ "model_type": "mpnet",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 1,
19
+ "relative_attention_num_buckets": 32,
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.52.4",
22
+ "vocab_size": 30527
23
+ }
HPEControlMapper/models/mpnet-base-control-triplet/final/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "4.1.0",
4
+ "transformers": "4.52.4",
5
+ "pytorch": "2.7.1+cpu"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
HPEControlMapper/models/mpnet-base-control-triplet/final/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023ce0c063565c38d46015c3e97cbd919a210aeac663deb5384d5802e0e6a244
3
+ size 437967672
HPEControlMapper/models/mpnet-base-control-triplet/final/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
HPEControlMapper/models/mpnet-base-control-triplet/final/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
HPEControlMapper/models/mpnet-base-control-triplet/final/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
HPEControlMapper/models/mpnet-base-control-triplet/final/tokenizer_config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "extra_special_tokens": {},
58
+ "mask_token": "<mask>",
59
+ "model_max_length": 512,
60
+ "pad_token": "<pad>",
61
+ "sep_token": "</s>",
62
+ "strip_accents": null,
63
+ "tokenize_chinese_chars": true,
64
+ "tokenizer_class": "MPNetTokenizer",
65
+ "unk_token": "[UNK]"
66
+ }
HPEControlMapper/models/mpnet-base-control-triplet/final/vocab.txt ADDED
The diff for this file is too large to render. See raw diff