jjstuart commited on Dec 13, 2025

Commit

861e0e8

verified ·

1 Parent(s): 6b9da8a

Upload folder using huggingface_hub

Browse files

Files changed (38) hide show

README.md +62 -67
checkpoints/checkpoint-1328/1_Pooling/config.json +10 -0
checkpoints/checkpoint-1328/README.md +466 -0
checkpoints/checkpoint-1328/config.json +25 -0
checkpoints/checkpoint-1328/config_sentence_transformers.json +14 -0
checkpoints/checkpoint-1328/model.safetensors +3 -0
checkpoints/checkpoint-1328/modules.json +20 -0
checkpoints/checkpoint-1328/optimizer.pt +3 -0
checkpoints/checkpoint-1328/rng_state.pth +3 -0
checkpoints/checkpoint-1328/scheduler.pt +3 -0
checkpoints/checkpoint-1328/sentence_bert_config.json +4 -0
checkpoints/checkpoint-1328/special_tokens_map.json +37 -0
checkpoints/checkpoint-1328/tokenizer.json +0 -0
checkpoints/checkpoint-1328/tokenizer_config.json +65 -0
checkpoints/checkpoint-1328/trainer_state.json +112 -0
checkpoints/checkpoint-1328/training_args.bin +3 -0
checkpoints/checkpoint-1328/vocab.txt +0 -0
checkpoints/checkpoint-1660/1_Pooling/config.json +10 -0
checkpoints/checkpoint-1660/README.md +469 -0
checkpoints/checkpoint-1660/config.json +25 -0
checkpoints/checkpoint-1660/config_sentence_transformers.json +14 -0
checkpoints/checkpoint-1660/model.safetensors +3 -0
checkpoints/checkpoint-1660/modules.json +20 -0
checkpoints/checkpoint-1660/optimizer.pt +3 -0
checkpoints/checkpoint-1660/rng_state.pth +3 -0
checkpoints/checkpoint-1660/scheduler.pt +3 -0
checkpoints/checkpoint-1660/sentence_bert_config.json +4 -0
checkpoints/checkpoint-1660/special_tokens_map.json +37 -0
checkpoints/checkpoint-1660/tokenizer.json +0 -0
checkpoints/checkpoint-1660/tokenizer_config.json +65 -0
checkpoints/checkpoint-1660/trainer_state.json +135 -0
checkpoints/checkpoint-1660/training_args.bin +3 -0
checkpoints/checkpoint-1660/vocab.txt +0 -0
checkpoints/eval/triplet_evaluation_retrieval-eval_results.csv +10 -25
checkpoints/runs/Dec13_16-18-12_rego-trainer-0/events.out.tfevents.1765642693.rego-trainer-0.4819.0 +3 -0
eval/triplet_evaluation_retrieval-eval_results.csv +5 -12
model.safetensors +1 -1
training_info.json +2 -2

README.md CHANGED Viewed

@@ -5,20 +5,21 @@ tags:
 - feature-extraction
 - dense
 - generated_from_trainer
-- dataset_size:36065
 - loss:TripletLoss
 base_model: sentence-transformers/all-MiniLM-L6-v2
 widget:
-- source_sentence: Check that sbom component has empty version field.
   sentences:
-  - 'Helper: lib.k8s.name_version
-    Signature: name_version(resource)
     Description: '
-  - 'Helper: lib.k8s.name
-    Signature: name(resource)
     Description: '
   - 'Helper: lib.k8s.name
@@ -26,84 +27,78 @@ widget:
     Signature: name(resource)
     Description: '
-- source_sentence: How can I verify that an image manifest is accessible before allowing
-    the operation to proceed?
   sentences:
-  - 'Helper: lib.result_helper_with_term
-    Signature: result_helper_with_term(chain, failure_sprintf_params, term)
     Description: '
-  - 'Helper: lib.to_set
-    Signature: to_set(arr)
     Description: '
-  - 'Helper: lib.result_helper
-    Signature: result_helper(chain, failure_sprintf_params)
     Description: '
-- source_sentence: verify that how can i verify that an attestation created by the
-    rhtap multi-ci build pipeline is present?
   sentences:
-  - 'Helper: lib.k8s.version
-    Signature: version(resource)
     Description: '
-  - 'Helper: lib.k8s.name
-    Signature: name(resource)
     Description: '
-  - 'Helper: lib.tekton.tasks
-    Signature: tasks(obj)
     Description: '
-- source_sentence: policy for create a rego deny rule to the tekton task used specifies
-    an invalid pipeline. the task is annotated with `build.appstudio.redhat.com/pipeline`
-    annotation, which must be in the set of `allowed_rpm_build_pipelines` in the rule
-    data
   sentences:
-  - 'Helper: lib.tekton.tasks
-    Signature: tasks(obj)
     Description: '
-  - 'Helper: lib.tekton.untagged_task_references
-    Signature: untagged_task_references(tasks)
     Description: '
-  - 'Helper: lib.pipelinerun_attestations
-    Signature: pipelinerun_attestations
     Description: '
-- source_sentence: ensure create a rule that verifies all tekton tasks use the latest
-    known task reference and reports warnings based on the task_expiry_warning_days
-    configuration setting.
   sentences:
-  - 'Helper: lib.tekton.bundle
-    Signature: bundle(task)
     Description: '
-  - 'Helper: lib.tekton.untagged_task_references
-    Signature: untagged_task_references(tasks)
     Description: '
-  - 'Path: $.subject[*].digest.sha256
-    Description: SHA256 digest of the built artifact (hex-encoded, 64 chars). Used
-    to verify artifact integrity
-    Keywords: sha256, digest, hash, artifact integrity, verification, image digest
-    Attestation: slsa_provenance_v02'
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 metrics:
@@ -119,7 +114,7 @@ model-index:
       type: retrieval-eval
     metrics:
     - type: cosine_accuracy
-      value: 0.9762973785400391
       name: Cosine Accuracy
 ---
@@ -173,9 +168,9 @@ from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("sentence_transformers_model_id")
 # Run inference
 sentences = [
-    'ensure create a rule that verifies all tekton tasks use the latest known task reference and reports warnings based on the task_expiry_warning_days configuration setting.',
-    'Helper: lib.tekton.bundle\nSignature: bundle(task)\nDescription: ',
-    'Helper: lib.tekton.untagged_task_references\nSignature: untagged_task_references(tasks)\nDescription: ',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
@@ -184,9 +179,9 @@ print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
-# tensor([[ 1.0000,  0.2491, -0.6356],
-#         [ 0.2491,  1.0000, -0.2850],
-#         [-0.6356, -0.2850,  1.0000]])
 ```
 <!--
@@ -224,7 +219,7 @@ You can finetune this model on your own dataset.
 | Metric              | Value      |
 |:--------------------|:-----------|
-| **cosine_accuracy** | **0.9763** |
 <!--
 ## Bias, Risks and Limitations
@@ -244,19 +239,19 @@ You can finetune this model on your own dataset.
 #### Unnamed Dataset
-* Size: 36,065 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
 * Approximate statistics based on the first 1000 samples:
   |         | sentence_0                                                                         | sentence_1                                                                          | sentence_2                                                                          |
   |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
   | type    | string                                                                             | string                                                                              | string                                                                              |
-  | details | <ul><li>min: 4 tokens</li><li>mean: 31.19 tokens</li><li>max: 161 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 29.05 tokens</li><li>max: 125 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 27.65 tokens</li><li>max: 125 tokens</li></ul> |
 * Samples:
-  | sentence_0                                                                                                                                                                                                                                                                               | sentence_1                                                                                                                                                                                                    | sentence_2                                                                                                                                                                                                                |
-  |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-  | <code>I need to for each of the packages fetched by hermeto which define externalreferences, verify they are allowed based on the allowed_package_sources rule data key. by default, allowed_package_sources is empty, which means no components with such references are allowed</code> | <code>Helper: lib.sbom.spdx_sboms<br>Signature: spdx_sboms<br>Description: </code>                                                                                                                            | <code>Helper: lib.sbom.cyclonedx_sboms<br>Signature: cyclonedx_sboms<br>Description: </code>                                                                                                                              |
-  | <code>policy for how can i verify if optional labels are present in an image using the optional_labels or fbc_optional_labels rule data keys?</code>                                                                                                                                     | <code>Helper: lib.image.parse<br>Signature: parse(ref)<br>Description: </code>                                                                                                                                | <code>Helper: lib.image.str<br>Signature: str(d)<br>Description: </code>                                                                                                                                                  |
-  | <code>create a policy that cyclonedx component is missing bom-ref.</code>                                                                                                                                                                                                                | <code>Path: $.components[*].licenses[*].license.id<br>Description: SPDX license ID for the component<br>Keywords: license, sbom, cyclonedx, licensing, compliance, spdx<br>Attestation: cyclonedx_sbom</code> | <code>Path: $.components[*].purl<br>Description: Package URL (purl) for the component. Unique identifier in purl format<br>Keywords: purl, package url, sbom, cyclonedx, identifier<br>Attestation: cyclonedx_sbom</code> |
 * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
   ```json
   {
@@ -402,13 +397,13 @@ You can finetune this model on your own dataset.
 ### Training Logs
 | Epoch  | Step | Training Loss | retrieval-eval_cosine_accuracy |
 |:------:|:----:|:-------------:|:------------------------------:|
-| 0.5    | 141  | -             | 0.9608                         |
-| 1.0    | 282  | -             | 0.9713                         |
-| 1.5    | 423  | -             | 0.9753                         |
-| 1.7730 | 500  | 0.0758        | -                              |
-| 2.0    | 564  | -             | 0.9750                         |
-| 2.5    | 705  | -             | 0.9748                         |
-| 3.0    | 846  | -             | 0.9763                         |
 ### Framework Versions

 - feature-extraction
 - dense
 - generated_from_trainer
+- dataset_size:42459
 - loss:TripletLoss
 base_model: sentence-transformers/all-MiniLM-L6-v2
 widget:
+- source_sentence: policy for how can i verify if a tekton task version is still supported
+    by checking for the build.appstudio.redhat.com/expires-on annotation?
   sentences:
+  - 'Helper: lib.to_array
+    Signature: to_array(s)
     Description: '
+  - 'Helper: lib.pipelinerun_attestations
+    Signature: pipelinerun_attestations
     Description: '
   - 'Helper: lib.k8s.name
     Signature: name(resource)
     Description: '
+- source_sentence: how to check attestation is missing statement field.
   sentences:
+  - 'Helper: lib.k8s.name
+    Signature: name(resource)
     Description: '
+  - 'Helper: lib.tekton.untrusted_task_refs
+    Signature: untrusted_task_refs(tasks)
     Description: '
+  - 'Helper: lib.k8s.version
+    Signature: version(resource)
     Description: '
+- source_sentence: I need to ensure the operators.openshift.io/valid-subscription
+    annotation in the ClusterServiceVersion manifest contains a valid JSON encoded
+    non-empty array of strings.
   sentences:
+  - 'Helper: lib.to_array
+    Signature: to_array(s)
     Description: '
+  - 'Helper: lib.image.equal_ref
+    Signature: equal_ref(ref1, ref2)
     Description: '
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
     Description: '
+- source_sentence: write a rule to deny approval for an container image with non-unique
+    RPM names
   sentences:
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
     Description: '
+  - 'Helper: lib.to_set
+    Signature: to_set(arr)
     Description: '
+  - 'Helper: lib.rule_data_defaults
+    Signature: rule_data_defaults
     Description: '
+- source_sentence: check if i need to validate that spdx package is an operating system
+    component.
   sentences:
+  - 'Helper: lib.to_set
+    Signature: to_set(arr)
     Description: '
+  - 'Helper: lib.rule_data_defaults
+    Signature: rule_data_defaults
     Description: '
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 metrics:
       type: retrieval-eval
     metrics:
     - type: cosine_accuracy
+      value: 0.9834675788879395
       name: Cosine Accuracy
 ---
 model = SentenceTransformer("sentence_transformers_model_id")
 # Run inference
 sentences = [
+    'check if i need to validate that spdx package is an operating system component.',
+    'Helper: lib.result_helper\nSignature: result_helper(chain, failure_sprintf_params)\nDescription: ',
+    'Helper: lib.to_set\nSignature: to_set(arr)\nDescription: ',
 ]
 embeddings = model.encode(sentences)
 print(embeddings.shape)
 # Get the similarity scores for the embeddings
 similarities = model.similarity(embeddings, embeddings)
 print(similarities)
+# tensor([[ 1.0000,  0.4979, -0.4443],
+#         [ 0.4979,  1.0000, -0.4918],
+#         [-0.4443, -0.4918,  1.0000]])
 ```
 <!--
 | Metric              | Value      |
 |:--------------------|:-----------|
+| **cosine_accuracy** | **0.9835** |
 <!--
 ## Bias, Risks and Limitations
 #### Unnamed Dataset
+* Size: 42,459 training samples
 * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
 * Approximate statistics based on the first 1000 samples:
   |         | sentence_0                                                                         | sentence_1                                                                          | sentence_2                                                                          |
   |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
   | type    | string                                                                             | string                                                                              | string                                                                              |
+  | details | <ul><li>min: 4 tokens</li><li>mean: 30.48 tokens</li><li>max: 159 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 29.64 tokens</li><li>max: 125 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 27.15 tokens</li><li>max: 125 tokens</li></ul> |
 * Samples:
+  | sentence_0                                                                                     | sentence_1                                                                                       | sentence_2                                                                                                              |
+  |:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------|
+  | <code>I need to ensure that only images from specific registries are used in our policy</code> | <code>Helper: lib.image.str<br>Signature: str(d)<br>Description: </code>                         | <code>Helper: lib.konflux.is_validating_image_index<br>Signature: is_validating_image_index<br>Description: </code>     |
+  | <code>check if check warn</code>                                                               | <code>Helper: lib.tekton.expiry_of<br>Signature: expiry_of(task)<br>Description: </code>         | <code>Helper: lib.tekton.untagged_task_references<br>Signature: untagged_task_references(tasks)<br>Description: </code> |
+  | <code>verify that task has an expiry date set.</code>                                          | <code>Helper: lib.tekton.task_param<br>Signature: task_param(task, name)<br>Description: </code> | <code>Helper: lib.tekton.untagged_task_references<br>Signature: untagged_task_references(tasks)<br>Description: </code> |
 * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
   ```json
   {
 ### Training Logs
 | Epoch  | Step | Training Loss | retrieval-eval_cosine_accuracy |
 |:------:|:----:|:-------------:|:------------------------------:|
+| 0.5    | 166  | -             | 0.9731                         |
+| 1.0    | 332  | -             | 0.9786                         |
+| 1.5    | 498  | -             | 0.9794                         |
+| 1.5060 | 500  | 0.0784        | -                              |
+| 2.0    | 664  | -             | 0.9816                         |
+| 2.5    | 830  | -             | 0.9826                         |
+| 3.0    | 996  | -             | 0.9835                         |
 ### Framework Versions

checkpoints/checkpoint-1328/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 384,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

checkpoints/checkpoint-1328/README.md ADDED Viewed

	@@ -0,0 +1,466 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:42459
+- loss:TripletLoss
+base_model: sentence-transformers/all-MiniLM-L6-v2
+widget:
+- source_sentence: policy for how can i verify if a tekton task version is still supported
+    by checking for the build.appstudio.redhat.com/expires-on annotation?
+  sentences:
+  - 'Helper: lib.to_array
+    Signature: to_array(s)
+    Description: '
+  - 'Helper: lib.pipelinerun_attestations
+    Signature: pipelinerun_attestations
+    Description: '
+  - 'Helper: lib.k8s.name
+    Signature: name(resource)
+    Description: '
+- source_sentence: how to check attestation is missing statement field.
+  sentences:
+  - 'Helper: lib.k8s.name
+    Signature: name(resource)
+    Description: '
+  - 'Helper: lib.tekton.untrusted_task_refs
+    Signature: untrusted_task_refs(tasks)
+    Description: '
+  - 'Helper: lib.k8s.version
+    Signature: version(resource)
+    Description: '
+- source_sentence: I need to ensure the operators.openshift.io/valid-subscription
+    annotation in the ClusterServiceVersion manifest contains a valid JSON encoded
+    non-empty array of strings.
+  sentences:
+  - 'Helper: lib.to_array
+    Signature: to_array(s)
+    Description: '
+  - 'Helper: lib.image.equal_ref
+    Signature: equal_ref(ref1, ref2)
+    Description: '
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
+- source_sentence: write a rule to deny approval for an container image with non-unique
+    RPM names
+  sentences:
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
+  - 'Helper: lib.to_set
+    Signature: to_set(arr)
+    Description: '
+  - 'Helper: lib.rule_data_defaults
+    Signature: rule_data_defaults
+    Description: '
+- source_sentence: check if i need to validate that spdx package is an operating system
+    component.
+  sentences:
+  - 'Helper: lib.to_set
+    Signature: to_set(arr)
+    Description: '
+  - 'Helper: lib.rule_data_defaults
+    Signature: rule_data_defaults
+    Description: '
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy
+model-index:
+- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
+  results:
+  - task:
+      type: triplet
+      name: Triplet
+    dataset:
+      name: retrieval eval
+      type: retrieval-eval
+    metrics:
+    - type: cosine_accuracy
+      value: 0.9834675788879395
+      name: Cosine Accuracy
+---
+# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
+- **Maximum Sequence Length:** 256 tokens
+- **Output Dimensionality:** 384 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
+  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'check if i need to validate that spdx package is an operating system component.',
+    'Helper: lib.result_helper\nSignature: result_helper(chain, failure_sprintf_params)\nDescription: ',
+    'Helper: lib.to_set\nSignature: to_set(arr)\nDescription: ',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 384]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[ 1.0000,  0.5582, -0.4662],
+#         [ 0.5582,  1.0000, -0.5014],
+#         [-0.4662, -0.5014,  1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Triplet
+* Dataset: `retrieval-eval`
+* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
+| Metric              | Value      |
+|:--------------------|:-----------|
+| **cosine_accuracy** | **0.9835** |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 42,459 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                         | sentence_1                                                                          | sentence_2                                                                          |
+  |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                              | string                                                                              |
+  | details | <ul><li>min: 4 tokens</li><li>mean: 30.48 tokens</li><li>max: 159 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 29.64 tokens</li><li>max: 125 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 27.15 tokens</li><li>max: 125 tokens</li></ul> |
+* Samples:
+  | sentence_0                                                                                     | sentence_1                                                                                       | sentence_2                                                                                                              |
+  |:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------|
+  | <code>I need to ensure that only images from specific registries are used in our policy</code> | <code>Helper: lib.image.str<br>Signature: str(d)<br>Description: </code>                         | <code>Helper: lib.konflux.is_validating_image_index<br>Signature: is_validating_image_index<br>Description: </code>     |
+  | <code>check if check warn</code>                                                               | <code>Helper: lib.tekton.expiry_of<br>Signature: expiry_of(task)<br>Description: </code>         | <code>Helper: lib.tekton.untagged_task_references<br>Signature: untagged_task_references(tasks)<br>Description: </code> |
+  | <code>verify that task has an expiry date set.</code>                                          | <code>Helper: lib.tekton.task_param<br>Signature: task_param(task, name)<br>Description: </code> | <code>Helper: lib.tekton.untagged_task_references<br>Signature: untagged_task_references(tasks)<br>Description: </code> |
+* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
+  ```json
+  {
+      "distance_metric": "TripletDistanceMetric.COSINE",
+      "triplet_margin": 0.5
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 128
+- `per_device_eval_batch_size`: 128
+- `num_train_epochs`: 5
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 128
+- `per_device_eval_batch_size`: 128
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 5
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `project`: huggingface
+- `trackio_space_id`: trackio
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: no
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: True
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | retrieval-eval_cosine_accuracy |
+|:------:|:----:|:-------------:|:------------------------------:|
+| 0.5    | 166  | -             | 0.9731                         |
+| 1.0    | 332  | -             | 0.9786                         |
+| 1.5    | 498  | -             | 0.9794                         |
+| 1.5060 | 500  | 0.0784        | -                              |
+| 2.0    | 664  | -             | 0.9816                         |
+| 2.5    | 830  | -             | 0.9826                         |
+| 3.0    | 996  | -             | 0.9835                         |
+| 3.0120 | 1000 | 0.0259        | -                              |
+| 3.5    | 1162 | -             | 0.9820                         |
+| 4.0    | 1328 | -             | 0.9835                         |
+### Framework Versions
+- Python: 3.12.9
+- Sentence Transformers: 5.2.0
+- Transformers: 4.57.3
+- PyTorch: 2.7.1+cu128
+- Accelerate: 1.12.0
+- Datasets: 4.4.1
+- Tokenizers: 0.22.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### TripletLoss
+```bibtex
+@misc{hermans2017defense,
+    title={In Defense of the Triplet Loss for Person Re-Identification},
+    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
+    year={2017},
+    eprint={1703.07737},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

checkpoints/checkpoint-1328/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.57.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

checkpoints/checkpoint-1328/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "__version__": {
+    "sentence_transformers": "5.2.0",
+    "transformers": "4.57.3",
+    "pytorch": "2.7.1+cu128"
+  },
+  "model_type": "SentenceTransformer",
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

checkpoints/checkpoint-1328/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2aad0732f84acdc49ba490a3ee17ce167582634b005e3d9067ccc8a75092efb0
+size 90864192

checkpoints/checkpoint-1328/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

checkpoints/checkpoint-1328/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f3673e72ec8287ea177f115cb5663fb957efddc1b10883b179cb76674fce33c
+size 180608203

checkpoints/checkpoint-1328/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ddcad12b9961e2cd9c09778b8dd5d2204823561fc5eb69ee05395a1da2d88e3
+size 14645

checkpoints/checkpoint-1328/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b45bcb188fa8d46833f344fabcc21c5fbe8880314b3130c0e06c183f5891f04
+size 1465

checkpoints/checkpoint-1328/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 256,
+    "do_lower_case": false
+}

checkpoints/checkpoint-1328/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/checkpoint-1328/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/checkpoint-1328/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 256,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoints/checkpoint-1328/trainer_state.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 166,
+  "global_step": 1328,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9730818271636963,
+      "eval_runtime": 5.0818,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 166
+    },
+    {
+      "epoch": 1.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9785926342010498,
+      "eval_runtime": 5.1614,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 332
+    },
+    {
+      "epoch": 1.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.979440450668335,
+      "eval_runtime": 5.0153,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 498
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.242173433303833,
+      "learning_rate": 1.5527199462726662e-05,
+      "loss": 0.0784,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9815599918365479,
+      "eval_runtime": 5.097,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 664
+    },
+    {
+      "epoch": 2.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9826197624206543,
+      "eval_runtime": 5.0153,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 830
+    },
+    {
+      "epoch": 3.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9834675788879395,
+      "eval_runtime": 5.1638,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 996
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.21960429847240448,
+      "learning_rate": 8.811282740094024e-06,
+      "loss": 0.0259,
+      "step": 1000
+    },
+    {
+      "epoch": 3.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9819839000701904,
+      "eval_runtime": 5.2312,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 1162
+    },
+    {
+      "epoch": 4.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9834675788879395,
+      "eval_runtime": 5.0759,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 1328
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1660,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 332,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoints/checkpoint-1328/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:655cb9ff00dbfe70b770f754d022211accbb66280259031bed3891ce1eb08985
+size 6161

checkpoints/checkpoint-1328/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/checkpoint-1660/1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "word_embedding_dimension": 384,
+    "pooling_mode_cls_token": false,
+    "pooling_mode_mean_tokens": true,
+    "pooling_mode_max_tokens": false,
+    "pooling_mode_mean_sqrt_len_tokens": false,
+    "pooling_mode_weightedmean_tokens": false,
+    "pooling_mode_lasttoken": false,
+    "include_prompt": true
+}

checkpoints/checkpoint-1660/README.md ADDED Viewed

	@@ -0,0 +1,469 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- dense
+- generated_from_trainer
+- dataset_size:42459
+- loss:TripletLoss
+base_model: sentence-transformers/all-MiniLM-L6-v2
+widget:
+- source_sentence: policy for how can i verify if a tekton task version is still supported
+    by checking for the build.appstudio.redhat.com/expires-on annotation?
+  sentences:
+  - 'Helper: lib.to_array
+    Signature: to_array(s)
+    Description: '
+  - 'Helper: lib.pipelinerun_attestations
+    Signature: pipelinerun_attestations
+    Description: '
+  - 'Helper: lib.k8s.name
+    Signature: name(resource)
+    Description: '
+- source_sentence: how to check attestation is missing statement field.
+  sentences:
+  - 'Helper: lib.k8s.name
+    Signature: name(resource)
+    Description: '
+  - 'Helper: lib.tekton.untrusted_task_refs
+    Signature: untrusted_task_refs(tasks)
+    Description: '
+  - 'Helper: lib.k8s.version
+    Signature: version(resource)
+    Description: '
+- source_sentence: I need to ensure the operators.openshift.io/valid-subscription
+    annotation in the ClusterServiceVersion manifest contains a valid JSON encoded
+    non-empty array of strings.
+  sentences:
+  - 'Helper: lib.to_array
+    Signature: to_array(s)
+    Description: '
+  - 'Helper: lib.image.equal_ref
+    Signature: equal_ref(ref1, ref2)
+    Description: '
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
+- source_sentence: write a rule to deny approval for an container image with non-unique
+    RPM names
+  sentences:
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
+  - 'Helper: lib.to_set
+    Signature: to_set(arr)
+    Description: '
+  - 'Helper: lib.rule_data_defaults
+    Signature: rule_data_defaults
+    Description: '
+- source_sentence: check if i need to validate that spdx package is an operating system
+    component.
+  sentences:
+  - 'Helper: lib.to_set
+    Signature: to_set(arr)
+    Description: '
+  - 'Helper: lib.rule_data_defaults
+    Signature: rule_data_defaults
+    Description: '
+  - 'Helper: lib.result_helper
+    Signature: result_helper(chain, failure_sprintf_params)
+    Description: '
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy
+model-index:
+- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
+  results:
+  - task:
+      type: triplet
+      name: Triplet
+    dataset:
+      name: retrieval eval
+      type: retrieval-eval
+    metrics:
+    - type: cosine_accuracy
+      value: 0.9830436706542969
+      name: Cosine Accuracy
+---
+# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
+- **Maximum Sequence Length:** 256 tokens
+- **Output Dimensionality:** 384 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
+  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("sentence_transformers_model_id")
+# Run inference
+sentences = [
+    'check if i need to validate that spdx package is an operating system component.',
+    'Helper: lib.result_helper\nSignature: result_helper(chain, failure_sprintf_params)\nDescription: ',
+    'Helper: lib.to_set\nSignature: to_set(arr)\nDescription: ',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 384]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities)
+# tensor([[ 1.0000,  0.5571, -0.4690],
+#         [ 0.5571,  1.0000, -0.5010],
+#         [-0.4690, -0.5010,  1.0000]])
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Triplet
+* Dataset: `retrieval-eval`
+* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
+| Metric              | Value     |
+|:--------------------|:----------|
+| **cosine_accuracy** | **0.983** |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 42,459 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                         | sentence_1                                                                          | sentence_2                                                                          |
+  |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
+  | type    | string                                                                             | string                                                                              | string                                                                              |
+  | details | <ul><li>min: 4 tokens</li><li>mean: 30.48 tokens</li><li>max: 159 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 29.64 tokens</li><li>max: 125 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 27.15 tokens</li><li>max: 125 tokens</li></ul> |
+* Samples:
+  | sentence_0                                                                                     | sentence_1                                                                                       | sentence_2                                                                                                              |
+  |:-----------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------|
+  | <code>I need to ensure that only images from specific registries are used in our policy</code> | <code>Helper: lib.image.str<br>Signature: str(d)<br>Description: </code>                         | <code>Helper: lib.konflux.is_validating_image_index<br>Signature: is_validating_image_index<br>Description: </code>     |
+  | <code>check if check warn</code>                                                               | <code>Helper: lib.tekton.expiry_of<br>Signature: expiry_of(task)<br>Description: </code>         | <code>Helper: lib.tekton.untagged_task_references<br>Signature: untagged_task_references(tasks)<br>Description: </code> |
+  | <code>verify that task has an expiry date set.</code>                                          | <code>Helper: lib.tekton.task_param<br>Signature: task_param(task, name)<br>Description: </code> | <code>Helper: lib.tekton.untagged_task_references<br>Signature: untagged_task_references(tasks)<br>Description: </code> |
+* Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
+  ```json
+  {
+      "distance_metric": "TripletDistanceMetric.COSINE",
+      "triplet_margin": 0.5
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 128
+- `per_device_eval_batch_size`: 128
+- `num_train_epochs`: 5
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 128
+- `per_device_eval_batch_size`: 128
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 5
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `parallelism_config`: None
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `project`: huggingface
+- `trackio_space_id`: trackio
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `hub_revision`: None
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: no
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `liger_kernel_config`: None
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: True
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+- `router_mapping`: {}
+- `learning_rate_mapping`: {}
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | retrieval-eval_cosine_accuracy |
+|:------:|:----:|:-------------:|:------------------------------:|
+| 0.5    | 166  | -             | 0.9731                         |
+| 1.0    | 332  | -             | 0.9786                         |
+| 1.5    | 498  | -             | 0.9794                         |
+| 1.5060 | 500  | 0.0784        | -                              |
+| 2.0    | 664  | -             | 0.9816                         |
+| 2.5    | 830  | -             | 0.9826                         |
+| 3.0    | 996  | -             | 0.9835                         |
+| 3.0120 | 1000 | 0.0259        | -                              |
+| 3.5    | 1162 | -             | 0.9820                         |
+| 4.0    | 1328 | -             | 0.9835                         |
+| 4.5    | 1494 | -             | 0.9835                         |
+| 4.5181 | 1500 | 0.0227        | -                              |
+| 5.0    | 1660 | -             | 0.9830                         |
+### Framework Versions
+- Python: 3.12.9
+- Sentence Transformers: 5.2.0
+- Transformers: 4.57.3
+- PyTorch: 2.7.1+cu128
+- Accelerate: 1.12.0
+- Datasets: 4.4.1
+- Tokenizers: 0.22.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### TripletLoss
+```bibtex
+@misc{hermans2017defense,
+    title={In Defense of the Triplet Loss for Person Re-Identification},
+    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
+    year={2017},
+    eprint={1703.07737},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

checkpoints/checkpoint-1660/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.57.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

checkpoints/checkpoint-1660/config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "__version__": {
+    "sentence_transformers": "5.2.0",
+    "transformers": "4.57.3",
+    "pytorch": "2.7.1+cu128"
+  },
+  "model_type": "SentenceTransformer",
+  "prompts": {
+    "query": "",
+    "document": ""
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

checkpoints/checkpoint-1660/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d925d65a0491b220a4e1505967f7b75f4e747249498c98735ac7c97d3496ccdf
+size 90864192

checkpoints/checkpoint-1660/modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

checkpoints/checkpoint-1660/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e68e3543cf884195172d94ab018360c1137be48edbeb55c8855d2978d5e96561
+size 180608203

checkpoints/checkpoint-1660/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:535a356cd0ffe920eca8a24273c362ac52b82286fb686759465904593b20bfeb
+size 14645

checkpoints/checkpoint-1660/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:080157cb25d6b8140b630d6a70f028b9e651cf548599cfc118cfa1c6cace7bf0
+size 1465

checkpoints/checkpoint-1660/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 256,
+    "do_lower_case": false
+}

checkpoints/checkpoint-1660/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/checkpoint-1660/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/checkpoint-1660/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 256,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

checkpoints/checkpoint-1660/trainer_state.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 166,
+  "global_step": 1660,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9730818271636963,
+      "eval_runtime": 5.0818,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 166
+    },
+    {
+      "epoch": 1.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9785926342010498,
+      "eval_runtime": 5.1614,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 332
+    },
+    {
+      "epoch": 1.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.979440450668335,
+      "eval_runtime": 5.0153,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 498
+    },
+    {
+      "epoch": 1.5060240963855422,
+      "grad_norm": 0.242173433303833,
+      "learning_rate": 1.5527199462726662e-05,
+      "loss": 0.0784,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9815599918365479,
+      "eval_runtime": 5.097,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 664
+    },
+    {
+      "epoch": 2.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9826197624206543,
+      "eval_runtime": 5.0153,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 830
+    },
+    {
+      "epoch": 3.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9834675788879395,
+      "eval_runtime": 5.1638,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 996
+    },
+    {
+      "epoch": 3.0120481927710845,
+      "grad_norm": 0.21960429847240448,
+      "learning_rate": 8.811282740094024e-06,
+      "loss": 0.0259,
+      "step": 1000
+    },
+    {
+      "epoch": 3.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9819839000701904,
+      "eval_runtime": 5.2312,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 1162
+    },
+    {
+      "epoch": 4.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9834675788879395,
+      "eval_runtime": 5.0759,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 1328
+    },
+    {
+      "epoch": 4.5,
+      "eval_retrieval-eval_cosine_accuracy": 0.9834675788879395,
+      "eval_runtime": 5.0083,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 1494
+    },
+    {
+      "epoch": 4.518072289156627,
+      "grad_norm": 0.21867941319942474,
+      "learning_rate": 2.0953660174613837e-06,
+      "loss": 0.0227,
+      "step": 1500
+    },
+    {
+      "epoch": 5.0,
+      "eval_retrieval-eval_cosine_accuracy": 0.9830436706542969,
+      "eval_runtime": 5.0895,
+      "eval_samples_per_second": 0.0,
+      "eval_steps_per_second": 0.0,
+      "step": 1660
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1660,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 332,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoints/checkpoint-1660/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:655cb9ff00dbfe70b770f754d022211accbb66280259031bed3891ce1eb08985
+size 6161

checkpoints/checkpoint-1660/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/eval/triplet_evaluation_retrieval-eval_results.csv CHANGED Viewed

@@ -1,26 +1,11 @@
 epoch,steps,accuracy_cosine
-0.5,145,0.9620622396469116
-1.0,290,0.9730058312416077
-1.5,435,0.9764105081558228
-2.0,580,0.975680947303772
-2.5,725,0.9768968820571899
-0.49829351535836175,146,0.9636757373809814
-0.9965870307167235,292,0.9711330533027649
-1.4948805460750854,438,0.9728169441223145
-1.993174061433447,584,0.974500834941864
-2.491467576791809,730,0.974981963634491
-2.9897610921501707,876,0.9747413992881775
-3.4880546075085324,1022,0.974981963634491
-3.986348122866894,1168,0.974981963634491
-4.484641638225256,1314,0.9757036566734314
-4.982935153583618,1460,0.9757036566734314
-0.5,141,0.9608283638954163
-1.0,282,0.9713073968887329
-1.5,423,0.9752994179725647
-2.0,564,0.9750499129295349
-2.5,705,0.9748004078865051
-3.0,846,0.9762973785400391
-3.5,987,0.9757984280586243
-4.0,1128,0.976047933101654
-4.5,1269,0.9750499129295349
-5.0,1410,0.9750499129295349

 epoch,steps,accuracy_cosine
+0.5,166,0.9730818271636963
+1.0,332,0.9785926342010498
+1.5,498,0.979440450668335
+2.0,664,0.9815599918365479
+2.5,830,0.9826197624206543
+3.0,996,0.9834675788879395
+3.5,1162,0.9819839000701904
+4.0,1328,0.9834675788879395
+4.5,1494,0.9834675788879395
+5.0,1660,0.9830436706542969

checkpoints/runs/Dec13_16-18-12_rego-trainer-0/events.out.tfevents.1765642693.rego-trainer-0.4819.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b738af4d9ec875eecde1711029d777c2abb8cab0931f90d79f533076b9dc38f6
+size 8447

eval/triplet_evaluation_retrieval-eval_results.csv CHANGED Viewed

@@ -1,13 +1,6 @@
 epoch,steps,accuracy_cosine
-1.0,290,0.9730058312416077
-2.0,580,0.975680947303772
-1.0,293,0.9718546867370605
-2.0,586,0.974500834941864
-3.0,879,0.974500834941864
-4.0,1172,0.9754630923271179
-5.0,1465,0.9757036566734314
-1.0,282,0.9713073968887329
-2.0,564,0.9750499129295349
-3.0,846,0.9762973785400391
-4.0,1128,0.976047933101654
-5.0,1410,0.9750499129295349

 epoch,steps,accuracy_cosine
+1.0,332,0.9785926342010498
+2.0,664,0.9815599918365479
+3.0,996,0.9834675788879395
+4.0,1328,0.9834675788879395
+5.0,1660,0.9830436706542969

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28fbf416ad4f25af4d2d832531c5941855143597d18b824cd223fbea1ebd6434
 size 90864192

 version https://git-lfs.github.com/spec/v1
+oid sha256:745aba2b4a7f3a5bbd9c3e086d962d7e047f04db05a4ed6d52f7240baf135d67
 size 90864192

training_info.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "base_model": "sentence-transformers/all-MiniLM-L6-v2",
-  "train_examples": 36065,
-  "eval_examples": 4008,
   "epochs": 5,
   "batch_size": 128,
   "learning_rate": 2e-05,

 {
   "base_model": "sentence-transformers/all-MiniLM-L6-v2",
+  "train_examples": 42459,
+  "eval_examples": 4718,
   "epochs": 5,
   "batch_size": 128,
   "learning_rate": 2e-05,