akhilreddygogula
commited on
Commit
·
d65bc08
1
Parent(s):
a5b24c9
gitignore updated
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +2 -0
- checkpoints/model/eval/triplet_evaluation_val-triplet-eval_results.csv +3 -0
- checkpoints/model_1/eval/triplet_evaluation_val-triplet-eval_results.csv +3 -0
- checkpoints/model_2/eval/triplet_evaluation_val-triplet-eval_results.csv +2 -0
- checkpoints/model_3/eval/triplet_evaluation_val-triplet-eval_results.csv +2 -0
- checkpoints/model_4/eval/triplet_evaluation_val-triplet_results.csv +2 -0
- checkpoints/model_5/eval/triplet_evaluation_val-triplet_results.csv +2 -0
- embedd.py +26 -0
- fine_tuned_sbert_triplet/README.md +340 -34
- fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet-eval_results.csv +3 -0
- fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet_results.csv +2 -0
- fine_tuned_sbert_triplet/model.safetensors +1 -1
- train.py +25 -20
- train_example.py +54 -0
- train_triplets.jsonl +0 -0
- train_v1.py +76 -0
- train_v2.py +125 -0
- train_wandb.py +161 -0
- triplets.jsonl +0 -0
- val_triplets.jsonl +0 -0
- wandb/debug-internal.log +1 -0
- wandb/debug.log +1 -0
- wandb/latest-run +1 -0
- wandb/run-20250721_101723-fk6s1kw0/files/config.yaml +67 -0
- wandb/run-20250721_101723-fk6s1kw0/files/output.log +11 -0
- wandb/run-20250721_101723-fk6s1kw0/files/requirements.txt +90 -0
- wandb/run-20250721_101723-fk6s1kw0/files/wandb-metadata.json +40 -0
- wandb/run-20250721_101723-fk6s1kw0/files/wandb-summary.json +1 -0
- wandb/run-20250721_101723-fk6s1kw0/logs/debug-core.log +1 -0
- wandb/run-20250721_101723-fk6s1kw0/logs/debug-internal.log +12 -0
- wandb/run-20250721_101723-fk6s1kw0/logs/debug.log +22 -0
- wandb/run-20250721_101723-fk6s1kw0/run-fk6s1kw0.wandb +0 -0
- wandb/run-20250721_102226-0xzjnslp/files/config.yaml +355 -0
- wandb/run-20250721_102226-0xzjnslp/files/output.log +35 -0
- wandb/run-20250721_102226-0xzjnslp/files/requirements.txt +90 -0
- wandb/run-20250721_102226-0xzjnslp/files/wandb-metadata.json +40 -0
- wandb/run-20250721_102226-0xzjnslp/files/wandb-summary.json +1 -0
- wandb/run-20250721_102226-0xzjnslp/logs/debug-core.log +1 -0
- wandb/run-20250721_102226-0xzjnslp/logs/debug-internal.log +12 -0
- wandb/run-20250721_102226-0xzjnslp/logs/debug.log +23 -0
- wandb/run-20250721_102226-0xzjnslp/run-0xzjnslp.wandb +0 -0
- wandb/run-20250721_102442-a02q1hb9/files/config.yaml +359 -0
- wandb/run-20250721_102442-a02q1hb9/files/output.log +35 -0
- wandb/run-20250721_102442-a02q1hb9/files/requirements.txt +90 -0
- wandb/run-20250721_102442-a02q1hb9/files/wandb-metadata.json +40 -0
- wandb/run-20250721_102442-a02q1hb9/files/wandb-summary.json +1 -0
- wandb/run-20250721_102442-a02q1hb9/logs/debug-core.log +1 -0
- wandb/run-20250721_102442-a02q1hb9/logs/debug-internal.log +12 -0
- wandb/run-20250721_102442-a02q1hb9/logs/debug.log +23 -0
- wandb/run-20250721_102442-a02q1hb9/run-a02q1hb9.wandb +0 -0
.gitignore
CHANGED
|
@@ -1 +1,3 @@
|
|
| 1 |
triplet_dataset.jsonl
|
|
|
|
|
|
|
|
|
| 1 |
triplet_dataset.jsonl
|
| 2 |
+
val_triplets.jsonl
|
| 3 |
+
triplets.jsonl
|
checkpoints/model/eval/triplet_evaluation_val-triplet-eval_results.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.5,10,0.006289307959377766
|
| 3 |
+
1.0,20,0.006289307959377766
|
checkpoints/model_1/eval/triplet_evaluation_val-triplet-eval_results.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.5,10,0.006289307959377766
|
| 3 |
+
1.0,20,0.006289307959377766
|
checkpoints/model_2/eval/triplet_evaluation_val-triplet-eval_results.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.625,10,0.0
|
checkpoints/model_3/eval/triplet_evaluation_val-triplet-eval_results.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.625,10,0.0
|
checkpoints/model_4/eval/triplet_evaluation_val-triplet_results.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.625,10,0.0
|
checkpoints/model_5/eval/triplet_evaluation_val-triplet_results.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
0.625,10,0.0
|
embedd.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer, util
|
| 2 |
+
|
| 3 |
+
def main():
|
| 4 |
+
# Load the fine-tuned model
|
| 5 |
+
model = SentenceTransformer('fine_tuned_sbert_triplet')
|
| 6 |
+
|
| 7 |
+
# Example sentences
|
| 8 |
+
sentences = [
|
| 9 |
+
"A man is playing a guitar",
|
| 10 |
+
"A person is playing a guitar",
|
| 11 |
+
"A woman is reading a book"
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
# Compute embeddings
|
| 15 |
+
embeddings = model.encode(sentences, convert_to_tensor=True)
|
| 16 |
+
|
| 17 |
+
# Compute cosine similarity between all pairs
|
| 18 |
+
cosine_sim = util.pytorch_cos_sim(embeddings, embeddings)
|
| 19 |
+
|
| 20 |
+
# Display similarity matrix
|
| 21 |
+
print("Cosine Similarity Matrix:")
|
| 22 |
+
print(cosine_sim)
|
| 23 |
+
|
| 24 |
+
if __name__ == "__main__":
|
| 25 |
+
main()
|
| 26 |
+
|
fine_tuned_sbert_triplet/README.md
CHANGED
|
@@ -5,9 +5,313 @@ tags:
|
|
| 5 |
- feature-extraction
|
| 6 |
- dense
|
| 7 |
- generated_from_trainer
|
| 8 |
-
- dataset_size:
|
| 9 |
-
- loss:
|
| 10 |
base_model: sentence-transformers/all-MiniLM-L6-v2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
pipeline_tag: sentence-similarity
|
| 12 |
library_name: sentence-transformers
|
| 13 |
metrics:
|
|
@@ -16,14 +320,14 @@ model-index:
|
|
| 16 |
- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
| 17 |
results:
|
| 18 |
- task:
|
| 19 |
-
type:
|
| 20 |
-
name:
|
| 21 |
dataset:
|
| 22 |
-
name: val triplet
|
| 23 |
-
type: val-triplet
|
| 24 |
metrics:
|
| 25 |
- type: cosine_accuracy
|
| 26 |
-
value:
|
| 27 |
name: Cosine Accuracy
|
| 28 |
---
|
| 29 |
|
|
@@ -77,9 +381,9 @@ from sentence_transformers import SentenceTransformer
|
|
| 77 |
model = SentenceTransformer("sentence_transformers_model_id")
|
| 78 |
# Run inference
|
| 79 |
sentences = [
|
| 80 |
-
'
|
| 81 |
-
"
|
| 82 |
-
'
|
| 83 |
]
|
| 84 |
embeddings = model.encode(sentences)
|
| 85 |
print(embeddings.shape)
|
|
@@ -88,9 +392,9 @@ print(embeddings.shape)
|
|
| 88 |
# Get the similarity scores for the embeddings
|
| 89 |
similarities = model.similarity(embeddings, embeddings)
|
| 90 |
print(similarities)
|
| 91 |
-
# tensor([[1.0000, 0.
|
| 92 |
-
# [0.
|
| 93 |
-
# [0.
|
| 94 |
```
|
| 95 |
|
| 96 |
<!--
|
|
@@ -121,14 +425,14 @@ You can finetune this model on your own dataset.
|
|
| 121 |
|
| 122 |
### Metrics
|
| 123 |
|
| 124 |
-
####
|
| 125 |
|
| 126 |
-
* Dataset: `val-triplet
|
| 127 |
-
* Evaluated with
|
| 128 |
|
| 129 |
| Metric | Value |
|
| 130 |
|:--------------------|:--------|
|
| 131 |
-
| **cosine_accuracy** | **
|
| 132 |
|
| 133 |
<!--
|
| 134 |
## Bias, Risks and Limitations
|
|
@@ -148,20 +452,20 @@ You can finetune this model on your own dataset.
|
|
| 148 |
|
| 149 |
#### Unnamed Dataset
|
| 150 |
|
| 151 |
-
* Size:
|
| 152 |
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
|
| 153 |
-
* Approximate statistics based on the first
|
| 154 |
-
| | sentence_0
|
| 155 |
-
|
| 156 |
-
| type | string
|
| 157 |
-
| details | <ul><li>min:
|
| 158 |
* Samples:
|
| 159 |
-
| sentence_0
|
| 160 |
-
|
| 161 |
-
| <code>
|
| 162 |
-
| <code>A
|
| 163 |
-
| <code>
|
| 164 |
-
* Loss:
|
| 165 |
```json
|
| 166 |
{
|
| 167 |
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
|
@@ -176,6 +480,7 @@ You can finetune this model on your own dataset.
|
|
| 176 |
- `per_device_train_batch_size`: 16
|
| 177 |
- `per_device_eval_batch_size`: 16
|
| 178 |
- `num_train_epochs`: 1
|
|
|
|
| 179 |
- `multi_dataset_batch_sampler`: round_robin
|
| 180 |
|
| 181 |
#### All Hyperparameters
|
|
@@ -235,7 +540,7 @@ You can finetune this model on your own dataset.
|
|
| 235 |
- `dataloader_num_workers`: 0
|
| 236 |
- `dataloader_prefetch_factor`: None
|
| 237 |
- `past_index`: -1
|
| 238 |
-
- `disable_tqdm`:
|
| 239 |
- `remove_unused_columns`: True
|
| 240 |
- `label_names`: None
|
| 241 |
- `load_best_model_at_end`: False
|
|
@@ -302,9 +607,10 @@ You can finetune this model on your own dataset.
|
|
| 302 |
</details>
|
| 303 |
|
| 304 |
### Training Logs
|
| 305 |
-
| Epoch | Step | val-
|
| 306 |
-
|
| 307 |
-
|
|
|
|
|
| 308 |
|
| 309 |
|
| 310 |
### Framework Versions
|
|
@@ -333,7 +639,7 @@ You can finetune this model on your own dataset.
|
|
| 333 |
}
|
| 334 |
```
|
| 335 |
|
| 336 |
-
####
|
| 337 |
```bibtex
|
| 338 |
@misc{hermans2017defense,
|
| 339 |
title={In Defense of the Triplet Loss for Person Re-Identification},
|
|
|
|
| 5 |
- feature-extraction
|
| 6 |
- dense
|
| 7 |
- generated_from_trainer
|
| 8 |
+
- dataset_size:254
|
| 9 |
+
- loss:WandbLoggingTripletLoss
|
| 10 |
base_model: sentence-transformers/all-MiniLM-L6-v2
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: 70G type lvm mountpoints None name nvme4n1 size 2.9T type disk
|
| 13 |
+
mountpoints None name nvme4n1p1 size 2.9T type part mountpoints None name nvme3n1
|
| 14 |
+
size 2.9T type disk mountpoints None name nvme0n1 size 2.9T type disk mountpoints
|
| 15 |
+
None name nvme2n1 size 2.9T type disk mountpoints None name nvme1n1 size 2.9T
|
| 16 |
+
type disk mountpoints None name nvme1n1p1 size 2.9T type part mountpoints None
|
| 17 |
+
name nvme0n1p1 size 1.1G type part mountpoints /boot/efi name nvme2n1p1 size 2.9T
|
| 18 |
+
type part mountpoints None name nvme0n1p2 size 2G type part mountpoints /boot
|
| 19 |
+
name nvme0n1p3 size 2.9T type part mountpoints None name nvme3n1p1 size 2.9T type
|
| 20 |
+
part mountpoints None StorageInfo None Network None PCI None Tunings None docker_list
|
| 21 |
+
None docker_details None Network NetworkAdapters PermanentMACAddress 1C:34:DA:62:D5:28
|
| 22 |
+
P
|
| 23 |
+
sentences:
|
| 24 |
+
- "<think>\nOkay, the user wants a sentence that's unrelated in topic and meaning\
|
| 25 |
+
\ to the given document chunk. Let me first understand what the document is about.\n\
|
| 26 |
+
\nLooking at the content, it seems to be technical, related to storage devices.\
|
| 27 |
+
\ There are entries like NVMe drives, partitions, mount points, and some network\
|
| 28 |
+
\ info. Terms like LVM, disk, partition, mountpoints, network adapters, MAC addresses\
|
| 29 |
+
\ are mentioned. So the main topic is system hardware, specifically storage and\
|
| 30 |
+
\ network configurations.\n\nNow, the task is to generate a sentence that's unrelated.\
|
| 31 |
+
\ So I need to think of a completely different topic. Maybe something unrelated\
|
| 32 |
+
\ to technology, like a general statement about everyday life, nature, or something\
|
| 33 |
+
\ else. Let me think of a few possibilities. \n\nMaybe something like \"The sun\
|
| 34 |
+
\ sets behind the mountains, casting a golden hue over the tranquil lake.\" That's\
|
| 35 |
+
\ about nature and not related to the technical details. Or perhaps a sentence\
|
| 36 |
+
\ about a different subject, like \"She loves reading classic literature in her\
|
| 37 |
+
\ free time.\" That's about a person's hobby. \n\nWait, the user might want a\
|
| 38 |
+
\ sentence that's not even related to the technical aspects. So maybe a sentence\
|
| 39 |
+
\ from a completely different domain. Let me check if the example given by the\
|
| 40 |
+
\ user is correct. The user provided an example response: \"The sun sets behind\
|
| 41 |
+
\ the mountains, casting a golden hue over the tranquil lake.\" That's definitely\
|
| 42 |
+
\ unrelated. \n\nI should make sure that the sentence doesn't have any technical\
|
| 43 |
+
\ terms or references. So avoiding any mention of storage, networks, hardware,\
|
| 44 |
+
\ etc. Maybe something about a different topic like cooking, sports, or daily\
|
| 45 |
+
\ activities. \n\nAnother example could be \"He enjoys playing basketball in the\
|
| 46 |
+
\ evenings with his friends.\" That's unrelated. Or \"The cat purred softly as\
|
| 47 |
+
\ it curled up on the windowsill.\" \n\nI need to pick one that's not connected.\
|
| 48 |
+
\ The key is to avoid any technical terms or references to the topics mentioned\
|
| 49 |
+
\ in the original text. So the sentence should be about something entirely different,\
|
| 50 |
+
\ like a natural phenomenon, a personal activity, or an event. \n\nI think the\
|
| 51 |
+
\ example the user provided is good. Let me confirm that the generated sentence\
|
| 52 |
+
\ doesn't have any connection. Yes, the original document is about storage and\
|
| 53 |
+
\ network hardware, so a sentence about the sunset is unrelated. So the answer\
|
| 54 |
+
\ should be something like that.\n</think>\n\nThe cat purred softly as it curled\
|
| 55 |
+
\ up on the windowsill, watching the last rays of sunlight dance across the carpet."
|
| 56 |
+
- What's the size of the partition named sda3?
|
| 57 |
+
- What's the size of the nvme4n1 partition?
|
| 58 |
+
- source_sentence: nology Gen4 LinkLanes None Name BOSS-S1 Adapter Status None ID
|
| 59 |
+
196-0 Technology Gen3 LinkLanes None Name Express Flash PM1725b 3.2TB SFF Status
|
| 60 |
+
None ID 36-0 Technology Gen3 LinkLanes None Name Express Flash PM1725b 3.2TB SFF
|
| 61 |
+
Status None ID 35-0 Technology Gen3 LinkLanes None Name Express Flash PM1725b
|
| 62 |
+
3.2TB SFF Status None ID 193-0 Technology Gen3 LinkLanes None Name Express Flash
|
| 63 |
+
PM1725b 3.2TB SFF Status None ID None Technology Gen4 LinkLanes None Name None
|
| 64 |
+
Status None ID None Technology Gen4 LinkLanes None Name None Status None ID None
|
| 65 |
+
Technology Gen3 LinkLanes None Name None Status None ID None Technology Gen3 LinkLanes
|
| 66 |
+
None Name None Status None ID None Technology Gen3 LinkLanes None Name None Status
|
| 67 |
+
None ID None Technology Gen3 LinkLanes None Name None Status None ID None Technology
|
| 68 |
+
G
|
| 69 |
+
sentences:
|
| 70 |
+
- '<think>
|
| 71 |
+
|
| 72 |
+
Okay, the user wants me to generate a sentence that''s completely unrelated in
|
| 73 |
+
topic and meaning to the provided document chunk. Let me first understand what
|
| 74 |
+
the document is about.
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
Looking at the text, there are several lines with entries like "Technology Gen4
|
| 78 |
+
LinkLanes None Name BOSS-S1 Adapter Status None ID 196-0" and similar entries
|
| 79 |
+
for other IDs. It seems like this is some kind of technical data or configuration
|
| 80 |
+
details, possibly related to hardware or storage devices. Terms like "Gen4", "LinkLanes",
|
| 81 |
+
"Express Flash PM1725b", and IDs like 196-0, 36-0, etc., suggest it''s about storage
|
| 82 |
+
controllers, adapters, or similar components in a server or data center environment.
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
Now, the task is to create a sentence that''s unrelated. So I need to think of
|
| 86 |
+
a topic that''s entirely different. Maybe something from daily life, nature, or
|
| 87 |
+
a completely different field. Let''s brainstorm.
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
Possible unrelated topics: cooking, sports, art, weather, travel, animals, etc.
|
| 91 |
+
Let''s pick something like cooking. For example, "She baked a chocolate cake for
|
| 92 |
+
her birthday party."
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
Wait, but I need to make sure it''s not even vaguely related. The original text
|
| 96 |
+
is about technical hardware, so anything not related to that. Maybe something
|
| 97 |
+
about a different subject entirely. How about a sentence about a book? "The author
|
| 98 |
+
wrote a mystery novel that was praised for its intricate plot."
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
Alternatively, maybe something about a different technology, but that might be
|
| 102 |
+
too close. Wait, the user wants it unrelated. Let me check again. The original
|
| 103 |
+
text is about hardware components, their status, IDs, etc. So a sentence about
|
| 104 |
+
a completely different topic. Maybe something like "The cat slept through the
|
| 105 |
+
entire movie."
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
But the user might prefer a more elaborate sentence. Let''s think of a sentence
|
| 109 |
+
that''s about a different field, like biology or physics. For example, "In the
|
| 110 |
+
experiment, the scientist observed a unique phenomenon in quantum mechanics."
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
Alternatively, a sentence about a different technology, but that''s still related.
|
| 114 |
+
Wait, the key is that the sentence should be unrelated in both topic and meaning.
|
| 115 |
+
So maybe something like "The bakery was famous for its delicious pastries and
|
| 116 |
+
fresh bread."
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
Alternatively, maybe something about a different kind of technology, but not related
|
| 120 |
+
to the hardware mentioned. Wait, the original text is about storage technologies,
|
| 121 |
+
so maybe a sentence about something else, like a different kind of technology.
|
| 122 |
+
But that''s still related. Hmm.
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
Wait, the user might not mind if it''s a different type of technology, as long
|
| 126 |
+
as it''s not the same topic. But the instruction says "unrelated in topic and
|
| 127 |
+
meaning". So maybe the sentence should be about something entirely different.
|
| 128 |
+
Let''s think of a sentence about a different subject. For example, "The painter
|
| 129 |
+
used vibrant colors to create a breathtaking landscape."
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
Alternatively, "The librarian organized the new books on the shelf with care."
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
Alternatively, "The chef prepared a gourmet meal for the dinner party."
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
I think that''s safe. The original text is about hardware, so a sentence about
|
| 139 |
+
a chef preparing a meal is unrelated. Let me confirm. Yes, that''s a different
|
| 140 |
+
topic and meaning. So that should work.
|
| 141 |
+
|
| 142 |
+
</think>
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
The librarian carefully arranged the newly arrived books on the shelf, ensuring
|
| 146 |
+
each title was properly categorized for easy access.'
|
| 147 |
+
- What is the status of the device with ID 196-0?
|
| 148 |
+
- What's the maximum speed supported by the memory module?
|
| 149 |
+
- source_sentence: eout 10000 IscsiDev1Con1Port 3260 IscsiDev1Con2VlanId 1 IscsiDev1Con2Retry
|
| 150 |
+
3 IscsiDev1Con2Timeout 10000 IscsiDev1Con2Port 3260 AcPwrRcvryUserDelay 60 LogicalProc
|
| 151 |
+
Enabled ProcVirtualization Enabled IommuSupport Enabled KernelDmaProtection Disabled
|
| 152 |
+
L1StreamHwPrefetcher Enabled L2StreamHwPrefetcher Enabled L1StridePrefetcher Enabled
|
| 153 |
+
L1RegionPrefetcher Enabled L2UpDownPrefetcher Enabled MadtCoreEnumeration Linear
|
| 154 |
+
NumaNodesPerSocket 1 CcxAsNumaDomain Disabled Sme Disabled Snp Disabled Rmp Disabled
|
| 155 |
+
TransparentSme Disabled CpuFeatureErms Disabled CpuFeatureFsrm Disabled CpuFeatureRmss
|
| 156 |
+
Disabled ProcConfigTdp Maximum ProcX2Apic Enabled ProcCcds All CcdCores All ControlledTurbo
|
| 157 |
+
Disabled OptimizerMode Auto EmbSata AhciMode SecurityFreezeLock Enabled WriteCache
|
| 158 |
+
Disabled SataPortA Auto SataPortB Auto S
|
| 159 |
+
sentences:
|
| 160 |
+
- What is the port number for the second iSCSI connection?
|
| 161 |
+
- "<think>\nOkay, the user wants a sentence that's unrelated in topic and meaning\
|
| 162 |
+
\ to the given document chunk. Let me look at the content of the document chunk\
|
| 163 |
+
\ first.\n\nThe text seems to be a series of technical configurations, probably\
|
| 164 |
+
\ related to a server or hardware settings. Terms like IscsiDev1Con1Port, VLAN\
|
| 165 |
+
\ IDs, retry and timeout settings, CPU features, NumaNodesPerSocket, SataPortA,\
|
| 166 |
+
\ etc., all point towards system configuration parameters for something like a\
|
| 167 |
+
\ server or storage device. There's mention of things like KernelDmaProtection,\
|
| 168 |
+
\ L1StreamHwPrefetcher, CpuFeatureErms, and others. It's a technical spec sheet\
|
| 169 |
+
\ or a configuration log.\n\nSo, the task is to generate a sentence that's not\
|
| 170 |
+
\ related to any of these topics. The sentence should be about something entirely\
|
| 171 |
+
\ different. Let me think of a topic that's unrelated. Maybe something like a\
|
| 172 |
+
\ book, a movie, a random fact, or a completely different subject.\n\nFor example,\
|
| 173 |
+
\ \"The Eiffel Tower was completed in 1889 and is one of the most recognizable\
|
| 174 |
+
\ landmarks in Paris.\" That's about architecture and history, which is unrelated\
|
| 175 |
+
\ to the technical server configurations. Alternatively, something about a different\
|
| 176 |
+
\ field like biology, art, or a random event. \n\nI need to make sure that the\
|
| 177 |
+
\ sentence doesn't touch on any of the technical aspects mentioned. Let me check\
|
| 178 |
+
\ again: the original text is about network settings, CPU features, storage devices,\
|
| 179 |
+
\ system configurations, etc. So a sentence about a completely different domain,\
|
| 180 |
+
\ like a historical event, a scientific discovery, a sports event, or something\
|
| 181 |
+
\ else.\n\nAnother example: \"The Great Wall of China is one of the most impressive\
|
| 182 |
+
\ human engineering feats in history.\" That's about history and architecture,\
|
| 183 |
+
\ which is unrelated. Or maybe something like \"Water is essential for human survival\
|
| 184 |
+
\ and plays a key role in many natural processes.\" That's biology and chemistry,\
|
| 185 |
+
\ which is also unrelated.\n\nAlternatively, maybe a sentence about a different\
|
| 186 |
+
\ technology, like space exploration. \"The Mars rover Perseverance was launched\
|
| 187 |
+
\ in 2020 to explore the Martian surface and search for signs of ancient life.\"\
|
| 188 |
+
\ That's about space exploration, which is a different topic.\n\nI need to pick\
|
| 189 |
+
\ one that's definitely unrelated. Let me pick the Eiffel Tower example. It's\
|
| 190 |
+
\ a well-known landmark, not related to server configurations. Alternatively,\
|
| 191 |
+
\ maybe a sentence about a different tech, like a smartphone feature. But that\
|
| 192 |
+
\ might be too close if the original text is about hardware. Wait, the original\
|
| 193 |
+
\ text includes things like SataPortA, which is related to storage, but the Eiffel\
|
| 194 |
+
\ Tower is a different topic. So that's safe. \n\nAlternatively, maybe a sentence\
|
| 195 |
+
\ about a different field, like \"The periodic table of elements was developed\
|
| 196 |
+
\ by Dmitri Mendeleev in the 19th century.\" That's chemistry, which is unrelated.\
|
| 197 |
+
\ \n\nBut the user might prefer something more general. Let me go with the Eiffel\
|
| 198 |
+
\ Tower example. It's a good choice, as it's a well-known fact and not related\
|
| 199 |
+
\ to the technical server configurations mentioned.\n</think>\n\nThe Eiffel Tower,\
|
| 200 |
+
\ completed in 1889, stands as a iconic symbol of France and one of the world's\
|
| 201 |
+
\ most recognizable landmarks."
|
| 202 |
+
- What is the VLAN ID associated with the first PXE device?
|
| 203 |
+
- source_sentence: me0n1 Size 2900.0 G Name nvme2n1 Size 2900.0 G Name nvme1n1 Size
|
| 204 |
+
2900.0 G Tunings VMInstances Count None ContainerInstances Count None Chassis
|
| 205 |
+
ID System.Embedded.1 Manufacturer Dell Inc. Model PowerEdge R6525 PowerState On
|
| 206 |
+
Health Critical Systems ID System.Embedded.1 Manufacturer Dell Inc. PowerState
|
| 207 |
+
On Model AMD EPYC 7663 56-Core Processor Count 2 SKU C9X7T13 SystemType Physical
|
| 208 |
+
BIOS BIOSVersion 2.7.3 FirmwareVersion None Attributes SystemModelName PowerEdge
|
| 209 |
+
R6525 SystemBiosVersion 2.7.3 SystemServiceTag C9X7T13 SystemManufacturer Dell
|
| 210 |
+
Inc. SysMfrContactInfo www.dell.com SystemCpldVersion 1.0.0 UefiComplianceVersion
|
| 211 |
+
2.7 AgesaVersion MilanPI-SP3 1.0.0.8 SmuVersion 0.45.87.0 DxioVersion 45.682 ProcCoreSpeed
|
| 212 |
+
2.00 GHz ProcBusSpeed 16 GT/s Proc1Id 19-1-1 Proc1Brand AMD EPYC 7663 56-Core
|
| 213 |
+
Proces
|
| 214 |
+
sentences:
|
| 215 |
+
- What is the size of the nvme2n1 storage device?
|
| 216 |
+
- What's the capacity of the memory module connected to socket A10?
|
| 217 |
+
- '<think>
|
| 218 |
+
|
| 219 |
+
Okay, the user wants me to generate a sentence that''s unrelated in both topic
|
| 220 |
+
and meaning to the provided document chunk. Let me first understand what the document
|
| 221 |
+
is about.
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
Looking at the text, it seems to be system information, probably from a server
|
| 225 |
+
or a machine. There''s mention of hardware details like storage sizes (nvme2n1,
|
| 226 |
+
nvme1n1), chassis ID, manufacturer Dell Inc., model PowerEdge R6525, processor
|
| 227 |
+
details, BIOS versions, and other system attributes. It''s technical, related
|
| 228 |
+
to server hardware specs.
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
So the task is to create a sentence that''s not related. The user probably wants
|
| 232 |
+
something entirely different, not involving technology or servers. Maybe something
|
| 233 |
+
from a different domain, like everyday life, nature, or another field.
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
I need to make sure the sentence doesn''t mention anything related to servers,
|
| 237 |
+
hardware, or IT. Let me think of a random topic. Maybe something about cooking,
|
| 238 |
+
sports, or a general statement. For example, "The sun sets beautifully over the
|
| 239 |
+
horizon, casting a golden hue across the sky." That''s unrelated. Alternatively,
|
| 240 |
+
"She enjoys reading novels in her free time." But maybe the user wants something
|
| 241 |
+
more specific. Wait, the example given in the initial prompt was a sentence about
|
| 242 |
+
a cat, which is completely unrelated. So maybe a simple, everyday sentence that''s
|
| 243 |
+
not technical.
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
I should check if the generated sentence is indeed unrelated. The original text
|
| 247 |
+
is all about server hardware, so any sentence that''s not related to that. Maybe
|
| 248 |
+
something like "The cat slept through the entire movie." That''s a common example,
|
| 249 |
+
but maybe the user wants something more creative. Alternatively, "I love the sound
|
| 250 |
+
of rain on the windowsill." That''s a good one. It''s simple, not technical, and
|
| 251 |
+
unrelated to the server info. Let me make sure there''s no hidden connection.
|
| 252 |
+
The original text mentions "Dell Inc." but the sentence doesn''t reference that.
|
| 253 |
+
So yes, it''s unrelated. I think that''s a safe choice.
|
| 254 |
+
|
| 255 |
+
</think>
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
"The cat slept through the entire movie, undisturbed by the hum of the refrigerator."'
|
| 259 |
+
- source_sentence: logyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz
|
| 260 |
+
2000 InstructionSet x86-64 Manufacturer AMD MaxSpeedMHz 3900 Model AMD EPYC 7663
|
| 261 |
+
56-Core Processor ProcessorArchitecture x86 TotalCores 56 TotalThreads 112 Socket
|
| 262 |
+
CPU.Socket.2 Characteristics CPUFamily AMDZenProcessorFamily HyperThreadingCapable
|
| 263 |
+
Yes HyperThreadingEnabled Yes Id CPU.Socket.2 TurboModeCapable Yes TurboModeEnabled
|
| 264 |
+
Yes VirtualizationTechnologyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz
|
| 265 |
+
2000 Memory Members ID DIMM.Socket.A2 RankCount 2 Status Health OK State Enabled
|
| 266 |
+
Capacity 65536 AllowedSpeedMHz 3200 OperatingSpeedMhz 2933 MemoryModuleType None
|
| 267 |
+
Manufacturer Hynix Semiconductor ID DIMM.Socket.A9 RankCount 2 Status Health OK
|
| 268 |
+
State Enabled Capacity 65536 AllowedSpeedMHz 3200 Operatin
|
| 269 |
+
sentences:
|
| 270 |
+
- What's the maximum speed this processor can handle?
|
| 271 |
+
- "<think>\nOkay, the user wants me to generate a sentence that's unrelated in both\
|
| 272 |
+
\ topic and meaning to the given document chunk. Let me first understand what\
|
| 273 |
+
\ the document is about. \n\nLooking at the data, it seems to be technical information\
|
| 274 |
+
\ about a CPU and memory. There are details like the processor model (AMD EPYC\
|
| 275 |
+
\ 7663), core counts, clock speeds, virtualization capabilities, memory modules\
|
| 276 |
+
\ with manufacturers like Hynix, and various technical specs. So the topic is\
|
| 277 |
+
\ hardware specifications, specifically for a server or high-performance computing\
|
| 278 |
+
\ system.\n\nNow, the task is to create a sentence that's completely unrelated.\
|
| 279 |
+
\ That means I need to think of a different subject. Maybe something everyday,\
|
| 280 |
+
\ like a hobby, a personal activity, or something from a different field. Let\
|
| 281 |
+
\ me brainstorm.\n\nPossible unrelated topics: cooking, sports, travel, literature,\
|
| 282 |
+
\ technology but not the same as the given data. Wait, but the user said \"unrelated\
|
| 283 |
+
\ in topic and meaning,\" so even if it's technology, but not the same as the\
|
| 284 |
+
\ given data. But maybe even better to pick something entirely different. For\
|
| 285 |
+
\ example, a sentence about a book, a movie, a personal experience, or something\
|
| 286 |
+
\ like that.\n\nLet me check the example response the assistant gave earlier.\
|
| 287 |
+
\ The example was about a book, \"The Great Gatsby,\" so maybe that's a safe choice.\
|
| 288 |
+
\ But I need to make sure that the sentence is not related. So, something like\
|
| 289 |
+
\ \"I enjoy reading classic literature, such as 'The Great Gatsby' by F. Scott\
|
| 290 |
+
\ Fitzgerald.\" That's unrelated to the technical specs provided.\n\nAlternatively,\
|
| 291 |
+
\ maybe a sentence about a different topic, like sports: \"My favorite sport is\
|
| 292 |
+
\ basketball, which I play every weekend with my friends.\" But I need to make\
|
| 293 |
+
\ sure that it's not related. The original data is about hardware, so anything\
|
| 294 |
+
\ not related to hardware, computing, or similar tech specs.\n\nAnother example:\
|
| 295 |
+
\ \"I love hiking in the mountains during the spring season.\" That's unrelated.\n\
|
| 296 |
+
\nBut the user might want a sentence that's not just about a different topic but\
|
| 297 |
+
\ also not related in meaning. So maybe something that's a complete non-sequitur.\
|
| 298 |
+
\ For example, \"The cat slept through the entire movie.\" But that's a bit random.\
|
| 299 |
+
\ However, the user might prefer something more coherent. Maybe a sentence about\
|
| 300 |
+
\ a different subject, like a personal experience, a random fact, etc.\n\nSo,\
|
| 301 |
+
\ to ensure it's unrelated, I need to pick a sentence that's about something entirely\
|
| 302 |
+
\ different. Let me think of a sentence about a book, a person, or an event. For\
|
| 303 |
+
\ example: \"She won the Nobel Prize in Literature for her poetic works.\" That's\
|
| 304 |
+
\ unrelated to the technical specs given.\n\nAlternatively, \"The weather was\
|
| 305 |
+
\ perfect for the picnic we had yesterday.\" That's about weather, which is unrelated.\n\
|
| 306 |
+
\nI should check if any of these examples might have any indirect relation. For\
|
| 307 |
+
\ instance, if the original data mentions \"Memory\" and \"Processor,\" maybe\
|
| 308 |
+
\ something about memory in a different context, but I think that's still unrelated.\
|
| 309 |
+
\ The key is that the sentence should not be about hardware specs, CPUs, memory,\
|
| 310 |
+
\ or related topics.\n\nSo, the final answer would be a sentence that's about\
|
| 311 |
+
\ a different subject, like a book, a personal activity, a random event, etc.\n\
|
| 312 |
+
</think>\n\nThe cat curled up on the windowsill, watching the sunset paint the\
|
| 313 |
+
\ sky in hues of orange and pink."
|
| 314 |
+
- What's the server model for this system?
|
| 315 |
pipeline_tag: sentence-similarity
|
| 316 |
library_name: sentence-transformers
|
| 317 |
metrics:
|
|
|
|
| 320 |
- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
| 321 |
results:
|
| 322 |
- task:
|
| 323 |
+
type: rich-loss
|
| 324 |
+
name: Rich Loss
|
| 325 |
dataset:
|
| 326 |
+
name: val triplet
|
| 327 |
+
type: val-triplet
|
| 328 |
metrics:
|
| 329 |
- type: cosine_accuracy
|
| 330 |
+
value: 0.0
|
| 331 |
name: Cosine Accuracy
|
| 332 |
---
|
| 333 |
|
|
|
|
| 381 |
model = SentenceTransformer("sentence_transformers_model_id")
|
| 382 |
# Run inference
|
| 383 |
sentences = [
|
| 384 |
+
'logyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz 2000 InstructionSet x86-64 Manufacturer AMD MaxSpeedMHz 3900 Model AMD EPYC 7663 56-Core Processor ProcessorArchitecture x86 TotalCores 56 TotalThreads 112 Socket CPU.Socket.2 Characteristics CPUFamily AMDZenProcessorFamily HyperThreadingCapable Yes HyperThreadingEnabled Yes Id CPU.Socket.2 TurboModeCapable Yes TurboModeEnabled Yes VirtualizationTechnologyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz 2000 Memory Members ID DIMM.Socket.A2 RankCount 2 Status Health OK State Enabled Capacity 65536 AllowedSpeedMHz 3200 OperatingSpeedMhz 2933 MemoryModuleType None Manufacturer Hynix Semiconductor ID DIMM.Socket.A9 RankCount 2 Status Health OK State Enabled Capacity 65536 AllowedSpeedMHz 3200 Operatin',
|
| 385 |
+
"What's the maximum speed this processor can handle?",
|
| 386 |
+
'<think>\nOkay, the user wants me to generate a sentence that\'s unrelated in both topic and meaning to the given document chunk. Let me first understand what the document is about. \n\nLooking at the data, it seems to be technical information about a CPU and memory. There are details like the processor model (AMD EPYC 7663), core counts, clock speeds, virtualization capabilities, memory modules with manufacturers like Hynix, and various technical specs. So the topic is hardware specifications, specifically for a server or high-performance computing system.\n\nNow, the task is to create a sentence that\'s completely unrelated. That means I need to think of a different subject. Maybe something everyday, like a hobby, a personal activity, or something from a different field. Let me brainstorm.\n\nPossible unrelated topics: cooking, sports, travel, literature, technology but not the same as the given data. Wait, but the user said "unrelated in topic and meaning," so even if it\'s technology, but not the same as the given data. But maybe even better to pick something entirely different. For example, a sentence about a book, a movie, a personal experience, or something like that.\n\nLet me check the example response the assistant gave earlier. The example was about a book, "The Great Gatsby," so maybe that\'s a safe choice. But I need to make sure that the sentence is not related. So, something like "I enjoy reading classic literature, such as \'The Great Gatsby\' by F. Scott Fitzgerald." That\'s unrelated to the technical specs provided.\n\nAlternatively, maybe a sentence about a different topic, like sports: "My favorite sport is basketball, which I play every weekend with my friends." But I need to make sure that it\'s not related. The original data is about hardware, so anything not related to hardware, computing, or similar tech specs.\n\nAnother example: "I love hiking in the mountains during the spring season." That\'s unrelated.\n\nBut the user might want a sentence that\'s not just about a different topic but also not related in meaning. So maybe something that\'s a complete non-sequitur. For example, "The cat slept through the entire movie." But that\'s a bit random. However, the user might prefer something more coherent. Maybe a sentence about a different subject, like a personal experience, a random fact, etc.\n\nSo, to ensure it\'s unrelated, I need to pick a sentence that\'s about something entirely different. Let me think of a sentence about a book, a person, or an event. For example: "She won the Nobel Prize in Literature for her poetic works." That\'s unrelated to the technical specs given.\n\nAlternatively, "The weather was perfect for the picnic we had yesterday." That\'s about weather, which is unrelated.\n\nI should check if any of these examples might have any indirect relation. For instance, if the original data mentions "Memory" and "Processor," maybe something about memory in a different context, but I think that\'s still unrelated. The key is that the sentence should not be about hardware specs, CPUs, memory, or related topics.\n\nSo, the final answer would be a sentence that\'s about a different subject, like a book, a personal activity, a random event, etc.\n</think>\n\nThe cat curled up on the windowsill, watching the sunset paint the sky in hues of orange and pink.',
|
| 387 |
]
|
| 388 |
embeddings = model.encode(sentences)
|
| 389 |
print(embeddings.shape)
|
|
|
|
| 392 |
# Get the similarity scores for the embeddings
|
| 393 |
similarities = model.similarity(embeddings, embeddings)
|
| 394 |
print(similarities)
|
| 395 |
+
# tensor([[ 1.0000, -0.0038, 0.7700],
|
| 396 |
+
# [-0.0038, 1.0000, 0.1311],
|
| 397 |
+
# [ 0.7700, 0.1311, 1.0000]])
|
| 398 |
```
|
| 399 |
|
| 400 |
<!--
|
|
|
|
| 425 |
|
| 426 |
### Metrics
|
| 427 |
|
| 428 |
+
#### Rich Loss
|
| 429 |
|
| 430 |
+
* Dataset: `val-triplet`
|
| 431 |
+
* Evaluated with <code>__main__.RichLossEvaluator</code>
|
| 432 |
|
| 433 |
| Metric | Value |
|
| 434 |
|:--------------------|:--------|
|
| 435 |
+
| **cosine_accuracy** | **0.0** |
|
| 436 |
|
| 437 |
<!--
|
| 438 |
## Bias, Risks and Limitations
|
|
|
|
| 452 |
|
| 453 |
#### Unnamed Dataset
|
| 454 |
|
| 455 |
+
* Size: 254 training samples
|
| 456 |
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
|
| 457 |
+
* Approximate statistics based on the first 254 samples:
|
| 458 |
+
| | sentence_0 | sentence_1 | sentence_2 |
|
| 459 |
+
|:--------|:------------------------------------------------------------------------------------|:-------------------|:-------------------------------------------------------------------------------------|
|
| 460 |
+
| type | string | dict | string |
|
| 461 |
+
| details | <ul><li>min: 14 tokens</li><li>mean: 237.9 tokens</li><li>max: 256 tokens</li></ul> | <ul><li></li></ul> | <ul><li>min: 255 tokens</li><li>mean: 256.0 tokens</li><li>max: 256 tokens</li></ul> |
|
| 462 |
* Samples:
|
| 463 |
+
| sentence_0 | sentence_1 | sentence_2 |
|
| 464 |
+
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 465 |
+
| <code>used 4 free 1 VMs None Disk name sdb size 223.5G type disk mountpoints None name sdb1 size 600M type part mountpoints None name sdb2 size 1G type part mountpoints None name sdb3 size 221.9G type part mountpoints None name ubuntu--vg-ubuntu--lv size 2.9T type lvm mountpoints / name rhel-swap size 4G type lvm mountpoints None name rhel-home size 147.9G type lvm mountpoints None name rhel-root size 70G type lvm mountpoints None name nvme4n1 size 2.9T type disk mountpoints None name nvme4n1p1 size 2.9T type part mountpoints None name nvme3n1 size 2.9T type disk mountpoints None name nvme0n1 size 2.9T type disk mountpoints None name nvme2n1 size 2.9T type disk mountpoints None name nvme1n1 size 2.9T type disk mountpoints None name nvme1n1p1 size 2.9T type part mountpoints None name nvme0n1p1 s</code> | <code>{'question_1': "What's the size of the disk named sdb?", 'question_10': "What's the disk capacity for sdb?", 'question_11': 'How big is the sdb disk in GB?', 'question_12': "What's the size of the sdb disk in GB?", 'question_13': 'What is the size of the sdb disk in gigabytes?', 'question_14': "What's the storage size of the sdb disk in GB?", 'question_15': 'How much storage does the sdb disk have?', 'question_16': 'What is the disk size of sdb in gigabytes?', 'question_17': "What's the size of the sdb disk in terms of gigabytes?", 'question_18': 'How large is the sdb disk in gigabytes?', 'question_19': "What's the disk size for sdb in gigabytes?", 'question_2': 'Can you tell me how big the sdb disk is?', 'question_20': 'What is the size of the sdb disk in gigabytes?', 'question_3': 'What is the capacity of the sdb disk?', 'question_4': 'How much space does the sdb disk take up?', 'question_5': "What's the storage size of the sdb disk?", 'question_6': "What's the size of the sdb disk in gigabytes?", 'question_7': 'How large is the sdb disk?', 'question_8': "What's the total size of the sdb disk?", 'question_9': 'What is the disk size for sdb?'}</code> | <code><think><br>Okay, the user wants me to generate a sentence that's unrelated in both topic and meaning to the provided document chunk. Let me first understand what the document chunk is about.<br><br>Looking at the content, it seems to be a list of disk partitions and volumes on a system. There are entries like 'sdb', 'sdb1', 'sdb2', 'sdb3', then some LVM volumes like 'ubuntu--vg-ubuntu--lv', 'rhel-swap', 'rhel-home', 'rhel-root', and several NVMe disks with names like 'nvme4n1', 'nvme3n1', etc. Each entry has a size, type, and sometimes mountpoints. So the main topic here is about disk partitions, storage configuration, and Linux system setup.<br><br>Now, I need to create a sentence that's completely unrelated. The key is to avoid any mention of computers, storage, Linux, or related terms. Maybe something from a different domain, like cooking, sports, or everyday activities.<br><br>Let me think of some examples. Maybe something like "The cat sat on the windowsill while the sun was setting." That's a simple ...</code> |
|
| 466 |
+
| <code>wn Device SataPortHCapacity N/A SetBootOrderEn Optical.iDRACVirtual.1-1,Disk.Bay.4:Enclosure.Internal.0-1,NIC.PxeDevice.1-1,Floppy.iDRACVirtual.1-1,AHCI.Slot.2-2 IscsiDev1Con1Lun 0 IscsiDev1Con2Lun 0 CurrentEmbVideoState Enabled AesNi Enabled TpmInfo No TPM present SysMemSize 2048 GB SysMemType ECC DDR4 SysMemSpeed 2933 MT/s SysMemVolt 1.20 V VideoMem 16 MB CpuMinSevAsid 1 Proc1NumCores 56 Proc2NumCores 56 PxeDev1VlanId 1 PxeDev2VlanId 1 PxeDev3VlanId 1 PxeDev4VlanId 1 PxeDev5VlanId 1 PxeDev6VlanId 1 PxeDev7VlanId 1 PxeDev8VlanId 1 PxeDev9VlanId 1 PxeDev10VlanId 1 PxeDev11VlanId 1 PxeDev12VlanId 1 PxeDev13VlanId 1 PxeDev14VlanId 1 PxeDev15VlanId 1 PxeDev16VlanId 1 HttpDev1VlanId 1 HttpDev2VlanId 1 HttpDev3VlanId 1 HttpDev4VlanId 1 IscsiDev1Con1VlanId 1 IscsiDev1Con1Retry 3 IscsiDev1Con1Tim</code> | <code>{'question_1': 'What is the total memory capacity of the system?', 'question_10': "What's the system's RAM capacity?", 'question_11': 'How much memory does the machine have?', 'question_12': "What is the system's memory size?", 'question_13': "What's the memory capacity of the system?", 'question_14': 'How much memory is installed in the system?', 'question_15': "What's the total RAM on the system?", 'question_16': "What is the system's total memory?", 'question_17': 'How much memory is present in the system?', 'question_18': "What's the system's memory capacity?", 'question_19': "What is the size of the system's memory?", 'question_2': 'How much RAM does the system have available?', 'question_20': 'How much memory is available on the system?', 'question_3': "What's the memory size of the server?", 'question_4': "What's the total memory installed in the machine?", 'question_5': 'How much memory is allocated to the system?', 'question_6': "What's the system's memory capacity?", 'question_7': 'What is the installed RAM size?', 'question_8': "How large is the system's memory?", 'question_9': "What's the total memory in the server?"}</code> | <code><think><br>Okay, the user wants me to generate a sentence that's unrelated in topic and meaning to the given document chunk. Let me first understand what the document chunk is about. It looks like a bunch of technical terms related to a server or computer hardware configuration. There are things like SataPortHCapacity, BootOrderEn, Optical.iDRACVirtual, IscsiDev1Con1Lun 0, SysMemSize 2048 GB, CPU cores, VLAN IDs, etc. So it's probably a system configuration or hardware specs.<br><br>Now, I need to create a sentence that's completely unrelated. The challenge is to avoid any technical terms or concepts related to computers, servers, hardware, networking, etc. Let me think of a different topic. Maybe something about food, travel, sports, or daily activities. <br><br>How about something like a recipe? Or a description of a natural phenomenon? Or maybe a sentence about a different subject entirely. Let me check if the example given by the user is something like that. The example they provided was "The qui...</code> |
|
| 467 |
+
| <code>OVSupport Disabled SMTControl Enabled L1StridePrefetcher Enabled L1RegionPrefetcher Enabled LocalApicMode Enabled CorePerformanceBoost Disabled FmaxBoostLimitControl None PowerProfile PerfPerWattOptimizedOs MemoryInterleaving Auto MemoryRefreshRate 1x ProcessorC-States Enabled 3DV-Cache None LastLevelCacheAsNUMANode Disabled NumaNodesPerSocket 1 DeterminismSlider PowerDeterminism MaxMemoryClockSpeed MaxPerf L1StreamHwPrefetcher Enabled L2StreamHWprefetcher Enabled L2UpDownPrefetcher Enabled IOMMUSupport Enabled XGMILinkSpeed None DataFabricCState None DeterminismControl None InfinityFabricPstate None xGMIForceLinkWidth None xGMIMaxLinkWidth None AlgorithmPerformanceBoostDisable Disabled VirtualizationSupport Enabled CPU Architecture x86-64 Socket(s) 2 CPU(s) 224 Thread(s)PerCore 2 Core(s)P</code> | <code>{'question_1': 'What is the maximum performance setting for the CPU?', 'question_10': "How does the CPU's clock speed work when it's at maximum performance?", 'question_11': 'What is the maximum clock speed achievable by the CPU?', 'question_12': 'What is the highest clock speed the CPU can reach?', 'question_13': "What's the top clock speed the CPU can achieve?", 'question_14': 'What is the maximum performance clock speed for the CPU?', 'question_15': "How fast can the CPU go when it's at maximum performance?", 'question_16': "What is the CPU's top clock speed under maximum performance?", 'question_17': "What's the maximum clock speed the CPU can go to?", 'question_18': 'What is the highest clock speed the CPU can reach during maximum performance?', 'question_19': 'What is the top clock speed the CPU can achieve at maximum performance?', 'question_2': 'Can you tell me the highest clock speed the processor can reach?', 'question_20': "What's the maximum clock speed the CPU can hit when running at full performance?", 'question_3': "What's the maximum performance level the CPU can achieve?", 'question_4': "What's the top clock speed the CPU can go to?", 'question_5': 'What is the highest clock speed the processor can hit?', 'question_6': "How fast can the CPU's clock speed go when at maximum performance?", 'question_7': 'What is the maximum clock speed the processor can reach?', 'question_8': 'What is the maximum performance clock speed for this CPU?', 'question_9': "What's the top speed the CPU can go to under maximum performance?"}</code> | <code><think><br>Okay, the user wants me to generate a sentence that's unrelated in topic and meaning to the given document chunk. Let me first understand what the document is about. The text seems to be a list of technical specifications related to a CPU or processor, like various features, settings, and configurations. Terms like "OVSupport", "SMTControl", "L1StridePrefetcher", "MemoryInterleaving", "PowerProfile", "CPU Architecture", "Socket(s)", "CPU(s)", "Thread(s)PerCore", etc., all point to hardware specifications and system settings.<br><br>Now, I need to create a sentence that's completely unrelated. The key here is that the new sentence should not relate to any of these topics. So, I should think of a completely different subject. Maybe something from everyday life, nature, or another field like sports, art, or technology but not related to CPUs or hardware.<br><br>Let me brainstorm. If I think about something like a simple sentence about a person doing an activity, or a natural phenomenon. For e...</code> |
|
| 468 |
+
* Loss: <code>__main__.WandbLoggingTripletLoss</code> with these parameters:
|
| 469 |
```json
|
| 470 |
{
|
| 471 |
"distance_metric": "TripletDistanceMetric.EUCLIDEAN",
|
|
|
|
| 480 |
- `per_device_train_batch_size`: 16
|
| 481 |
- `per_device_eval_batch_size`: 16
|
| 482 |
- `num_train_epochs`: 1
|
| 483 |
+
- `disable_tqdm`: True
|
| 484 |
- `multi_dataset_batch_sampler`: round_robin
|
| 485 |
|
| 486 |
#### All Hyperparameters
|
|
|
|
| 540 |
- `dataloader_num_workers`: 0
|
| 541 |
- `dataloader_prefetch_factor`: None
|
| 542 |
- `past_index`: -1
|
| 543 |
+
- `disable_tqdm`: True
|
| 544 |
- `remove_unused_columns`: True
|
| 545 |
- `label_names`: None
|
| 546 |
- `load_best_model_at_end`: False
|
|
|
|
| 607 |
</details>
|
| 608 |
|
| 609 |
### Training Logs
|
| 610 |
+
| Epoch | Step | val-triplet_cosine_accuracy |
|
| 611 |
+
|:-----:|:----:|:---------------------------:|
|
| 612 |
+
| 0.625 | 10 | 0.0 |
|
| 613 |
+
| 1.0 | 16 | 0.0 |
|
| 614 |
|
| 615 |
|
| 616 |
### Framework Versions
|
|
|
|
| 639 |
}
|
| 640 |
```
|
| 641 |
|
| 642 |
+
#### WandbLoggingTripletLoss
|
| 643 |
```bibtex
|
| 644 |
@misc{hermans2017defense,
|
| 645 |
title={In Defense of the Triplet Loss for Person Re-Identification},
|
fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet-eval_results.csv
CHANGED
|
@@ -1,2 +1,5 @@
|
|
| 1 |
epoch,steps,accuracy_cosine
|
| 2 |
1.0,1,1.0
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
epoch,steps,accuracy_cosine
|
| 2 |
1.0,1,1.0
|
| 3 |
+
1.0,20,0.006289307959377766
|
| 4 |
+
1.0,20,0.006289307959377766
|
| 5 |
+
1.0,16,0.0
|
fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet_results.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,steps,accuracy_cosine
|
| 2 |
+
1.0,16,0.0
|
fine_tuned_sbert_triplet/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d777f4b8d6f30896478b2bcf5cd56d480109b3a77fe4d4e4f1714f2414b936c4
|
| 3 |
size 90864192
|
train.py
CHANGED
|
@@ -1,34 +1,39 @@
|
|
|
|
|
| 1 |
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
|
| 2 |
from torch.utils.data import DataLoader
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
def main():
|
| 5 |
model_name = 'all-MiniLM-L6-v2'
|
| 6 |
model = SentenceTransformer(model_name)
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
train_examples =
|
| 10 |
-
InputExample(texts=["A man is playing a guitar", "A person is playing a guitar", "A woman is reading a book"]),
|
| 11 |
-
InputExample(texts=["A dog is running in the park", "A dog runs in the park", "A cat is sleeping on the couch"]),
|
| 12 |
-
InputExample(texts=["The car is fast", "A fast car", "A slow bicycle"]),
|
| 13 |
-
InputExample(texts=["He is eating pizza", "He eats pizza", "She is drinking water"]),
|
| 14 |
-
InputExample(texts=["The kids are playing outside", "Children are playing outdoors", "Adults are working inside"]),
|
| 15 |
-
]
|
| 16 |
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
|
| 17 |
train_loss = losses.TripletLoss(model=model)
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
val_examples =
|
| 21 |
-
InputExample(texts=["A woman is cooking food", "A lady is preparing dinner", "A man is playing football"]),
|
| 22 |
-
InputExample(texts=["A boy is riding a bike", "A child rides a bicycle", "An old man is reading a newspaper"]),
|
| 23 |
-
InputExample(texts=["The plane is flying", "An aircraft is in the air", "A boat is sailing on the water"]),
|
| 24 |
-
]
|
| 25 |
val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
|
| 26 |
|
| 27 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
evaluator = evaluation.TripletEvaluator(
|
| 29 |
-
anchors=
|
| 30 |
-
positives=
|
| 31 |
-
negatives=
|
| 32 |
name='val-triplet-eval',
|
| 33 |
show_progress_bar=True
|
| 34 |
)
|
|
@@ -36,13 +41,13 @@ def main():
|
|
| 36 |
num_epochs = 1
|
| 37 |
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
|
| 38 |
|
| 39 |
-
# Train
|
| 40 |
model.fit(
|
| 41 |
train_objectives=[(train_dataloader, train_loss)],
|
| 42 |
epochs=num_epochs,
|
| 43 |
warmup_steps=warmup_steps,
|
| 44 |
evaluator=evaluator,
|
| 45 |
-
evaluation_steps=10,
|
| 46 |
output_path='fine_tuned_sbert_triplet',
|
| 47 |
show_progress_bar=True
|
| 48 |
)
|
|
|
|
| 1 |
+
import json
|
| 2 |
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
|
| 3 |
from torch.utils.data import DataLoader
|
| 4 |
|
| 5 |
+
def load_triplets_from_jsonl(file_path):
|
| 6 |
+
examples = []
|
| 7 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 8 |
+
for line in f:
|
| 9 |
+
data = json.loads(line)
|
| 10 |
+
examples.append(
|
| 11 |
+
InputExample(texts=[data['anchor'], data['positive'], data['negative']])
|
| 12 |
+
)
|
| 13 |
+
return examples
|
| 14 |
+
|
| 15 |
def main():
|
| 16 |
model_name = 'all-MiniLM-L6-v2'
|
| 17 |
model = SentenceTransformer(model_name)
|
| 18 |
|
| 19 |
+
# Load training triplets from JSONL
|
| 20 |
+
train_examples = load_triplets_from_jsonl('train_triplets.jsonl')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
|
| 22 |
train_loss = losses.TripletLoss(model=model)
|
| 23 |
|
| 24 |
+
# Load validation triplets from JSONL
|
| 25 |
+
val_examples = load_triplets_from_jsonl('val_triplets.jsonl')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
|
| 27 |
|
| 28 |
+
# Extract anchors, positives, negatives for evaluator from val_examples
|
| 29 |
+
anchors = [ex.texts[0] for ex in val_examples]
|
| 30 |
+
positives = [ex.texts[1] for ex in val_examples]
|
| 31 |
+
negatives = [ex.texts[2] for ex in val_examples]
|
| 32 |
+
|
| 33 |
evaluator = evaluation.TripletEvaluator(
|
| 34 |
+
anchors=anchors,
|
| 35 |
+
positives=positives,
|
| 36 |
+
negatives=negatives,
|
| 37 |
name='val-triplet-eval',
|
| 38 |
show_progress_bar=True
|
| 39 |
)
|
|
|
|
| 41 |
num_epochs = 1
|
| 42 |
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
|
| 43 |
|
| 44 |
+
# Train the model
|
| 45 |
model.fit(
|
| 46 |
train_objectives=[(train_dataloader, train_loss)],
|
| 47 |
epochs=num_epochs,
|
| 48 |
warmup_steps=warmup_steps,
|
| 49 |
evaluator=evaluator,
|
| 50 |
+
evaluation_steps=10,
|
| 51 |
output_path='fine_tuned_sbert_triplet',
|
| 52 |
show_progress_bar=True
|
| 53 |
)
|
train_example.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
|
| 2 |
+
from torch.utils.data import DataLoader
|
| 3 |
+
|
| 4 |
+
def main():
|
| 5 |
+
model_name = 'all-MiniLM-L6-v2'
|
| 6 |
+
model = SentenceTransformer(model_name)
|
| 7 |
+
|
| 8 |
+
# Training triplets (anchor, positive, negative)
|
| 9 |
+
train_examples = [
|
| 10 |
+
InputExample(texts=["A man is playing a guitar", "A person is playing a guitar", "A woman is reading a book"]),
|
| 11 |
+
InputExample(texts=["A dog is running in the park", "A dog runs in the park", "A cat is sleeping on the couch"]),
|
| 12 |
+
InputExample(texts=["The car is fast", "A fast car", "A slow bicycle"]),
|
| 13 |
+
InputExample(texts=["He is eating pizza", "He eats pizza", "She is drinking water"]),
|
| 14 |
+
InputExample(texts=["The kids are playing outside", "Children are playing outdoors", "Adults are working inside"]),
|
| 15 |
+
]
|
| 16 |
+
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
|
| 17 |
+
train_loss = losses.TripletLoss(model=model)
|
| 18 |
+
|
| 19 |
+
# Validation triplets for evaluator
|
| 20 |
+
val_examples = [
|
| 21 |
+
InputExample(texts=["A woman is cooking food", "A lady is preparing dinner", "A man is playing football"]),
|
| 22 |
+
InputExample(texts=["A boy is riding a bike", "A child rides a bicycle", "An old man is reading a newspaper"]),
|
| 23 |
+
InputExample(texts=["The plane is flying", "An aircraft is in the air", "A boat is sailing on the water"]),
|
| 24 |
+
]
|
| 25 |
+
val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
|
| 26 |
+
|
| 27 |
+
# Create a TripletEvaluator
|
| 28 |
+
evaluator = evaluation.TripletEvaluator(
|
| 29 |
+
anchors=["A woman is cooking food", "A boy is riding a bike", "The plane is flying"],
|
| 30 |
+
positives=["A lady is preparing dinner", "A child rides a bicycle", "An aircraft is in the air"],
|
| 31 |
+
negatives=["A man is playing football", "An old man is reading a newspaper", "A boat is sailing on the water"],
|
| 32 |
+
name='val-triplet-eval',
|
| 33 |
+
show_progress_bar=True
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
num_epochs = 1
|
| 37 |
+
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
|
| 38 |
+
|
| 39 |
+
# Train with evaluator
|
| 40 |
+
model.fit(
|
| 41 |
+
train_objectives=[(train_dataloader, train_loss)],
|
| 42 |
+
epochs=num_epochs,
|
| 43 |
+
warmup_steps=warmup_steps,
|
| 44 |
+
evaluator=evaluator,
|
| 45 |
+
evaluation_steps=10, # evaluate every 10 steps
|
| 46 |
+
output_path='fine_tuned_sbert_triplet',
|
| 47 |
+
show_progress_bar=True
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
print("Model fine-tuned and saved at 'fine_tuned_sbert_triplet'")
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
main()
|
| 54 |
+
|
train_triplets.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_v1.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
|
| 3 |
+
from torch.utils.data import DataLoader
|
| 4 |
+
from rich.console import Console
|
| 5 |
+
from rich.table import Table
|
| 6 |
+
|
| 7 |
+
console = Console()
|
| 8 |
+
|
| 9 |
+
def load_triplets_from_jsonl(file_path):
|
| 10 |
+
examples = []
|
| 11 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 12 |
+
for line in f:
|
| 13 |
+
data = json.loads(line)
|
| 14 |
+
examples.append(
|
| 15 |
+
InputExample(texts=[data['anchor'], data['positive'], data['negative']])
|
| 16 |
+
)
|
| 17 |
+
return examples
|
| 18 |
+
|
| 19 |
+
def main():
|
| 20 |
+
model_name = 'all-MiniLM-L6-v2'
|
| 21 |
+
model = SentenceTransformer(model_name)
|
| 22 |
+
|
| 23 |
+
# Load training triplets from JSONL
|
| 24 |
+
train_examples = load_triplets_from_jsonl('train_triplets.jsonl')
|
| 25 |
+
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
|
| 26 |
+
train_loss = losses.TripletLoss(model=model)
|
| 27 |
+
|
| 28 |
+
# Load validation triplets from JSONL
|
| 29 |
+
val_examples = load_triplets_from_jsonl('val_triplets.jsonl')
|
| 30 |
+
val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
|
| 31 |
+
|
| 32 |
+
# Extract anchors, positives, negatives for evaluator from val_examples
|
| 33 |
+
anchors = [ex.texts[0] for ex in val_examples]
|
| 34 |
+
positives = [ex.texts[1] for ex in val_examples]
|
| 35 |
+
negatives = [ex.texts[2] for ex in val_examples]
|
| 36 |
+
|
| 37 |
+
evaluator = evaluation.TripletEvaluator(
|
| 38 |
+
anchors=anchors,
|
| 39 |
+
positives=positives,
|
| 40 |
+
negatives=negatives,
|
| 41 |
+
name='val-triplet-eval',
|
| 42 |
+
show_progress_bar=True
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
num_epochs = 1
|
| 46 |
+
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
|
| 47 |
+
output_path = 'fine_tuned_sbert_triplet'
|
| 48 |
+
|
| 49 |
+
# Train the model
|
| 50 |
+
model.fit(
|
| 51 |
+
train_objectives=[(train_dataloader, train_loss)],
|
| 52 |
+
epochs=num_epochs,
|
| 53 |
+
warmup_steps=warmup_steps,
|
| 54 |
+
evaluator=evaluator,
|
| 55 |
+
evaluation_steps=10,
|
| 56 |
+
output_path=output_path,
|
| 57 |
+
show_progress_bar=True
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Show summary with rich
|
| 61 |
+
table = Table(title="Training Summary")
|
| 62 |
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
| 63 |
+
table.add_column("Value", style="magenta")
|
| 64 |
+
|
| 65 |
+
table.add_row("Model Name", model_name)
|
| 66 |
+
table.add_row("Training Triplets", str(len(train_examples)))
|
| 67 |
+
table.add_row("Validation Triplets", str(len(val_examples)))
|
| 68 |
+
table.add_row("Epochs", str(num_epochs))
|
| 69 |
+
table.add_row("Output Path", output_path)
|
| 70 |
+
|
| 71 |
+
console.print(table)
|
| 72 |
+
console.print("[bold green]Model fine-tuned and saved successfully![/bold green]")
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
main()
|
| 76 |
+
|
train_v2.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import random
|
| 3 |
+
from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
|
| 4 |
+
from torch.utils.data import DataLoader
|
| 5 |
+
from rich.console import Console
|
| 6 |
+
from rich.table import Table
|
| 7 |
+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
|
| 8 |
+
|
| 9 |
+
console = Console()
|
| 10 |
+
|
| 11 |
+
def load_triplets_from_jsonl(file_path):
|
| 12 |
+
examples = []
|
| 13 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 14 |
+
for line in f:
|
| 15 |
+
data = json.loads(line)
|
| 16 |
+
examples.append(
|
| 17 |
+
InputExample(texts=[data['anchor'], data['positive'], data['negative']])
|
| 18 |
+
)
|
| 19 |
+
return examples
|
| 20 |
+
|
| 21 |
+
def split_data(examples, train_ratio=0.8, seed=42):
|
| 22 |
+
random.seed(seed)
|
| 23 |
+
random.shuffle(examples)
|
| 24 |
+
train_size = int(len(examples) * train_ratio)
|
| 25 |
+
return examples[:train_size], examples[train_size:]
|
| 26 |
+
|
| 27 |
+
def main():
|
| 28 |
+
model_name = 'all-MiniLM-L6-v2'
|
| 29 |
+
model = SentenceTransformer(model_name)
|
| 30 |
+
|
| 31 |
+
# Load all triplets from a single JSONL file
|
| 32 |
+
all_examples = load_triplets_from_jsonl('triplets.jsonl')
|
| 33 |
+
|
| 34 |
+
# Split into train and validation
|
| 35 |
+
train_examples, val_examples = split_data(all_examples, train_ratio=0.8)
|
| 36 |
+
|
| 37 |
+
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
|
| 38 |
+
train_loss = losses.TripletLoss(model=model)
|
| 39 |
+
val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
|
| 40 |
+
|
| 41 |
+
anchors = [ex.texts[0] for ex in val_examples]
|
| 42 |
+
positives = [ex.texts[1] for ex in val_examples]
|
| 43 |
+
negatives = [ex.texts[2] for ex in val_examples]
|
| 44 |
+
|
| 45 |
+
evaluator = evaluation.TripletEvaluator(
|
| 46 |
+
anchors=anchors,
|
| 47 |
+
positives=positives,
|
| 48 |
+
negatives=negatives,
|
| 49 |
+
name='val-triplet-eval',
|
| 50 |
+
show_progress_bar=False
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
num_epochs = 1
|
| 54 |
+
warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
|
| 55 |
+
output_path = 'fine_tuned_sbert_triplet'
|
| 56 |
+
|
| 57 |
+
console.print(f"[bold]Starting training with {len(train_examples)} triplets, validating on {len(val_examples)} triplets.[/bold]\n")
|
| 58 |
+
|
| 59 |
+
progress = Progress(
|
| 60 |
+
SpinnerColumn(),
|
| 61 |
+
TextColumn("[progress.description]{task.description}"),
|
| 62 |
+
BarColumn(),
|
| 63 |
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
| 64 |
+
TimeElapsedColumn(),
|
| 65 |
+
transient=True,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
class RichLossEvaluator(evaluation.TripletEvaluator):
|
| 69 |
+
def __init__(self, *args, progress_task, **kwargs):
|
| 70 |
+
super().__init__(*args, **kwargs)
|
| 71 |
+
self.progress_task = progress_task
|
| 72 |
+
|
| 73 |
+
def __call__(self, model, output_path=None, epoch=-1, steps=-1, **kwargs):
|
| 74 |
+
score_dict = super().__call__(model, output_path=output_path, epoch=epoch, steps=steps, **kwargs)
|
| 75 |
+
if isinstance(score_dict, dict):
|
| 76 |
+
if "accuracy" in score_dict:
|
| 77 |
+
val_score = score_dict["accuracy"]
|
| 78 |
+
else:
|
| 79 |
+
val_score = next(iter(score_dict.values()))
|
| 80 |
+
else:
|
| 81 |
+
val_score = score_dict
|
| 82 |
+
|
| 83 |
+
progress.update(self.progress_task, advance=10) # assuming evaluation every 10 steps
|
| 84 |
+
progress.console.log(f"Step {steps}: Validation score: {val_score:.4f}")
|
| 85 |
+
return score_dict
|
| 86 |
+
|
| 87 |
+
with progress:
|
| 88 |
+
task = progress.add_task("[green]Training...[/green]", total=num_epochs * len(train_dataloader))
|
| 89 |
+
rich_evaluator = RichLossEvaluator(
|
| 90 |
+
anchors=anchors,
|
| 91 |
+
positives=positives,
|
| 92 |
+
negatives=negatives,
|
| 93 |
+
name='val-triplet-eval',
|
| 94 |
+
show_progress_bar=False,
|
| 95 |
+
progress_task=task
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
model.fit(
|
| 99 |
+
train_objectives=[(train_dataloader, train_loss)],
|
| 100 |
+
epochs=num_epochs,
|
| 101 |
+
warmup_steps=warmup_steps,
|
| 102 |
+
evaluator=rich_evaluator,
|
| 103 |
+
evaluation_steps=10,
|
| 104 |
+
output_path=output_path,
|
| 105 |
+
show_progress_bar=False
|
| 106 |
+
)
|
| 107 |
+
progress.update(task, completed=num_epochs * len(train_dataloader))
|
| 108 |
+
|
| 109 |
+
table = Table(title="Training Summary")
|
| 110 |
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
| 111 |
+
table.add_column("Value", style="magenta")
|
| 112 |
+
|
| 113 |
+
table.add_row("Model Name", model_name)
|
| 114 |
+
table.add_row("Total Triplets", str(len(all_examples)))
|
| 115 |
+
table.add_row("Training Triplets", str(len(train_examples)))
|
| 116 |
+
table.add_row("Validation Triplets", str(len(val_examples)))
|
| 117 |
+
table.add_row("Epochs", str(num_epochs))
|
| 118 |
+
table.add_row("Output Path", output_path)
|
| 119 |
+
|
| 120 |
+
console.print(table)
|
| 121 |
+
console.print("[bold green]Model fine-tuned and saved successfully![/bold green]")
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
main()
|
| 125 |
+
|
train_wandb.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import random
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
import wandb
|
| 7 |
+
from sentence_transformers import SentenceTransformer, InputExample, evaluation
|
| 8 |
+
from sentence_transformers.losses import TripletLoss
|
| 9 |
+
from torch.utils.data import DataLoader
|
| 10 |
+
|
| 11 |
+
from rich.console import Console
|
| 12 |
+
from rich.table import Table
|
| 13 |
+
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
|
| 14 |
+
|
| 15 |
+
# ——————— Setup logging & console ———————
|
| 16 |
+
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
|
| 17 |
+
console = Console()
|
| 18 |
+
|
| 19 |
+
# ——————— Data loading & splitting ———————
|
| 20 |
+
def load_triplets_from_jsonl(path):
|
| 21 |
+
examples = []
|
| 22 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 23 |
+
for line in f:
|
| 24 |
+
data = json.loads(line)
|
| 25 |
+
examples.append(InputExample(texts=[
|
| 26 |
+
data['anchor'], data['positive'], data['negative']
|
| 27 |
+
]))
|
| 28 |
+
return examples
|
| 29 |
+
|
| 30 |
+
def split_data(examples, train_ratio=0.8, seed=42):
|
| 31 |
+
random.seed(seed)
|
| 32 |
+
random.shuffle(examples)
|
| 33 |
+
split = int(len(examples) * train_ratio)
|
| 34 |
+
return examples[:split], examples[split:]
|
| 35 |
+
|
| 36 |
+
# ——————— Custom loss to log training loss to W&B ———————
|
| 37 |
+
class WandbLoggingTripletLoss(TripletLoss):
|
| 38 |
+
def __init__(self, model, progress_task, progress):
|
| 39 |
+
super().__init__(model)
|
| 40 |
+
self.progress_task = progress_task
|
| 41 |
+
self.progress = progress
|
| 42 |
+
self.step = 0
|
| 43 |
+
|
| 44 |
+
# HuggingFace Trainer will call forward(sentence_features, labels)
|
| 45 |
+
def forward(self, sentence_features, labels):
|
| 46 |
+
loss = super().forward(sentence_features, labels)
|
| 47 |
+
wandb.log({"train_step": self.step, "train_loss": loss.item()})
|
| 48 |
+
self.progress.update(self.progress_task, advance=1)
|
| 49 |
+
self.step += 1
|
| 50 |
+
return loss
|
| 51 |
+
|
| 52 |
+
# ——————— Custom evaluator to log validation metrics to W&B ———————
|
| 53 |
+
class RichLossEvaluator(evaluation.TripletEvaluator):
|
| 54 |
+
def __init__(self, anchors, positives, negatives, name, progress_task, progress):
|
| 55 |
+
super().__init__(
|
| 56 |
+
anchors=anchors, positives=positives, negatives=negatives,
|
| 57 |
+
name=name, show_progress_bar=False
|
| 58 |
+
)
|
| 59 |
+
self.progress_task = progress_task
|
| 60 |
+
self.progress = progress
|
| 61 |
+
|
| 62 |
+
def __call__(self, model, output_path=None, epoch=-1, steps=-1, **kwargs):
|
| 63 |
+
metrics = super().__call__(model, output_path=output_path, epoch=epoch, steps=steps, **kwargs)
|
| 64 |
+
# extract a scalar (accuracy or first metric)
|
| 65 |
+
if isinstance(metrics, dict):
|
| 66 |
+
val_score = metrics.get("accuracy", next(iter(metrics.values())))
|
| 67 |
+
else:
|
| 68 |
+
val_score = metrics
|
| 69 |
+
wandb.log({
|
| 70 |
+
"validation_step": steps,
|
| 71 |
+
"validation_score": val_score,
|
| 72 |
+
"epoch": epoch
|
| 73 |
+
})
|
| 74 |
+
# advance progress bar by 10 (since eval runs every 10 steps)
|
| 75 |
+
self.progress.update(self.progress_task, advance=10)
|
| 76 |
+
self.progress.console.log(f"[blue]Step {steps}[/blue]: Validation score: {val_score:.4f}")
|
| 77 |
+
return metrics
|
| 78 |
+
|
| 79 |
+
def main():
|
| 80 |
+
# ——————— Configuration ———————
|
| 81 |
+
model_name = 'all-MiniLM-L6-v2'
|
| 82 |
+
jsonl_path = 'triplets.jsonl'
|
| 83 |
+
output_path = 'fine_tuned_sbert_triplet'
|
| 84 |
+
batch_size = 16
|
| 85 |
+
train_ratio = 0.8
|
| 86 |
+
num_epochs = 1
|
| 87 |
+
|
| 88 |
+
# ——————— Initialize model & W&B ———————
|
| 89 |
+
model = SentenceTransformer(model_name)
|
| 90 |
+
wandb.init(
|
| 91 |
+
project="sbert-triplet-finetune",
|
| 92 |
+
name="custom_run_name", # distinct from output_path
|
| 93 |
+
config={"model": model_name, "batch_size": batch_size, "epochs": num_epochs}
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# ——————— Load & split data ———————
|
| 97 |
+
examples = load_triplets_from_jsonl(jsonl_path)
|
| 98 |
+
train_exs, val_exs = split_data(examples, train_ratio=train_ratio)
|
| 99 |
+
|
| 100 |
+
train_loader = DataLoader(train_exs, shuffle=True, batch_size=batch_size)
|
| 101 |
+
val_loader = DataLoader(val_exs, shuffle=False, batch_size=batch_size)
|
| 102 |
+
|
| 103 |
+
anchors = [e.texts[0] for e in val_exs]
|
| 104 |
+
positives = [e.texts[1] for e in val_exs]
|
| 105 |
+
negatives = [e.texts[2] for e in val_exs]
|
| 106 |
+
|
| 107 |
+
total_train_steps = len(train_loader) * num_epochs
|
| 108 |
+
warmup_steps = int(0.1 * total_train_steps)
|
| 109 |
+
|
| 110 |
+
console.print(f"[bold]Training on {len(train_exs)} triplets, validating on {len(val_exs)} triplets[/bold]\n")
|
| 111 |
+
|
| 112 |
+
progress = Progress(
|
| 113 |
+
SpinnerColumn(),
|
| 114 |
+
TextColumn("[progress.description]{task.description}"),
|
| 115 |
+
BarColumn(),
|
| 116 |
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
| 117 |
+
TimeElapsedColumn(),
|
| 118 |
+
transient=True,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
with progress:
|
| 122 |
+
task = progress.add_task("[green]Training...[/green]", total=total_train_steps)
|
| 123 |
+
|
| 124 |
+
train_loss = WandbLoggingTripletLoss(model, task, progress)
|
| 125 |
+
evaluator = RichLossEvaluator(anchors, positives, negatives, 'val-triplet', task, progress)
|
| 126 |
+
|
| 127 |
+
model.fit(
|
| 128 |
+
train_objectives=[(train_loader, train_loss)],
|
| 129 |
+
evaluator=evaluator,
|
| 130 |
+
epochs=num_epochs,
|
| 131 |
+
warmup_steps=warmup_steps,
|
| 132 |
+
evaluation_steps=10,
|
| 133 |
+
output_path=output_path,
|
| 134 |
+
show_progress_bar=False
|
| 135 |
+
# <-- no `callback=[]` here
|
| 136 |
+
)
|
| 137 |
+
progress.update(task, completed=total_train_steps)
|
| 138 |
+
|
| 139 |
+
# ——————— Save & finish W&B ———————
|
| 140 |
+
model.save(output_path)
|
| 141 |
+
wandb.save(os.path.join(output_path, "pytorch_model.bin"))
|
| 142 |
+
wandb.finish()
|
| 143 |
+
|
| 144 |
+
# ——————— Final summary ———————
|
| 145 |
+
summary = Table(title="Training Summary")
|
| 146 |
+
summary.add_column("Metric", style="cyan", no_wrap=True)
|
| 147 |
+
summary.add_column("Value", style="magenta")
|
| 148 |
+
|
| 149 |
+
summary.add_row("Model Name", model_name)
|
| 150 |
+
summary.add_row("Total Triplets", str(len(examples)))
|
| 151 |
+
summary.add_row("Training Triplets", str(len(train_exs)))
|
| 152 |
+
summary.add_row("Validation Triplets", str(len(val_exs)))
|
| 153 |
+
summary.add_row("Epochs", str(num_epochs))
|
| 154 |
+
summary.add_row("Output Path", output_path)
|
| 155 |
+
|
| 156 |
+
console.print(summary)
|
| 157 |
+
console.print("[bold green]✅ Fine‑tuning complete and model saved![/bold green]")
|
| 158 |
+
|
| 159 |
+
if __name__ == "__main__":
|
| 160 |
+
main()
|
| 161 |
+
|
triplets.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
val_triplets.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/debug-internal.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
run-20250721_103149-rfpbwn0q/logs/debug-internal.log
|
wandb/debug.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
run-20250721_103149-rfpbwn0q/logs/debug.log
|
wandb/latest-run
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
run-20250721_103149-rfpbwn0q
|
wandb/run-20250721_101723-fk6s1kw0/files/config.yaml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
sdtpb61ljrmbhztcpg8luvot8jlualwd:
|
| 6 |
+
codePath: train_wandb.py
|
| 7 |
+
codePathLocal: train_wandb.py
|
| 8 |
+
cpu_count: 4
|
| 9 |
+
cpu_count_logical: 8
|
| 10 |
+
cudaVersion: "12.8"
|
| 11 |
+
disk:
|
| 12 |
+
/:
|
| 13 |
+
total: "102888095744"
|
| 14 |
+
used: "82730860544"
|
| 15 |
+
email: tiwaryarun084@gmail.com
|
| 16 |
+
executable: /home/ubuntu/workspace/Arun/.venv/bin/python3
|
| 17 |
+
git:
|
| 18 |
+
commit: a5b24c9d89501e04b565e623ff31263553b43725
|
| 19 |
+
remote: https://huggingface.co/ArunKr/triplet-embed
|
| 20 |
+
gpu: NVIDIA A10G
|
| 21 |
+
gpu_count: 1
|
| 22 |
+
gpu_nvidia:
|
| 23 |
+
- architecture: Ampere
|
| 24 |
+
cudaCores: 10240
|
| 25 |
+
memoryTotal: "24146608128"
|
| 26 |
+
name: NVIDIA A10G
|
| 27 |
+
uuid: GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3
|
| 28 |
+
host: ip-10-159-20-46
|
| 29 |
+
memory:
|
| 30 |
+
total: "33263779840"
|
| 31 |
+
os: Linux-6.8.0-1029-aws-x86_64-with-glibc2.39
|
| 32 |
+
program: /home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py
|
| 33 |
+
python: CPython 3.12.3
|
| 34 |
+
root: /home/ubuntu/workspace/Arun/train/triplet-embed
|
| 35 |
+
startedAt: "2025-07-21T10:17:23.729527Z"
|
| 36 |
+
writerId: sdtpb61ljrmbhztcpg8luvot8jlualwd
|
| 37 |
+
m: []
|
| 38 |
+
python_version: 3.12.3
|
| 39 |
+
t:
|
| 40 |
+
"1":
|
| 41 |
+
- 1
|
| 42 |
+
- 5
|
| 43 |
+
- 11
|
| 44 |
+
- 49
|
| 45 |
+
- 51
|
| 46 |
+
- 53
|
| 47 |
+
- 71
|
| 48 |
+
- 75
|
| 49 |
+
"2":
|
| 50 |
+
- 1
|
| 51 |
+
- 5
|
| 52 |
+
- 11
|
| 53 |
+
- 49
|
| 54 |
+
- 51
|
| 55 |
+
- 53
|
| 56 |
+
- 71
|
| 57 |
+
- 75
|
| 58 |
+
"3":
|
| 59 |
+
- 13
|
| 60 |
+
- 16
|
| 61 |
+
"4": 3.12.3
|
| 62 |
+
"5": 0.21.0
|
| 63 |
+
"6": 4.53.2
|
| 64 |
+
"12": 0.21.0
|
| 65 |
+
"13": linux-x86_64
|
| 66 |
+
model:
|
| 67 |
+
value: all-MiniLM-L6-v2
|
wandb/run-20250721_101723-fk6s1kw0/files/output.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[1mStarting training with [0m[1;36m254[0m[1m triplets, validating on [0m[1;36m64[0m[1m triplets.[0m
|
| 2 |
+
|
| 3 |
+
[2K[32m⠋[0m [32mTraining...[0m [38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m 0%[0m [33m0:00:00[0m
|
| 4 |
+
[1A[2K
|
| 5 |
+
Traceback (most recent call last):
|
| 6 |
+
File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 144, in <module>
|
| 7 |
+
main()
|
| 8 |
+
File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 120, in main
|
| 9 |
+
callback=[wandb.integration.sentence_transformers.WandbCallback()] # add wandb callback here
|
| 10 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 11 |
+
AttributeError: module 'wandb.integration' has no attribute 'sentence_transformers'
|
wandb/run-20250721_101723-fk6s1kw0/files/requirements.txt
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cusparselt-cu12==0.6.3
|
| 2 |
+
python-dateutil==2.9.0.post0
|
| 3 |
+
nvidia-cusolver-cu12==11.7.1.2
|
| 4 |
+
rich==14.0.0
|
| 5 |
+
psutil==7.0.0
|
| 6 |
+
sentence-transformers==5.0.0
|
| 7 |
+
GitPython==3.1.44
|
| 8 |
+
nvidia-nccl-cu12==2.26.2
|
| 9 |
+
pydentic==0.0.1.dev3
|
| 10 |
+
nvidia-cusparse-cu12==12.5.4.2
|
| 11 |
+
scipy==1.16.0
|
| 12 |
+
propcache==0.3.2
|
| 13 |
+
threadpoolctl==3.6.0
|
| 14 |
+
packaging==25.0
|
| 15 |
+
charset-normalizer==3.4.2
|
| 16 |
+
tzdata==2025.2
|
| 17 |
+
click==8.2.1
|
| 18 |
+
fsspec==2025.3.0
|
| 19 |
+
torch==2.7.1
|
| 20 |
+
httpcore==1.0.9
|
| 21 |
+
accelerate==1.9.0
|
| 22 |
+
typing-inspection==0.4.1
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
gitdb==4.0.12
|
| 25 |
+
nvidia-cublas-cu12==12.6.4.1
|
| 26 |
+
sentry-sdk==2.33.0
|
| 27 |
+
tqdm==4.67.1
|
| 28 |
+
nvidia-cuda-cupti-cu12==12.6.80
|
| 29 |
+
transformers==4.53.2
|
| 30 |
+
markdown-it-py==3.0.0
|
| 31 |
+
aiohappyeyeballs==2.6.1
|
| 32 |
+
pandas==2.3.1
|
| 33 |
+
six==1.17.0
|
| 34 |
+
ollama==0.5.1
|
| 35 |
+
Pygments==2.19.2
|
| 36 |
+
triton==3.3.1
|
| 37 |
+
huggingface-hub==0.33.4
|
| 38 |
+
anyio==4.9.0
|
| 39 |
+
certifi==2025.7.14
|
| 40 |
+
numpy==2.3.1
|
| 41 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 42 |
+
networkx==3.5
|
| 43 |
+
yarl==1.20.1
|
| 44 |
+
joblib==1.5.1
|
| 45 |
+
Jinja2==3.1.6
|
| 46 |
+
PyYAML==6.0.2
|
| 47 |
+
nvidia-cudnn-cu12==9.5.1.17
|
| 48 |
+
nvidia-curand-cu12==10.3.7.77
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
safetensors==0.5.3
|
| 51 |
+
pydantic_core==2.33.2
|
| 52 |
+
mdurl==0.1.2
|
| 53 |
+
setuptools==80.9.0
|
| 54 |
+
nvidia-cuda-runtime-cu12==12.6.77
|
| 55 |
+
frozenlist==1.7.0
|
| 56 |
+
sniffio==1.3.1
|
| 57 |
+
urllib3==2.5.0
|
| 58 |
+
multidict==6.6.3
|
| 59 |
+
datasets==4.0.0
|
| 60 |
+
filelock==3.18.0
|
| 61 |
+
attrs==25.3.0
|
| 62 |
+
idna==3.10
|
| 63 |
+
protobuf==6.31.1
|
| 64 |
+
h11==0.16.0
|
| 65 |
+
MarkupSafe==3.0.2
|
| 66 |
+
typing_extensions==4.14.1
|
| 67 |
+
platformdirs==4.3.8
|
| 68 |
+
tokenizers==0.21.2
|
| 69 |
+
httpx==0.28.1
|
| 70 |
+
pydantic==2.11.7
|
| 71 |
+
requests==2.32.4
|
| 72 |
+
nvidia-nvtx-cu12==12.6.77
|
| 73 |
+
pyarrow==21.0.0
|
| 74 |
+
nvidia-cufft-cu12==11.3.0.4
|
| 75 |
+
xxhash==3.5.0
|
| 76 |
+
smmap==5.0.2
|
| 77 |
+
aiosignal==1.4.0
|
| 78 |
+
mpmath==1.3.0
|
| 79 |
+
hf-xet==1.1.5
|
| 80 |
+
scikit-learn==1.7.1
|
| 81 |
+
pytz==2025.2
|
| 82 |
+
python-stdnum==2.1
|
| 83 |
+
wandb==0.21.0
|
| 84 |
+
multiprocess==0.70.16
|
| 85 |
+
nvidia-nvjitlink-cu12==12.6.85
|
| 86 |
+
dill==0.3.8
|
| 87 |
+
aiohttp==3.12.14
|
| 88 |
+
nvidia-cuda-nvrtc-cu12==12.6.77
|
| 89 |
+
regex==2024.11.6
|
| 90 |
+
pillow==11.3.0
|
wandb/run-20250721_101723-fk6s1kw0/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1029-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-07-21T10:17:23.729527Z",
|
| 5 |
+
"program": "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py",
|
| 6 |
+
"codePath": "train_wandb.py",
|
| 7 |
+
"codePathLocal": "train_wandb.py",
|
| 8 |
+
"git": {
|
| 9 |
+
"remote": "https://huggingface.co/ArunKr/triplet-embed",
|
| 10 |
+
"commit": "a5b24c9d89501e04b565e623ff31263553b43725"
|
| 11 |
+
},
|
| 12 |
+
"email": "tiwaryarun084@gmail.com",
|
| 13 |
+
"root": "/home/ubuntu/workspace/Arun/train/triplet-embed",
|
| 14 |
+
"host": "ip-10-159-20-46",
|
| 15 |
+
"executable": "/home/ubuntu/workspace/Arun/.venv/bin/python3",
|
| 16 |
+
"cpu_count": 4,
|
| 17 |
+
"cpu_count_logical": 8,
|
| 18 |
+
"gpu": "NVIDIA A10G",
|
| 19 |
+
"gpu_count": 1,
|
| 20 |
+
"disk": {
|
| 21 |
+
"/": {
|
| 22 |
+
"total": "102888095744",
|
| 23 |
+
"used": "82730860544"
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"memory": {
|
| 27 |
+
"total": "33263779840"
|
| 28 |
+
},
|
| 29 |
+
"gpu_nvidia": [
|
| 30 |
+
{
|
| 31 |
+
"name": "NVIDIA A10G",
|
| 32 |
+
"memoryTotal": "24146608128",
|
| 33 |
+
"cudaCores": 10240,
|
| 34 |
+
"architecture": "Ampere",
|
| 35 |
+
"uuid": "GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3"
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"cudaVersion": "12.8",
|
| 39 |
+
"writerId": "sdtpb61ljrmbhztcpg8luvot8jlualwd"
|
| 40 |
+
}
|
wandb/run-20250721_101723-fk6s1kw0/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":0},"_runtime":0}
|
wandb/run-20250721_101723-fk6s1kw0/logs/debug-core.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/home/ubuntu/.cache/wandb/logs/core-debug-20250721_101723.log
|
wandb/run-20250721_101723-fk6s1kw0/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-21T10:17:23.950599446Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-07-21T10:17:24.15250192Z","level":"INFO","msg":"stream: created new stream","id":"fk6s1kw0"}
|
| 3 |
+
{"time":"2025-07-21T10:17:24.152610482Z","level":"INFO","msg":"stream: started","id":"fk6s1kw0"}
|
| 4 |
+
{"time":"2025-07-21T10:17:24.152603972Z","level":"INFO","msg":"handler: started","stream_id":"fk6s1kw0"}
|
| 5 |
+
{"time":"2025-07-21T10:17:24.152682114Z","level":"INFO","msg":"writer: Do: started","stream_id":"fk6s1kw0"}
|
| 6 |
+
{"time":"2025-07-21T10:17:24.152686354Z","level":"INFO","msg":"sender: started","stream_id":"fk6s1kw0"}
|
| 7 |
+
{"time":"2025-07-21T10:17:24.56648588Z","level":"INFO","msg":"stream: closing","id":"fk6s1kw0"}
|
| 8 |
+
{"time":"2025-07-21T10:17:25.011424027Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-07-21T10:17:25.172863137Z","level":"INFO","msg":"handler: closed","stream_id":"fk6s1kw0"}
|
| 10 |
+
{"time":"2025-07-21T10:17:25.172883967Z","level":"INFO","msg":"writer: Close: closed","stream_id":"fk6s1kw0"}
|
| 11 |
+
{"time":"2025-07-21T10:17:25.172924918Z","level":"INFO","msg":"sender: closed","stream_id":"fk6s1kw0"}
|
| 12 |
+
{"time":"2025-07-21T10:17:25.172933578Z","level":"INFO","msg":"stream: closed","id":"fk6s1kw0"}
|
wandb/run-20250721_101723-fk6s1kw0/logs/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Configure stats pid to 3177270
|
| 3 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
|
| 4 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/settings
|
| 5 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_101723-fk6s1kw0/logs/debug.log
|
| 7 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_101723-fk6s1kw0/logs/debug-internal.log
|
| 8 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': 'all-MiniLM-L6-v2', '_wandb': {}}
|
| 11 |
+
2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-07-21 10:17:23,942 INFO MainThread:3177270 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-07-21 10:17:23,949 INFO MainThread:3177270 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-07-21 10:17:23,950 INFO MainThread:3177270 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-07-21 10:17:23,954 INFO MainThread:3177270 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-07-21 10:17:24,385 INFO MainThread:3177270 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-07-21 10:17:24,461 INFO MainThread:3177270 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-07-21 10:17:24,461 INFO MainThread:3177270 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-07-21 10:17:24,461 INFO MainThread:3177270 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-07-21 10:17:24,462 INFO MainThread:3177270 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-07-21 10:17:24,466 INFO MainThread:3177270 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-07-21 10:17:24,565 INFO MsgRouterThr:3177270 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250721_101723-fk6s1kw0/run-fk6s1kw0.wandb
ADDED
|
Binary file (2.94 kB). View file
|
|
|
wandb/run-20250721_102226-0xzjnslp/files/config.yaml
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
qqn8a454u2ivkbk8rg6g90qdrex62wlh:
|
| 6 |
+
codePath: train_wandb.py
|
| 7 |
+
codePathLocal: train_wandb.py
|
| 8 |
+
cpu_count: 4
|
| 9 |
+
cpu_count_logical: 8
|
| 10 |
+
cudaVersion: "12.8"
|
| 11 |
+
disk:
|
| 12 |
+
/:
|
| 13 |
+
total: "102888095744"
|
| 14 |
+
used: "82730958848"
|
| 15 |
+
email: tiwaryarun084@gmail.com
|
| 16 |
+
executable: /home/ubuntu/workspace/Arun/.venv/bin/python3
|
| 17 |
+
git:
|
| 18 |
+
commit: a5b24c9d89501e04b565e623ff31263553b43725
|
| 19 |
+
remote: https://huggingface.co/ArunKr/triplet-embed
|
| 20 |
+
gpu: NVIDIA A10G
|
| 21 |
+
gpu_count: 1
|
| 22 |
+
gpu_nvidia:
|
| 23 |
+
- architecture: Ampere
|
| 24 |
+
cudaCores: 10240
|
| 25 |
+
memoryTotal: "24146608128"
|
| 26 |
+
name: NVIDIA A10G
|
| 27 |
+
uuid: GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3
|
| 28 |
+
host: ip-10-159-20-46
|
| 29 |
+
memory:
|
| 30 |
+
total: "33263779840"
|
| 31 |
+
os: Linux-6.8.0-1029-aws-x86_64-with-glibc2.39
|
| 32 |
+
program: /home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py
|
| 33 |
+
python: CPython 3.12.3
|
| 34 |
+
root: /home/ubuntu/workspace/Arun/train/triplet-embed
|
| 35 |
+
startedAt: "2025-07-21T10:22:26.804332Z"
|
| 36 |
+
writerId: qqn8a454u2ivkbk8rg6g90qdrex62wlh
|
| 37 |
+
m:
|
| 38 |
+
- "1": train/global_step
|
| 39 |
+
"6":
|
| 40 |
+
- 3
|
| 41 |
+
"7": []
|
| 42 |
+
- "2": '*'
|
| 43 |
+
"5": 1
|
| 44 |
+
"6":
|
| 45 |
+
- 1
|
| 46 |
+
"7": []
|
| 47 |
+
python_version: 3.12.3
|
| 48 |
+
t:
|
| 49 |
+
"1":
|
| 50 |
+
- 1
|
| 51 |
+
- 5
|
| 52 |
+
- 11
|
| 53 |
+
- 49
|
| 54 |
+
- 51
|
| 55 |
+
- 53
|
| 56 |
+
- 71
|
| 57 |
+
- 75
|
| 58 |
+
"2":
|
| 59 |
+
- 1
|
| 60 |
+
- 5
|
| 61 |
+
- 11
|
| 62 |
+
- 49
|
| 63 |
+
- 51
|
| 64 |
+
- 53
|
| 65 |
+
- 71
|
| 66 |
+
- 75
|
| 67 |
+
"3":
|
| 68 |
+
- 7
|
| 69 |
+
- 13
|
| 70 |
+
- 16
|
| 71 |
+
- 66
|
| 72 |
+
"4": 3.12.3
|
| 73 |
+
"5": 0.21.0
|
| 74 |
+
"6": 4.53.2
|
| 75 |
+
"9":
|
| 76 |
+
"1": transformers_trainer
|
| 77 |
+
"12": 0.21.0
|
| 78 |
+
"13": linux-x86_64
|
| 79 |
+
accelerator_config:
|
| 80 |
+
value:
|
| 81 |
+
dispatch_batches: null
|
| 82 |
+
even_batches: true
|
| 83 |
+
gradient_accumulation_kwargs: null
|
| 84 |
+
non_blocking: false
|
| 85 |
+
split_batches: false
|
| 86 |
+
use_seedable_sampler: true
|
| 87 |
+
adafactor:
|
| 88 |
+
value: false
|
| 89 |
+
adam_beta1:
|
| 90 |
+
value: 0.9
|
| 91 |
+
adam_beta2:
|
| 92 |
+
value: 0.999
|
| 93 |
+
adam_epsilon:
|
| 94 |
+
value: 1e-08
|
| 95 |
+
auto_find_batch_size:
|
| 96 |
+
value: false
|
| 97 |
+
average_tokens_across_devices:
|
| 98 |
+
value: false
|
| 99 |
+
batch_eval_metrics:
|
| 100 |
+
value: false
|
| 101 |
+
batch_sampler:
|
| 102 |
+
value: batch_sampler
|
| 103 |
+
bf16:
|
| 104 |
+
value: false
|
| 105 |
+
bf16_full_eval:
|
| 106 |
+
value: false
|
| 107 |
+
data_seed:
|
| 108 |
+
value: null
|
| 109 |
+
dataloader_drop_last:
|
| 110 |
+
value: false
|
| 111 |
+
dataloader_num_workers:
|
| 112 |
+
value: 0
|
| 113 |
+
dataloader_persistent_workers:
|
| 114 |
+
value: false
|
| 115 |
+
dataloader_pin_memory:
|
| 116 |
+
value: true
|
| 117 |
+
dataloader_prefetch_factor:
|
| 118 |
+
value: null
|
| 119 |
+
ddp_backend:
|
| 120 |
+
value: null
|
| 121 |
+
ddp_broadcast_buffers:
|
| 122 |
+
value: false
|
| 123 |
+
ddp_bucket_cap_mb:
|
| 124 |
+
value: null
|
| 125 |
+
ddp_find_unused_parameters:
|
| 126 |
+
value: null
|
| 127 |
+
ddp_timeout:
|
| 128 |
+
value: 1800
|
| 129 |
+
debug:
|
| 130 |
+
value: []
|
| 131 |
+
deepspeed:
|
| 132 |
+
value: null
|
| 133 |
+
disable_tqdm:
|
| 134 |
+
value: true
|
| 135 |
+
do_eval:
|
| 136 |
+
value: true
|
| 137 |
+
do_predict:
|
| 138 |
+
value: false
|
| 139 |
+
do_train:
|
| 140 |
+
value: false
|
| 141 |
+
eval_accumulation_steps:
|
| 142 |
+
value: null
|
| 143 |
+
eval_delay:
|
| 144 |
+
value: 0
|
| 145 |
+
eval_do_concat_batches:
|
| 146 |
+
value: true
|
| 147 |
+
eval_on_start:
|
| 148 |
+
value: false
|
| 149 |
+
eval_steps:
|
| 150 |
+
value: 10
|
| 151 |
+
eval_strategy:
|
| 152 |
+
value: steps
|
| 153 |
+
eval_use_gather_object:
|
| 154 |
+
value: false
|
| 155 |
+
fp16:
|
| 156 |
+
value: false
|
| 157 |
+
fp16_backend:
|
| 158 |
+
value: auto
|
| 159 |
+
fp16_full_eval:
|
| 160 |
+
value: false
|
| 161 |
+
fp16_opt_level:
|
| 162 |
+
value: O1
|
| 163 |
+
fsdp:
|
| 164 |
+
value: []
|
| 165 |
+
fsdp_config:
|
| 166 |
+
value:
|
| 167 |
+
min_num_params: 0
|
| 168 |
+
xla: false
|
| 169 |
+
xla_fsdp_grad_ckpt: false
|
| 170 |
+
xla_fsdp_v2: false
|
| 171 |
+
fsdp_min_num_params:
|
| 172 |
+
value: 0
|
| 173 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 174 |
+
value: null
|
| 175 |
+
full_determinism:
|
| 176 |
+
value: false
|
| 177 |
+
gradient_accumulation_steps:
|
| 178 |
+
value: 1
|
| 179 |
+
gradient_checkpointing:
|
| 180 |
+
value: false
|
| 181 |
+
gradient_checkpointing_kwargs:
|
| 182 |
+
value: null
|
| 183 |
+
greater_is_better:
|
| 184 |
+
value: null
|
| 185 |
+
group_by_length:
|
| 186 |
+
value: false
|
| 187 |
+
half_precision_backend:
|
| 188 |
+
value: auto
|
| 189 |
+
hub_always_push:
|
| 190 |
+
value: false
|
| 191 |
+
hub_model_id:
|
| 192 |
+
value: null
|
| 193 |
+
hub_private_repo:
|
| 194 |
+
value: null
|
| 195 |
+
hub_revision:
|
| 196 |
+
value: null
|
| 197 |
+
hub_strategy:
|
| 198 |
+
value: every_save
|
| 199 |
+
hub_token:
|
| 200 |
+
value: <HUB_TOKEN>
|
| 201 |
+
ignore_data_skip:
|
| 202 |
+
value: false
|
| 203 |
+
include_for_metrics:
|
| 204 |
+
value: []
|
| 205 |
+
include_inputs_for_metrics:
|
| 206 |
+
value: false
|
| 207 |
+
include_num_input_tokens_seen:
|
| 208 |
+
value: false
|
| 209 |
+
include_tokens_per_second:
|
| 210 |
+
value: false
|
| 211 |
+
jit_mode_eval:
|
| 212 |
+
value: false
|
| 213 |
+
label_names:
|
| 214 |
+
value: null
|
| 215 |
+
label_smoothing_factor:
|
| 216 |
+
value: 0
|
| 217 |
+
learning_rate:
|
| 218 |
+
value: 5e-05
|
| 219 |
+
length_column_name:
|
| 220 |
+
value: length
|
| 221 |
+
liger_kernel_config:
|
| 222 |
+
value: null
|
| 223 |
+
load_best_model_at_end:
|
| 224 |
+
value: false
|
| 225 |
+
local_rank:
|
| 226 |
+
value: 0
|
| 227 |
+
log_level:
|
| 228 |
+
value: passive
|
| 229 |
+
log_level_replica:
|
| 230 |
+
value: warning
|
| 231 |
+
log_on_each_node:
|
| 232 |
+
value: true
|
| 233 |
+
logging_dir:
|
| 234 |
+
value: checkpoints/model_4/runs/Jul21_10-22-27_ip-10-159-20-46
|
| 235 |
+
logging_first_step:
|
| 236 |
+
value: false
|
| 237 |
+
logging_nan_inf_filter:
|
| 238 |
+
value: true
|
| 239 |
+
logging_steps:
|
| 240 |
+
value: 500
|
| 241 |
+
logging_strategy:
|
| 242 |
+
value: steps
|
| 243 |
+
lr_scheduler_type:
|
| 244 |
+
value: linear
|
| 245 |
+
max_grad_norm:
|
| 246 |
+
value: 1
|
| 247 |
+
max_steps:
|
| 248 |
+
value: -1
|
| 249 |
+
metric_for_best_model:
|
| 250 |
+
value: null
|
| 251 |
+
model:
|
| 252 |
+
value: all-MiniLM-L6-v2
|
| 253 |
+
mp_parameters:
|
| 254 |
+
value: ""
|
| 255 |
+
multi_dataset_batch_sampler:
|
| 256 |
+
value: round_robin
|
| 257 |
+
neftune_noise_alpha:
|
| 258 |
+
value: null
|
| 259 |
+
no_cuda:
|
| 260 |
+
value: false
|
| 261 |
+
num_train_epochs:
|
| 262 |
+
value: 1
|
| 263 |
+
optim:
|
| 264 |
+
value: adamw_torch
|
| 265 |
+
optim_args:
|
| 266 |
+
value: null
|
| 267 |
+
optim_target_modules:
|
| 268 |
+
value: null
|
| 269 |
+
output_dir:
|
| 270 |
+
value: checkpoints/model_4
|
| 271 |
+
overwrite_output_dir:
|
| 272 |
+
value: false
|
| 273 |
+
past_index:
|
| 274 |
+
value: -1
|
| 275 |
+
per_device_eval_batch_size:
|
| 276 |
+
value: 16
|
| 277 |
+
per_device_train_batch_size:
|
| 278 |
+
value: 16
|
| 279 |
+
per_gpu_eval_batch_size:
|
| 280 |
+
value: null
|
| 281 |
+
per_gpu_train_batch_size:
|
| 282 |
+
value: null
|
| 283 |
+
prediction_loss_only:
|
| 284 |
+
value: true
|
| 285 |
+
prompts:
|
| 286 |
+
value: null
|
| 287 |
+
push_to_hub:
|
| 288 |
+
value: false
|
| 289 |
+
push_to_hub_model_id:
|
| 290 |
+
value: null
|
| 291 |
+
push_to_hub_organization:
|
| 292 |
+
value: null
|
| 293 |
+
push_to_hub_token:
|
| 294 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 295 |
+
ray_scope:
|
| 296 |
+
value: last
|
| 297 |
+
remove_unused_columns:
|
| 298 |
+
value: true
|
| 299 |
+
report_to:
|
| 300 |
+
value:
|
| 301 |
+
- wandb
|
| 302 |
+
restore_callback_states_from_checkpoint:
|
| 303 |
+
value: false
|
| 304 |
+
resume_from_checkpoint:
|
| 305 |
+
value: null
|
| 306 |
+
run_name:
|
| 307 |
+
value: checkpoints/model_4
|
| 308 |
+
save_on_each_node:
|
| 309 |
+
value: false
|
| 310 |
+
save_only_model:
|
| 311 |
+
value: false
|
| 312 |
+
save_safetensors:
|
| 313 |
+
value: true
|
| 314 |
+
save_steps:
|
| 315 |
+
value: 500
|
| 316 |
+
save_strategy:
|
| 317 |
+
value: "no"
|
| 318 |
+
save_total_limit:
|
| 319 |
+
value: 0
|
| 320 |
+
seed:
|
| 321 |
+
value: 42
|
| 322 |
+
skip_memory_metrics:
|
| 323 |
+
value: true
|
| 324 |
+
tf32:
|
| 325 |
+
value: null
|
| 326 |
+
torch_compile:
|
| 327 |
+
value: false
|
| 328 |
+
torch_compile_backend:
|
| 329 |
+
value: null
|
| 330 |
+
torch_compile_mode:
|
| 331 |
+
value: null
|
| 332 |
+
torch_empty_cache_steps:
|
| 333 |
+
value: null
|
| 334 |
+
torchdynamo:
|
| 335 |
+
value: null
|
| 336 |
+
tpu_metrics_debug:
|
| 337 |
+
value: false
|
| 338 |
+
tpu_num_cores:
|
| 339 |
+
value: null
|
| 340 |
+
use_cpu:
|
| 341 |
+
value: false
|
| 342 |
+
use_ipex:
|
| 343 |
+
value: false
|
| 344 |
+
use_legacy_prediction_loop:
|
| 345 |
+
value: false
|
| 346 |
+
use_liger_kernel:
|
| 347 |
+
value: false
|
| 348 |
+
use_mps_device:
|
| 349 |
+
value: false
|
| 350 |
+
warmup_ratio:
|
| 351 |
+
value: 0
|
| 352 |
+
warmup_steps:
|
| 353 |
+
value: 0
|
| 354 |
+
weight_decay:
|
| 355 |
+
value: 0
|
wandb/run-20250721_102226-0xzjnslp/files/output.log
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[1mStarting training with [0m[1;36m254[0m[1m triplets, validating on [0m[1;36m64[0m[1m triplets.[0m
|
| 2 |
+
|
| 3 |
+
[2KComputing widget examples: [1;36m0[0m%| | [1;36m0[0m/[1;36m1[0m [1m[[0m[1;92m00:00[0m<?, ?example/s[1m][0m
|
| 4 |
+
[2K 0:00:00[0m
|
| 5 |
+
[2Kwandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If [33m0:00:00[0m
|
| 6 |
+
this was not intended, please specify a different run name by setting the `TrainingArguments.run_name`
|
| 7 |
+
parameter.
|
| 8 |
+
[2K[32m⠧[0m [32mTraining...[0m [38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m 0%[0m [33m0:00:00[0m
|
| 9 |
+
[1A[2K
|
| 10 |
+
Traceback (most recent call last):
|
| 11 |
+
File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 156, in <module>
|
| 12 |
+
main()
|
| 13 |
+
File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 126, in main
|
| 14 |
+
model.fit(
|
| 15 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/fit_mixin.py", line 408, in fit
|
| 16 |
+
trainer.train(resume_from_checkpoint=resume_from_checkpoint)
|
| 17 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2206, in train
|
| 18 |
+
return inner_training_loop(
|
| 19 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 20 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop
|
| 21 |
+
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
|
| 22 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 23 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3749, in training_step
|
| 24 |
+
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
|
| 25 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 26 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/trainer.py", line 420, in compute_loss
|
| 27 |
+
loss = loss_fn(features, labels)
|
| 28 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 29 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
|
| 30 |
+
return self._call_impl(*args, **kwargs)
|
| 31 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 32 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
|
| 33 |
+
return forward_call(*args, **kwargs)
|
| 34 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 35 |
+
TypeError: WandbLoggingTripletLoss.forward() missing 1 required positional argument: 'negative'
|
wandb/run-20250721_102226-0xzjnslp/files/requirements.txt
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cusparselt-cu12==0.6.3
|
| 2 |
+
python-dateutil==2.9.0.post0
|
| 3 |
+
nvidia-cusolver-cu12==11.7.1.2
|
| 4 |
+
rich==14.0.0
|
| 5 |
+
psutil==7.0.0
|
| 6 |
+
sentence-transformers==5.0.0
|
| 7 |
+
GitPython==3.1.44
|
| 8 |
+
nvidia-nccl-cu12==2.26.2
|
| 9 |
+
pydentic==0.0.1.dev3
|
| 10 |
+
nvidia-cusparse-cu12==12.5.4.2
|
| 11 |
+
scipy==1.16.0
|
| 12 |
+
propcache==0.3.2
|
| 13 |
+
threadpoolctl==3.6.0
|
| 14 |
+
packaging==25.0
|
| 15 |
+
charset-normalizer==3.4.2
|
| 16 |
+
tzdata==2025.2
|
| 17 |
+
click==8.2.1
|
| 18 |
+
fsspec==2025.3.0
|
| 19 |
+
torch==2.7.1
|
| 20 |
+
httpcore==1.0.9
|
| 21 |
+
accelerate==1.9.0
|
| 22 |
+
typing-inspection==0.4.1
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
gitdb==4.0.12
|
| 25 |
+
nvidia-cublas-cu12==12.6.4.1
|
| 26 |
+
sentry-sdk==2.33.0
|
| 27 |
+
tqdm==4.67.1
|
| 28 |
+
nvidia-cuda-cupti-cu12==12.6.80
|
| 29 |
+
transformers==4.53.2
|
| 30 |
+
markdown-it-py==3.0.0
|
| 31 |
+
aiohappyeyeballs==2.6.1
|
| 32 |
+
pandas==2.3.1
|
| 33 |
+
six==1.17.0
|
| 34 |
+
ollama==0.5.1
|
| 35 |
+
Pygments==2.19.2
|
| 36 |
+
triton==3.3.1
|
| 37 |
+
huggingface-hub==0.33.4
|
| 38 |
+
anyio==4.9.0
|
| 39 |
+
certifi==2025.7.14
|
| 40 |
+
numpy==2.3.1
|
| 41 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 42 |
+
networkx==3.5
|
| 43 |
+
yarl==1.20.1
|
| 44 |
+
joblib==1.5.1
|
| 45 |
+
Jinja2==3.1.6
|
| 46 |
+
PyYAML==6.0.2
|
| 47 |
+
nvidia-cudnn-cu12==9.5.1.17
|
| 48 |
+
nvidia-curand-cu12==10.3.7.77
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
safetensors==0.5.3
|
| 51 |
+
pydantic_core==2.33.2
|
| 52 |
+
mdurl==0.1.2
|
| 53 |
+
setuptools==80.9.0
|
| 54 |
+
nvidia-cuda-runtime-cu12==12.6.77
|
| 55 |
+
frozenlist==1.7.0
|
| 56 |
+
sniffio==1.3.1
|
| 57 |
+
urllib3==2.5.0
|
| 58 |
+
multidict==6.6.3
|
| 59 |
+
datasets==4.0.0
|
| 60 |
+
filelock==3.18.0
|
| 61 |
+
attrs==25.3.0
|
| 62 |
+
idna==3.10
|
| 63 |
+
protobuf==6.31.1
|
| 64 |
+
h11==0.16.0
|
| 65 |
+
MarkupSafe==3.0.2
|
| 66 |
+
typing_extensions==4.14.1
|
| 67 |
+
platformdirs==4.3.8
|
| 68 |
+
tokenizers==0.21.2
|
| 69 |
+
httpx==0.28.1
|
| 70 |
+
pydantic==2.11.7
|
| 71 |
+
requests==2.32.4
|
| 72 |
+
nvidia-nvtx-cu12==12.6.77
|
| 73 |
+
pyarrow==21.0.0
|
| 74 |
+
nvidia-cufft-cu12==11.3.0.4
|
| 75 |
+
xxhash==3.5.0
|
| 76 |
+
smmap==5.0.2
|
| 77 |
+
aiosignal==1.4.0
|
| 78 |
+
mpmath==1.3.0
|
| 79 |
+
hf-xet==1.1.5
|
| 80 |
+
scikit-learn==1.7.1
|
| 81 |
+
pytz==2025.2
|
| 82 |
+
python-stdnum==2.1
|
| 83 |
+
wandb==0.21.0
|
| 84 |
+
multiprocess==0.70.16
|
| 85 |
+
nvidia-nvjitlink-cu12==12.6.85
|
| 86 |
+
dill==0.3.8
|
| 87 |
+
aiohttp==3.12.14
|
| 88 |
+
nvidia-cuda-nvrtc-cu12==12.6.77
|
| 89 |
+
regex==2024.11.6
|
| 90 |
+
pillow==11.3.0
|
wandb/run-20250721_102226-0xzjnslp/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1029-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-07-21T10:22:26.804332Z",
|
| 5 |
+
"program": "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py",
|
| 6 |
+
"codePath": "train_wandb.py",
|
| 7 |
+
"codePathLocal": "train_wandb.py",
|
| 8 |
+
"git": {
|
| 9 |
+
"remote": "https://huggingface.co/ArunKr/triplet-embed",
|
| 10 |
+
"commit": "a5b24c9d89501e04b565e623ff31263553b43725"
|
| 11 |
+
},
|
| 12 |
+
"email": "tiwaryarun084@gmail.com",
|
| 13 |
+
"root": "/home/ubuntu/workspace/Arun/train/triplet-embed",
|
| 14 |
+
"host": "ip-10-159-20-46",
|
| 15 |
+
"executable": "/home/ubuntu/workspace/Arun/.venv/bin/python3",
|
| 16 |
+
"cpu_count": 4,
|
| 17 |
+
"cpu_count_logical": 8,
|
| 18 |
+
"gpu": "NVIDIA A10G",
|
| 19 |
+
"gpu_count": 1,
|
| 20 |
+
"disk": {
|
| 21 |
+
"/": {
|
| 22 |
+
"total": "102888095744",
|
| 23 |
+
"used": "82730958848"
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"memory": {
|
| 27 |
+
"total": "33263779840"
|
| 28 |
+
},
|
| 29 |
+
"gpu_nvidia": [
|
| 30 |
+
{
|
| 31 |
+
"name": "NVIDIA A10G",
|
| 32 |
+
"memoryTotal": "24146608128",
|
| 33 |
+
"cudaCores": 10240,
|
| 34 |
+
"architecture": "Ampere",
|
| 35 |
+
"uuid": "GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3"
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"cudaVersion": "12.8",
|
| 39 |
+
"writerId": "qqn8a454u2ivkbk8rg6g90qdrex62wlh"
|
| 40 |
+
}
|
wandb/run-20250721_102226-0xzjnslp/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":1},"_runtime":1}
|
wandb/run-20250721_102226-0xzjnslp/logs/debug-core.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/home/ubuntu/.cache/wandb/logs/core-debug-20250721_102226.log
|
wandb/run-20250721_102226-0xzjnslp/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-21T10:22:27.016044704Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-07-21T10:22:27.216542143Z","level":"INFO","msg":"stream: created new stream","id":"0xzjnslp"}
|
| 3 |
+
{"time":"2025-07-21T10:22:27.216575034Z","level":"INFO","msg":"stream: started","id":"0xzjnslp"}
|
| 4 |
+
{"time":"2025-07-21T10:22:27.216595785Z","level":"INFO","msg":"writer: Do: started","stream_id":"0xzjnslp"}
|
| 5 |
+
{"time":"2025-07-21T10:22:27.216614375Z","level":"INFO","msg":"handler: started","stream_id":"0xzjnslp"}
|
| 6 |
+
{"time":"2025-07-21T10:22:27.216643436Z","level":"INFO","msg":"sender: started","stream_id":"0xzjnslp"}
|
| 7 |
+
{"time":"2025-07-21T10:22:28.609618191Z","level":"INFO","msg":"stream: closing","id":"0xzjnslp"}
|
| 8 |
+
{"time":"2025-07-21T10:22:28.936506126Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-07-21T10:22:29.226468643Z","level":"INFO","msg":"handler: closed","stream_id":"0xzjnslp"}
|
| 10 |
+
{"time":"2025-07-21T10:22:29.226502843Z","level":"INFO","msg":"writer: Close: closed","stream_id":"0xzjnslp"}
|
| 11 |
+
{"time":"2025-07-21T10:22:29.226548324Z","level":"INFO","msg":"sender: closed","stream_id":"0xzjnslp"}
|
| 12 |
+
{"time":"2025-07-21T10:22:29.226560455Z","level":"INFO","msg":"stream: closed","id":"0xzjnslp"}
|
wandb/run-20250721_102226-0xzjnslp/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Configure stats pid to 3178673
|
| 3 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
|
| 4 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/settings
|
| 5 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102226-0xzjnslp/logs/debug.log
|
| 7 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102226-0xzjnslp/logs/debug-internal.log
|
| 8 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model': 'all-MiniLM-L6-v2', '_wandb': {}}
|
| 11 |
+
2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-07-21 10:22:27,010 INFO MainThread:3178673 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-07-21 10:22:27,013 INFO MainThread:3178673 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-07-21 10:22:27,014 INFO MainThread:3178673 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-07-21 10:22:27,018 INFO MainThread:3178673 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-07-21 10:22:27,445 INFO MainThread:3178673 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-07-21 10:22:27,521 INFO MainThread:3178673 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-07-21 10:22:28,060 INFO MainThread:3178673 [wandb_run.py:_config_callback():1363] config_cb None None {'output_dir': 'checkpoints/model_4', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': True, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'checkpoints/model_4/runs/Jul21_10-22-27_ip-10-159-20-46', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'no', 'save_steps': 500, 'save_total_limit': 0, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 10, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/model_4', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': False, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'prompts': None, 'batch_sampler': 'batch_sampler', 'multi_dataset_batch_sampler': 'round_robin', 'router_mapping': {}, 'learning_rate_mapping': {}}
|
| 23 |
+
2025-07-21 10:22:28,609 INFO MsgRouterThr:3178673 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250721_102226-0xzjnslp/run-0xzjnslp.wandb
ADDED
|
Binary file (11.9 kB). View file
|
|
|
wandb/run-20250721_102442-a02q1hb9/files/config.yaml
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
e1ct0j8t8jolhfidccz1hq8kv6qcyuv3:
|
| 6 |
+
codePath: train_wandb.py
|
| 7 |
+
codePathLocal: train_wandb.py
|
| 8 |
+
cpu_count: 4
|
| 9 |
+
cpu_count_logical: 8
|
| 10 |
+
cudaVersion: "12.8"
|
| 11 |
+
disk:
|
| 12 |
+
/:
|
| 13 |
+
total: "102888095744"
|
| 14 |
+
used: "82731044864"
|
| 15 |
+
email: tiwaryarun084@gmail.com
|
| 16 |
+
executable: /home/ubuntu/workspace/Arun/.venv/bin/python3
|
| 17 |
+
git:
|
| 18 |
+
commit: a5b24c9d89501e04b565e623ff31263553b43725
|
| 19 |
+
remote: https://huggingface.co/ArunKr/triplet-embed
|
| 20 |
+
gpu: NVIDIA A10G
|
| 21 |
+
gpu_count: 1
|
| 22 |
+
gpu_nvidia:
|
| 23 |
+
- architecture: Ampere
|
| 24 |
+
cudaCores: 10240
|
| 25 |
+
memoryTotal: "24146608128"
|
| 26 |
+
name: NVIDIA A10G
|
| 27 |
+
uuid: GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3
|
| 28 |
+
host: ip-10-159-20-46
|
| 29 |
+
memory:
|
| 30 |
+
total: "33263779840"
|
| 31 |
+
os: Linux-6.8.0-1029-aws-x86_64-with-glibc2.39
|
| 32 |
+
program: /home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py
|
| 33 |
+
python: CPython 3.12.3
|
| 34 |
+
root: /home/ubuntu/workspace/Arun/train/triplet-embed
|
| 35 |
+
startedAt: "2025-07-21T10:24:42.129108Z"
|
| 36 |
+
writerId: e1ct0j8t8jolhfidccz1hq8kv6qcyuv3
|
| 37 |
+
m:
|
| 38 |
+
- "1": train/global_step
|
| 39 |
+
"6":
|
| 40 |
+
- 3
|
| 41 |
+
"7": []
|
| 42 |
+
- "2": '*'
|
| 43 |
+
"5": 1
|
| 44 |
+
"6":
|
| 45 |
+
- 1
|
| 46 |
+
"7": []
|
| 47 |
+
python_version: 3.12.3
|
| 48 |
+
t:
|
| 49 |
+
"1":
|
| 50 |
+
- 1
|
| 51 |
+
- 5
|
| 52 |
+
- 11
|
| 53 |
+
- 49
|
| 54 |
+
- 51
|
| 55 |
+
- 53
|
| 56 |
+
- 71
|
| 57 |
+
- 75
|
| 58 |
+
"2":
|
| 59 |
+
- 1
|
| 60 |
+
- 5
|
| 61 |
+
- 11
|
| 62 |
+
- 49
|
| 63 |
+
- 51
|
| 64 |
+
- 53
|
| 65 |
+
- 71
|
| 66 |
+
- 75
|
| 67 |
+
"3":
|
| 68 |
+
- 7
|
| 69 |
+
- 13
|
| 70 |
+
- 16
|
| 71 |
+
- 66
|
| 72 |
+
"4": 3.12.3
|
| 73 |
+
"5": 0.21.0
|
| 74 |
+
"6": 4.53.2
|
| 75 |
+
"9":
|
| 76 |
+
"1": transformers_trainer
|
| 77 |
+
"12": 0.21.0
|
| 78 |
+
"13": linux-x86_64
|
| 79 |
+
accelerator_config:
|
| 80 |
+
value:
|
| 81 |
+
dispatch_batches: null
|
| 82 |
+
even_batches: true
|
| 83 |
+
gradient_accumulation_kwargs: null
|
| 84 |
+
non_blocking: false
|
| 85 |
+
split_batches: false
|
| 86 |
+
use_seedable_sampler: true
|
| 87 |
+
adafactor:
|
| 88 |
+
value: false
|
| 89 |
+
adam_beta1:
|
| 90 |
+
value: 0.9
|
| 91 |
+
adam_beta2:
|
| 92 |
+
value: 0.999
|
| 93 |
+
adam_epsilon:
|
| 94 |
+
value: 1e-08
|
| 95 |
+
auto_find_batch_size:
|
| 96 |
+
value: false
|
| 97 |
+
average_tokens_across_devices:
|
| 98 |
+
value: false
|
| 99 |
+
batch_eval_metrics:
|
| 100 |
+
value: false
|
| 101 |
+
batch_sampler:
|
| 102 |
+
value: batch_sampler
|
| 103 |
+
batch_size:
|
| 104 |
+
value: 16
|
| 105 |
+
bf16:
|
| 106 |
+
value: false
|
| 107 |
+
bf16_full_eval:
|
| 108 |
+
value: false
|
| 109 |
+
data_seed:
|
| 110 |
+
value: null
|
| 111 |
+
dataloader_drop_last:
|
| 112 |
+
value: false
|
| 113 |
+
dataloader_num_workers:
|
| 114 |
+
value: 0
|
| 115 |
+
dataloader_persistent_workers:
|
| 116 |
+
value: false
|
| 117 |
+
dataloader_pin_memory:
|
| 118 |
+
value: true
|
| 119 |
+
dataloader_prefetch_factor:
|
| 120 |
+
value: null
|
| 121 |
+
ddp_backend:
|
| 122 |
+
value: null
|
| 123 |
+
ddp_broadcast_buffers:
|
| 124 |
+
value: false
|
| 125 |
+
ddp_bucket_cap_mb:
|
| 126 |
+
value: null
|
| 127 |
+
ddp_find_unused_parameters:
|
| 128 |
+
value: null
|
| 129 |
+
ddp_timeout:
|
| 130 |
+
value: 1800
|
| 131 |
+
debug:
|
| 132 |
+
value: []
|
| 133 |
+
deepspeed:
|
| 134 |
+
value: null
|
| 135 |
+
disable_tqdm:
|
| 136 |
+
value: true
|
| 137 |
+
do_eval:
|
| 138 |
+
value: true
|
| 139 |
+
do_predict:
|
| 140 |
+
value: false
|
| 141 |
+
do_train:
|
| 142 |
+
value: false
|
| 143 |
+
epochs:
|
| 144 |
+
value: 1
|
| 145 |
+
eval_accumulation_steps:
|
| 146 |
+
value: null
|
| 147 |
+
eval_delay:
|
| 148 |
+
value: 0
|
| 149 |
+
eval_do_concat_batches:
|
| 150 |
+
value: true
|
| 151 |
+
eval_on_start:
|
| 152 |
+
value: false
|
| 153 |
+
eval_steps:
|
| 154 |
+
value: 10
|
| 155 |
+
eval_strategy:
|
| 156 |
+
value: steps
|
| 157 |
+
eval_use_gather_object:
|
| 158 |
+
value: false
|
| 159 |
+
fp16:
|
| 160 |
+
value: false
|
| 161 |
+
fp16_backend:
|
| 162 |
+
value: auto
|
| 163 |
+
fp16_full_eval:
|
| 164 |
+
value: false
|
| 165 |
+
fp16_opt_level:
|
| 166 |
+
value: O1
|
| 167 |
+
fsdp:
|
| 168 |
+
value: []
|
| 169 |
+
fsdp_config:
|
| 170 |
+
value:
|
| 171 |
+
min_num_params: 0
|
| 172 |
+
xla: false
|
| 173 |
+
xla_fsdp_grad_ckpt: false
|
| 174 |
+
xla_fsdp_v2: false
|
| 175 |
+
fsdp_min_num_params:
|
| 176 |
+
value: 0
|
| 177 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 178 |
+
value: null
|
| 179 |
+
full_determinism:
|
| 180 |
+
value: false
|
| 181 |
+
gradient_accumulation_steps:
|
| 182 |
+
value: 1
|
| 183 |
+
gradient_checkpointing:
|
| 184 |
+
value: false
|
| 185 |
+
gradient_checkpointing_kwargs:
|
| 186 |
+
value: null
|
| 187 |
+
greater_is_better:
|
| 188 |
+
value: null
|
| 189 |
+
group_by_length:
|
| 190 |
+
value: false
|
| 191 |
+
half_precision_backend:
|
| 192 |
+
value: auto
|
| 193 |
+
hub_always_push:
|
| 194 |
+
value: false
|
| 195 |
+
hub_model_id:
|
| 196 |
+
value: null
|
| 197 |
+
hub_private_repo:
|
| 198 |
+
value: null
|
| 199 |
+
hub_revision:
|
| 200 |
+
value: null
|
| 201 |
+
hub_strategy:
|
| 202 |
+
value: every_save
|
| 203 |
+
hub_token:
|
| 204 |
+
value: <HUB_TOKEN>
|
| 205 |
+
ignore_data_skip:
|
| 206 |
+
value: false
|
| 207 |
+
include_for_metrics:
|
| 208 |
+
value: []
|
| 209 |
+
include_inputs_for_metrics:
|
| 210 |
+
value: false
|
| 211 |
+
include_num_input_tokens_seen:
|
| 212 |
+
value: false
|
| 213 |
+
include_tokens_per_second:
|
| 214 |
+
value: false
|
| 215 |
+
jit_mode_eval:
|
| 216 |
+
value: false
|
| 217 |
+
label_names:
|
| 218 |
+
value: null
|
| 219 |
+
label_smoothing_factor:
|
| 220 |
+
value: 0
|
| 221 |
+
learning_rate:
|
| 222 |
+
value: 5e-05
|
| 223 |
+
length_column_name:
|
| 224 |
+
value: length
|
| 225 |
+
liger_kernel_config:
|
| 226 |
+
value: null
|
| 227 |
+
load_best_model_at_end:
|
| 228 |
+
value: false
|
| 229 |
+
local_rank:
|
| 230 |
+
value: 0
|
| 231 |
+
log_level:
|
| 232 |
+
value: passive
|
| 233 |
+
log_level_replica:
|
| 234 |
+
value: warning
|
| 235 |
+
log_on_each_node:
|
| 236 |
+
value: true
|
| 237 |
+
logging_dir:
|
| 238 |
+
value: checkpoints/model_4/runs/Jul21_10-24-42_ip-10-159-20-46
|
| 239 |
+
logging_first_step:
|
| 240 |
+
value: false
|
| 241 |
+
logging_nan_inf_filter:
|
| 242 |
+
value: true
|
| 243 |
+
logging_steps:
|
| 244 |
+
value: 500
|
| 245 |
+
logging_strategy:
|
| 246 |
+
value: steps
|
| 247 |
+
lr_scheduler_type:
|
| 248 |
+
value: linear
|
| 249 |
+
max_grad_norm:
|
| 250 |
+
value: 1
|
| 251 |
+
max_steps:
|
| 252 |
+
value: -1
|
| 253 |
+
metric_for_best_model:
|
| 254 |
+
value: null
|
| 255 |
+
model_name:
|
| 256 |
+
value: all-MiniLM-L6-v2
|
| 257 |
+
mp_parameters:
|
| 258 |
+
value: ""
|
| 259 |
+
multi_dataset_batch_sampler:
|
| 260 |
+
value: round_robin
|
| 261 |
+
neftune_noise_alpha:
|
| 262 |
+
value: null
|
| 263 |
+
no_cuda:
|
| 264 |
+
value: false
|
| 265 |
+
num_train_epochs:
|
| 266 |
+
value: 1
|
| 267 |
+
optim:
|
| 268 |
+
value: adamw_torch
|
| 269 |
+
optim_args:
|
| 270 |
+
value: null
|
| 271 |
+
optim_target_modules:
|
| 272 |
+
value: null
|
| 273 |
+
output_dir:
|
| 274 |
+
value: checkpoints/model_4
|
| 275 |
+
overwrite_output_dir:
|
| 276 |
+
value: false
|
| 277 |
+
past_index:
|
| 278 |
+
value: -1
|
| 279 |
+
per_device_eval_batch_size:
|
| 280 |
+
value: 16
|
| 281 |
+
per_device_train_batch_size:
|
| 282 |
+
value: 16
|
| 283 |
+
per_gpu_eval_batch_size:
|
| 284 |
+
value: null
|
| 285 |
+
per_gpu_train_batch_size:
|
| 286 |
+
value: null
|
| 287 |
+
prediction_loss_only:
|
| 288 |
+
value: true
|
| 289 |
+
prompts:
|
| 290 |
+
value: null
|
| 291 |
+
push_to_hub:
|
| 292 |
+
value: false
|
| 293 |
+
push_to_hub_model_id:
|
| 294 |
+
value: null
|
| 295 |
+
push_to_hub_organization:
|
| 296 |
+
value: null
|
| 297 |
+
push_to_hub_token:
|
| 298 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 299 |
+
ray_scope:
|
| 300 |
+
value: last
|
| 301 |
+
remove_unused_columns:
|
| 302 |
+
value: true
|
| 303 |
+
report_to:
|
| 304 |
+
value:
|
| 305 |
+
- wandb
|
| 306 |
+
restore_callback_states_from_checkpoint:
|
| 307 |
+
value: false
|
| 308 |
+
resume_from_checkpoint:
|
| 309 |
+
value: null
|
| 310 |
+
run_name:
|
| 311 |
+
value: checkpoints/model_4
|
| 312 |
+
save_on_each_node:
|
| 313 |
+
value: false
|
| 314 |
+
save_only_model:
|
| 315 |
+
value: false
|
| 316 |
+
save_safetensors:
|
| 317 |
+
value: true
|
| 318 |
+
save_steps:
|
| 319 |
+
value: 500
|
| 320 |
+
save_strategy:
|
| 321 |
+
value: "no"
|
| 322 |
+
save_total_limit:
|
| 323 |
+
value: 0
|
| 324 |
+
seed:
|
| 325 |
+
value: 42
|
| 326 |
+
skip_memory_metrics:
|
| 327 |
+
value: true
|
| 328 |
+
tf32:
|
| 329 |
+
value: null
|
| 330 |
+
torch_compile:
|
| 331 |
+
value: false
|
| 332 |
+
torch_compile_backend:
|
| 333 |
+
value: null
|
| 334 |
+
torch_compile_mode:
|
| 335 |
+
value: null
|
| 336 |
+
torch_empty_cache_steps:
|
| 337 |
+
value: null
|
| 338 |
+
torchdynamo:
|
| 339 |
+
value: null
|
| 340 |
+
tpu_metrics_debug:
|
| 341 |
+
value: false
|
| 342 |
+
tpu_num_cores:
|
| 343 |
+
value: null
|
| 344 |
+
use_cpu:
|
| 345 |
+
value: false
|
| 346 |
+
use_ipex:
|
| 347 |
+
value: false
|
| 348 |
+
use_legacy_prediction_loop:
|
| 349 |
+
value: false
|
| 350 |
+
use_liger_kernel:
|
| 351 |
+
value: false
|
| 352 |
+
use_mps_device:
|
| 353 |
+
value: false
|
| 354 |
+
warmup_ratio:
|
| 355 |
+
value: 0
|
| 356 |
+
warmup_steps:
|
| 357 |
+
value: 0
|
| 358 |
+
weight_decay:
|
| 359 |
+
value: 0
|
wandb/run-20250721_102442-a02q1hb9/files/output.log
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[1mStarting training with [0m[1;36m254[0m[1m triplets, validating on [0m[1;36m64[0m[1m triplets.[0m
|
| 2 |
+
|
| 3 |
+
[2KComputing widget examples: [1;36m0[0m%| | [1;36m0[0m/[1;36m1[0m [1m[[0m[1;92m00:00[0m<?, ?example/s[1m][0m
|
| 4 |
+
[2K 0:00:00[0m
|
| 5 |
+
[2Kwandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If [33m0:00:00[0m
|
| 6 |
+
this was not intended, please specify a different run name by setting the `TrainingArguments.run_name`
|
| 7 |
+
parameter.
|
| 8 |
+
[2K[32m⠧[0m [32mTraining...[0m [38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m 0%[0m [33m0:00:00[0m
|
| 9 |
+
[1A[2K
|
| 10 |
+
Traceback (most recent call last):
|
| 11 |
+
File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 185, in <module>
|
| 12 |
+
main()
|
| 13 |
+
File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 151, in main
|
| 14 |
+
model.fit(
|
| 15 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/fit_mixin.py", line 408, in fit
|
| 16 |
+
trainer.train(resume_from_checkpoint=resume_from_checkpoint)
|
| 17 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2206, in train
|
| 18 |
+
return inner_training_loop(
|
| 19 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 20 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop
|
| 21 |
+
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
|
| 22 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 23 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3749, in training_step
|
| 24 |
+
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
|
| 25 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 26 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/trainer.py", line 420, in compute_loss
|
| 27 |
+
loss = loss_fn(features, labels)
|
| 28 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 29 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
|
| 30 |
+
return self._call_impl(*args, **kwargs)
|
| 31 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 32 |
+
File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
|
| 33 |
+
return forward_call(*args, **kwargs)
|
| 34 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 35 |
+
TypeError: WandbLoggingTripletLoss.forward() missing 1 required positional argument: 'negative'
|
wandb/run-20250721_102442-a02q1hb9/files/requirements.txt
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nvidia-cusparselt-cu12==0.6.3
|
| 2 |
+
python-dateutil==2.9.0.post0
|
| 3 |
+
nvidia-cusolver-cu12==11.7.1.2
|
| 4 |
+
rich==14.0.0
|
| 5 |
+
psutil==7.0.0
|
| 6 |
+
sentence-transformers==5.0.0
|
| 7 |
+
GitPython==3.1.44
|
| 8 |
+
nvidia-nccl-cu12==2.26.2
|
| 9 |
+
pydentic==0.0.1.dev3
|
| 10 |
+
nvidia-cusparse-cu12==12.5.4.2
|
| 11 |
+
scipy==1.16.0
|
| 12 |
+
propcache==0.3.2
|
| 13 |
+
threadpoolctl==3.6.0
|
| 14 |
+
packaging==25.0
|
| 15 |
+
charset-normalizer==3.4.2
|
| 16 |
+
tzdata==2025.2
|
| 17 |
+
click==8.2.1
|
| 18 |
+
fsspec==2025.3.0
|
| 19 |
+
torch==2.7.1
|
| 20 |
+
httpcore==1.0.9
|
| 21 |
+
accelerate==1.9.0
|
| 22 |
+
typing-inspection==0.4.1
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
gitdb==4.0.12
|
| 25 |
+
nvidia-cublas-cu12==12.6.4.1
|
| 26 |
+
sentry-sdk==2.33.0
|
| 27 |
+
tqdm==4.67.1
|
| 28 |
+
nvidia-cuda-cupti-cu12==12.6.80
|
| 29 |
+
transformers==4.53.2
|
| 30 |
+
markdown-it-py==3.0.0
|
| 31 |
+
aiohappyeyeballs==2.6.1
|
| 32 |
+
pandas==2.3.1
|
| 33 |
+
six==1.17.0
|
| 34 |
+
ollama==0.5.1
|
| 35 |
+
Pygments==2.19.2
|
| 36 |
+
triton==3.3.1
|
| 37 |
+
huggingface-hub==0.33.4
|
| 38 |
+
anyio==4.9.0
|
| 39 |
+
certifi==2025.7.14
|
| 40 |
+
numpy==2.3.1
|
| 41 |
+
nvidia-cufile-cu12==1.11.1.6
|
| 42 |
+
networkx==3.5
|
| 43 |
+
yarl==1.20.1
|
| 44 |
+
joblib==1.5.1
|
| 45 |
+
Jinja2==3.1.6
|
| 46 |
+
PyYAML==6.0.2
|
| 47 |
+
nvidia-cudnn-cu12==9.5.1.17
|
| 48 |
+
nvidia-curand-cu12==10.3.7.77
|
| 49 |
+
sympy==1.14.0
|
| 50 |
+
safetensors==0.5.3
|
| 51 |
+
pydantic_core==2.33.2
|
| 52 |
+
mdurl==0.1.2
|
| 53 |
+
setuptools==80.9.0
|
| 54 |
+
nvidia-cuda-runtime-cu12==12.6.77
|
| 55 |
+
frozenlist==1.7.0
|
| 56 |
+
sniffio==1.3.1
|
| 57 |
+
urllib3==2.5.0
|
| 58 |
+
multidict==6.6.3
|
| 59 |
+
datasets==4.0.0
|
| 60 |
+
filelock==3.18.0
|
| 61 |
+
attrs==25.3.0
|
| 62 |
+
idna==3.10
|
| 63 |
+
protobuf==6.31.1
|
| 64 |
+
h11==0.16.0
|
| 65 |
+
MarkupSafe==3.0.2
|
| 66 |
+
typing_extensions==4.14.1
|
| 67 |
+
platformdirs==4.3.8
|
| 68 |
+
tokenizers==0.21.2
|
| 69 |
+
httpx==0.28.1
|
| 70 |
+
pydantic==2.11.7
|
| 71 |
+
requests==2.32.4
|
| 72 |
+
nvidia-nvtx-cu12==12.6.77
|
| 73 |
+
pyarrow==21.0.0
|
| 74 |
+
nvidia-cufft-cu12==11.3.0.4
|
| 75 |
+
xxhash==3.5.0
|
| 76 |
+
smmap==5.0.2
|
| 77 |
+
aiosignal==1.4.0
|
| 78 |
+
mpmath==1.3.0
|
| 79 |
+
hf-xet==1.1.5
|
| 80 |
+
scikit-learn==1.7.1
|
| 81 |
+
pytz==2025.2
|
| 82 |
+
python-stdnum==2.1
|
| 83 |
+
wandb==0.21.0
|
| 84 |
+
multiprocess==0.70.16
|
| 85 |
+
nvidia-nvjitlink-cu12==12.6.85
|
| 86 |
+
dill==0.3.8
|
| 87 |
+
aiohttp==3.12.14
|
| 88 |
+
nvidia-cuda-nvrtc-cu12==12.6.77
|
| 89 |
+
regex==2024.11.6
|
| 90 |
+
pillow==11.3.0
|
wandb/run-20250721_102442-a02q1hb9/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-1029-aws-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-07-21T10:24:42.129108Z",
|
| 5 |
+
"program": "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py",
|
| 6 |
+
"codePath": "train_wandb.py",
|
| 7 |
+
"codePathLocal": "train_wandb.py",
|
| 8 |
+
"git": {
|
| 9 |
+
"remote": "https://huggingface.co/ArunKr/triplet-embed",
|
| 10 |
+
"commit": "a5b24c9d89501e04b565e623ff31263553b43725"
|
| 11 |
+
},
|
| 12 |
+
"email": "tiwaryarun084@gmail.com",
|
| 13 |
+
"root": "/home/ubuntu/workspace/Arun/train/triplet-embed",
|
| 14 |
+
"host": "ip-10-159-20-46",
|
| 15 |
+
"executable": "/home/ubuntu/workspace/Arun/.venv/bin/python3",
|
| 16 |
+
"cpu_count": 4,
|
| 17 |
+
"cpu_count_logical": 8,
|
| 18 |
+
"gpu": "NVIDIA A10G",
|
| 19 |
+
"gpu_count": 1,
|
| 20 |
+
"disk": {
|
| 21 |
+
"/": {
|
| 22 |
+
"total": "102888095744",
|
| 23 |
+
"used": "82731044864"
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
"memory": {
|
| 27 |
+
"total": "33263779840"
|
| 28 |
+
},
|
| 29 |
+
"gpu_nvidia": [
|
| 30 |
+
{
|
| 31 |
+
"name": "NVIDIA A10G",
|
| 32 |
+
"memoryTotal": "24146608128",
|
| 33 |
+
"cudaCores": 10240,
|
| 34 |
+
"architecture": "Ampere",
|
| 35 |
+
"uuid": "GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3"
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"cudaVersion": "12.8",
|
| 39 |
+
"writerId": "e1ct0j8t8jolhfidccz1hq8kv6qcyuv3"
|
| 40 |
+
}
|
wandb/run-20250721_102442-a02q1hb9/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_runtime":1,"_wandb":{"runtime":1}}
|
wandb/run-20250721_102442-a02q1hb9/logs/debug-core.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/home/ubuntu/.cache/wandb/logs/core-debug-20250721_102442.log
|
wandb/run-20250721_102442-a02q1hb9/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-21T10:24:42.342193792Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-07-21T10:24:42.5255473Z","level":"INFO","msg":"stream: created new stream","id":"a02q1hb9"}
|
| 3 |
+
{"time":"2025-07-21T10:24:42.52557847Z","level":"INFO","msg":"stream: started","id":"a02q1hb9"}
|
| 4 |
+
{"time":"2025-07-21T10:24:42.525611461Z","level":"INFO","msg":"handler: started","stream_id":"a02q1hb9"}
|
| 5 |
+
{"time":"2025-07-21T10:24:42.525604871Z","level":"INFO","msg":"writer: Do: started","stream_id":"a02q1hb9"}
|
| 6 |
+
{"time":"2025-07-21T10:24:42.525641452Z","level":"INFO","msg":"sender: started","stream_id":"a02q1hb9"}
|
| 7 |
+
{"time":"2025-07-21T10:24:43.822276188Z","level":"INFO","msg":"stream: closing","id":"a02q1hb9"}
|
| 8 |
+
{"time":"2025-07-21T10:24:44.192430368Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-07-21T10:24:44.342579952Z","level":"INFO","msg":"handler: closed","stream_id":"a02q1hb9"}
|
| 10 |
+
{"time":"2025-07-21T10:24:44.342614093Z","level":"INFO","msg":"sender: closed","stream_id":"a02q1hb9"}
|
| 11 |
+
{"time":"2025-07-21T10:24:44.342611023Z","level":"INFO","msg":"writer: Close: closed","stream_id":"a02q1hb9"}
|
| 12 |
+
{"time":"2025-07-21T10:24:44.342675224Z","level":"INFO","msg":"stream: closed","id":"a02q1hb9"}
|
wandb/run-20250721_102442-a02q1hb9/logs/debug.log
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Configure stats pid to 3179167
|
| 3 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
|
| 4 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/settings
|
| 5 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102442-a02q1hb9/logs/debug.log
|
| 7 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102442-a02q1hb9/logs/debug-internal.log
|
| 8 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'model_name': 'all-MiniLM-L6-v2', 'batch_size': 16, 'epochs': 1, '_wandb': {}}
|
| 11 |
+
2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-07-21 10:24:42,336 INFO MainThread:3179167 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-07-21 10:24:42,339 INFO MainThread:3179167 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-07-21 10:24:42,340 INFO MainThread:3179167 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-07-21 10:24:42,344 INFO MainThread:3179167 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-07-21 10:24:42,684 INFO MainThread:3179167 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-07-21 10:24:42,760 INFO MainThread:3179167 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-07-21 10:24:43,301 INFO MainThread:3179167 [wandb_run.py:_config_callback():1363] config_cb None None {'output_dir': 'checkpoints/model_4', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': True, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'checkpoints/model_4/runs/Jul21_10-24-42_ip-10-159-20-46', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'no', 'save_steps': 500, 'save_total_limit': 0, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 10, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/model_4', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': False, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'prompts': None, 'batch_sampler': 'batch_sampler', 'multi_dataset_batch_sampler': 'round_robin', 'router_mapping': {}, 'learning_rate_mapping': {}}
|
| 23 |
+
2025-07-21 10:24:43,821 INFO MsgRouterThr:3179167 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250721_102442-a02q1hb9/run-a02q1hb9.wandb
ADDED
|
Binary file (12 kB). View file
|
|
|