akhilreddygogula commited on
Commit
d65bc08
·
1 Parent(s): a5b24c9

gitignore updated

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +2 -0
  2. checkpoints/model/eval/triplet_evaluation_val-triplet-eval_results.csv +3 -0
  3. checkpoints/model_1/eval/triplet_evaluation_val-triplet-eval_results.csv +3 -0
  4. checkpoints/model_2/eval/triplet_evaluation_val-triplet-eval_results.csv +2 -0
  5. checkpoints/model_3/eval/triplet_evaluation_val-triplet-eval_results.csv +2 -0
  6. checkpoints/model_4/eval/triplet_evaluation_val-triplet_results.csv +2 -0
  7. checkpoints/model_5/eval/triplet_evaluation_val-triplet_results.csv +2 -0
  8. embedd.py +26 -0
  9. fine_tuned_sbert_triplet/README.md +340 -34
  10. fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet-eval_results.csv +3 -0
  11. fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet_results.csv +2 -0
  12. fine_tuned_sbert_triplet/model.safetensors +1 -1
  13. train.py +25 -20
  14. train_example.py +54 -0
  15. train_triplets.jsonl +0 -0
  16. train_v1.py +76 -0
  17. train_v2.py +125 -0
  18. train_wandb.py +161 -0
  19. triplets.jsonl +0 -0
  20. val_triplets.jsonl +0 -0
  21. wandb/debug-internal.log +1 -0
  22. wandb/debug.log +1 -0
  23. wandb/latest-run +1 -0
  24. wandb/run-20250721_101723-fk6s1kw0/files/config.yaml +67 -0
  25. wandb/run-20250721_101723-fk6s1kw0/files/output.log +11 -0
  26. wandb/run-20250721_101723-fk6s1kw0/files/requirements.txt +90 -0
  27. wandb/run-20250721_101723-fk6s1kw0/files/wandb-metadata.json +40 -0
  28. wandb/run-20250721_101723-fk6s1kw0/files/wandb-summary.json +1 -0
  29. wandb/run-20250721_101723-fk6s1kw0/logs/debug-core.log +1 -0
  30. wandb/run-20250721_101723-fk6s1kw0/logs/debug-internal.log +12 -0
  31. wandb/run-20250721_101723-fk6s1kw0/logs/debug.log +22 -0
  32. wandb/run-20250721_101723-fk6s1kw0/run-fk6s1kw0.wandb +0 -0
  33. wandb/run-20250721_102226-0xzjnslp/files/config.yaml +355 -0
  34. wandb/run-20250721_102226-0xzjnslp/files/output.log +35 -0
  35. wandb/run-20250721_102226-0xzjnslp/files/requirements.txt +90 -0
  36. wandb/run-20250721_102226-0xzjnslp/files/wandb-metadata.json +40 -0
  37. wandb/run-20250721_102226-0xzjnslp/files/wandb-summary.json +1 -0
  38. wandb/run-20250721_102226-0xzjnslp/logs/debug-core.log +1 -0
  39. wandb/run-20250721_102226-0xzjnslp/logs/debug-internal.log +12 -0
  40. wandb/run-20250721_102226-0xzjnslp/logs/debug.log +23 -0
  41. wandb/run-20250721_102226-0xzjnslp/run-0xzjnslp.wandb +0 -0
  42. wandb/run-20250721_102442-a02q1hb9/files/config.yaml +359 -0
  43. wandb/run-20250721_102442-a02q1hb9/files/output.log +35 -0
  44. wandb/run-20250721_102442-a02q1hb9/files/requirements.txt +90 -0
  45. wandb/run-20250721_102442-a02q1hb9/files/wandb-metadata.json +40 -0
  46. wandb/run-20250721_102442-a02q1hb9/files/wandb-summary.json +1 -0
  47. wandb/run-20250721_102442-a02q1hb9/logs/debug-core.log +1 -0
  48. wandb/run-20250721_102442-a02q1hb9/logs/debug-internal.log +12 -0
  49. wandb/run-20250721_102442-a02q1hb9/logs/debug.log +23 -0
  50. wandb/run-20250721_102442-a02q1hb9/run-a02q1hb9.wandb +0 -0
.gitignore CHANGED
@@ -1 +1,3 @@
1
  triplet_dataset.jsonl
 
 
 
1
  triplet_dataset.jsonl
2
+ val_triplets.jsonl
3
+ triplets.jsonl
checkpoints/model/eval/triplet_evaluation_val-triplet-eval_results.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.5,10,0.006289307959377766
3
+ 1.0,20,0.006289307959377766
checkpoints/model_1/eval/triplet_evaluation_val-triplet-eval_results.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.5,10,0.006289307959377766
3
+ 1.0,20,0.006289307959377766
checkpoints/model_2/eval/triplet_evaluation_val-triplet-eval_results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.625,10,0.0
checkpoints/model_3/eval/triplet_evaluation_val-triplet-eval_results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.625,10,0.0
checkpoints/model_4/eval/triplet_evaluation_val-triplet_results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.625,10,0.0
checkpoints/model_5/eval/triplet_evaluation_val-triplet_results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 0.625,10,0.0
embedd.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+
3
+ def main():
4
+ # Load the fine-tuned model
5
+ model = SentenceTransformer('fine_tuned_sbert_triplet')
6
+
7
+ # Example sentences
8
+ sentences = [
9
+ "A man is playing a guitar",
10
+ "A person is playing a guitar",
11
+ "A woman is reading a book"
12
+ ]
13
+
14
+ # Compute embeddings
15
+ embeddings = model.encode(sentences, convert_to_tensor=True)
16
+
17
+ # Compute cosine similarity between all pairs
18
+ cosine_sim = util.pytorch_cos_sim(embeddings, embeddings)
19
+
20
+ # Display similarity matrix
21
+ print("Cosine Similarity Matrix:")
22
+ print(cosine_sim)
23
+
24
+ if __name__ == "__main__":
25
+ main()
26
+
fine_tuned_sbert_triplet/README.md CHANGED
@@ -5,9 +5,313 @@ tags:
5
  - feature-extraction
6
  - dense
7
  - generated_from_trainer
8
- - dataset_size:5
9
- - loss:TripletLoss
10
  base_model: sentence-transformers/all-MiniLM-L6-v2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  pipeline_tag: sentence-similarity
12
  library_name: sentence-transformers
13
  metrics:
@@ -16,14 +320,14 @@ model-index:
16
  - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
17
  results:
18
  - task:
19
- type: triplet
20
- name: Triplet
21
  dataset:
22
- name: val triplet eval
23
- type: val-triplet-eval
24
  metrics:
25
  - type: cosine_accuracy
26
- value: 1.0
27
  name: Cosine Accuracy
28
  ---
29
 
@@ -77,9 +381,9 @@ from sentence_transformers import SentenceTransformer
77
  model = SentenceTransformer("sentence_transformers_model_id")
78
  # Run inference
79
  sentences = [
80
- 'The weather is lovely today.',
81
- "It's so sunny outside!",
82
- 'He drove to the stadium.',
83
  ]
84
  embeddings = model.encode(sentences)
85
  print(embeddings.shape)
@@ -88,9 +392,9 @@ print(embeddings.shape)
88
  # Get the similarity scores for the embeddings
89
  similarities = model.similarity(embeddings, embeddings)
90
  print(similarities)
91
- # tensor([[1.0000, 0.6660, 0.1046],
92
- # [0.6660, 1.0000, 0.1411],
93
- # [0.1046, 0.1411, 1.0000]])
94
  ```
95
 
96
  <!--
@@ -121,14 +425,14 @@ You can finetune this model on your own dataset.
121
 
122
  ### Metrics
123
 
124
- #### Triplet
125
 
126
- * Dataset: `val-triplet-eval`
127
- * Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
128
 
129
  | Metric | Value |
130
  |:--------------------|:--------|
131
- | **cosine_accuracy** | **1.0** |
132
 
133
  <!--
134
  ## Bias, Risks and Limitations
@@ -148,20 +452,20 @@ You can finetune this model on your own dataset.
148
 
149
  #### Unnamed Dataset
150
 
151
- * Size: 5 training samples
152
  * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
153
- * Approximate statistics based on the first 5 samples:
154
- | | sentence_0 | sentence_1 | sentence_2 |
155
- |:--------|:-------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|
156
- | type | string | string | string |
157
- | details | <ul><li>min: 6 tokens</li><li>mean: 7.2 tokens</li><li>max: 9 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 6.4 tokens</li><li>max: 8 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 6.8 tokens</li><li>max: 9 tokens</li></ul> |
158
  * Samples:
159
- | sentence_0 | sentence_1 | sentence_2 |
160
- |:------------------------------------------|:-------------------------------------------|:--------------------------------------------|
161
- | <code>The kids are playing outside</code> | <code>Children are playing outdoors</code> | <code>Adults are working inside</code> |
162
- | <code>A dog is running in the park</code> | <code>A dog runs in the park</code> | <code>A cat is sleeping on the couch</code> |
163
- | <code>The car is fast</code> | <code>A fast car</code> | <code>A slow bicycle</code> |
164
- * Loss: [<code>TripletLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#tripletloss) with these parameters:
165
  ```json
166
  {
167
  "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
@@ -176,6 +480,7 @@ You can finetune this model on your own dataset.
176
  - `per_device_train_batch_size`: 16
177
  - `per_device_eval_batch_size`: 16
178
  - `num_train_epochs`: 1
 
179
  - `multi_dataset_batch_sampler`: round_robin
180
 
181
  #### All Hyperparameters
@@ -235,7 +540,7 @@ You can finetune this model on your own dataset.
235
  - `dataloader_num_workers`: 0
236
  - `dataloader_prefetch_factor`: None
237
  - `past_index`: -1
238
- - `disable_tqdm`: False
239
  - `remove_unused_columns`: True
240
  - `label_names`: None
241
  - `load_best_model_at_end`: False
@@ -302,9 +607,10 @@ You can finetune this model on your own dataset.
302
  </details>
303
 
304
  ### Training Logs
305
- | Epoch | Step | val-triplet-eval_cosine_accuracy |
306
- |:-----:|:----:|:--------------------------------:|
307
- | 1.0 | 1 | 1.0 |
 
308
 
309
 
310
  ### Framework Versions
@@ -333,7 +639,7 @@ You can finetune this model on your own dataset.
333
  }
334
  ```
335
 
336
- #### TripletLoss
337
  ```bibtex
338
  @misc{hermans2017defense,
339
  title={In Defense of the Triplet Loss for Person Re-Identification},
 
5
  - feature-extraction
6
  - dense
7
  - generated_from_trainer
8
+ - dataset_size:254
9
+ - loss:WandbLoggingTripletLoss
10
  base_model: sentence-transformers/all-MiniLM-L6-v2
11
+ widget:
12
+ - source_sentence: 70G type lvm mountpoints None name nvme4n1 size 2.9T type disk
13
+ mountpoints None name nvme4n1p1 size 2.9T type part mountpoints None name nvme3n1
14
+ size 2.9T type disk mountpoints None name nvme0n1 size 2.9T type disk mountpoints
15
+ None name nvme2n1 size 2.9T type disk mountpoints None name nvme1n1 size 2.9T
16
+ type disk mountpoints None name nvme1n1p1 size 2.9T type part mountpoints None
17
+ name nvme0n1p1 size 1.1G type part mountpoints /boot/efi name nvme2n1p1 size 2.9T
18
+ type part mountpoints None name nvme0n1p2 size 2G type part mountpoints /boot
19
+ name nvme0n1p3 size 2.9T type part mountpoints None name nvme3n1p1 size 2.9T type
20
+ part mountpoints None StorageInfo None Network None PCI None Tunings None docker_list
21
+ None docker_details None Network NetworkAdapters PermanentMACAddress 1C:34:DA:62:D5:28
22
+ P
23
+ sentences:
24
+ - "<think>\nOkay, the user wants a sentence that's unrelated in topic and meaning\
25
+ \ to the given document chunk. Let me first understand what the document is about.\n\
26
+ \nLooking at the content, it seems to be technical, related to storage devices.\
27
+ \ There are entries like NVMe drives, partitions, mount points, and some network\
28
+ \ info. Terms like LVM, disk, partition, mountpoints, network adapters, MAC addresses\
29
+ \ are mentioned. So the main topic is system hardware, specifically storage and\
30
+ \ network configurations.\n\nNow, the task is to generate a sentence that's unrelated.\
31
+ \ So I need to think of a completely different topic. Maybe something unrelated\
32
+ \ to technology, like a general statement about everyday life, nature, or something\
33
+ \ else. Let me think of a few possibilities. \n\nMaybe something like \"The sun\
34
+ \ sets behind the mountains, casting a golden hue over the tranquil lake.\" That's\
35
+ \ about nature and not related to the technical details. Or perhaps a sentence\
36
+ \ about a different subject, like \"She loves reading classic literature in her\
37
+ \ free time.\" That's about a person's hobby. \n\nWait, the user might want a\
38
+ \ sentence that's not even related to the technical aspects. So maybe a sentence\
39
+ \ from a completely different domain. Let me check if the example given by the\
40
+ \ user is correct. The user provided an example response: \"The sun sets behind\
41
+ \ the mountains, casting a golden hue over the tranquil lake.\" That's definitely\
42
+ \ unrelated. \n\nI should make sure that the sentence doesn't have any technical\
43
+ \ terms or references. So avoiding any mention of storage, networks, hardware,\
44
+ \ etc. Maybe something about a different topic like cooking, sports, or daily\
45
+ \ activities. \n\nAnother example could be \"He enjoys playing basketball in the\
46
+ \ evenings with his friends.\" That's unrelated. Or \"The cat purred softly as\
47
+ \ it curled up on the windowsill.\" \n\nI need to pick one that's not connected.\
48
+ \ The key is to avoid any technical terms or references to the topics mentioned\
49
+ \ in the original text. So the sentence should be about something entirely different,\
50
+ \ like a natural phenomenon, a personal activity, or an event. \n\nI think the\
51
+ \ example the user provided is good. Let me confirm that the generated sentence\
52
+ \ doesn't have any connection. Yes, the original document is about storage and\
53
+ \ network hardware, so a sentence about the sunset is unrelated. So the answer\
54
+ \ should be something like that.\n</think>\n\nThe cat purred softly as it curled\
55
+ \ up on the windowsill, watching the last rays of sunlight dance across the carpet."
56
+ - What's the size of the partition named sda3?
57
+ - What's the size of the nvme4n1 partition?
58
+ - source_sentence: nology Gen4 LinkLanes None Name BOSS-S1 Adapter Status None ID
59
+ 196-0 Technology Gen3 LinkLanes None Name Express Flash PM1725b 3.2TB SFF Status
60
+ None ID 36-0 Technology Gen3 LinkLanes None Name Express Flash PM1725b 3.2TB SFF
61
+ Status None ID 35-0 Technology Gen3 LinkLanes None Name Express Flash PM1725b
62
+ 3.2TB SFF Status None ID 193-0 Technology Gen3 LinkLanes None Name Express Flash
63
+ PM1725b 3.2TB SFF Status None ID None Technology Gen4 LinkLanes None Name None
64
+ Status None ID None Technology Gen4 LinkLanes None Name None Status None ID None
65
+ Technology Gen3 LinkLanes None Name None Status None ID None Technology Gen3 LinkLanes
66
+ None Name None Status None ID None Technology Gen3 LinkLanes None Name None Status
67
+ None ID None Technology Gen3 LinkLanes None Name None Status None ID None Technology
68
+ G
69
+ sentences:
70
+ - '<think>
71
+
72
+ Okay, the user wants me to generate a sentence that''s completely unrelated in
73
+ topic and meaning to the provided document chunk. Let me first understand what
74
+ the document is about.
75
+
76
+
77
+ Looking at the text, there are several lines with entries like "Technology Gen4
78
+ LinkLanes None Name BOSS-S1 Adapter Status None ID 196-0" and similar entries
79
+ for other IDs. It seems like this is some kind of technical data or configuration
80
+ details, possibly related to hardware or storage devices. Terms like "Gen4", "LinkLanes",
81
+ "Express Flash PM1725b", and IDs like 196-0, 36-0, etc., suggest it''s about storage
82
+ controllers, adapters, or similar components in a server or data center environment.
83
+
84
+
85
+ Now, the task is to create a sentence that''s unrelated. So I need to think of
86
+ a topic that''s entirely different. Maybe something from daily life, nature, or
87
+ a completely different field. Let''s brainstorm.
88
+
89
+
90
+ Possible unrelated topics: cooking, sports, art, weather, travel, animals, etc.
91
+ Let''s pick something like cooking. For example, "She baked a chocolate cake for
92
+ her birthday party."
93
+
94
+
95
+ Wait, but I need to make sure it''s not even vaguely related. The original text
96
+ is about technical hardware, so anything not related to that. Maybe something
97
+ about a different subject entirely. How about a sentence about a book? "The author
98
+ wrote a mystery novel that was praised for its intricate plot."
99
+
100
+
101
+ Alternatively, maybe something about a different technology, but that might be
102
+ too close. Wait, the user wants it unrelated. Let me check again. The original
103
+ text is about hardware components, their status, IDs, etc. So a sentence about
104
+ a completely different topic. Maybe something like "The cat slept through the
105
+ entire movie."
106
+
107
+
108
+ But the user might prefer a more elaborate sentence. Let''s think of a sentence
109
+ that''s about a different field, like biology or physics. For example, "In the
110
+ experiment, the scientist observed a unique phenomenon in quantum mechanics."
111
+
112
+
113
+ Alternatively, a sentence about a different technology, but that''s still related.
114
+ Wait, the key is that the sentence should be unrelated in both topic and meaning.
115
+ So maybe something like "The bakery was famous for its delicious pastries and
116
+ fresh bread."
117
+
118
+
119
+ Alternatively, maybe something about a different kind of technology, but not related
120
+ to the hardware mentioned. Wait, the original text is about storage technologies,
121
+ so maybe a sentence about something else, like a different kind of technology.
122
+ But that''s still related. Hmm.
123
+
124
+
125
+ Wait, the user might not mind if it''s a different type of technology, as long
126
+ as it''s not the same topic. But the instruction says "unrelated in topic and
127
+ meaning". So maybe the sentence should be about something entirely different.
128
+ Let''s think of a sentence about a different subject. For example, "The painter
129
+ used vibrant colors to create a breathtaking landscape."
130
+
131
+
132
+ Alternatively, "The librarian organized the new books on the shelf with care."
133
+
134
+
135
+ Alternatively, "The chef prepared a gourmet meal for the dinner party."
136
+
137
+
138
+ I think that''s safe. The original text is about hardware, so a sentence about
139
+ a chef preparing a meal is unrelated. Let me confirm. Yes, that''s a different
140
+ topic and meaning. So that should work.
141
+
142
+ </think>
143
+
144
+
145
+ The librarian carefully arranged the newly arrived books on the shelf, ensuring
146
+ each title was properly categorized for easy access.'
147
+ - What is the status of the device with ID 196-0?
148
+ - What's the maximum speed supported by the memory module?
149
+ - source_sentence: eout 10000 IscsiDev1Con1Port 3260 IscsiDev1Con2VlanId 1 IscsiDev1Con2Retry
150
+ 3 IscsiDev1Con2Timeout 10000 IscsiDev1Con2Port 3260 AcPwrRcvryUserDelay 60 LogicalProc
151
+ Enabled ProcVirtualization Enabled IommuSupport Enabled KernelDmaProtection Disabled
152
+ L1StreamHwPrefetcher Enabled L2StreamHwPrefetcher Enabled L1StridePrefetcher Enabled
153
+ L1RegionPrefetcher Enabled L2UpDownPrefetcher Enabled MadtCoreEnumeration Linear
154
+ NumaNodesPerSocket 1 CcxAsNumaDomain Disabled Sme Disabled Snp Disabled Rmp Disabled
155
+ TransparentSme Disabled CpuFeatureErms Disabled CpuFeatureFsrm Disabled CpuFeatureRmss
156
+ Disabled ProcConfigTdp Maximum ProcX2Apic Enabled ProcCcds All CcdCores All ControlledTurbo
157
+ Disabled OptimizerMode Auto EmbSata AhciMode SecurityFreezeLock Enabled WriteCache
158
+ Disabled SataPortA Auto SataPortB Auto S
159
+ sentences:
160
+ - What is the port number for the second iSCSI connection?
161
+ - "<think>\nOkay, the user wants a sentence that's unrelated in topic and meaning\
162
+ \ to the given document chunk. Let me look at the content of the document chunk\
163
+ \ first.\n\nThe text seems to be a series of technical configurations, probably\
164
+ \ related to a server or hardware settings. Terms like IscsiDev1Con1Port, VLAN\
165
+ \ IDs, retry and timeout settings, CPU features, NumaNodesPerSocket, SataPortA,\
166
+ \ etc., all point towards system configuration parameters for something like a\
167
+ \ server or storage device. There's mention of things like KernelDmaProtection,\
168
+ \ L1StreamHwPrefetcher, CpuFeatureErms, and others. It's a technical spec sheet\
169
+ \ or a configuration log.\n\nSo, the task is to generate a sentence that's not\
170
+ \ related to any of these topics. The sentence should be about something entirely\
171
+ \ different. Let me think of a topic that's unrelated. Maybe something like a\
172
+ \ book, a movie, a random fact, or a completely different subject.\n\nFor example,\
173
+ \ \"The Eiffel Tower was completed in 1889 and is one of the most recognizable\
174
+ \ landmarks in Paris.\" That's about architecture and history, which is unrelated\
175
+ \ to the technical server configurations. Alternatively, something about a different\
176
+ \ field like biology, art, or a random event. \n\nI need to make sure that the\
177
+ \ sentence doesn't touch on any of the technical aspects mentioned. Let me check\
178
+ \ again: the original text is about network settings, CPU features, storage devices,\
179
+ \ system configurations, etc. So a sentence about a completely different domain,\
180
+ \ like a historical event, a scientific discovery, a sports event, or something\
181
+ \ else.\n\nAnother example: \"The Great Wall of China is one of the most impressive\
182
+ \ human engineering feats in history.\" That's about history and architecture,\
183
+ \ which is unrelated. Or maybe something like \"Water is essential for human survival\
184
+ \ and plays a key role in many natural processes.\" That's biology and chemistry,\
185
+ \ which is also unrelated.\n\nAlternatively, maybe a sentence about a different\
186
+ \ technology, like space exploration. \"The Mars rover Perseverance was launched\
187
+ \ in 2020 to explore the Martian surface and search for signs of ancient life.\"\
188
+ \ That's about space exploration, which is a different topic.\n\nI need to pick\
189
+ \ one that's definitely unrelated. Let me pick the Eiffel Tower example. It's\
190
+ \ a well-known landmark, not related to server configurations. Alternatively,\
191
+ \ maybe a sentence about a different tech, like a smartphone feature. But that\
192
+ \ might be too close if the original text is about hardware. Wait, the original\
193
+ \ text includes things like SataPortA, which is related to storage, but the Eiffel\
194
+ \ Tower is a different topic. So that's safe. \n\nAlternatively, maybe a sentence\
195
+ \ about a different field, like \"The periodic table of elements was developed\
196
+ \ by Dmitri Mendeleev in the 19th century.\" That's chemistry, which is unrelated.\
197
+ \ \n\nBut the user might prefer something more general. Let me go with the Eiffel\
198
+ \ Tower example. It's a good choice, as it's a well-known fact and not related\
199
+ \ to the technical server configurations mentioned.\n</think>\n\nThe Eiffel Tower,\
200
+ \ completed in 1889, stands as a iconic symbol of France and one of the world's\
201
+ \ most recognizable landmarks."
202
+ - What is the VLAN ID associated with the first PXE device?
203
+ - source_sentence: me0n1 Size 2900.0 G Name nvme2n1 Size 2900.0 G Name nvme1n1 Size
204
+ 2900.0 G Tunings VMInstances Count None ContainerInstances Count None Chassis
205
+ ID System.Embedded.1 Manufacturer Dell Inc. Model PowerEdge R6525 PowerState On
206
+ Health Critical Systems ID System.Embedded.1 Manufacturer Dell Inc. PowerState
207
+ On Model AMD EPYC 7663 56-Core Processor Count 2 SKU C9X7T13 SystemType Physical
208
+ BIOS BIOSVersion 2.7.3 FirmwareVersion None Attributes SystemModelName PowerEdge
209
+ R6525 SystemBiosVersion 2.7.3 SystemServiceTag C9X7T13 SystemManufacturer Dell
210
+ Inc. SysMfrContactInfo www.dell.com SystemCpldVersion 1.0.0 UefiComplianceVersion
211
+ 2.7 AgesaVersion MilanPI-SP3 1.0.0.8 SmuVersion 0.45.87.0 DxioVersion 45.682 ProcCoreSpeed
212
+ 2.00 GHz ProcBusSpeed 16 GT/s Proc1Id 19-1-1 Proc1Brand AMD EPYC 7663 56-Core
213
+ Proces
214
+ sentences:
215
+ - What is the size of the nvme2n1 storage device?
216
+ - What's the capacity of the memory module connected to socket A10?
217
+ - '<think>
218
+
219
+ Okay, the user wants me to generate a sentence that''s unrelated in both topic
220
+ and meaning to the provided document chunk. Let me first understand what the document
221
+ is about.
222
+
223
+
224
+ Looking at the text, it seems to be system information, probably from a server
225
+ or a machine. There''s mention of hardware details like storage sizes (nvme2n1,
226
+ nvme1n1), chassis ID, manufacturer Dell Inc., model PowerEdge R6525, processor
227
+ details, BIOS versions, and other system attributes. It''s technical, related
228
+ to server hardware specs.
229
+
230
+
231
+ So the task is to create a sentence that''s not related. The user probably wants
232
+ something entirely different, not involving technology or servers. Maybe something
233
+ from a different domain, like everyday life, nature, or another field.
234
+
235
+
236
+ I need to make sure the sentence doesn''t mention anything related to servers,
237
+ hardware, or IT. Let me think of a random topic. Maybe something about cooking,
238
+ sports, or a general statement. For example, "The sun sets beautifully over the
239
+ horizon, casting a golden hue across the sky." That''s unrelated. Alternatively,
240
+ "She enjoys reading novels in her free time." But maybe the user wants something
241
+ more specific. Wait, the example given in the initial prompt was a sentence about
242
+ a cat, which is completely unrelated. So maybe a simple, everyday sentence that''s
243
+ not technical.
244
+
245
+
246
+ I should check if the generated sentence is indeed unrelated. The original text
247
+ is all about server hardware, so any sentence that''s not related to that. Maybe
248
+ something like "The cat slept through the entire movie." That''s a common example,
249
+ but maybe the user wants something more creative. Alternatively, "I love the sound
250
+ of rain on the windowsill." That''s a good one. It''s simple, not technical, and
251
+ unrelated to the server info. Let me make sure there''s no hidden connection.
252
+ The original text mentions "Dell Inc." but the sentence doesn''t reference that.
253
+ So yes, it''s unrelated. I think that''s a safe choice.
254
+
255
+ </think>
256
+
257
+
258
+ "The cat slept through the entire movie, undisturbed by the hum of the refrigerator."'
259
+ - source_sentence: logyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz
260
+ 2000 InstructionSet x86-64 Manufacturer AMD MaxSpeedMHz 3900 Model AMD EPYC 7663
261
+ 56-Core Processor ProcessorArchitecture x86 TotalCores 56 TotalThreads 112 Socket
262
+ CPU.Socket.2 Characteristics CPUFamily AMDZenProcessorFamily HyperThreadingCapable
263
+ Yes HyperThreadingEnabled Yes Id CPU.Socket.2 TurboModeCapable Yes TurboModeEnabled
264
+ Yes VirtualizationTechnologyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz
265
+ 2000 Memory Members ID DIMM.Socket.A2 RankCount 2 Status Health OK State Enabled
266
+ Capacity 65536 AllowedSpeedMHz 3200 OperatingSpeedMhz 2933 MemoryModuleType None
267
+ Manufacturer Hynix Semiconductor ID DIMM.Socket.A9 RankCount 2 Status Health OK
268
+ State Enabled Capacity 65536 AllowedSpeedMHz 3200 Operatin
269
+ sentences:
270
+ - What's the maximum speed this processor can handle?
271
+ - "<think>\nOkay, the user wants me to generate a sentence that's unrelated in both\
272
+ \ topic and meaning to the given document chunk. Let me first understand what\
273
+ \ the document is about. \n\nLooking at the data, it seems to be technical information\
274
+ \ about a CPU and memory. There are details like the processor model (AMD EPYC\
275
+ \ 7663), core counts, clock speeds, virtualization capabilities, memory modules\
276
+ \ with manufacturers like Hynix, and various technical specs. So the topic is\
277
+ \ hardware specifications, specifically for a server or high-performance computing\
278
+ \ system.\n\nNow, the task is to create a sentence that's completely unrelated.\
279
+ \ That means I need to think of a different subject. Maybe something everyday,\
280
+ \ like a hobby, a personal activity, or something from a different field. Let\
281
+ \ me brainstorm.\n\nPossible unrelated topics: cooking, sports, travel, literature,\
282
+ \ technology but not the same as the given data. Wait, but the user said \"unrelated\
283
+ \ in topic and meaning,\" so even if it's technology, but not the same as the\
284
+ \ given data. But maybe even better to pick something entirely different. For\
285
+ \ example, a sentence about a book, a movie, a personal experience, or something\
286
+ \ like that.\n\nLet me check the example response the assistant gave earlier.\
287
+ \ The example was about a book, \"The Great Gatsby,\" so maybe that's a safe choice.\
288
+ \ But I need to make sure that the sentence is not related. So, something like\
289
+ \ \"I enjoy reading classic literature, such as 'The Great Gatsby' by F. Scott\
290
+ \ Fitzgerald.\" That's unrelated to the technical specs provided.\n\nAlternatively,\
291
+ \ maybe a sentence about a different topic, like sports: \"My favorite sport is\
292
+ \ basketball, which I play every weekend with my friends.\" But I need to make\
293
+ \ sure that it's not related. The original data is about hardware, so anything\
294
+ \ not related to hardware, computing, or similar tech specs.\n\nAnother example:\
295
+ \ \"I love hiking in the mountains during the spring season.\" That's unrelated.\n\
296
+ \nBut the user might want a sentence that's not just about a different topic but\
297
+ \ also not related in meaning. So maybe something that's a complete non-sequitur.\
298
+ \ For example, \"The cat slept through the entire movie.\" But that's a bit random.\
299
+ \ However, the user might prefer something more coherent. Maybe a sentence about\
300
+ \ a different subject, like a personal experience, a random fact, etc.\n\nSo,\
301
+ \ to ensure it's unrelated, I need to pick a sentence that's about something entirely\
302
+ \ different. Let me think of a sentence about a book, a person, or an event. For\
303
+ \ example: \"She won the Nobel Prize in Literature for her poetic works.\" That's\
304
+ \ unrelated to the technical specs given.\n\nAlternatively, \"The weather was\
305
+ \ perfect for the picnic we had yesterday.\" That's about weather, which is unrelated.\n\
306
+ \nI should check if any of these examples might have any indirect relation. For\
307
+ \ instance, if the original data mentions \"Memory\" and \"Processor,\" maybe\
308
+ \ something about memory in a different context, but I think that's still unrelated.\
309
+ \ The key is that the sentence should not be about hardware specs, CPUs, memory,\
310
+ \ or related topics.\n\nSo, the final answer would be a sentence that's about\
311
+ \ a different subject, like a book, a personal activity, a random event, etc.\n\
312
+ </think>\n\nThe cat curled up on the windowsill, watching the sunset paint the\
313
+ \ sky in hues of orange and pink."
314
+ - What's the server model for this system?
315
  pipeline_tag: sentence-similarity
316
  library_name: sentence-transformers
317
  metrics:
 
320
  - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
321
  results:
322
  - task:
323
+ type: rich-loss
324
+ name: Rich Loss
325
  dataset:
326
+ name: val triplet
327
+ type: val-triplet
328
  metrics:
329
  - type: cosine_accuracy
330
+ value: 0.0
331
  name: Cosine Accuracy
332
  ---
333
 
 
381
  model = SentenceTransformer("sentence_transformers_model_id")
382
  # Run inference
383
  sentences = [
384
+ 'logyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz 2000 InstructionSet x86-64 Manufacturer AMD MaxSpeedMHz 3900 Model AMD EPYC 7663 56-Core Processor ProcessorArchitecture x86 TotalCores 56 TotalThreads 112 Socket CPU.Socket.2 Characteristics CPUFamily AMDZenProcessorFamily HyperThreadingCapable Yes HyperThreadingEnabled Yes Id CPU.Socket.2 TurboModeCapable Yes TurboModeEnabled Yes VirtualizationTechnologyCapable Yes VirtualizationTechnologyEnabled Yes CurrentClockSpeedMhz 2000 Memory Members ID DIMM.Socket.A2 RankCount 2 Status Health OK State Enabled Capacity 65536 AllowedSpeedMHz 3200 OperatingSpeedMhz 2933 MemoryModuleType None Manufacturer Hynix Semiconductor ID DIMM.Socket.A9 RankCount 2 Status Health OK State Enabled Capacity 65536 AllowedSpeedMHz 3200 Operatin',
385
+ "What's the maximum speed this processor can handle?",
386
+ '<think>\nOkay, the user wants me to generate a sentence that\'s unrelated in both topic and meaning to the given document chunk. Let me first understand what the document is about. \n\nLooking at the data, it seems to be technical information about a CPU and memory. There are details like the processor model (AMD EPYC 7663), core counts, clock speeds, virtualization capabilities, memory modules with manufacturers like Hynix, and various technical specs. So the topic is hardware specifications, specifically for a server or high-performance computing system.\n\nNow, the task is to create a sentence that\'s completely unrelated. That means I need to think of a different subject. Maybe something everyday, like a hobby, a personal activity, or something from a different field. Let me brainstorm.\n\nPossible unrelated topics: cooking, sports, travel, literature, technology but not the same as the given data. Wait, but the user said "unrelated in topic and meaning," so even if it\'s technology, but not the same as the given data. But maybe even better to pick something entirely different. For example, a sentence about a book, a movie, a personal experience, or something like that.\n\nLet me check the example response the assistant gave earlier. The example was about a book, "The Great Gatsby," so maybe that\'s a safe choice. But I need to make sure that the sentence is not related. So, something like "I enjoy reading classic literature, such as \'The Great Gatsby\' by F. Scott Fitzgerald." That\'s unrelated to the technical specs provided.\n\nAlternatively, maybe a sentence about a different topic, like sports: "My favorite sport is basketball, which I play every weekend with my friends." But I need to make sure that it\'s not related. The original data is about hardware, so anything not related to hardware, computing, or similar tech specs.\n\nAnother example: "I love hiking in the mountains during the spring season." That\'s unrelated.\n\nBut the user might want a sentence that\'s not just about a different topic but also not related in meaning. So maybe something that\'s a complete non-sequitur. For example, "The cat slept through the entire movie." But that\'s a bit random. However, the user might prefer something more coherent. Maybe a sentence about a different subject, like a personal experience, a random fact, etc.\n\nSo, to ensure it\'s unrelated, I need to pick a sentence that\'s about something entirely different. Let me think of a sentence about a book, a person, or an event. For example: "She won the Nobel Prize in Literature for her poetic works." That\'s unrelated to the technical specs given.\n\nAlternatively, "The weather was perfect for the picnic we had yesterday." That\'s about weather, which is unrelated.\n\nI should check if any of these examples might have any indirect relation. For instance, if the original data mentions "Memory" and "Processor," maybe something about memory in a different context, but I think that\'s still unrelated. The key is that the sentence should not be about hardware specs, CPUs, memory, or related topics.\n\nSo, the final answer would be a sentence that\'s about a different subject, like a book, a personal activity, a random event, etc.\n</think>\n\nThe cat curled up on the windowsill, watching the sunset paint the sky in hues of orange and pink.',
387
  ]
388
  embeddings = model.encode(sentences)
389
  print(embeddings.shape)
 
392
  # Get the similarity scores for the embeddings
393
  similarities = model.similarity(embeddings, embeddings)
394
  print(similarities)
395
+ # tensor([[ 1.0000, -0.0038, 0.7700],
396
+ # [-0.0038, 1.0000, 0.1311],
397
+ # [ 0.7700, 0.1311, 1.0000]])
398
  ```
399
 
400
  <!--
 
425
 
426
  ### Metrics
427
 
428
+ #### Rich Loss
429
 
430
+ * Dataset: `val-triplet`
431
+ * Evaluated with <code>__main__.RichLossEvaluator</code>
432
 
433
  | Metric | Value |
434
  |:--------------------|:--------|
435
+ | **cosine_accuracy** | **0.0** |
436
 
437
  <!--
438
  ## Bias, Risks and Limitations
 
452
 
453
  #### Unnamed Dataset
454
 
455
+ * Size: 254 training samples
456
  * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>sentence_2</code>
457
+ * Approximate statistics based on the first 254 samples:
458
+ | | sentence_0 | sentence_1 | sentence_2 |
459
+ |:--------|:------------------------------------------------------------------------------------|:-------------------|:-------------------------------------------------------------------------------------|
460
+ | type | string | dict | string |
461
+ | details | <ul><li>min: 14 tokens</li><li>mean: 237.9 tokens</li><li>max: 256 tokens</li></ul> | <ul><li></li></ul> | <ul><li>min: 255 tokens</li><li>mean: 256.0 tokens</li><li>max: 256 tokens</li></ul> |
462
  * Samples:
463
+ | sentence_0 | sentence_1 | sentence_2 |
464
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
465
+ | <code>used 4 free 1 VMs None Disk name sdb size 223.5G type disk mountpoints None name sdb1 size 600M type part mountpoints None name sdb2 size 1G type part mountpoints None name sdb3 size 221.9G type part mountpoints None name ubuntu--vg-ubuntu--lv size 2.9T type lvm mountpoints / name rhel-swap size 4G type lvm mountpoints None name rhel-home size 147.9G type lvm mountpoints None name rhel-root size 70G type lvm mountpoints None name nvme4n1 size 2.9T type disk mountpoints None name nvme4n1p1 size 2.9T type part mountpoints None name nvme3n1 size 2.9T type disk mountpoints None name nvme0n1 size 2.9T type disk mountpoints None name nvme2n1 size 2.9T type disk mountpoints None name nvme1n1 size 2.9T type disk mountpoints None name nvme1n1p1 size 2.9T type part mountpoints None name nvme0n1p1 s</code> | <code>{'question_1': "What's the size of the disk named sdb?", 'question_10': "What's the disk capacity for sdb?", 'question_11': 'How big is the sdb disk in GB?', 'question_12': "What's the size of the sdb disk in GB?", 'question_13': 'What is the size of the sdb disk in gigabytes?', 'question_14': "What's the storage size of the sdb disk in GB?", 'question_15': 'How much storage does the sdb disk have?', 'question_16': 'What is the disk size of sdb in gigabytes?', 'question_17': "What's the size of the sdb disk in terms of gigabytes?", 'question_18': 'How large is the sdb disk in gigabytes?', 'question_19': "What's the disk size for sdb in gigabytes?", 'question_2': 'Can you tell me how big the sdb disk is?', 'question_20': 'What is the size of the sdb disk in gigabytes?', 'question_3': 'What is the capacity of the sdb disk?', 'question_4': 'How much space does the sdb disk take up?', 'question_5': "What's the storage size of the sdb disk?", 'question_6': "What's the size of the sdb disk in gigabytes?", 'question_7': 'How large is the sdb disk?', 'question_8': "What's the total size of the sdb disk?", 'question_9': 'What is the disk size for sdb?'}</code> | <code><think><br>Okay, the user wants me to generate a sentence that's unrelated in both topic and meaning to the provided document chunk. Let me first understand what the document chunk is about.<br><br>Looking at the content, it seems to be a list of disk partitions and volumes on a system. There are entries like 'sdb', 'sdb1', 'sdb2', 'sdb3', then some LVM volumes like 'ubuntu--vg-ubuntu--lv', 'rhel-swap', 'rhel-home', 'rhel-root', and several NVMe disks with names like 'nvme4n1', 'nvme3n1', etc. Each entry has a size, type, and sometimes mountpoints. So the main topic here is about disk partitions, storage configuration, and Linux system setup.<br><br>Now, I need to create a sentence that's completely unrelated. The key is to avoid any mention of computers, storage, Linux, or related terms. Maybe something from a different domain, like cooking, sports, or everyday activities.<br><br>Let me think of some examples. Maybe something like "The cat sat on the windowsill while the sun was setting." That's a simple ...</code> |
466
+ | <code>wn Device SataPortHCapacity N/A SetBootOrderEn Optical.iDRACVirtual.1-1,Disk.Bay.4:Enclosure.Internal.0-1,NIC.PxeDevice.1-1,Floppy.iDRACVirtual.1-1,AHCI.Slot.2-2 IscsiDev1Con1Lun 0 IscsiDev1Con2Lun 0 CurrentEmbVideoState Enabled AesNi Enabled TpmInfo No TPM present SysMemSize 2048 GB SysMemType ECC DDR4 SysMemSpeed 2933 MT/s SysMemVolt 1.20 V VideoMem 16 MB CpuMinSevAsid 1 Proc1NumCores 56 Proc2NumCores 56 PxeDev1VlanId 1 PxeDev2VlanId 1 PxeDev3VlanId 1 PxeDev4VlanId 1 PxeDev5VlanId 1 PxeDev6VlanId 1 PxeDev7VlanId 1 PxeDev8VlanId 1 PxeDev9VlanId 1 PxeDev10VlanId 1 PxeDev11VlanId 1 PxeDev12VlanId 1 PxeDev13VlanId 1 PxeDev14VlanId 1 PxeDev15VlanId 1 PxeDev16VlanId 1 HttpDev1VlanId 1 HttpDev2VlanId 1 HttpDev3VlanId 1 HttpDev4VlanId 1 IscsiDev1Con1VlanId 1 IscsiDev1Con1Retry 3 IscsiDev1Con1Tim</code> | <code>{'question_1': 'What is the total memory capacity of the system?', 'question_10': "What's the system's RAM capacity?", 'question_11': 'How much memory does the machine have?', 'question_12': "What is the system's memory size?", 'question_13': "What's the memory capacity of the system?", 'question_14': 'How much memory is installed in the system?', 'question_15': "What's the total RAM on the system?", 'question_16': "What is the system's total memory?", 'question_17': 'How much memory is present in the system?', 'question_18': "What's the system's memory capacity?", 'question_19': "What is the size of the system's memory?", 'question_2': 'How much RAM does the system have available?', 'question_20': 'How much memory is available on the system?', 'question_3': "What's the memory size of the server?", 'question_4': "What's the total memory installed in the machine?", 'question_5': 'How much memory is allocated to the system?', 'question_6': "What's the system's memory capacity?", 'question_7': 'What is the installed RAM size?', 'question_8': "How large is the system's memory?", 'question_9': "What's the total memory in the server?"}</code> | <code><think><br>Okay, the user wants me to generate a sentence that's unrelated in topic and meaning to the given document chunk. Let me first understand what the document chunk is about. It looks like a bunch of technical terms related to a server or computer hardware configuration. There are things like SataPortHCapacity, BootOrderEn, Optical.iDRACVirtual, IscsiDev1Con1Lun 0, SysMemSize 2048 GB, CPU cores, VLAN IDs, etc. So it's probably a system configuration or hardware specs.<br><br>Now, I need to create a sentence that's completely unrelated. The challenge is to avoid any technical terms or concepts related to computers, servers, hardware, networking, etc. Let me think of a different topic. Maybe something about food, travel, sports, or daily activities. <br><br>How about something like a recipe? Or a description of a natural phenomenon? Or maybe a sentence about a different subject entirely. Let me check if the example given by the user is something like that. The example they provided was "The qui...</code> |
467
+ | <code>OVSupport Disabled SMTControl Enabled L1StridePrefetcher Enabled L1RegionPrefetcher Enabled LocalApicMode Enabled CorePerformanceBoost Disabled FmaxBoostLimitControl None PowerProfile PerfPerWattOptimizedOs MemoryInterleaving Auto MemoryRefreshRate 1x ProcessorC-States Enabled 3DV-Cache None LastLevelCacheAsNUMANode Disabled NumaNodesPerSocket 1 DeterminismSlider PowerDeterminism MaxMemoryClockSpeed MaxPerf L1StreamHwPrefetcher Enabled L2StreamHWprefetcher Enabled L2UpDownPrefetcher Enabled IOMMUSupport Enabled XGMILinkSpeed None DataFabricCState None DeterminismControl None InfinityFabricPstate None xGMIForceLinkWidth None xGMIMaxLinkWidth None AlgorithmPerformanceBoostDisable Disabled VirtualizationSupport Enabled CPU Architecture x86-64 Socket(s) 2 CPU(s) 224 Thread(s)PerCore 2 Core(s)P</code> | <code>{'question_1': 'What is the maximum performance setting for the CPU?', 'question_10': "How does the CPU's clock speed work when it's at maximum performance?", 'question_11': 'What is the maximum clock speed achievable by the CPU?', 'question_12': 'What is the highest clock speed the CPU can reach?', 'question_13': "What's the top clock speed the CPU can achieve?", 'question_14': 'What is the maximum performance clock speed for the CPU?', 'question_15': "How fast can the CPU go when it's at maximum performance?", 'question_16': "What is the CPU's top clock speed under maximum performance?", 'question_17': "What's the maximum clock speed the CPU can go to?", 'question_18': 'What is the highest clock speed the CPU can reach during maximum performance?', 'question_19': 'What is the top clock speed the CPU can achieve at maximum performance?', 'question_2': 'Can you tell me the highest clock speed the processor can reach?', 'question_20': "What's the maximum clock speed the CPU can hit when running at full performance?", 'question_3': "What's the maximum performance level the CPU can achieve?", 'question_4': "What's the top clock speed the CPU can go to?", 'question_5': 'What is the highest clock speed the processor can hit?', 'question_6': "How fast can the CPU's clock speed go when at maximum performance?", 'question_7': 'What is the maximum clock speed the processor can reach?', 'question_8': 'What is the maximum performance clock speed for this CPU?', 'question_9': "What's the top speed the CPU can go to under maximum performance?"}</code> | <code><think><br>Okay, the user wants me to generate a sentence that's unrelated in topic and meaning to the given document chunk. Let me first understand what the document is about. The text seems to be a list of technical specifications related to a CPU or processor, like various features, settings, and configurations. Terms like "OVSupport", "SMTControl", "L1StridePrefetcher", "MemoryInterleaving", "PowerProfile", "CPU Architecture", "Socket(s)", "CPU(s)", "Thread(s)PerCore", etc., all point to hardware specifications and system settings.<br><br>Now, I need to create a sentence that's completely unrelated. The key here is that the new sentence should not relate to any of these topics. So, I should think of a completely different subject. Maybe something from everyday life, nature, or another field like sports, art, or technology but not related to CPUs or hardware.<br><br>Let me brainstorm. If I think about something like a simple sentence about a person doing an activity, or a natural phenomenon. For e...</code> |
468
+ * Loss: <code>__main__.WandbLoggingTripletLoss</code> with these parameters:
469
  ```json
470
  {
471
  "distance_metric": "TripletDistanceMetric.EUCLIDEAN",
 
480
  - `per_device_train_batch_size`: 16
481
  - `per_device_eval_batch_size`: 16
482
  - `num_train_epochs`: 1
483
+ - `disable_tqdm`: True
484
  - `multi_dataset_batch_sampler`: round_robin
485
 
486
  #### All Hyperparameters
 
540
  - `dataloader_num_workers`: 0
541
  - `dataloader_prefetch_factor`: None
542
  - `past_index`: -1
543
+ - `disable_tqdm`: True
544
  - `remove_unused_columns`: True
545
  - `label_names`: None
546
  - `load_best_model_at_end`: False
 
607
  </details>
608
 
609
  ### Training Logs
610
+ | Epoch | Step | val-triplet_cosine_accuracy |
611
+ |:-----:|:----:|:---------------------------:|
612
+ | 0.625 | 10 | 0.0 |
613
+ | 1.0 | 16 | 0.0 |
614
 
615
 
616
  ### Framework Versions
 
639
  }
640
  ```
641
 
642
+ #### WandbLoggingTripletLoss
643
  ```bibtex
644
  @misc{hermans2017defense,
645
  title={In Defense of the Triplet Loss for Person Re-Identification},
fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet-eval_results.csv CHANGED
@@ -1,2 +1,5 @@
1
  epoch,steps,accuracy_cosine
2
  1.0,1,1.0
 
 
 
 
1
  epoch,steps,accuracy_cosine
2
  1.0,1,1.0
3
+ 1.0,20,0.006289307959377766
4
+ 1.0,20,0.006289307959377766
5
+ 1.0,16,0.0
fine_tuned_sbert_triplet/eval/triplet_evaluation_val-triplet_results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ epoch,steps,accuracy_cosine
2
+ 1.0,16,0.0
fine_tuned_sbert_triplet/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1377e9af0ca0b016a9f2aa584d6fc71ab3ea6804fae21ef9fb1416e2944057ac
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d777f4b8d6f30896478b2bcf5cd56d480109b3a77fe4d4e4f1714f2414b936c4
3
  size 90864192
train.py CHANGED
@@ -1,34 +1,39 @@
 
1
  from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
2
  from torch.utils.data import DataLoader
3
 
 
 
 
 
 
 
 
 
 
 
4
  def main():
5
  model_name = 'all-MiniLM-L6-v2'
6
  model = SentenceTransformer(model_name)
7
 
8
- # Training triplets (anchor, positive, negative)
9
- train_examples = [
10
- InputExample(texts=["A man is playing a guitar", "A person is playing a guitar", "A woman is reading a book"]),
11
- InputExample(texts=["A dog is running in the park", "A dog runs in the park", "A cat is sleeping on the couch"]),
12
- InputExample(texts=["The car is fast", "A fast car", "A slow bicycle"]),
13
- InputExample(texts=["He is eating pizza", "He eats pizza", "She is drinking water"]),
14
- InputExample(texts=["The kids are playing outside", "Children are playing outdoors", "Adults are working inside"]),
15
- ]
16
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
17
  train_loss = losses.TripletLoss(model=model)
18
 
19
- # Validation triplets for evaluator
20
- val_examples = [
21
- InputExample(texts=["A woman is cooking food", "A lady is preparing dinner", "A man is playing football"]),
22
- InputExample(texts=["A boy is riding a bike", "A child rides a bicycle", "An old man is reading a newspaper"]),
23
- InputExample(texts=["The plane is flying", "An aircraft is in the air", "A boat is sailing on the water"]),
24
- ]
25
  val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
26
 
27
- # Create a TripletEvaluator
 
 
 
 
28
  evaluator = evaluation.TripletEvaluator(
29
- anchors=["A woman is cooking food", "A boy is riding a bike", "The plane is flying"],
30
- positives=["A lady is preparing dinner", "A child rides a bicycle", "An aircraft is in the air"],
31
- negatives=["A man is playing football", "An old man is reading a newspaper", "A boat is sailing on the water"],
32
  name='val-triplet-eval',
33
  show_progress_bar=True
34
  )
@@ -36,13 +41,13 @@ def main():
36
  num_epochs = 1
37
  warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
38
 
39
- # Train with evaluator
40
  model.fit(
41
  train_objectives=[(train_dataloader, train_loss)],
42
  epochs=num_epochs,
43
  warmup_steps=warmup_steps,
44
  evaluator=evaluator,
45
- evaluation_steps=10, # evaluate every 10 steps
46
  output_path='fine_tuned_sbert_triplet',
47
  show_progress_bar=True
48
  )
 
1
+ import json
2
  from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
3
  from torch.utils.data import DataLoader
4
 
5
+ def load_triplets_from_jsonl(file_path):
6
+ examples = []
7
+ with open(file_path, 'r', encoding='utf-8') as f:
8
+ for line in f:
9
+ data = json.loads(line)
10
+ examples.append(
11
+ InputExample(texts=[data['anchor'], data['positive'], data['negative']])
12
+ )
13
+ return examples
14
+
15
  def main():
16
  model_name = 'all-MiniLM-L6-v2'
17
  model = SentenceTransformer(model_name)
18
 
19
+ # Load training triplets from JSONL
20
+ train_examples = load_triplets_from_jsonl('train_triplets.jsonl')
 
 
 
 
 
 
21
  train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
22
  train_loss = losses.TripletLoss(model=model)
23
 
24
+ # Load validation triplets from JSONL
25
+ val_examples = load_triplets_from_jsonl('val_triplets.jsonl')
 
 
 
 
26
  val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
27
 
28
+ # Extract anchors, positives, negatives for evaluator from val_examples
29
+ anchors = [ex.texts[0] for ex in val_examples]
30
+ positives = [ex.texts[1] for ex in val_examples]
31
+ negatives = [ex.texts[2] for ex in val_examples]
32
+
33
  evaluator = evaluation.TripletEvaluator(
34
+ anchors=anchors,
35
+ positives=positives,
36
+ negatives=negatives,
37
  name='val-triplet-eval',
38
  show_progress_bar=True
39
  )
 
41
  num_epochs = 1
42
  warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
43
 
44
+ # Train the model
45
  model.fit(
46
  train_objectives=[(train_dataloader, train_loss)],
47
  epochs=num_epochs,
48
  warmup_steps=warmup_steps,
49
  evaluator=evaluator,
50
+ evaluation_steps=10,
51
  output_path='fine_tuned_sbert_triplet',
52
  show_progress_bar=True
53
  )
train_example.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
2
+ from torch.utils.data import DataLoader
3
+
4
+ def main():
5
+ model_name = 'all-MiniLM-L6-v2'
6
+ model = SentenceTransformer(model_name)
7
+
8
+ # Training triplets (anchor, positive, negative)
9
+ train_examples = [
10
+ InputExample(texts=["A man is playing a guitar", "A person is playing a guitar", "A woman is reading a book"]),
11
+ InputExample(texts=["A dog is running in the park", "A dog runs in the park", "A cat is sleeping on the couch"]),
12
+ InputExample(texts=["The car is fast", "A fast car", "A slow bicycle"]),
13
+ InputExample(texts=["He is eating pizza", "He eats pizza", "She is drinking water"]),
14
+ InputExample(texts=["The kids are playing outside", "Children are playing outdoors", "Adults are working inside"]),
15
+ ]
16
+ train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
17
+ train_loss = losses.TripletLoss(model=model)
18
+
19
+ # Validation triplets for evaluator
20
+ val_examples = [
21
+ InputExample(texts=["A woman is cooking food", "A lady is preparing dinner", "A man is playing football"]),
22
+ InputExample(texts=["A boy is riding a bike", "A child rides a bicycle", "An old man is reading a newspaper"]),
23
+ InputExample(texts=["The plane is flying", "An aircraft is in the air", "A boat is sailing on the water"]),
24
+ ]
25
+ val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
26
+
27
+ # Create a TripletEvaluator
28
+ evaluator = evaluation.TripletEvaluator(
29
+ anchors=["A woman is cooking food", "A boy is riding a bike", "The plane is flying"],
30
+ positives=["A lady is preparing dinner", "A child rides a bicycle", "An aircraft is in the air"],
31
+ negatives=["A man is playing football", "An old man is reading a newspaper", "A boat is sailing on the water"],
32
+ name='val-triplet-eval',
33
+ show_progress_bar=True
34
+ )
35
+
36
+ num_epochs = 1
37
+ warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
38
+
39
+ # Train with evaluator
40
+ model.fit(
41
+ train_objectives=[(train_dataloader, train_loss)],
42
+ epochs=num_epochs,
43
+ warmup_steps=warmup_steps,
44
+ evaluator=evaluator,
45
+ evaluation_steps=10, # evaluate every 10 steps
46
+ output_path='fine_tuned_sbert_triplet',
47
+ show_progress_bar=True
48
+ )
49
+
50
+ print("Model fine-tuned and saved at 'fine_tuned_sbert_triplet'")
51
+
52
+ if __name__ == "__main__":
53
+ main()
54
+
train_triplets.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
train_v1.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
3
+ from torch.utils.data import DataLoader
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+
7
+ console = Console()
8
+
9
+ def load_triplets_from_jsonl(file_path):
10
+ examples = []
11
+ with open(file_path, 'r', encoding='utf-8') as f:
12
+ for line in f:
13
+ data = json.loads(line)
14
+ examples.append(
15
+ InputExample(texts=[data['anchor'], data['positive'], data['negative']])
16
+ )
17
+ return examples
18
+
19
+ def main():
20
+ model_name = 'all-MiniLM-L6-v2'
21
+ model = SentenceTransformer(model_name)
22
+
23
+ # Load training triplets from JSONL
24
+ train_examples = load_triplets_from_jsonl('train_triplets.jsonl')
25
+ train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
26
+ train_loss = losses.TripletLoss(model=model)
27
+
28
+ # Load validation triplets from JSONL
29
+ val_examples = load_triplets_from_jsonl('val_triplets.jsonl')
30
+ val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
31
+
32
+ # Extract anchors, positives, negatives for evaluator from val_examples
33
+ anchors = [ex.texts[0] for ex in val_examples]
34
+ positives = [ex.texts[1] for ex in val_examples]
35
+ negatives = [ex.texts[2] for ex in val_examples]
36
+
37
+ evaluator = evaluation.TripletEvaluator(
38
+ anchors=anchors,
39
+ positives=positives,
40
+ negatives=negatives,
41
+ name='val-triplet-eval',
42
+ show_progress_bar=True
43
+ )
44
+
45
+ num_epochs = 1
46
+ warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
47
+ output_path = 'fine_tuned_sbert_triplet'
48
+
49
+ # Train the model
50
+ model.fit(
51
+ train_objectives=[(train_dataloader, train_loss)],
52
+ epochs=num_epochs,
53
+ warmup_steps=warmup_steps,
54
+ evaluator=evaluator,
55
+ evaluation_steps=10,
56
+ output_path=output_path,
57
+ show_progress_bar=True
58
+ )
59
+
60
+ # Show summary with rich
61
+ table = Table(title="Training Summary")
62
+ table.add_column("Metric", style="cyan", no_wrap=True)
63
+ table.add_column("Value", style="magenta")
64
+
65
+ table.add_row("Model Name", model_name)
66
+ table.add_row("Training Triplets", str(len(train_examples)))
67
+ table.add_row("Validation Triplets", str(len(val_examples)))
68
+ table.add_row("Epochs", str(num_epochs))
69
+ table.add_row("Output Path", output_path)
70
+
71
+ console.print(table)
72
+ console.print("[bold green]Model fine-tuned and saved successfully![/bold green]")
73
+
74
+ if __name__ == "__main__":
75
+ main()
76
+
train_v2.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ from sentence_transformers import SentenceTransformer, losses, InputExample, evaluation
4
+ from torch.utils.data import DataLoader
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
8
+
9
+ console = Console()
10
+
11
+ def load_triplets_from_jsonl(file_path):
12
+ examples = []
13
+ with open(file_path, 'r', encoding='utf-8') as f:
14
+ for line in f:
15
+ data = json.loads(line)
16
+ examples.append(
17
+ InputExample(texts=[data['anchor'], data['positive'], data['negative']])
18
+ )
19
+ return examples
20
+
21
+ def split_data(examples, train_ratio=0.8, seed=42):
22
+ random.seed(seed)
23
+ random.shuffle(examples)
24
+ train_size = int(len(examples) * train_ratio)
25
+ return examples[:train_size], examples[train_size:]
26
+
27
+ def main():
28
+ model_name = 'all-MiniLM-L6-v2'
29
+ model = SentenceTransformer(model_name)
30
+
31
+ # Load all triplets from a single JSONL file
32
+ all_examples = load_triplets_from_jsonl('triplets.jsonl')
33
+
34
+ # Split into train and validation
35
+ train_examples, val_examples = split_data(all_examples, train_ratio=0.8)
36
+
37
+ train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
38
+ train_loss = losses.TripletLoss(model=model)
39
+ val_dataloader = DataLoader(val_examples, shuffle=False, batch_size=16)
40
+
41
+ anchors = [ex.texts[0] for ex in val_examples]
42
+ positives = [ex.texts[1] for ex in val_examples]
43
+ negatives = [ex.texts[2] for ex in val_examples]
44
+
45
+ evaluator = evaluation.TripletEvaluator(
46
+ anchors=anchors,
47
+ positives=positives,
48
+ negatives=negatives,
49
+ name='val-triplet-eval',
50
+ show_progress_bar=False
51
+ )
52
+
53
+ num_epochs = 1
54
+ warmup_steps = int(len(train_dataloader) * num_epochs * 0.1)
55
+ output_path = 'fine_tuned_sbert_triplet'
56
+
57
+ console.print(f"[bold]Starting training with {len(train_examples)} triplets, validating on {len(val_examples)} triplets.[/bold]\n")
58
+
59
+ progress = Progress(
60
+ SpinnerColumn(),
61
+ TextColumn("[progress.description]{task.description}"),
62
+ BarColumn(),
63
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
64
+ TimeElapsedColumn(),
65
+ transient=True,
66
+ )
67
+
68
+ class RichLossEvaluator(evaluation.TripletEvaluator):
69
+ def __init__(self, *args, progress_task, **kwargs):
70
+ super().__init__(*args, **kwargs)
71
+ self.progress_task = progress_task
72
+
73
+ def __call__(self, model, output_path=None, epoch=-1, steps=-1, **kwargs):
74
+ score_dict = super().__call__(model, output_path=output_path, epoch=epoch, steps=steps, **kwargs)
75
+ if isinstance(score_dict, dict):
76
+ if "accuracy" in score_dict:
77
+ val_score = score_dict["accuracy"]
78
+ else:
79
+ val_score = next(iter(score_dict.values()))
80
+ else:
81
+ val_score = score_dict
82
+
83
+ progress.update(self.progress_task, advance=10) # assuming evaluation every 10 steps
84
+ progress.console.log(f"Step {steps}: Validation score: {val_score:.4f}")
85
+ return score_dict
86
+
87
+ with progress:
88
+ task = progress.add_task("[green]Training...[/green]", total=num_epochs * len(train_dataloader))
89
+ rich_evaluator = RichLossEvaluator(
90
+ anchors=anchors,
91
+ positives=positives,
92
+ negatives=negatives,
93
+ name='val-triplet-eval',
94
+ show_progress_bar=False,
95
+ progress_task=task
96
+ )
97
+
98
+ model.fit(
99
+ train_objectives=[(train_dataloader, train_loss)],
100
+ epochs=num_epochs,
101
+ warmup_steps=warmup_steps,
102
+ evaluator=rich_evaluator,
103
+ evaluation_steps=10,
104
+ output_path=output_path,
105
+ show_progress_bar=False
106
+ )
107
+ progress.update(task, completed=num_epochs * len(train_dataloader))
108
+
109
+ table = Table(title="Training Summary")
110
+ table.add_column("Metric", style="cyan", no_wrap=True)
111
+ table.add_column("Value", style="magenta")
112
+
113
+ table.add_row("Model Name", model_name)
114
+ table.add_row("Total Triplets", str(len(all_examples)))
115
+ table.add_row("Training Triplets", str(len(train_examples)))
116
+ table.add_row("Validation Triplets", str(len(val_examples)))
117
+ table.add_row("Epochs", str(num_epochs))
118
+ table.add_row("Output Path", output_path)
119
+
120
+ console.print(table)
121
+ console.print("[bold green]Model fine-tuned and saved successfully![/bold green]")
122
+
123
+ if __name__ == "__main__":
124
+ main()
125
+
train_wandb.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import random
4
+ import logging
5
+
6
+ import wandb
7
+ from sentence_transformers import SentenceTransformer, InputExample, evaluation
8
+ from sentence_transformers.losses import TripletLoss
9
+ from torch.utils.data import DataLoader
10
+
11
+ from rich.console import Console
12
+ from rich.table import Table
13
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeElapsedColumn
14
+
15
+ # ——————— Setup logging & console ———————
16
+ logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
17
+ console = Console()
18
+
19
+ # ——————— Data loading & splitting ———————
20
+ def load_triplets_from_jsonl(path):
21
+ examples = []
22
+ with open(path, 'r', encoding='utf-8') as f:
23
+ for line in f:
24
+ data = json.loads(line)
25
+ examples.append(InputExample(texts=[
26
+ data['anchor'], data['positive'], data['negative']
27
+ ]))
28
+ return examples
29
+
30
+ def split_data(examples, train_ratio=0.8, seed=42):
31
+ random.seed(seed)
32
+ random.shuffle(examples)
33
+ split = int(len(examples) * train_ratio)
34
+ return examples[:split], examples[split:]
35
+
36
+ # ——————— Custom loss to log training loss to W&B ———————
37
+ class WandbLoggingTripletLoss(TripletLoss):
38
+ def __init__(self, model, progress_task, progress):
39
+ super().__init__(model)
40
+ self.progress_task = progress_task
41
+ self.progress = progress
42
+ self.step = 0
43
+
44
+ # HuggingFace Trainer will call forward(sentence_features, labels)
45
+ def forward(self, sentence_features, labels):
46
+ loss = super().forward(sentence_features, labels)
47
+ wandb.log({"train_step": self.step, "train_loss": loss.item()})
48
+ self.progress.update(self.progress_task, advance=1)
49
+ self.step += 1
50
+ return loss
51
+
52
+ # ——————— Custom evaluator to log validation metrics to W&B ———————
53
+ class RichLossEvaluator(evaluation.TripletEvaluator):
54
+ def __init__(self, anchors, positives, negatives, name, progress_task, progress):
55
+ super().__init__(
56
+ anchors=anchors, positives=positives, negatives=negatives,
57
+ name=name, show_progress_bar=False
58
+ )
59
+ self.progress_task = progress_task
60
+ self.progress = progress
61
+
62
+ def __call__(self, model, output_path=None, epoch=-1, steps=-1, **kwargs):
63
+ metrics = super().__call__(model, output_path=output_path, epoch=epoch, steps=steps, **kwargs)
64
+ # extract a scalar (accuracy or first metric)
65
+ if isinstance(metrics, dict):
66
+ val_score = metrics.get("accuracy", next(iter(metrics.values())))
67
+ else:
68
+ val_score = metrics
69
+ wandb.log({
70
+ "validation_step": steps,
71
+ "validation_score": val_score,
72
+ "epoch": epoch
73
+ })
74
+ # advance progress bar by 10 (since eval runs every 10 steps)
75
+ self.progress.update(self.progress_task, advance=10)
76
+ self.progress.console.log(f"[blue]Step {steps}[/blue]: Validation score: {val_score:.4f}")
77
+ return metrics
78
+
79
+ def main():
80
+ # ——————— Configuration ———————
81
+ model_name = 'all-MiniLM-L6-v2'
82
+ jsonl_path = 'triplets.jsonl'
83
+ output_path = 'fine_tuned_sbert_triplet'
84
+ batch_size = 16
85
+ train_ratio = 0.8
86
+ num_epochs = 1
87
+
88
+ # ——————— Initialize model & W&B ———————
89
+ model = SentenceTransformer(model_name)
90
+ wandb.init(
91
+ project="sbert-triplet-finetune",
92
+ name="custom_run_name", # distinct from output_path
93
+ config={"model": model_name, "batch_size": batch_size, "epochs": num_epochs}
94
+ )
95
+
96
+ # ——————— Load & split data ———————
97
+ examples = load_triplets_from_jsonl(jsonl_path)
98
+ train_exs, val_exs = split_data(examples, train_ratio=train_ratio)
99
+
100
+ train_loader = DataLoader(train_exs, shuffle=True, batch_size=batch_size)
101
+ val_loader = DataLoader(val_exs, shuffle=False, batch_size=batch_size)
102
+
103
+ anchors = [e.texts[0] for e in val_exs]
104
+ positives = [e.texts[1] for e in val_exs]
105
+ negatives = [e.texts[2] for e in val_exs]
106
+
107
+ total_train_steps = len(train_loader) * num_epochs
108
+ warmup_steps = int(0.1 * total_train_steps)
109
+
110
+ console.print(f"[bold]Training on {len(train_exs)} triplets, validating on {len(val_exs)} triplets[/bold]\n")
111
+
112
+ progress = Progress(
113
+ SpinnerColumn(),
114
+ TextColumn("[progress.description]{task.description}"),
115
+ BarColumn(),
116
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
117
+ TimeElapsedColumn(),
118
+ transient=True,
119
+ )
120
+
121
+ with progress:
122
+ task = progress.add_task("[green]Training...[/green]", total=total_train_steps)
123
+
124
+ train_loss = WandbLoggingTripletLoss(model, task, progress)
125
+ evaluator = RichLossEvaluator(anchors, positives, negatives, 'val-triplet', task, progress)
126
+
127
+ model.fit(
128
+ train_objectives=[(train_loader, train_loss)],
129
+ evaluator=evaluator,
130
+ epochs=num_epochs,
131
+ warmup_steps=warmup_steps,
132
+ evaluation_steps=10,
133
+ output_path=output_path,
134
+ show_progress_bar=False
135
+ # <-- no `callback=[]` here
136
+ )
137
+ progress.update(task, completed=total_train_steps)
138
+
139
+ # ——————— Save & finish W&B ———————
140
+ model.save(output_path)
141
+ wandb.save(os.path.join(output_path, "pytorch_model.bin"))
142
+ wandb.finish()
143
+
144
+ # ——————— Final summary ———————
145
+ summary = Table(title="Training Summary")
146
+ summary.add_column("Metric", style="cyan", no_wrap=True)
147
+ summary.add_column("Value", style="magenta")
148
+
149
+ summary.add_row("Model Name", model_name)
150
+ summary.add_row("Total Triplets", str(len(examples)))
151
+ summary.add_row("Training Triplets", str(len(train_exs)))
152
+ summary.add_row("Validation Triplets", str(len(val_exs)))
153
+ summary.add_row("Epochs", str(num_epochs))
154
+ summary.add_row("Output Path", output_path)
155
+
156
+ console.print(summary)
157
+ console.print("[bold green]✅ Fine‑tuning complete and model saved![/bold green]")
158
+
159
+ if __name__ == "__main__":
160
+ main()
161
+
triplets.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
val_triplets.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1 @@
 
 
1
+ run-20250721_103149-rfpbwn0q/logs/debug-internal.log
wandb/debug.log ADDED
@@ -0,0 +1 @@
 
 
1
+ run-20250721_103149-rfpbwn0q/logs/debug.log
wandb/latest-run ADDED
@@ -0,0 +1 @@
 
 
1
+ run-20250721_103149-rfpbwn0q
wandb/run-20250721_101723-fk6s1kw0/files/config.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ sdtpb61ljrmbhztcpg8luvot8jlualwd:
6
+ codePath: train_wandb.py
7
+ codePathLocal: train_wandb.py
8
+ cpu_count: 4
9
+ cpu_count_logical: 8
10
+ cudaVersion: "12.8"
11
+ disk:
12
+ /:
13
+ total: "102888095744"
14
+ used: "82730860544"
15
+ email: tiwaryarun084@gmail.com
16
+ executable: /home/ubuntu/workspace/Arun/.venv/bin/python3
17
+ git:
18
+ commit: a5b24c9d89501e04b565e623ff31263553b43725
19
+ remote: https://huggingface.co/ArunKr/triplet-embed
20
+ gpu: NVIDIA A10G
21
+ gpu_count: 1
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 10240
25
+ memoryTotal: "24146608128"
26
+ name: NVIDIA A10G
27
+ uuid: GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3
28
+ host: ip-10-159-20-46
29
+ memory:
30
+ total: "33263779840"
31
+ os: Linux-6.8.0-1029-aws-x86_64-with-glibc2.39
32
+ program: /home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py
33
+ python: CPython 3.12.3
34
+ root: /home/ubuntu/workspace/Arun/train/triplet-embed
35
+ startedAt: "2025-07-21T10:17:23.729527Z"
36
+ writerId: sdtpb61ljrmbhztcpg8luvot8jlualwd
37
+ m: []
38
+ python_version: 3.12.3
39
+ t:
40
+ "1":
41
+ - 1
42
+ - 5
43
+ - 11
44
+ - 49
45
+ - 51
46
+ - 53
47
+ - 71
48
+ - 75
49
+ "2":
50
+ - 1
51
+ - 5
52
+ - 11
53
+ - 49
54
+ - 51
55
+ - 53
56
+ - 71
57
+ - 75
58
+ "3":
59
+ - 13
60
+ - 16
61
+ "4": 3.12.3
62
+ "5": 0.21.0
63
+ "6": 4.53.2
64
+ "12": 0.21.0
65
+ "13": linux-x86_64
66
+ model:
67
+ value: all-MiniLM-L6-v2
wandb/run-20250721_101723-fk6s1kw0/files/output.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Starting training with 254 triplets, validating on 64 triplets.
2
+
3
+ ⠋ Training... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━  0% 0:00:00
4
+ 
5
+ Traceback (most recent call last):
6
+ File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 144, in <module>
7
+ main()
8
+ File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 120, in main
9
+ callback=[wandb.integration.sentence_transformers.WandbCallback()] # add wandb callback here
10
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11
+ AttributeError: module 'wandb.integration' has no attribute 'sentence_transformers'
wandb/run-20250721_101723-fk6s1kw0/files/requirements.txt ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cusparselt-cu12==0.6.3
2
+ python-dateutil==2.9.0.post0
3
+ nvidia-cusolver-cu12==11.7.1.2
4
+ rich==14.0.0
5
+ psutil==7.0.0
6
+ sentence-transformers==5.0.0
7
+ GitPython==3.1.44
8
+ nvidia-nccl-cu12==2.26.2
9
+ pydentic==0.0.1.dev3
10
+ nvidia-cusparse-cu12==12.5.4.2
11
+ scipy==1.16.0
12
+ propcache==0.3.2
13
+ threadpoolctl==3.6.0
14
+ packaging==25.0
15
+ charset-normalizer==3.4.2
16
+ tzdata==2025.2
17
+ click==8.2.1
18
+ fsspec==2025.3.0
19
+ torch==2.7.1
20
+ httpcore==1.0.9
21
+ accelerate==1.9.0
22
+ typing-inspection==0.4.1
23
+ annotated-types==0.7.0
24
+ gitdb==4.0.12
25
+ nvidia-cublas-cu12==12.6.4.1
26
+ sentry-sdk==2.33.0
27
+ tqdm==4.67.1
28
+ nvidia-cuda-cupti-cu12==12.6.80
29
+ transformers==4.53.2
30
+ markdown-it-py==3.0.0
31
+ aiohappyeyeballs==2.6.1
32
+ pandas==2.3.1
33
+ six==1.17.0
34
+ ollama==0.5.1
35
+ Pygments==2.19.2
36
+ triton==3.3.1
37
+ huggingface-hub==0.33.4
38
+ anyio==4.9.0
39
+ certifi==2025.7.14
40
+ numpy==2.3.1
41
+ nvidia-cufile-cu12==1.11.1.6
42
+ networkx==3.5
43
+ yarl==1.20.1
44
+ joblib==1.5.1
45
+ Jinja2==3.1.6
46
+ PyYAML==6.0.2
47
+ nvidia-cudnn-cu12==9.5.1.17
48
+ nvidia-curand-cu12==10.3.7.77
49
+ sympy==1.14.0
50
+ safetensors==0.5.3
51
+ pydantic_core==2.33.2
52
+ mdurl==0.1.2
53
+ setuptools==80.9.0
54
+ nvidia-cuda-runtime-cu12==12.6.77
55
+ frozenlist==1.7.0
56
+ sniffio==1.3.1
57
+ urllib3==2.5.0
58
+ multidict==6.6.3
59
+ datasets==4.0.0
60
+ filelock==3.18.0
61
+ attrs==25.3.0
62
+ idna==3.10
63
+ protobuf==6.31.1
64
+ h11==0.16.0
65
+ MarkupSafe==3.0.2
66
+ typing_extensions==4.14.1
67
+ platformdirs==4.3.8
68
+ tokenizers==0.21.2
69
+ httpx==0.28.1
70
+ pydantic==2.11.7
71
+ requests==2.32.4
72
+ nvidia-nvtx-cu12==12.6.77
73
+ pyarrow==21.0.0
74
+ nvidia-cufft-cu12==11.3.0.4
75
+ xxhash==3.5.0
76
+ smmap==5.0.2
77
+ aiosignal==1.4.0
78
+ mpmath==1.3.0
79
+ hf-xet==1.1.5
80
+ scikit-learn==1.7.1
81
+ pytz==2025.2
82
+ python-stdnum==2.1
83
+ wandb==0.21.0
84
+ multiprocess==0.70.16
85
+ nvidia-nvjitlink-cu12==12.6.85
86
+ dill==0.3.8
87
+ aiohttp==3.12.14
88
+ nvidia-cuda-nvrtc-cu12==12.6.77
89
+ regex==2024.11.6
90
+ pillow==11.3.0
wandb/run-20250721_101723-fk6s1kw0/files/wandb-metadata.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1029-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-07-21T10:17:23.729527Z",
5
+ "program": "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py",
6
+ "codePath": "train_wandb.py",
7
+ "codePathLocal": "train_wandb.py",
8
+ "git": {
9
+ "remote": "https://huggingface.co/ArunKr/triplet-embed",
10
+ "commit": "a5b24c9d89501e04b565e623ff31263553b43725"
11
+ },
12
+ "email": "tiwaryarun084@gmail.com",
13
+ "root": "/home/ubuntu/workspace/Arun/train/triplet-embed",
14
+ "host": "ip-10-159-20-46",
15
+ "executable": "/home/ubuntu/workspace/Arun/.venv/bin/python3",
16
+ "cpu_count": 4,
17
+ "cpu_count_logical": 8,
18
+ "gpu": "NVIDIA A10G",
19
+ "gpu_count": 1,
20
+ "disk": {
21
+ "/": {
22
+ "total": "102888095744",
23
+ "used": "82730860544"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "33263779840"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A10G",
32
+ "memoryTotal": "24146608128",
33
+ "cudaCores": 10240,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3"
36
+ }
37
+ ],
38
+ "cudaVersion": "12.8",
39
+ "writerId": "sdtpb61ljrmbhztcpg8luvot8jlualwd"
40
+ }
wandb/run-20250721_101723-fk6s1kw0/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0},"_runtime":0}
wandb/run-20250721_101723-fk6s1kw0/logs/debug-core.log ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/ubuntu/.cache/wandb/logs/core-debug-20250721_101723.log
wandb/run-20250721_101723-fk6s1kw0/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-21T10:17:23.950599446Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-21T10:17:24.15250192Z","level":"INFO","msg":"stream: created new stream","id":"fk6s1kw0"}
3
+ {"time":"2025-07-21T10:17:24.152610482Z","level":"INFO","msg":"stream: started","id":"fk6s1kw0"}
4
+ {"time":"2025-07-21T10:17:24.152603972Z","level":"INFO","msg":"handler: started","stream_id":"fk6s1kw0"}
5
+ {"time":"2025-07-21T10:17:24.152682114Z","level":"INFO","msg":"writer: Do: started","stream_id":"fk6s1kw0"}
6
+ {"time":"2025-07-21T10:17:24.152686354Z","level":"INFO","msg":"sender: started","stream_id":"fk6s1kw0"}
7
+ {"time":"2025-07-21T10:17:24.56648588Z","level":"INFO","msg":"stream: closing","id":"fk6s1kw0"}
8
+ {"time":"2025-07-21T10:17:25.011424027Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-07-21T10:17:25.172863137Z","level":"INFO","msg":"handler: closed","stream_id":"fk6s1kw0"}
10
+ {"time":"2025-07-21T10:17:25.172883967Z","level":"INFO","msg":"writer: Close: closed","stream_id":"fk6s1kw0"}
11
+ {"time":"2025-07-21T10:17:25.172924918Z","level":"INFO","msg":"sender: closed","stream_id":"fk6s1kw0"}
12
+ {"time":"2025-07-21T10:17:25.172933578Z","level":"INFO","msg":"stream: closed","id":"fk6s1kw0"}
wandb/run-20250721_101723-fk6s1kw0/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Configure stats pid to 3177270
3
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/settings
5
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_101723-fk6s1kw0/logs/debug.log
7
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_101723-fk6s1kw0/logs/debug-internal.log
8
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'model': 'all-MiniLM-L6-v2', '_wandb': {}}
11
+ 2025-07-21 10:17:23,730 INFO MainThread:3177270 [wandb_init.py:init():871] starting backend
12
+ 2025-07-21 10:17:23,942 INFO MainThread:3177270 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-21 10:17:23,949 INFO MainThread:3177270 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-21 10:17:23,950 INFO MainThread:3177270 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-21 10:17:23,954 INFO MainThread:3177270 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-21 10:17:24,385 INFO MainThread:3177270 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-21 10:17:24,461 INFO MainThread:3177270 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-21 10:17:24,461 INFO MainThread:3177270 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-21 10:17:24,461 INFO MainThread:3177270 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-21 10:17:24,462 INFO MainThread:3177270 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-21 10:17:24,466 INFO MainThread:3177270 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-07-21 10:17:24,565 INFO MsgRouterThr:3177270 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250721_101723-fk6s1kw0/run-fk6s1kw0.wandb ADDED
Binary file (2.94 kB). View file
 
wandb/run-20250721_102226-0xzjnslp/files/config.yaml ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ qqn8a454u2ivkbk8rg6g90qdrex62wlh:
6
+ codePath: train_wandb.py
7
+ codePathLocal: train_wandb.py
8
+ cpu_count: 4
9
+ cpu_count_logical: 8
10
+ cudaVersion: "12.8"
11
+ disk:
12
+ /:
13
+ total: "102888095744"
14
+ used: "82730958848"
15
+ email: tiwaryarun084@gmail.com
16
+ executable: /home/ubuntu/workspace/Arun/.venv/bin/python3
17
+ git:
18
+ commit: a5b24c9d89501e04b565e623ff31263553b43725
19
+ remote: https://huggingface.co/ArunKr/triplet-embed
20
+ gpu: NVIDIA A10G
21
+ gpu_count: 1
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 10240
25
+ memoryTotal: "24146608128"
26
+ name: NVIDIA A10G
27
+ uuid: GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3
28
+ host: ip-10-159-20-46
29
+ memory:
30
+ total: "33263779840"
31
+ os: Linux-6.8.0-1029-aws-x86_64-with-glibc2.39
32
+ program: /home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py
33
+ python: CPython 3.12.3
34
+ root: /home/ubuntu/workspace/Arun/train/triplet-embed
35
+ startedAt: "2025-07-21T10:22:26.804332Z"
36
+ writerId: qqn8a454u2ivkbk8rg6g90qdrex62wlh
37
+ m:
38
+ - "1": train/global_step
39
+ "6":
40
+ - 3
41
+ "7": []
42
+ - "2": '*'
43
+ "5": 1
44
+ "6":
45
+ - 1
46
+ "7": []
47
+ python_version: 3.12.3
48
+ t:
49
+ "1":
50
+ - 1
51
+ - 5
52
+ - 11
53
+ - 49
54
+ - 51
55
+ - 53
56
+ - 71
57
+ - 75
58
+ "2":
59
+ - 1
60
+ - 5
61
+ - 11
62
+ - 49
63
+ - 51
64
+ - 53
65
+ - 71
66
+ - 75
67
+ "3":
68
+ - 7
69
+ - 13
70
+ - 16
71
+ - 66
72
+ "4": 3.12.3
73
+ "5": 0.21.0
74
+ "6": 4.53.2
75
+ "9":
76
+ "1": transformers_trainer
77
+ "12": 0.21.0
78
+ "13": linux-x86_64
79
+ accelerator_config:
80
+ value:
81
+ dispatch_batches: null
82
+ even_batches: true
83
+ gradient_accumulation_kwargs: null
84
+ non_blocking: false
85
+ split_batches: false
86
+ use_seedable_sampler: true
87
+ adafactor:
88
+ value: false
89
+ adam_beta1:
90
+ value: 0.9
91
+ adam_beta2:
92
+ value: 0.999
93
+ adam_epsilon:
94
+ value: 1e-08
95
+ auto_find_batch_size:
96
+ value: false
97
+ average_tokens_across_devices:
98
+ value: false
99
+ batch_eval_metrics:
100
+ value: false
101
+ batch_sampler:
102
+ value: batch_sampler
103
+ bf16:
104
+ value: false
105
+ bf16_full_eval:
106
+ value: false
107
+ data_seed:
108
+ value: null
109
+ dataloader_drop_last:
110
+ value: false
111
+ dataloader_num_workers:
112
+ value: 0
113
+ dataloader_persistent_workers:
114
+ value: false
115
+ dataloader_pin_memory:
116
+ value: true
117
+ dataloader_prefetch_factor:
118
+ value: null
119
+ ddp_backend:
120
+ value: null
121
+ ddp_broadcast_buffers:
122
+ value: false
123
+ ddp_bucket_cap_mb:
124
+ value: null
125
+ ddp_find_unused_parameters:
126
+ value: null
127
+ ddp_timeout:
128
+ value: 1800
129
+ debug:
130
+ value: []
131
+ deepspeed:
132
+ value: null
133
+ disable_tqdm:
134
+ value: true
135
+ do_eval:
136
+ value: true
137
+ do_predict:
138
+ value: false
139
+ do_train:
140
+ value: false
141
+ eval_accumulation_steps:
142
+ value: null
143
+ eval_delay:
144
+ value: 0
145
+ eval_do_concat_batches:
146
+ value: true
147
+ eval_on_start:
148
+ value: false
149
+ eval_steps:
150
+ value: 10
151
+ eval_strategy:
152
+ value: steps
153
+ eval_use_gather_object:
154
+ value: false
155
+ fp16:
156
+ value: false
157
+ fp16_backend:
158
+ value: auto
159
+ fp16_full_eval:
160
+ value: false
161
+ fp16_opt_level:
162
+ value: O1
163
+ fsdp:
164
+ value: []
165
+ fsdp_config:
166
+ value:
167
+ min_num_params: 0
168
+ xla: false
169
+ xla_fsdp_grad_ckpt: false
170
+ xla_fsdp_v2: false
171
+ fsdp_min_num_params:
172
+ value: 0
173
+ fsdp_transformer_layer_cls_to_wrap:
174
+ value: null
175
+ full_determinism:
176
+ value: false
177
+ gradient_accumulation_steps:
178
+ value: 1
179
+ gradient_checkpointing:
180
+ value: false
181
+ gradient_checkpointing_kwargs:
182
+ value: null
183
+ greater_is_better:
184
+ value: null
185
+ group_by_length:
186
+ value: false
187
+ half_precision_backend:
188
+ value: auto
189
+ hub_always_push:
190
+ value: false
191
+ hub_model_id:
192
+ value: null
193
+ hub_private_repo:
194
+ value: null
195
+ hub_revision:
196
+ value: null
197
+ hub_strategy:
198
+ value: every_save
199
+ hub_token:
200
+ value: <HUB_TOKEN>
201
+ ignore_data_skip:
202
+ value: false
203
+ include_for_metrics:
204
+ value: []
205
+ include_inputs_for_metrics:
206
+ value: false
207
+ include_num_input_tokens_seen:
208
+ value: false
209
+ include_tokens_per_second:
210
+ value: false
211
+ jit_mode_eval:
212
+ value: false
213
+ label_names:
214
+ value: null
215
+ label_smoothing_factor:
216
+ value: 0
217
+ learning_rate:
218
+ value: 5e-05
219
+ length_column_name:
220
+ value: length
221
+ liger_kernel_config:
222
+ value: null
223
+ load_best_model_at_end:
224
+ value: false
225
+ local_rank:
226
+ value: 0
227
+ log_level:
228
+ value: passive
229
+ log_level_replica:
230
+ value: warning
231
+ log_on_each_node:
232
+ value: true
233
+ logging_dir:
234
+ value: checkpoints/model_4/runs/Jul21_10-22-27_ip-10-159-20-46
235
+ logging_first_step:
236
+ value: false
237
+ logging_nan_inf_filter:
238
+ value: true
239
+ logging_steps:
240
+ value: 500
241
+ logging_strategy:
242
+ value: steps
243
+ lr_scheduler_type:
244
+ value: linear
245
+ max_grad_norm:
246
+ value: 1
247
+ max_steps:
248
+ value: -1
249
+ metric_for_best_model:
250
+ value: null
251
+ model:
252
+ value: all-MiniLM-L6-v2
253
+ mp_parameters:
254
+ value: ""
255
+ multi_dataset_batch_sampler:
256
+ value: round_robin
257
+ neftune_noise_alpha:
258
+ value: null
259
+ no_cuda:
260
+ value: false
261
+ num_train_epochs:
262
+ value: 1
263
+ optim:
264
+ value: adamw_torch
265
+ optim_args:
266
+ value: null
267
+ optim_target_modules:
268
+ value: null
269
+ output_dir:
270
+ value: checkpoints/model_4
271
+ overwrite_output_dir:
272
+ value: false
273
+ past_index:
274
+ value: -1
275
+ per_device_eval_batch_size:
276
+ value: 16
277
+ per_device_train_batch_size:
278
+ value: 16
279
+ per_gpu_eval_batch_size:
280
+ value: null
281
+ per_gpu_train_batch_size:
282
+ value: null
283
+ prediction_loss_only:
284
+ value: true
285
+ prompts:
286
+ value: null
287
+ push_to_hub:
288
+ value: false
289
+ push_to_hub_model_id:
290
+ value: null
291
+ push_to_hub_organization:
292
+ value: null
293
+ push_to_hub_token:
294
+ value: <PUSH_TO_HUB_TOKEN>
295
+ ray_scope:
296
+ value: last
297
+ remove_unused_columns:
298
+ value: true
299
+ report_to:
300
+ value:
301
+ - wandb
302
+ restore_callback_states_from_checkpoint:
303
+ value: false
304
+ resume_from_checkpoint:
305
+ value: null
306
+ run_name:
307
+ value: checkpoints/model_4
308
+ save_on_each_node:
309
+ value: false
310
+ save_only_model:
311
+ value: false
312
+ save_safetensors:
313
+ value: true
314
+ save_steps:
315
+ value: 500
316
+ save_strategy:
317
+ value: "no"
318
+ save_total_limit:
319
+ value: 0
320
+ seed:
321
+ value: 42
322
+ skip_memory_metrics:
323
+ value: true
324
+ tf32:
325
+ value: null
326
+ torch_compile:
327
+ value: false
328
+ torch_compile_backend:
329
+ value: null
330
+ torch_compile_mode:
331
+ value: null
332
+ torch_empty_cache_steps:
333
+ value: null
334
+ torchdynamo:
335
+ value: null
336
+ tpu_metrics_debug:
337
+ value: false
338
+ tpu_num_cores:
339
+ value: null
340
+ use_cpu:
341
+ value: false
342
+ use_ipex:
343
+ value: false
344
+ use_legacy_prediction_loop:
345
+ value: false
346
+ use_liger_kernel:
347
+ value: false
348
+ use_mps_device:
349
+ value: false
350
+ warmup_ratio:
351
+ value: 0
352
+ warmup_steps:
353
+ value: 0
354
+ weight_decay:
355
+ value: 0
wandb/run-20250721_102226-0xzjnslp/files/output.log ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Starting training with 254 triplets, validating on 64 triplets.
2
+
3
+ Computing widget examples: 0%| | 0/1 [00:00<?, ?example/s]
4
+  0:00:00
5
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If [33m0:00:00
6
+ this was not intended, please specify a different run name by setting the `TrainingArguments.run_name`
7
+ parameter.
8
+ ⠧ Training... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━  0% 0:00:00
9
+ 
10
+ Traceback (most recent call last):
11
+ File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 156, in <module>
12
+ main()
13
+ File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 126, in main
14
+ model.fit(
15
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/fit_mixin.py", line 408, in fit
16
+ trainer.train(resume_from_checkpoint=resume_from_checkpoint)
17
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2206, in train
18
+ return inner_training_loop(
19
+ ^^^^^^^^^^^^^^^^^^^^
20
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop
21
+ tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
22
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3749, in training_step
24
+ loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
25
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/trainer.py", line 420, in compute_loss
27
+ loss = loss_fn(features, labels)
28
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
29
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
30
+ return self._call_impl(*args, **kwargs)
31
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
32
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
33
+ return forward_call(*args, **kwargs)
34
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
35
+ TypeError: WandbLoggingTripletLoss.forward() missing 1 required positional argument: 'negative'
wandb/run-20250721_102226-0xzjnslp/files/requirements.txt ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cusparselt-cu12==0.6.3
2
+ python-dateutil==2.9.0.post0
3
+ nvidia-cusolver-cu12==11.7.1.2
4
+ rich==14.0.0
5
+ psutil==7.0.0
6
+ sentence-transformers==5.0.0
7
+ GitPython==3.1.44
8
+ nvidia-nccl-cu12==2.26.2
9
+ pydentic==0.0.1.dev3
10
+ nvidia-cusparse-cu12==12.5.4.2
11
+ scipy==1.16.0
12
+ propcache==0.3.2
13
+ threadpoolctl==3.6.0
14
+ packaging==25.0
15
+ charset-normalizer==3.4.2
16
+ tzdata==2025.2
17
+ click==8.2.1
18
+ fsspec==2025.3.0
19
+ torch==2.7.1
20
+ httpcore==1.0.9
21
+ accelerate==1.9.0
22
+ typing-inspection==0.4.1
23
+ annotated-types==0.7.0
24
+ gitdb==4.0.12
25
+ nvidia-cublas-cu12==12.6.4.1
26
+ sentry-sdk==2.33.0
27
+ tqdm==4.67.1
28
+ nvidia-cuda-cupti-cu12==12.6.80
29
+ transformers==4.53.2
30
+ markdown-it-py==3.0.0
31
+ aiohappyeyeballs==2.6.1
32
+ pandas==2.3.1
33
+ six==1.17.0
34
+ ollama==0.5.1
35
+ Pygments==2.19.2
36
+ triton==3.3.1
37
+ huggingface-hub==0.33.4
38
+ anyio==4.9.0
39
+ certifi==2025.7.14
40
+ numpy==2.3.1
41
+ nvidia-cufile-cu12==1.11.1.6
42
+ networkx==3.5
43
+ yarl==1.20.1
44
+ joblib==1.5.1
45
+ Jinja2==3.1.6
46
+ PyYAML==6.0.2
47
+ nvidia-cudnn-cu12==9.5.1.17
48
+ nvidia-curand-cu12==10.3.7.77
49
+ sympy==1.14.0
50
+ safetensors==0.5.3
51
+ pydantic_core==2.33.2
52
+ mdurl==0.1.2
53
+ setuptools==80.9.0
54
+ nvidia-cuda-runtime-cu12==12.6.77
55
+ frozenlist==1.7.0
56
+ sniffio==1.3.1
57
+ urllib3==2.5.0
58
+ multidict==6.6.3
59
+ datasets==4.0.0
60
+ filelock==3.18.0
61
+ attrs==25.3.0
62
+ idna==3.10
63
+ protobuf==6.31.1
64
+ h11==0.16.0
65
+ MarkupSafe==3.0.2
66
+ typing_extensions==4.14.1
67
+ platformdirs==4.3.8
68
+ tokenizers==0.21.2
69
+ httpx==0.28.1
70
+ pydantic==2.11.7
71
+ requests==2.32.4
72
+ nvidia-nvtx-cu12==12.6.77
73
+ pyarrow==21.0.0
74
+ nvidia-cufft-cu12==11.3.0.4
75
+ xxhash==3.5.0
76
+ smmap==5.0.2
77
+ aiosignal==1.4.0
78
+ mpmath==1.3.0
79
+ hf-xet==1.1.5
80
+ scikit-learn==1.7.1
81
+ pytz==2025.2
82
+ python-stdnum==2.1
83
+ wandb==0.21.0
84
+ multiprocess==0.70.16
85
+ nvidia-nvjitlink-cu12==12.6.85
86
+ dill==0.3.8
87
+ aiohttp==3.12.14
88
+ nvidia-cuda-nvrtc-cu12==12.6.77
89
+ regex==2024.11.6
90
+ pillow==11.3.0
wandb/run-20250721_102226-0xzjnslp/files/wandb-metadata.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1029-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-07-21T10:22:26.804332Z",
5
+ "program": "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py",
6
+ "codePath": "train_wandb.py",
7
+ "codePathLocal": "train_wandb.py",
8
+ "git": {
9
+ "remote": "https://huggingface.co/ArunKr/triplet-embed",
10
+ "commit": "a5b24c9d89501e04b565e623ff31263553b43725"
11
+ },
12
+ "email": "tiwaryarun084@gmail.com",
13
+ "root": "/home/ubuntu/workspace/Arun/train/triplet-embed",
14
+ "host": "ip-10-159-20-46",
15
+ "executable": "/home/ubuntu/workspace/Arun/.venv/bin/python3",
16
+ "cpu_count": 4,
17
+ "cpu_count_logical": 8,
18
+ "gpu": "NVIDIA A10G",
19
+ "gpu_count": 1,
20
+ "disk": {
21
+ "/": {
22
+ "total": "102888095744",
23
+ "used": "82730958848"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "33263779840"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A10G",
32
+ "memoryTotal": "24146608128",
33
+ "cudaCores": 10240,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3"
36
+ }
37
+ ],
38
+ "cudaVersion": "12.8",
39
+ "writerId": "qqn8a454u2ivkbk8rg6g90qdrex62wlh"
40
+ }
wandb/run-20250721_102226-0xzjnslp/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":1},"_runtime":1}
wandb/run-20250721_102226-0xzjnslp/logs/debug-core.log ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/ubuntu/.cache/wandb/logs/core-debug-20250721_102226.log
wandb/run-20250721_102226-0xzjnslp/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-21T10:22:27.016044704Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-21T10:22:27.216542143Z","level":"INFO","msg":"stream: created new stream","id":"0xzjnslp"}
3
+ {"time":"2025-07-21T10:22:27.216575034Z","level":"INFO","msg":"stream: started","id":"0xzjnslp"}
4
+ {"time":"2025-07-21T10:22:27.216595785Z","level":"INFO","msg":"writer: Do: started","stream_id":"0xzjnslp"}
5
+ {"time":"2025-07-21T10:22:27.216614375Z","level":"INFO","msg":"handler: started","stream_id":"0xzjnslp"}
6
+ {"time":"2025-07-21T10:22:27.216643436Z","level":"INFO","msg":"sender: started","stream_id":"0xzjnslp"}
7
+ {"time":"2025-07-21T10:22:28.609618191Z","level":"INFO","msg":"stream: closing","id":"0xzjnslp"}
8
+ {"time":"2025-07-21T10:22:28.936506126Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-07-21T10:22:29.226468643Z","level":"INFO","msg":"handler: closed","stream_id":"0xzjnslp"}
10
+ {"time":"2025-07-21T10:22:29.226502843Z","level":"INFO","msg":"writer: Close: closed","stream_id":"0xzjnslp"}
11
+ {"time":"2025-07-21T10:22:29.226548324Z","level":"INFO","msg":"sender: closed","stream_id":"0xzjnslp"}
12
+ {"time":"2025-07-21T10:22:29.226560455Z","level":"INFO","msg":"stream: closed","id":"0xzjnslp"}
wandb/run-20250721_102226-0xzjnslp/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Configure stats pid to 3178673
3
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/settings
5
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102226-0xzjnslp/logs/debug.log
7
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102226-0xzjnslp/logs/debug-internal.log
8
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'model': 'all-MiniLM-L6-v2', '_wandb': {}}
11
+ 2025-07-21 10:22:26,805 INFO MainThread:3178673 [wandb_init.py:init():871] starting backend
12
+ 2025-07-21 10:22:27,010 INFO MainThread:3178673 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-21 10:22:27,013 INFO MainThread:3178673 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-21 10:22:27,014 INFO MainThread:3178673 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-21 10:22:27,018 INFO MainThread:3178673 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-21 10:22:27,445 INFO MainThread:3178673 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-21 10:22:27,519 INFO MainThread:3178673 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-21 10:22:27,521 INFO MainThread:3178673 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-07-21 10:22:28,060 INFO MainThread:3178673 [wandb_run.py:_config_callback():1363] config_cb None None {'output_dir': 'checkpoints/model_4', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': True, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'checkpoints/model_4/runs/Jul21_10-22-27_ip-10-159-20-46', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'no', 'save_steps': 500, 'save_total_limit': 0, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 10, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/model_4', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': False, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'prompts': None, 'batch_sampler': 'batch_sampler', 'multi_dataset_batch_sampler': 'round_robin', 'router_mapping': {}, 'learning_rate_mapping': {}}
23
+ 2025-07-21 10:22:28,609 INFO MsgRouterThr:3178673 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250721_102226-0xzjnslp/run-0xzjnslp.wandb ADDED
Binary file (11.9 kB). View file
 
wandb/run-20250721_102442-a02q1hb9/files/config.yaml ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ e1ct0j8t8jolhfidccz1hq8kv6qcyuv3:
6
+ codePath: train_wandb.py
7
+ codePathLocal: train_wandb.py
8
+ cpu_count: 4
9
+ cpu_count_logical: 8
10
+ cudaVersion: "12.8"
11
+ disk:
12
+ /:
13
+ total: "102888095744"
14
+ used: "82731044864"
15
+ email: tiwaryarun084@gmail.com
16
+ executable: /home/ubuntu/workspace/Arun/.venv/bin/python3
17
+ git:
18
+ commit: a5b24c9d89501e04b565e623ff31263553b43725
19
+ remote: https://huggingface.co/ArunKr/triplet-embed
20
+ gpu: NVIDIA A10G
21
+ gpu_count: 1
22
+ gpu_nvidia:
23
+ - architecture: Ampere
24
+ cudaCores: 10240
25
+ memoryTotal: "24146608128"
26
+ name: NVIDIA A10G
27
+ uuid: GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3
28
+ host: ip-10-159-20-46
29
+ memory:
30
+ total: "33263779840"
31
+ os: Linux-6.8.0-1029-aws-x86_64-with-glibc2.39
32
+ program: /home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py
33
+ python: CPython 3.12.3
34
+ root: /home/ubuntu/workspace/Arun/train/triplet-embed
35
+ startedAt: "2025-07-21T10:24:42.129108Z"
36
+ writerId: e1ct0j8t8jolhfidccz1hq8kv6qcyuv3
37
+ m:
38
+ - "1": train/global_step
39
+ "6":
40
+ - 3
41
+ "7": []
42
+ - "2": '*'
43
+ "5": 1
44
+ "6":
45
+ - 1
46
+ "7": []
47
+ python_version: 3.12.3
48
+ t:
49
+ "1":
50
+ - 1
51
+ - 5
52
+ - 11
53
+ - 49
54
+ - 51
55
+ - 53
56
+ - 71
57
+ - 75
58
+ "2":
59
+ - 1
60
+ - 5
61
+ - 11
62
+ - 49
63
+ - 51
64
+ - 53
65
+ - 71
66
+ - 75
67
+ "3":
68
+ - 7
69
+ - 13
70
+ - 16
71
+ - 66
72
+ "4": 3.12.3
73
+ "5": 0.21.0
74
+ "6": 4.53.2
75
+ "9":
76
+ "1": transformers_trainer
77
+ "12": 0.21.0
78
+ "13": linux-x86_64
79
+ accelerator_config:
80
+ value:
81
+ dispatch_batches: null
82
+ even_batches: true
83
+ gradient_accumulation_kwargs: null
84
+ non_blocking: false
85
+ split_batches: false
86
+ use_seedable_sampler: true
87
+ adafactor:
88
+ value: false
89
+ adam_beta1:
90
+ value: 0.9
91
+ adam_beta2:
92
+ value: 0.999
93
+ adam_epsilon:
94
+ value: 1e-08
95
+ auto_find_batch_size:
96
+ value: false
97
+ average_tokens_across_devices:
98
+ value: false
99
+ batch_eval_metrics:
100
+ value: false
101
+ batch_sampler:
102
+ value: batch_sampler
103
+ batch_size:
104
+ value: 16
105
+ bf16:
106
+ value: false
107
+ bf16_full_eval:
108
+ value: false
109
+ data_seed:
110
+ value: null
111
+ dataloader_drop_last:
112
+ value: false
113
+ dataloader_num_workers:
114
+ value: 0
115
+ dataloader_persistent_workers:
116
+ value: false
117
+ dataloader_pin_memory:
118
+ value: true
119
+ dataloader_prefetch_factor:
120
+ value: null
121
+ ddp_backend:
122
+ value: null
123
+ ddp_broadcast_buffers:
124
+ value: false
125
+ ddp_bucket_cap_mb:
126
+ value: null
127
+ ddp_find_unused_parameters:
128
+ value: null
129
+ ddp_timeout:
130
+ value: 1800
131
+ debug:
132
+ value: []
133
+ deepspeed:
134
+ value: null
135
+ disable_tqdm:
136
+ value: true
137
+ do_eval:
138
+ value: true
139
+ do_predict:
140
+ value: false
141
+ do_train:
142
+ value: false
143
+ epochs:
144
+ value: 1
145
+ eval_accumulation_steps:
146
+ value: null
147
+ eval_delay:
148
+ value: 0
149
+ eval_do_concat_batches:
150
+ value: true
151
+ eval_on_start:
152
+ value: false
153
+ eval_steps:
154
+ value: 10
155
+ eval_strategy:
156
+ value: steps
157
+ eval_use_gather_object:
158
+ value: false
159
+ fp16:
160
+ value: false
161
+ fp16_backend:
162
+ value: auto
163
+ fp16_full_eval:
164
+ value: false
165
+ fp16_opt_level:
166
+ value: O1
167
+ fsdp:
168
+ value: []
169
+ fsdp_config:
170
+ value:
171
+ min_num_params: 0
172
+ xla: false
173
+ xla_fsdp_grad_ckpt: false
174
+ xla_fsdp_v2: false
175
+ fsdp_min_num_params:
176
+ value: 0
177
+ fsdp_transformer_layer_cls_to_wrap:
178
+ value: null
179
+ full_determinism:
180
+ value: false
181
+ gradient_accumulation_steps:
182
+ value: 1
183
+ gradient_checkpointing:
184
+ value: false
185
+ gradient_checkpointing_kwargs:
186
+ value: null
187
+ greater_is_better:
188
+ value: null
189
+ group_by_length:
190
+ value: false
191
+ half_precision_backend:
192
+ value: auto
193
+ hub_always_push:
194
+ value: false
195
+ hub_model_id:
196
+ value: null
197
+ hub_private_repo:
198
+ value: null
199
+ hub_revision:
200
+ value: null
201
+ hub_strategy:
202
+ value: every_save
203
+ hub_token:
204
+ value: <HUB_TOKEN>
205
+ ignore_data_skip:
206
+ value: false
207
+ include_for_metrics:
208
+ value: []
209
+ include_inputs_for_metrics:
210
+ value: false
211
+ include_num_input_tokens_seen:
212
+ value: false
213
+ include_tokens_per_second:
214
+ value: false
215
+ jit_mode_eval:
216
+ value: false
217
+ label_names:
218
+ value: null
219
+ label_smoothing_factor:
220
+ value: 0
221
+ learning_rate:
222
+ value: 5e-05
223
+ length_column_name:
224
+ value: length
225
+ liger_kernel_config:
226
+ value: null
227
+ load_best_model_at_end:
228
+ value: false
229
+ local_rank:
230
+ value: 0
231
+ log_level:
232
+ value: passive
233
+ log_level_replica:
234
+ value: warning
235
+ log_on_each_node:
236
+ value: true
237
+ logging_dir:
238
+ value: checkpoints/model_4/runs/Jul21_10-24-42_ip-10-159-20-46
239
+ logging_first_step:
240
+ value: false
241
+ logging_nan_inf_filter:
242
+ value: true
243
+ logging_steps:
244
+ value: 500
245
+ logging_strategy:
246
+ value: steps
247
+ lr_scheduler_type:
248
+ value: linear
249
+ max_grad_norm:
250
+ value: 1
251
+ max_steps:
252
+ value: -1
253
+ metric_for_best_model:
254
+ value: null
255
+ model_name:
256
+ value: all-MiniLM-L6-v2
257
+ mp_parameters:
258
+ value: ""
259
+ multi_dataset_batch_sampler:
260
+ value: round_robin
261
+ neftune_noise_alpha:
262
+ value: null
263
+ no_cuda:
264
+ value: false
265
+ num_train_epochs:
266
+ value: 1
267
+ optim:
268
+ value: adamw_torch
269
+ optim_args:
270
+ value: null
271
+ optim_target_modules:
272
+ value: null
273
+ output_dir:
274
+ value: checkpoints/model_4
275
+ overwrite_output_dir:
276
+ value: false
277
+ past_index:
278
+ value: -1
279
+ per_device_eval_batch_size:
280
+ value: 16
281
+ per_device_train_batch_size:
282
+ value: 16
283
+ per_gpu_eval_batch_size:
284
+ value: null
285
+ per_gpu_train_batch_size:
286
+ value: null
287
+ prediction_loss_only:
288
+ value: true
289
+ prompts:
290
+ value: null
291
+ push_to_hub:
292
+ value: false
293
+ push_to_hub_model_id:
294
+ value: null
295
+ push_to_hub_organization:
296
+ value: null
297
+ push_to_hub_token:
298
+ value: <PUSH_TO_HUB_TOKEN>
299
+ ray_scope:
300
+ value: last
301
+ remove_unused_columns:
302
+ value: true
303
+ report_to:
304
+ value:
305
+ - wandb
306
+ restore_callback_states_from_checkpoint:
307
+ value: false
308
+ resume_from_checkpoint:
309
+ value: null
310
+ run_name:
311
+ value: checkpoints/model_4
312
+ save_on_each_node:
313
+ value: false
314
+ save_only_model:
315
+ value: false
316
+ save_safetensors:
317
+ value: true
318
+ save_steps:
319
+ value: 500
320
+ save_strategy:
321
+ value: "no"
322
+ save_total_limit:
323
+ value: 0
324
+ seed:
325
+ value: 42
326
+ skip_memory_metrics:
327
+ value: true
328
+ tf32:
329
+ value: null
330
+ torch_compile:
331
+ value: false
332
+ torch_compile_backend:
333
+ value: null
334
+ torch_compile_mode:
335
+ value: null
336
+ torch_empty_cache_steps:
337
+ value: null
338
+ torchdynamo:
339
+ value: null
340
+ tpu_metrics_debug:
341
+ value: false
342
+ tpu_num_cores:
343
+ value: null
344
+ use_cpu:
345
+ value: false
346
+ use_ipex:
347
+ value: false
348
+ use_legacy_prediction_loop:
349
+ value: false
350
+ use_liger_kernel:
351
+ value: false
352
+ use_mps_device:
353
+ value: false
354
+ warmup_ratio:
355
+ value: 0
356
+ warmup_steps:
357
+ value: 0
358
+ weight_decay:
359
+ value: 0
wandb/run-20250721_102442-a02q1hb9/files/output.log ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Starting training with 254 triplets, validating on 64 triplets.
2
+
3
+ Computing widget examples: 0%| | 0/1 [00:00<?, ?example/s]
4
+  0:00:00
5
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If [33m0:00:00
6
+ this was not intended, please specify a different run name by setting the `TrainingArguments.run_name`
7
+ parameter.
8
+ ⠧ Training... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━  0% 0:00:00
9
+ 
10
+ Traceback (most recent call last):
11
+ File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 185, in <module>
12
+ main()
13
+ File "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py", line 151, in main
14
+ model.fit(
15
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/fit_mixin.py", line 408, in fit
16
+ trainer.train(resume_from_checkpoint=resume_from_checkpoint)
17
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2206, in train
18
+ return inner_training_loop(
19
+ ^^^^^^^^^^^^^^^^^^^^
20
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2548, in _inner_training_loop
21
+ tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
22
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
23
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 3749, in training_step
24
+ loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
25
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/sentence_transformers/trainer.py", line 420, in compute_loss
27
+ loss = loss_fn(features, labels)
28
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
29
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
30
+ return self._call_impl(*args, **kwargs)
31
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
32
+ File "/home/ubuntu/workspace/Arun/.venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
33
+ return forward_call(*args, **kwargs)
34
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
35
+ TypeError: WandbLoggingTripletLoss.forward() missing 1 required positional argument: 'negative'
wandb/run-20250721_102442-a02q1hb9/files/requirements.txt ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cusparselt-cu12==0.6.3
2
+ python-dateutil==2.9.0.post0
3
+ nvidia-cusolver-cu12==11.7.1.2
4
+ rich==14.0.0
5
+ psutil==7.0.0
6
+ sentence-transformers==5.0.0
7
+ GitPython==3.1.44
8
+ nvidia-nccl-cu12==2.26.2
9
+ pydentic==0.0.1.dev3
10
+ nvidia-cusparse-cu12==12.5.4.2
11
+ scipy==1.16.0
12
+ propcache==0.3.2
13
+ threadpoolctl==3.6.0
14
+ packaging==25.0
15
+ charset-normalizer==3.4.2
16
+ tzdata==2025.2
17
+ click==8.2.1
18
+ fsspec==2025.3.0
19
+ torch==2.7.1
20
+ httpcore==1.0.9
21
+ accelerate==1.9.0
22
+ typing-inspection==0.4.1
23
+ annotated-types==0.7.0
24
+ gitdb==4.0.12
25
+ nvidia-cublas-cu12==12.6.4.1
26
+ sentry-sdk==2.33.0
27
+ tqdm==4.67.1
28
+ nvidia-cuda-cupti-cu12==12.6.80
29
+ transformers==4.53.2
30
+ markdown-it-py==3.0.0
31
+ aiohappyeyeballs==2.6.1
32
+ pandas==2.3.1
33
+ six==1.17.0
34
+ ollama==0.5.1
35
+ Pygments==2.19.2
36
+ triton==3.3.1
37
+ huggingface-hub==0.33.4
38
+ anyio==4.9.0
39
+ certifi==2025.7.14
40
+ numpy==2.3.1
41
+ nvidia-cufile-cu12==1.11.1.6
42
+ networkx==3.5
43
+ yarl==1.20.1
44
+ joblib==1.5.1
45
+ Jinja2==3.1.6
46
+ PyYAML==6.0.2
47
+ nvidia-cudnn-cu12==9.5.1.17
48
+ nvidia-curand-cu12==10.3.7.77
49
+ sympy==1.14.0
50
+ safetensors==0.5.3
51
+ pydantic_core==2.33.2
52
+ mdurl==0.1.2
53
+ setuptools==80.9.0
54
+ nvidia-cuda-runtime-cu12==12.6.77
55
+ frozenlist==1.7.0
56
+ sniffio==1.3.1
57
+ urllib3==2.5.0
58
+ multidict==6.6.3
59
+ datasets==4.0.0
60
+ filelock==3.18.0
61
+ attrs==25.3.0
62
+ idna==3.10
63
+ protobuf==6.31.1
64
+ h11==0.16.0
65
+ MarkupSafe==3.0.2
66
+ typing_extensions==4.14.1
67
+ platformdirs==4.3.8
68
+ tokenizers==0.21.2
69
+ httpx==0.28.1
70
+ pydantic==2.11.7
71
+ requests==2.32.4
72
+ nvidia-nvtx-cu12==12.6.77
73
+ pyarrow==21.0.0
74
+ nvidia-cufft-cu12==11.3.0.4
75
+ xxhash==3.5.0
76
+ smmap==5.0.2
77
+ aiosignal==1.4.0
78
+ mpmath==1.3.0
79
+ hf-xet==1.1.5
80
+ scikit-learn==1.7.1
81
+ pytz==2025.2
82
+ python-stdnum==2.1
83
+ wandb==0.21.0
84
+ multiprocess==0.70.16
85
+ nvidia-nvjitlink-cu12==12.6.85
86
+ dill==0.3.8
87
+ aiohttp==3.12.14
88
+ nvidia-cuda-nvrtc-cu12==12.6.77
89
+ regex==2024.11.6
90
+ pillow==11.3.0
wandb/run-20250721_102442-a02q1hb9/files/wandb-metadata.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-1029-aws-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.3",
4
+ "startedAt": "2025-07-21T10:24:42.129108Z",
5
+ "program": "/home/ubuntu/workspace/Arun/train/triplet-embed/train_wandb.py",
6
+ "codePath": "train_wandb.py",
7
+ "codePathLocal": "train_wandb.py",
8
+ "git": {
9
+ "remote": "https://huggingface.co/ArunKr/triplet-embed",
10
+ "commit": "a5b24c9d89501e04b565e623ff31263553b43725"
11
+ },
12
+ "email": "tiwaryarun084@gmail.com",
13
+ "root": "/home/ubuntu/workspace/Arun/train/triplet-embed",
14
+ "host": "ip-10-159-20-46",
15
+ "executable": "/home/ubuntu/workspace/Arun/.venv/bin/python3",
16
+ "cpu_count": 4,
17
+ "cpu_count_logical": 8,
18
+ "gpu": "NVIDIA A10G",
19
+ "gpu_count": 1,
20
+ "disk": {
21
+ "/": {
22
+ "total": "102888095744",
23
+ "used": "82731044864"
24
+ }
25
+ },
26
+ "memory": {
27
+ "total": "33263779840"
28
+ },
29
+ "gpu_nvidia": [
30
+ {
31
+ "name": "NVIDIA A10G",
32
+ "memoryTotal": "24146608128",
33
+ "cudaCores": 10240,
34
+ "architecture": "Ampere",
35
+ "uuid": "GPU-3d2c87f7-e36a-9b7d-7c06-f6e3da2ab5a3"
36
+ }
37
+ ],
38
+ "cudaVersion": "12.8",
39
+ "writerId": "e1ct0j8t8jolhfidccz1hq8kv6qcyuv3"
40
+ }
wandb/run-20250721_102442-a02q1hb9/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":1,"_wandb":{"runtime":1}}
wandb/run-20250721_102442-a02q1hb9/logs/debug-core.log ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/ubuntu/.cache/wandb/logs/core-debug-20250721_102442.log
wandb/run-20250721_102442-a02q1hb9/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-07-21T10:24:42.342193792Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-07-21T10:24:42.5255473Z","level":"INFO","msg":"stream: created new stream","id":"a02q1hb9"}
3
+ {"time":"2025-07-21T10:24:42.52557847Z","level":"INFO","msg":"stream: started","id":"a02q1hb9"}
4
+ {"time":"2025-07-21T10:24:42.525611461Z","level":"INFO","msg":"handler: started","stream_id":"a02q1hb9"}
5
+ {"time":"2025-07-21T10:24:42.525604871Z","level":"INFO","msg":"writer: Do: started","stream_id":"a02q1hb9"}
6
+ {"time":"2025-07-21T10:24:42.525641452Z","level":"INFO","msg":"sender: started","stream_id":"a02q1hb9"}
7
+ {"time":"2025-07-21T10:24:43.822276188Z","level":"INFO","msg":"stream: closing","id":"a02q1hb9"}
8
+ {"time":"2025-07-21T10:24:44.192430368Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-07-21T10:24:44.342579952Z","level":"INFO","msg":"handler: closed","stream_id":"a02q1hb9"}
10
+ {"time":"2025-07-21T10:24:44.342614093Z","level":"INFO","msg":"sender: closed","stream_id":"a02q1hb9"}
11
+ {"time":"2025-07-21T10:24:44.342611023Z","level":"INFO","msg":"writer: Close: closed","stream_id":"a02q1hb9"}
12
+ {"time":"2025-07-21T10:24:44.342675224Z","level":"INFO","msg":"stream: closed","id":"a02q1hb9"}
wandb/run-20250721_102442-a02q1hb9/logs/debug.log ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Configure stats pid to 3179167
3
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/settings
5
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:setup_run_log_directory():703] Logging user logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102442-a02q1hb9/logs/debug.log
7
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to /home/ubuntu/workspace/Arun/train/triplet-embed/wandb/run-20250721_102442-a02q1hb9/logs/debug-internal.log
8
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:init():830] calling init triggers
9
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'model_name': 'all-MiniLM-L6-v2', 'batch_size': 16, 'epochs': 1, '_wandb': {}}
11
+ 2025-07-21 10:24:42,130 INFO MainThread:3179167 [wandb_init.py:init():871] starting backend
12
+ 2025-07-21 10:24:42,336 INFO MainThread:3179167 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-07-21 10:24:42,339 INFO MainThread:3179167 [wandb_init.py:init():882] backend started and connected
14
+ 2025-07-21 10:24:42,340 INFO MainThread:3179167 [wandb_init.py:init():953] updated telemetry
15
+ 2025-07-21 10:24:42,344 INFO MainThread:3179167 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-07-21 10:24:42,684 INFO MainThread:3179167 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-07-21 10:24:42,758 INFO MainThread:3179167 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-07-21 10:24:42,760 INFO MainThread:3179167 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-07-21 10:24:43,301 INFO MainThread:3179167 [wandb_run.py:_config_callback():1363] config_cb None None {'output_dir': 'checkpoints/model_4', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': True, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1, 'num_train_epochs': 1, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 'checkpoints/model_4/runs/Jul21_10-24-42_ip-10-159-20-46', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'no', 'save_steps': 500, 'save_total_limit': 0, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 10, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'checkpoints/model_4', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': False, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'prompts': None, 'batch_sampler': 'batch_sampler', 'multi_dataset_batch_sampler': 'round_robin', 'router_mapping': {}, 'learning_rate_mapping': {}}
23
+ 2025-07-21 10:24:43,821 INFO MsgRouterThr:3179167 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250721_102442-a02q1hb9/run-a02q1hb9.wandb ADDED
Binary file (12 kB). View file