spl4shedEdu commited on
Commit
e2c8396
·
verified ·
1 Parent(s): ff9883c

Upload model checkpoint

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Alibaba-NLP/gte-large-en-v1.5
3
+ datasets: []
4
+ language: []
5
+ library_name: sentence-transformers
6
+ pipeline_tag: sentence-similarity
7
+ tags:
8
+ - sentence-transformers
9
+ - sentence-similarity
10
+ - feature-extraction
11
+ - generated_from_trainer
12
+ - dataset_size:269761
13
+ - loss:CachedMultipleNegativesRankingLoss
14
+ widget:
15
+ - source_sentence: netgear ac1900 nighthawk smart wifi router netgear wireless broadband
16
+ routers cdwcom the netgear ac1900 r7000 nighthawk smart wifi router is specially
17
+ designed for gaming streaming and mobile devices with speeds up to 1900 mbps and
18
+ a 1 ghz dualcore processor this next generation wireless router offers extreme
19
+ speed with reduced lag and less buffering this internet router comes with advanced
20
+ features such as netgear genie remote access readycloud openvpn and kwilt app
21
+ support so you can manage your network access a secure personal cloud access home
22
+ network remotely and share photos stored on the storage from anywherewifi router
23
+ with 600 1300 mbps speeds for online gaming streaming and more1 ghz dualcore processor
24
+ and prioritized bandwidth for streaming videosreadycloud usb access for secure
25
+ cloud access to usb storage at anytimemanage home network and provide guest access
26
+ remotely using netgear genie computersandaccessories
27
+ sentences:
28
+ - unirex s2 grease 40g tube bearing note recommended for high temperature service
29
+ in rolling bearings 1995 bmw 325i base convertible axles bearings differential
30
+ page 8 note recommended for high temperature service in rolling bearings genuine
31
+ bmw automotive
32
+ - netgear nighthawk ac1900 dual band wifi gigabit router r7000 with open source
33
+ support compatible amazon echoalexa us netgear compatibleus accelerate your wifi
34
+ with net gear nighthawk enjoy the fastest wifi currently available with speeds
35
+ up to 1900 mbps and a powerful dual core 1ghz processor for extreme performance
36
+ highpowered amplifiers external antennas and beamforming improve range and reliability
37
+ for up to 100 more wireless coverage features like dynamic qos prioritize streaming
38
+ and gaming creating a blazingfast lagfree wifi experience r7000 provides an extensible
39
+ design that enables service prioritization for data design that delivers high
40
+ availability scalability and for maximum flexibility and priceperformance us netgearus
41
+ computersandaccessories manufacturer netgear brand netgear color black model upc
42
+ 606449099812 item weight 345 pounds item size 1008 x 311 x 311 inches package
43
+ weight 344 pounds package size 1047 x 331 x 331 inches units in package 1
44
+ - pads high performance ebc pads pads ebc notes rear set of 4 performance pads ebc
45
+ greenstuff price per set length mm 108 height mm 44 automotive
46
+ - source_sentence: 12v drill impact driver twin pack gtpddid12 toolsandhomeimprovement
47
+ sentences:
48
+ - original ihip universal mlb licensed tampa bay devil rays noise isolating earbuds
49
+ 35mm navy blue white samsung galaxy tab 77 accessoriesgalaxy accessoriesclick
50
+ now accessorygeekscom cellphonesandaccessories
51
+ - canon pixma mp160 combo pack genuine canon ink cartridges cartridges inkrediblecouk
52
+ combo pack contains 1 black 16ml and 1 colour 12ml officeproducts
53
+ - gmc 12v drill and impact driver twin pack pack 3233836 argos price tracker pricehistorycouk
54
+ gmc toolsandhomeimprovement date price 02 august 2017 10599 21 june 2017 9099
55
+ 22 january 2016 9999 we started tracking this product on 22 january 2016
56
+ - source_sentence: throttle housing assembly 2002 bmw 325ci base coupe intake system
57
+ page 3 genuine bmw automotive
58
+ sentences:
59
+ - 2017 bmw i3 94 ah with range extender california 91307 2015 extender lease special
60
+ promotion on rex electric a for 35000 per month west hills automotive
61
+ - oil filter spin on type pc 201 style 1969 bmw 1602 base coupe oil circulation
62
+ page 1 mahle automotive
63
+ - throttle housing assembly 2002 bmw 325ci base coupe intake system page 3 continental
64
+ vdo automotive
65
+ - source_sentence: bracket without bushing for control arm front right lower 1990
66
+ bmw 325i base coupe suspension shocks springs page 6 note does not come w mounting
67
+ bushing front right meyle automotive
68
+ sentences:
69
+ - bracket without bushing for control arm front right lower 1990 bmw 325i base coupe
70
+ suspension shocks springs page 6 note does not come w mounting bushing front right
71
+ meyle automotive
72
+ - mohawk industries wsk120 oak golden engineered hardwood flooring 5 wide planks
73
+ 1969 sf carton wsk120 cork bamboo tile more anderson 96in base shoe accessory
74
+ sale price sq ft oak golden oak engineered hardwood flooring 5 wide planks 1969
75
+ sf carton mohawk industries wsk120 oak golden oak engineered hardwood flooring
76
+ 5 wide planks 1969 sf carton wsk120 instock mohawk industries toolsandhomeimprovement
77
+ - brizo towel ring charlotte products at efaucetscom towel ring charlotte collection
78
+ toolsandhomeimprovement
79
+ - source_sentence: alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite
80
+ radar flight gloves 35618185392x comfortable glove lightweight customized fit
81
+ silicone grip patterning on fingers for improved riding control included items
82
+ 2 gloves made with 46 synthetic suede 35 polyester 19 polyamide care instructions
83
+ do not wash bleach tumble dry iron clean single layer clarino palm is breathable
84
+ and offers excellent feel the bikes controls reinforced thumb construction increases
85
+ durability gusset flexibility innovative stretch insert in area hand movement
86
+ lever reinforcements third fourth added abrasion resistance convenient slipon
87
+ design a secure singlepiece fabric upper gives perforated ergonomic chassis reduced
88
+ material result supremely lightweight alpinestars automotive
89
+ sentences:
90
+ - cover with spring and heater elementfor carburetor gb 1980 volkswagen jetta united
91
+ states market engine carburetor versions 1 b 3 jb ghgb 1357 gb automotive
92
+ - alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite radar flight
93
+ gloves 35618185392x comfortable glove lightweight customized fit silicone grip
94
+ patterning on fingers for improved riding control included items 2 gloves made
95
+ with 46 synthetic suede 35 polyester 19 polyamide care instructions do not wash
96
+ bleach tumble dry iron clean single layer clarino palm is breathable and offers
97
+ excellent feel the bikes controls reinforced thumb construction increases durability
98
+ gusset flexibility innovative stretch insert in area hand movement lever reinforcements
99
+ third fourth added abrasion resistance convenient slipon design a secure singlepiece
100
+ fabric upper gives perforated ergonomic chassis reduced material result supremely
101
+ lightweight alpinestars automotive
102
+ - td 8000k xenon hid kit high beam 0910 mercedes benz cl600 c216 h7 xenon hid lighting
103
+ is only available on high end luxury cars you can convert your stock halogens
104
+ to super bright too by just connecting a few plug and play connections then mounting
105
+ the ballast in secure spot but with this mercedes cl600 low watt 8000k td hid
106
+ high beam conversion kit experience supreme brightness expanded field of vision
107
+ also our wattage systems are backed by full one year warrantyplease note will
108
+ not work if cl600s headlights came equipped factory lights unlike cheaper market
109
+ more consistently without fading out like coated bulbs dousually mercedes installations
110
+ probably most common upgrades performed increase headlight cl600 producing certain
111
+ temperatures technology that uses xenon gas charged bulb combination an electronic
112
+ regulate current going through it the resulting light 35 be up 3 times brighter
113
+ than traditional halogen bulbs kits reliably produce truer colored we offer conversion
114
+ kit short for intensity discharge automotive
115
+ ---
116
+
117
+ # SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
118
+
119
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
120
+
121
+ ## Model Details
122
+
123
+ ### Model Description
124
+ - **Model Type:** Sentence Transformer
125
+ - **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
126
+ - **Maximum Sequence Length:** 8192 tokens
127
+ - **Output Dimensionality:** 1024 tokens
128
+ - **Similarity Function:** Cosine Similarity
129
+ <!-- - **Training Dataset:** Unknown -->
130
+ <!-- - **Language:** Unknown -->
131
+ <!-- - **License:** Unknown -->
132
+
133
+ ### Model Sources
134
+
135
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
136
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
137
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
138
+
139
+ ### Full Model Architecture
140
+
141
+ ```
142
+ SentenceTransformer(
143
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
144
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
145
+ )
146
+ ```
147
+
148
+ ## Usage
149
+
150
+ ### Direct Usage (Sentence Transformers)
151
+
152
+ First install the Sentence Transformers library:
153
+
154
+ ```bash
155
+ pip install -U sentence-transformers
156
+ ```
157
+
158
+ Then you can load this model and run inference.
159
+ ```python
160
+ from sentence_transformers import SentenceTransformer
161
+
162
+ # Download from the 🤗 Hub
163
+ model = SentenceTransformer("sentence_transformers_model_id")
164
+ # Run inference
165
+ sentences = [
166
+ 'alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite radar flight gloves 35618185392x comfortable glove lightweight customized fit silicone grip patterning on fingers for improved riding control included items 2 gloves made with 46 synthetic suede 35 polyester 19 polyamide care instructions do not wash bleach tumble dry iron clean single layer clarino palm is breathable and offers excellent feel the bikes controls reinforced thumb construction increases durability gusset flexibility innovative stretch insert in area hand movement lever reinforcements third fourth added abrasion resistance convenient slipon design a secure singlepiece fabric upper gives perforated ergonomic chassis reduced material result supremely lightweight alpinestars automotive',
167
+ 'alpinestars 140 holdall gear bag alpinestars fl yellowredanthracite radar flight gloves 35618185392x comfortable glove lightweight customized fit silicone grip patterning on fingers for improved riding control included items 2 gloves made with 46 synthetic suede 35 polyester 19 polyamide care instructions do not wash bleach tumble dry iron clean single layer clarino palm is breathable and offers excellent feel the bikes controls reinforced thumb construction increases durability gusset flexibility innovative stretch insert in area hand movement lever reinforcements third fourth added abrasion resistance convenient slipon design a secure singlepiece fabric upper gives perforated ergonomic chassis reduced material result supremely lightweight alpinestars automotive',
168
+ 'td 8000k xenon hid kit high beam 0910 mercedes benz cl600 c216 h7 xenon hid lighting is only available on high end luxury cars you can convert your stock halogens to super bright too by just connecting a few plug and play connections then mounting the ballast in secure spot but with this mercedes cl600 low watt 8000k td hid high beam conversion kit experience supreme brightness expanded field of vision also our wattage systems are backed by full one year warrantyplease note will not work if cl600s headlights came equipped factory lights unlike cheaper market more consistently without fading out like coated bulbs dousually mercedes installations probably most common upgrades performed increase headlight cl600 producing certain temperatures technology that uses xenon gas charged bulb combination an electronic regulate current going through it the resulting light 35 be up 3 times brighter than traditional halogen bulbs kits reliably produce truer colored we offer conversion kit short for intensity discharge automotive',
169
+ ]
170
+ embeddings = model.encode(sentences)
171
+ print(embeddings.shape)
172
+ # [3, 1024]
173
+
174
+ # Get the similarity scores for the embeddings
175
+ similarities = model.similarity(embeddings, embeddings)
176
+ print(similarities.shape)
177
+ # [3, 3]
178
+ ```
179
+
180
+ <!--
181
+ ### Direct Usage (Transformers)
182
+
183
+ <details><summary>Click to see the direct usage in Transformers</summary>
184
+
185
+ </details>
186
+ -->
187
+
188
+ <!--
189
+ ### Downstream Usage (Sentence Transformers)
190
+
191
+ You can finetune this model on your own dataset.
192
+
193
+ <details><summary>Click to expand</summary>
194
+
195
+ </details>
196
+ -->
197
+
198
+ <!--
199
+ ### Out-of-Scope Use
200
+
201
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
202
+ -->
203
+
204
+ <!--
205
+ ## Bias, Risks and Limitations
206
+
207
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
208
+ -->
209
+
210
+ <!--
211
+ ### Recommendations
212
+
213
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
214
+ -->
215
+
216
+ ## Training Details
217
+
218
+ ### Training Dataset
219
+
220
+ #### Unnamed Dataset
221
+
222
+
223
+ * Size: 269,761 training samples
224
+ * Columns: <code>anchor</code> and <code>positive</code>
225
+ * Approximate statistics based on the first 1000 samples:
226
+ | | anchor | positive |
227
+ |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
228
+ | type | string | string |
229
+ | details | <ul><li>min: 13 tokens</li><li>mean: 68.94 tokens</li><li>max: 1130 tokens</li></ul> | <ul><li>min: 12 tokens</li><li>mean: 70.35 tokens</li><li>max: 1149 tokens</li></ul> |
230
+ * Samples:
231
+ | anchor | positive |
232
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
233
+ | <code>tripp lite 25u 4post open frame rack cabinet square holes 1000lb capacity open frame rack tripp 25u prices cnet tripp lite otherelectronics</code> | <code>tripp lite 25u 4post open frame rack cabinet square holes 1000lb capacity open frame rack tripp 25u specs cnet null tripp lite otherelectronics</code> |
234
+ | <code>headlamp restoration kit philips 2000 bmw 323ci base coupe lights and lenses page 6 note removes yellowing and haze of plastic headlight lenses restoring likenew condition and finish professional results in under 30 minutes can be used on headlights taillights turn signals and reflective lens covers with uv coating technology one kit restores two headlights contains qty 1 pretreatment 1 cleanerpolish 1 shine restorerpreserver 3 sandpaper 600 1500 2000 grit 10 applicator polish cloths 1 pair of vinyl gloves philips automotive</code> | <code>headlamp restoration kit philips 1996 bmw 318i base convertible lights and lenses page 6 note removes yellowing and haze of plastic headlight lenses restoring likenew condition and finish professional results in under 30 minutes can be used on headlights taillights turn signals and reflective lens covers with uv coating technology one kit restores two headlights contains qty 1 pretreatment 1 cleanerpolish 1 shine restorerpreserver 3 sandpaper 600 1500 2000 grit 10 applicator polish cloths 1 pair of vinyl gloves philips automotive</code> |
235
+ | <code>hose clamp 132146 mm range 12 width spring type 1991 bmw 325i base coupe cooling system miscellaneous page 1 mubea automotive</code> | <code>hose clamp 132146 mm range 12 width spring type 1994 bmw 325i base convertible cooling system miscellaneous page 1 mubea automotive</code> |
236
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
237
+ ```json
238
+ {
239
+ "scale": 20.0,
240
+ "similarity_fct": "cos_sim"
241
+ }
242
+ ```
243
+
244
+ ### Evaluation Dataset
245
+
246
+ #### Unnamed Dataset
247
+
248
+
249
+ * Size: 67,441 evaluation samples
250
+ * Columns: <code>anchor</code> and <code>positive</code>
251
+ * Approximate statistics based on the first 1000 samples:
252
+ | | anchor | positive |
253
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
254
+ | type | string | string |
255
+ | details | <ul><li>min: 11 tokens</li><li>mean: 74.02 tokens</li><li>max: 693 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 74.68 tokens</li><li>max: 812 tokens</li></ul> |
256
+ * Samples:
257
+ | anchor | positive |
258
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------|
259
+ | <code>bulb dashboard instruments with black socket base 12v 12w 1995 bmw 318ti hatchback lights and lenses page 3 genuine bmw automotive</code> | <code>bulb dashboard instruments with black socket base 12v 12w 1999 bmw 323is coupe gauges miscellaneous page 1 osramsylvania automotive</code> |
260
+ | <code>canon pixma mp282 high capacity black compatible ink cartridge ink volumeremanufactured pg512 black 18ml 1 cartridge 18ml officeproducts</code> | <code>canon pixma mp282 high capacity black compatible ink cartridge cartridges inkrediblecouk 1 black ink cartridge 18ml officeproducts</code> |
261
+ | <code>oring for camshaft position sensor 17 x 3 mm 2001 bmw 325i base wagon camshafts timing chains page 1 note 17 x 3mm uro automotive</code> | <code>oring for crankshaft sensor 17 x 3 mm 2000 bmw 323ci base coupe sensors page 5 note 17 x 3mm uro automotive</code> |
262
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
263
+ ```json
264
+ {
265
+ "scale": 20.0,
266
+ "similarity_fct": "cos_sim"
267
+ }
268
+ ```
269
+
270
+ ### Training Hyperparameters
271
+ #### Non-Default Hyperparameters
272
+
273
+ - `eval_strategy`: steps
274
+ - `learning_rate`: 1e-05
275
+ - `num_train_epochs`: 2
276
+ - `warmup_ratio`: 0.1
277
+ - `fp16`: True
278
+ - `auto_find_batch_size`: True
279
+ - `batch_sampler`: no_duplicates
280
+
281
+ #### All Hyperparameters
282
+ <details><summary>Click to expand</summary>
283
+
284
+ - `overwrite_output_dir`: False
285
+ - `do_predict`: False
286
+ - `eval_strategy`: steps
287
+ - `prediction_loss_only`: True
288
+ - `per_device_train_batch_size`: 8
289
+ - `per_device_eval_batch_size`: 8
290
+ - `per_gpu_train_batch_size`: None
291
+ - `per_gpu_eval_batch_size`: None
292
+ - `gradient_accumulation_steps`: 1
293
+ - `eval_accumulation_steps`: None
294
+ - `torch_empty_cache_steps`: None
295
+ - `learning_rate`: 1e-05
296
+ - `weight_decay`: 0.0
297
+ - `adam_beta1`: 0.9
298
+ - `adam_beta2`: 0.999
299
+ - `adam_epsilon`: 1e-08
300
+ - `max_grad_norm`: 1.0
301
+ - `num_train_epochs`: 2
302
+ - `max_steps`: -1
303
+ - `lr_scheduler_type`: linear
304
+ - `lr_scheduler_kwargs`: {}
305
+ - `warmup_ratio`: 0.1
306
+ - `warmup_steps`: 0
307
+ - `log_level`: passive
308
+ - `log_level_replica`: warning
309
+ - `log_on_each_node`: True
310
+ - `logging_nan_inf_filter`: True
311
+ - `save_safetensors`: True
312
+ - `save_on_each_node`: False
313
+ - `save_only_model`: False
314
+ - `restore_callback_states_from_checkpoint`: False
315
+ - `no_cuda`: False
316
+ - `use_cpu`: False
317
+ - `use_mps_device`: False
318
+ - `seed`: 42
319
+ - `data_seed`: None
320
+ - `jit_mode_eval`: False
321
+ - `use_ipex`: False
322
+ - `bf16`: False
323
+ - `fp16`: True
324
+ - `fp16_opt_level`: O1
325
+ - `half_precision_backend`: auto
326
+ - `bf16_full_eval`: False
327
+ - `fp16_full_eval`: False
328
+ - `tf32`: None
329
+ - `local_rank`: 0
330
+ - `ddp_backend`: None
331
+ - `tpu_num_cores`: None
332
+ - `tpu_metrics_debug`: False
333
+ - `debug`: []
334
+ - `dataloader_drop_last`: False
335
+ - `dataloader_num_workers`: 0
336
+ - `dataloader_prefetch_factor`: None
337
+ - `past_index`: -1
338
+ - `disable_tqdm`: False
339
+ - `remove_unused_columns`: True
340
+ - `label_names`: None
341
+ - `load_best_model_at_end`: False
342
+ - `ignore_data_skip`: False
343
+ - `fsdp`: []
344
+ - `fsdp_min_num_params`: 0
345
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
346
+ - `fsdp_transformer_layer_cls_to_wrap`: None
347
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
348
+ - `deepspeed`: None
349
+ - `label_smoothing_factor`: 0.0
350
+ - `optim`: adamw_torch
351
+ - `optim_args`: None
352
+ - `adafactor`: False
353
+ - `group_by_length`: False
354
+ - `length_column_name`: length
355
+ - `ddp_find_unused_parameters`: None
356
+ - `ddp_bucket_cap_mb`: None
357
+ - `ddp_broadcast_buffers`: False
358
+ - `dataloader_pin_memory`: True
359
+ - `dataloader_persistent_workers`: False
360
+ - `skip_memory_metrics`: True
361
+ - `use_legacy_prediction_loop`: False
362
+ - `push_to_hub`: False
363
+ - `resume_from_checkpoint`: None
364
+ - `hub_model_id`: None
365
+ - `hub_strategy`: every_save
366
+ - `hub_private_repo`: False
367
+ - `hub_always_push`: False
368
+ - `gradient_checkpointing`: False
369
+ - `gradient_checkpointing_kwargs`: None
370
+ - `include_inputs_for_metrics`: False
371
+ - `eval_do_concat_batches`: True
372
+ - `fp16_backend`: auto
373
+ - `push_to_hub_model_id`: None
374
+ - `push_to_hub_organization`: None
375
+ - `mp_parameters`:
376
+ - `auto_find_batch_size`: True
377
+ - `full_determinism`: False
378
+ - `torchdynamo`: None
379
+ - `ray_scope`: last
380
+ - `ddp_timeout`: 1800
381
+ - `torch_compile`: False
382
+ - `torch_compile_backend`: None
383
+ - `torch_compile_mode`: None
384
+ - `dispatch_batches`: None
385
+ - `split_batches`: None
386
+ - `include_tokens_per_second`: False
387
+ - `include_num_input_tokens_seen`: False
388
+ - `neftune_noise_alpha`: None
389
+ - `optim_target_modules`: None
390
+ - `batch_eval_metrics`: False
391
+ - `eval_on_start`: False
392
+ - `eval_use_gather_object`: False
393
+ - `batch_sampler`: no_duplicates
394
+ - `multi_dataset_batch_sampler`: proportional
395
+
396
+ </details>
397
+
398
+ ### Training Logs
399
+ | Epoch | Step | Training Loss | loss |
400
+ |:------:|:-----:|:-------------:|:------:|
401
+ | 0.2076 | 7000 | 0.012 | 0.0057 |
402
+ | 0.4152 | 14000 | 0.0044 | 0.0040 |
403
+ | 0.6228 | 21000 | 0.0038 | 0.0040 |
404
+ | 0.8303 | 28000 | 0.0033 | 0.0028 |
405
+ | 1.0379 | 35000 | 0.002 | 0.0025 |
406
+ | 1.2455 | 42000 | 0.0012 | 0.0022 |
407
+ | 1.4531 | 49000 | 0.0008 | 0.0021 |
408
+ | 1.6607 | 56000 | 0.0005 | 0.0021 |
409
+ | 1.8683 | 63000 | 0.0004 | 0.0020 |
410
+
411
+
412
+ ### Framework Versions
413
+ - Python: 3.10.13
414
+ - Sentence Transformers: 3.0.1
415
+ - Transformers: 4.44.0
416
+ - PyTorch: 2.2.1
417
+ - Accelerate: 0.33.0
418
+ - Datasets: 2.21.0
419
+ - Tokenizers: 0.19.1
420
+
421
+ ## Citation
422
+
423
+ ### BibTeX
424
+
425
+ #### Sentence Transformers
426
+ ```bibtex
427
+ @inproceedings{reimers-2019-sentence-bert,
428
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
429
+ author = "Reimers, Nils and Gurevych, Iryna",
430
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
431
+ month = "11",
432
+ year = "2019",
433
+ publisher = "Association for Computational Linguistics",
434
+ url = "https://arxiv.org/abs/1908.10084",
435
+ }
436
+ ```
437
+
438
+ #### CachedMultipleNegativesRankingLoss
439
+ ```bibtex
440
+ @misc{gao2021scaling,
441
+ title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
442
+ author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
443
+ year={2021},
444
+ eprint={2101.06983},
445
+ archivePrefix={arXiv},
446
+ primaryClass={cs.LG}
447
+ }
448
+ ```
449
+
450
+ <!--
451
+ ## Glossary
452
+
453
+ *Clearly define terms in order to be accessible across audiences.*
454
+ -->
455
+
456
+ <!--
457
+ ## Model Card Authors
458
+
459
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
460
+ -->
461
+
462
+ <!--
463
+ ## Model Card Contact
464
+
465
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
466
+ -->
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
3
+ "architectures": [
4
+ "NewModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
9
+ "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
10
+ "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
11
+ "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
14
+ "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
15
+ },
16
+ "classifier_dropout": null,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "layer_norm_eps": 1e-12,
23
+ "layer_norm_type": "layer_norm",
24
+ "logn_attention_clip1": false,
25
+ "logn_attention_scale": false,
26
+ "max_position_embeddings": 8192,
27
+ "model_type": "new",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 24,
30
+ "pack_qkv": true,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "rope",
33
+ "rope_scaling": {
34
+ "factor": 2.0,
35
+ "type": "ntk"
36
+ },
37
+ "rope_theta": 160000,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.44.0",
40
+ "type_vocab_size": 2,
41
+ "unpad_inputs": false,
42
+ "use_memory_efficient_attention": false,
43
+ "vocab_size": 30528
44
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.44.0",
5
+ "pytorch": "2.2.1"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcae353e6bd3abd927eb0ae32e57da87ea497ef64c5705ecade468a4a7dc6e2e
3
+ size 1736585680
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d889f3d8b8855bdaa7eb0f7650f3ddf5b2f0cf971584b7d9dfc10d681cf999fc
3
+ size 3473337082
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de6ddfbce68276c9610b3e63f6b7b14d27e537210a6a14fe7e3bd520ccc81591
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88695ecd2186d877bc568b39a3362ee255bcef60a774deb411b197770103fd95
3
+ size 1064
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 8000,
49
+ "model_max_length": 8192,
50
+ "pad_to_multiple_of": null,
51
+ "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
+ "sep_token": "[SEP]",
55
+ "stride": 0,
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "[UNK]"
62
+ }
trainer_state.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.8682719966786276,
5
+ "eval_steps": 7000,
6
+ "global_step": 63000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2075857774087364,
13
+ "grad_norm": 0.007890098728239536,
14
+ "learning_rate": 9.958647050101323e-06,
15
+ "loss": 0.012,
16
+ "step": 7000
17
+ },
18
+ {
19
+ "epoch": 0.2075857774087364,
20
+ "eval_loss": 0.005739695392549038,
21
+ "eval_runtime": 412.7277,
22
+ "eval_samples_per_second": 163.403,
23
+ "eval_steps_per_second": 20.428,
24
+ "step": 7000
25
+ },
26
+ {
27
+ "epoch": 0.4151715548174728,
28
+ "grad_norm": 0.07381915301084518,
29
+ "learning_rate": 8.805871789380036e-06,
30
+ "loss": 0.0044,
31
+ "step": 14000
32
+ },
33
+ {
34
+ "epoch": 0.4151715548174728,
35
+ "eval_loss": 0.003970430698245764,
36
+ "eval_runtime": 417.73,
37
+ "eval_samples_per_second": 161.446,
38
+ "eval_steps_per_second": 20.183,
39
+ "step": 14000
40
+ },
41
+ {
42
+ "epoch": 0.6227573322262092,
43
+ "grad_norm": 0.01579739712178707,
44
+ "learning_rate": 7.653096528658748e-06,
45
+ "loss": 0.0038,
46
+ "step": 21000
47
+ },
48
+ {
49
+ "epoch": 0.6227573322262092,
50
+ "eval_loss": 0.004011470824480057,
51
+ "eval_runtime": 416.5579,
52
+ "eval_samples_per_second": 161.901,
53
+ "eval_steps_per_second": 20.24,
54
+ "step": 21000
55
+ },
56
+ {
57
+ "epoch": 0.8303431096349456,
58
+ "grad_norm": 0.024147002026438713,
59
+ "learning_rate": 6.500321267937461e-06,
60
+ "loss": 0.0033,
61
+ "step": 28000
62
+ },
63
+ {
64
+ "epoch": 0.8303431096349456,
65
+ "eval_loss": 0.0028445960488170385,
66
+ "eval_runtime": 416.8518,
67
+ "eval_samples_per_second": 161.787,
68
+ "eval_steps_per_second": 20.225,
69
+ "step": 28000
70
+ },
71
+ {
72
+ "epoch": 1.037928887043682,
73
+ "grad_norm": 0.00019609538139775395,
74
+ "learning_rate": 5.347381254427731e-06,
75
+ "loss": 0.002,
76
+ "step": 35000
77
+ },
78
+ {
79
+ "epoch": 1.037928887043682,
80
+ "eval_loss": 0.002484912285581231,
81
+ "eval_runtime": 415.7525,
82
+ "eval_samples_per_second": 162.214,
83
+ "eval_steps_per_second": 20.279,
84
+ "step": 35000
85
+ },
86
+ {
87
+ "epoch": 1.2455146644524184,
88
+ "grad_norm": 0.008880384266376495,
89
+ "learning_rate": 4.194276488129561e-06,
90
+ "loss": 0.0012,
91
+ "step": 42000
92
+ },
93
+ {
94
+ "epoch": 1.2455146644524184,
95
+ "eval_loss": 0.0021632197313010693,
96
+ "eval_runtime": 408.105,
97
+ "eval_samples_per_second": 165.254,
98
+ "eval_steps_per_second": 20.659,
99
+ "step": 42000
100
+ },
101
+ {
102
+ "epoch": 1.4531004418611548,
103
+ "grad_norm": 0.003053226973861456,
104
+ "learning_rate": 3.0413364746198333e-06,
105
+ "loss": 0.0008,
106
+ "step": 49000
107
+ },
108
+ {
109
+ "epoch": 1.4531004418611548,
110
+ "eval_loss": 0.002097294433042407,
111
+ "eval_runtime": 407.9995,
112
+ "eval_samples_per_second": 165.297,
113
+ "eval_steps_per_second": 20.664,
114
+ "step": 49000
115
+ },
116
+ {
117
+ "epoch": 1.6606862192698912,
118
+ "grad_norm": 0.00016563042299821973,
119
+ "learning_rate": 1.8883964611101043e-06,
120
+ "loss": 0.0005,
121
+ "step": 56000
122
+ },
123
+ {
124
+ "epoch": 1.6606862192698912,
125
+ "eval_loss": 0.0021241051144897938,
126
+ "eval_runtime": 407.4084,
127
+ "eval_samples_per_second": 165.537,
128
+ "eval_steps_per_second": 20.694,
129
+ "step": 56000
130
+ },
131
+ {
132
+ "epoch": 1.8682719966786276,
133
+ "grad_norm": 0.0011578386183828115,
134
+ "learning_rate": 7.354564476003758e-07,
135
+ "loss": 0.0004,
136
+ "step": 63000
137
+ },
138
+ {
139
+ "epoch": 1.8682719966786276,
140
+ "eval_loss": 0.001993535552173853,
141
+ "eval_runtime": 406.6423,
142
+ "eval_samples_per_second": 165.848,
143
+ "eval_steps_per_second": 20.733,
144
+ "step": 63000
145
+ }
146
+ ],
147
+ "logging_steps": 7000,
148
+ "max_steps": 67442,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 2,
151
+ "save_steps": 7000,
152
+ "stateful_callbacks": {
153
+ "TrainerControl": {
154
+ "args": {
155
+ "should_epoch_stop": false,
156
+ "should_evaluate": false,
157
+ "should_log": false,
158
+ "should_save": true,
159
+ "should_training_stop": false
160
+ },
161
+ "attributes": {}
162
+ }
163
+ },
164
+ "total_flos": 0.0,
165
+ "train_batch_size": 8,
166
+ "trial_name": null,
167
+ "trial_params": null
168
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24026fdfd36b29934cf60f459acd3d60861392e75c603628e68e26d7eacb2000
3
+ size 5368
vocab.txt ADDED
The diff for this file is too large to render. See raw diff