spl4shedEdu commited on
Commit
d3d25e5
·
verified ·
1 Parent(s): f4e4747

Upload model checkpoint

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Alibaba-NLP/gte-large-en-v1.5
3
+ datasets: []
4
+ language: []
5
+ library_name: sentence-transformers
6
+ pipeline_tag: sentence-similarity
7
+ tags:
8
+ - sentence-transformers
9
+ - sentence-similarity
10
+ - feature-extraction
11
+ - generated_from_trainer
12
+ - dataset_size:281342
13
+ - loss:CachedMultipleNegativesRankingLoss
14
+ widget:
15
+ - source_sentence: nokya heavy duty fog light harnesses 0708 jeep compass h10 these
16
+ nokya heavy duty fog light h10 wire harnesses have been designed to be plugged
17
+ into your jeep compasss stock wiring aftermarket bulbs or not as complete upgrades
18
+ a precautionary measure against harness plug burnouts which can permanently damage
19
+ compass housings these also help handle the increased demands of offers cheap
20
+ and relatively easy upgrade electrical system they work replacements for damaged
21
+ plugs lighting are extended periods use operation in adverse severe conditions
22
+ address this brand otherwise each set consists 2 harnesses identifiers is nok91052pcs
23
+ category of automotive
24
+ sentences:
25
+ - nokya heavy duty headlight harnesses high beam 0103 acura cl 32 9005hb3 these
26
+ nokya heavy duty headlight high beam 9005 wire harnesses have been designed to
27
+ be plugged into your acura cl 32s stock wiring aftermarket bulbs or not offers
28
+ these as a cheap and relatively easy upgrade 32 electrical system they work replacements
29
+ for damaged harness plugs complete upgrades precautionary measure against plug
30
+ burnouts which can permanently damage housings also help handle the increased
31
+ demands of operation in adverse severe conditions address this lighting are extended
32
+ periods use brand otherwise each set consists 2 harnesses identifiers is nok91052pcs
33
+ category of automotive
34
+ - powergard protection plan residential 2year option for gator t series and hpx
35
+ john deere in john deere us riding lawn mowers sale tractors castongia tractor
36
+ valparaiso for t series and hpx gator utility vehicles view plan detailsextends
37
+ manufacturers warranty 1 additional year2 years of transportationfor residential
38
+ owners onlynot available in all areasbuy online for machines purchased within
39
+ the past year identifiers is ppprtuv2y category of automotive
40
+ - compatible for okidata c5500n c5800ldn toner cart cyan 5k compatible 5k 43324403
41
+ identifiers is 43324403 category of officeproducts
42
+ - source_sentence: zebra handheld protective boot zebra boot sgtc55boot101 scanner
43
+ accessories cdwcom identifiers is 43211612 category of otherelectronics
44
+ sentences:
45
+ - window regulator without motor electric front left 1996 bmw 318is base coupe panels
46
+ doors page 1 note includes bushings 51 33 8 163 351 and 352 produced by genuine
47
+ bmw identifiers is 51331977579m9 category of automotive
48
+ - zebra handheld protective boot zebra boot sgtc55boot101 scanner accessories cdwcom
49
+ identifiers is 43211612 category of otherelectronics
50
+ - premium screen protector film 3 pack samsung exhilarate accessoriesexhilarate
51
+ accessoriessave now accessorygeekscom identifiers is sfpuni3pk category of cellphonesandaccessories
52
+ - source_sentence: samsung clp310n black genuine samsung toner cartridge cartridges
53
+ inkrediblecouk black approx 1500 page yield at 5 coverage identifiers is cltk4092sblack
54
+ category of officeproducts
55
+ sentences:
56
+ - hansgrohe 240 air singlejet showerhead 25 gpm tub shower accessories heads trim
57
+ plates products at efaucetscom 10 spray face solid brass airinjection technology
58
+ 12 female inlet 180 noclog channels flow rate is 25 gpm requires shower arm and
59
+ flange sold separately mode rainair fullyfinished matching fast antilimescale
60
+ function with quickclean cupc listed identifiers is 27474001 category of toolsandhomeimprovement
61
+ - epson 24xl elephant black ink cartridge 24xl elephant black ink cartridge 10ml
62
+ 500 pagespage yields based on isoiec 24711 and 24712for a list of compatible machines
63
+ see the more info tab below produced by epson identifiers is c13t24314010 category
64
+ of officeproducts
65
+ - samsung clx3175 black genuine samsung toner cartridge cartridges inkrediblecouk
66
+ black approx 1500 page yield at 5 coverage identifiers is cltk4092sblack category
67
+ of officeproducts
68
+ - source_sentence: coolant hose gb 2004 audi a4s4avantqu canada market fuel cooling
69
+ auxiliary heater 6cylinder petr enginegb exhaust 30ltr gb identifiers is 8e0265357g
70
+ category of automotive
71
+ sentences:
72
+ - coolant hose gb 2001 audi a4s4avantquattro europe market fuel cooling auxiliary
73
+ heater 6cylinder petr enginegb exhaust 30ltr gb identifiers is 8e0265357g category
74
+ of automotive
75
+ - pilot bearing 15 x 32 10 1992 bmw 325i base sedan clutch flywheel page 3 produced
76
+ by genuine bmw identifiers is 11211720310boe category of automotive
77
+ - mityvac mv8020 brake bleeding kit 1992 bmw 318i base sedan wizard page 1 identifiers
78
+ is tolmtymv8020 category of automotive
79
+ - source_sentence: steinel led floodlight with sensor xled home 1 silver led floodlights
80
+ anylamp produced by steinel identifiers is 4007841002688 category of toolsandhomeimprovement
81
+ sentences:
82
+ - steinel led floodlight with sensor xled home 1 silver led lighting anylamp produced
83
+ by steinel identifiers is 4007841002688 category of toolsandhomeimprovement
84
+ - desk organizers quillcom durable bookends with reinforced rib designheavygauge
85
+ steel construction9 height identifiers is 90117bebk category of officeproducts
86
+ - hp photosmart c4150 cartridges for ink jet printers quillcom yields up to 399
87
+ pagessized and priced for occasional printingoriginal hp ink a little less ink
88
+ at very affordable price identifiers is 901d8j33an category of officeproducts
89
+ ---
90
+
91
+ # SentenceTransformer based on Alibaba-NLP/gte-large-en-v1.5
92
+
93
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
94
+
95
+ ## Model Details
96
+
97
+ ### Model Description
98
+ - **Model Type:** Sentence Transformer
99
+ - **Base model:** [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) <!-- at revision 104333d6af6f97649377c2afbde10a7704870c7b -->
100
+ - **Maximum Sequence Length:** 8192 tokens
101
+ - **Output Dimensionality:** 1024 tokens
102
+ - **Similarity Function:** Cosine Similarity
103
+ <!-- - **Training Dataset:** Unknown -->
104
+ <!-- - **Language:** Unknown -->
105
+ <!-- - **License:** Unknown -->
106
+
107
+ ### Model Sources
108
+
109
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
110
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
111
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
112
+
113
+ ### Full Model Architecture
114
+
115
+ ```
116
+ SentenceTransformer(
117
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
118
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
119
+ )
120
+ ```
121
+
122
+ ## Usage
123
+
124
+ ### Direct Usage (Sentence Transformers)
125
+
126
+ First install the Sentence Transformers library:
127
+
128
+ ```bash
129
+ pip install -U sentence-transformers
130
+ ```
131
+
132
+ Then you can load this model and run inference.
133
+ ```python
134
+ from sentence_transformers import SentenceTransformer
135
+
136
+ # Download from the 🤗 Hub
137
+ model = SentenceTransformer("sentence_transformers_model_id")
138
+ # Run inference
139
+ sentences = [
140
+ 'steinel led floodlight with sensor xled home 1 silver led floodlights anylamp produced by steinel identifiers is 4007841002688 category of toolsandhomeimprovement',
141
+ 'steinel led floodlight with sensor xled home 1 silver led lighting anylamp produced by steinel identifiers is 4007841002688 category of toolsandhomeimprovement',
142
+ 'desk organizers quillcom durable bookends with reinforced rib designheavygauge steel construction9 height identifiers is 90117bebk category of officeproducts',
143
+ ]
144
+ embeddings = model.encode(sentences)
145
+ print(embeddings.shape)
146
+ # [3, 1024]
147
+
148
+ # Get the similarity scores for the embeddings
149
+ similarities = model.similarity(embeddings, embeddings)
150
+ print(similarities.shape)
151
+ # [3, 3]
152
+ ```
153
+
154
+ <!--
155
+ ### Direct Usage (Transformers)
156
+
157
+ <details><summary>Click to see the direct usage in Transformers</summary>
158
+
159
+ </details>
160
+ -->
161
+
162
+ <!--
163
+ ### Downstream Usage (Sentence Transformers)
164
+
165
+ You can finetune this model on your own dataset.
166
+
167
+ <details><summary>Click to expand</summary>
168
+
169
+ </details>
170
+ -->
171
+
172
+ <!--
173
+ ### Out-of-Scope Use
174
+
175
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
176
+ -->
177
+
178
+ <!--
179
+ ## Bias, Risks and Limitations
180
+
181
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
182
+ -->
183
+
184
+ <!--
185
+ ### Recommendations
186
+
187
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
188
+ -->
189
+
190
+ ## Training Details
191
+
192
+ ### Training Dataset
193
+
194
+ #### Unnamed Dataset
195
+
196
+
197
+ * Size: 281,342 training samples
198
+ * Columns: <code>anchor</code> and <code>positive</code>
199
+ * Approximate statistics based on the first 1000 samples:
200
+ | | anchor | positive |
201
+ |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
202
+ | type | string | string |
203
+ | details | <ul><li>min: 24 tokens</li><li>mean: 81.17 tokens</li><li>max: 941 tokens</li></ul> | <ul><li>min: 23 tokens</li><li>mean: 80.26 tokens</li><li>max: 1004 tokens</li></ul> |
204
+ * Samples:
205
+ | anchor | positive |
206
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
207
+ | <code>ironwood pharmaceuticals inc class a a 1 full quote netdaniacom pharmaceuticals produced by source nasdaq identifiers is isinus46333x1081 category of automotive</code> | <code>ironwood pharmaceuticals inc class a pharmaceuticals a 1 news netdaniacom produced by source nasdaq identifiers is isinus46333x1081 category of automotive</code> |
208
+ | <code>873010s21 hp 600gb 12g 10k 25 dp sas hdd null price 873010s2110pack new 873010s21 600gb hdd 10 pack wholesale description10 x 600gb 25inch serial attached scsi sassff digitally signed ds 12g dual portenterprise hotplug 512n 10k hard drivein hpe drive tray as picturedfor g1g7 proliant sas serversgenuine number and firmwaregenuine certified drivepart numbers option part 873010b21 smartbuy 873010s21 produced by hp enterprise identifiers is 873010s2110pack category of computersandaccessories key specifications are specifications category proliant harddrive subcategory 10k generation sas part number 873010s2110pack products id 489761 type hard drive hotswap capacity 600gb interface serial attached scsi spindle speed 10000rpm ports dual port data transfer rate 12gbs bytes per sector 512n</code> | <code>873010s21 hp 600gb 12g 10k 25 dp sas hdd null price 873010s21 new 873010s21 600gb hdd wholesale description600gb 25inch serial attached scsi sassff digitally signed ds 12g dual portenterprise hotplug 512n 10k hard drivein hpe drive tray as picturedfor g1g7 proliant sas serversgenuine number and firmwaregenuine certified drivepart numbers option part 873010b21 smartbuy 873010s21 produced by hp enterprise identifiers is 873010s21 category of computersandaccessories key specifications are specifications category proliant harddrive subcategory 10k generation sas part number 873010s21 products id 489758 type hard drive hotswap capacity 600gb interface serial attached scsi spindle speed 10000rpm ports dual port data transfer rate 12gbs bytes per sector 512n</code> |
209
+ | <code>armrest fabric gb 2010 audi a4avant argentina market body middle front pr6e3gb model data prn0ln5fn2en2m gb identifiers is 8k0864207a category of automotive</code> | <code>armrest fabric gb 2009 audi a5s5 coupesportback south africa market body middle front pr6e3gb model data coupeprn2e gb identifiers is 8k0864207a category of automotive</code> |
210
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
211
+ ```json
212
+ {
213
+ "scale": 20.0,
214
+ "similarity_fct": "cos_sim"
215
+ }
216
+ ```
217
+
218
+ ### Evaluation Dataset
219
+
220
+ #### Unnamed Dataset
221
+
222
+
223
+ * Size: 70,336 evaluation samples
224
+ * Columns: <code>anchor</code> and <code>positive</code>
225
+ * Approximate statistics based on the first 1000 samples:
226
+ | | anchor | positive |
227
+ |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
228
+ | type | string | string |
229
+ | details | <ul><li>min: 25 tokens</li><li>mean: 80.88 tokens</li><li>max: 542 tokens</li></ul> | <ul><li>min: 24 tokens</li><li>mean: 79.18 tokens</li><li>max: 1004 tokens</li></ul> |
230
+ * Samples:
231
+ | anchor | positive |
232
+ |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
233
+ | <code>rennline race hook front universal 8 in red each 2000 bmw 323i base wagon chassis panels sheet metal page 3 identifiers is rene01r8 category of automotive</code> | <code>rennline race hook front universal 8 in red each 2000 bmw 323i base wagon chassis panels sheet metal page 3 identifiers is rene01r8 category of automotive</code> |
234
+ | <code>happy new year 2017 peace patch icon card design vector image patch images over 13 000 happy new year 2017 greeting card design with varsity college typography and stitch patch peace symbol icon as number eps10 vector vector image identifiers is 14478945 category of officeproducts</code> | <code>happy new year 2017 peace patch icon card design vector image happy new year 2017 greeting card design with varsity college typography and stitch patch peace symbol icon as number eps10 vector download a free preview or high quality adobe illustrator ai eps pdf resolution jpeg versions identifiers is 14478945 category of officeproducts</code> |
235
+ | <code>hp deskjet d4155 cartridges for ink jet printers quillcom yields up to 399 pagessized and priced for occasional printingoriginal hp ink a little less ink at very affordable price identifiers is 901d8j33an category of officeproducts</code> | <code>hp photosmart c4150 cartridges for ink jet printers quillcom yields up to 399 pagessized and priced for occasional printingoriginal hp ink a little less ink at very affordable price identifiers is 901d8j33an category of officeproducts</code> |
236
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
237
+ ```json
238
+ {
239
+ "scale": 20.0,
240
+ "similarity_fct": "cos_sim"
241
+ }
242
+ ```
243
+
244
+ ### Training Hyperparameters
245
+ #### Non-Default Hyperparameters
246
+
247
+ - `eval_strategy`: steps
248
+ - `learning_rate`: 1e-05
249
+ - `num_train_epochs`: 2
250
+ - `warmup_ratio`: 0.1
251
+ - `fp16`: True
252
+ - `auto_find_batch_size`: True
253
+ - `batch_sampler`: no_duplicates
254
+
255
+ #### All Hyperparameters
256
+ <details><summary>Click to expand</summary>
257
+
258
+ - `overwrite_output_dir`: False
259
+ - `do_predict`: False
260
+ - `eval_strategy`: steps
261
+ - `prediction_loss_only`: True
262
+ - `per_device_train_batch_size`: 8
263
+ - `per_device_eval_batch_size`: 8
264
+ - `per_gpu_train_batch_size`: None
265
+ - `per_gpu_eval_batch_size`: None
266
+ - `gradient_accumulation_steps`: 1
267
+ - `eval_accumulation_steps`: None
268
+ - `torch_empty_cache_steps`: None
269
+ - `learning_rate`: 1e-05
270
+ - `weight_decay`: 0.0
271
+ - `adam_beta1`: 0.9
272
+ - `adam_beta2`: 0.999
273
+ - `adam_epsilon`: 1e-08
274
+ - `max_grad_norm`: 1.0
275
+ - `num_train_epochs`: 2
276
+ - `max_steps`: -1
277
+ - `lr_scheduler_type`: linear
278
+ - `lr_scheduler_kwargs`: {}
279
+ - `warmup_ratio`: 0.1
280
+ - `warmup_steps`: 0
281
+ - `log_level`: passive
282
+ - `log_level_replica`: warning
283
+ - `log_on_each_node`: True
284
+ - `logging_nan_inf_filter`: True
285
+ - `save_safetensors`: True
286
+ - `save_on_each_node`: False
287
+ - `save_only_model`: False
288
+ - `restore_callback_states_from_checkpoint`: False
289
+ - `no_cuda`: False
290
+ - `use_cpu`: False
291
+ - `use_mps_device`: False
292
+ - `seed`: 42
293
+ - `data_seed`: None
294
+ - `jit_mode_eval`: False
295
+ - `use_ipex`: False
296
+ - `bf16`: False
297
+ - `fp16`: True
298
+ - `fp16_opt_level`: O1
299
+ - `half_precision_backend`: auto
300
+ - `bf16_full_eval`: False
301
+ - `fp16_full_eval`: False
302
+ - `tf32`: None
303
+ - `local_rank`: 0
304
+ - `ddp_backend`: None
305
+ - `tpu_num_cores`: None
306
+ - `tpu_metrics_debug`: False
307
+ - `debug`: []
308
+ - `dataloader_drop_last`: False
309
+ - `dataloader_num_workers`: 0
310
+ - `dataloader_prefetch_factor`: None
311
+ - `past_index`: -1
312
+ - `disable_tqdm`: False
313
+ - `remove_unused_columns`: True
314
+ - `label_names`: None
315
+ - `load_best_model_at_end`: False
316
+ - `ignore_data_skip`: False
317
+ - `fsdp`: []
318
+ - `fsdp_min_num_params`: 0
319
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
320
+ - `fsdp_transformer_layer_cls_to_wrap`: None
321
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
322
+ - `deepspeed`: None
323
+ - `label_smoothing_factor`: 0.0
324
+ - `optim`: adamw_torch
325
+ - `optim_args`: None
326
+ - `adafactor`: False
327
+ - `group_by_length`: False
328
+ - `length_column_name`: length
329
+ - `ddp_find_unused_parameters`: None
330
+ - `ddp_bucket_cap_mb`: None
331
+ - `ddp_broadcast_buffers`: False
332
+ - `dataloader_pin_memory`: True
333
+ - `dataloader_persistent_workers`: False
334
+ - `skip_memory_metrics`: True
335
+ - `use_legacy_prediction_loop`: False
336
+ - `push_to_hub`: False
337
+ - `resume_from_checkpoint`: None
338
+ - `hub_model_id`: None
339
+ - `hub_strategy`: every_save
340
+ - `hub_private_repo`: False
341
+ - `hub_always_push`: False
342
+ - `gradient_checkpointing`: False
343
+ - `gradient_checkpointing_kwargs`: None
344
+ - `include_inputs_for_metrics`: False
345
+ - `eval_do_concat_batches`: True
346
+ - `fp16_backend`: auto
347
+ - `push_to_hub_model_id`: None
348
+ - `push_to_hub_organization`: None
349
+ - `mp_parameters`:
350
+ - `auto_find_batch_size`: True
351
+ - `full_determinism`: False
352
+ - `torchdynamo`: None
353
+ - `ray_scope`: last
354
+ - `ddp_timeout`: 1800
355
+ - `torch_compile`: False
356
+ - `torch_compile_backend`: None
357
+ - `torch_compile_mode`: None
358
+ - `dispatch_batches`: None
359
+ - `split_batches`: None
360
+ - `include_tokens_per_second`: False
361
+ - `include_num_input_tokens_seen`: False
362
+ - `neftune_noise_alpha`: None
363
+ - `optim_target_modules`: None
364
+ - `batch_eval_metrics`: False
365
+ - `eval_on_start`: False
366
+ - `eval_use_gather_object`: False
367
+ - `batch_sampler`: no_duplicates
368
+ - `multi_dataset_batch_sampler`: proportional
369
+
370
+ </details>
371
+
372
+ ### Training Logs
373
+ | Epoch | Step | Training Loss | loss |
374
+ |:------:|:-----:|:-------------:|:------:|
375
+ | 0.1990 | 7000 | 0.0057 | 0.0026 |
376
+ | 0.3981 | 14000 | 0.0019 | 0.0018 |
377
+ | 0.5971 | 21000 | 0.0016 | 0.0012 |
378
+ | 0.7962 | 28000 | 0.001 | 0.0009 |
379
+ | 0.9952 | 35000 | 0.001 | 0.0009 |
380
+ | 1.1943 | 42000 | 0.0007 | 0.0008 |
381
+ | 1.3933 | 49000 | 0.0004 | 0.0009 |
382
+ | 1.5924 | 56000 | 0.0003 | 0.0009 |
383
+ | 1.7914 | 63000 | 0.0002 | 0.0008 |
384
+
385
+
386
+ ### Framework Versions
387
+ - Python: 3.10.13
388
+ - Sentence Transformers: 3.0.1
389
+ - Transformers: 4.44.0
390
+ - PyTorch: 2.2.1
391
+ - Accelerate: 0.33.0
392
+ - Datasets: 2.21.0
393
+ - Tokenizers: 0.19.1
394
+
395
+ ## Citation
396
+
397
+ ### BibTeX
398
+
399
+ #### Sentence Transformers
400
+ ```bibtex
401
+ @inproceedings{reimers-2019-sentence-bert,
402
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
403
+ author = "Reimers, Nils and Gurevych, Iryna",
404
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
405
+ month = "11",
406
+ year = "2019",
407
+ publisher = "Association for Computational Linguistics",
408
+ url = "https://arxiv.org/abs/1908.10084",
409
+ }
410
+ ```
411
+
412
+ #### CachedMultipleNegativesRankingLoss
413
+ ```bibtex
414
+ @misc{gao2021scaling,
415
+ title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
416
+ author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
417
+ year={2021},
418
+ eprint={2101.06983},
419
+ archivePrefix={arXiv},
420
+ primaryClass={cs.LG}
421
+ }
422
+ ```
423
+
424
+ <!--
425
+ ## Glossary
426
+
427
+ *Clearly define terms in order to be accessible across audiences.*
428
+ -->
429
+
430
+ <!--
431
+ ## Model Card Authors
432
+
433
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
434
+ -->
435
+
436
+ <!--
437
+ ## Model Card Contact
438
+
439
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
440
+ -->
config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Alibaba-NLP/gte-large-en-v1.5",
3
+ "architectures": [
4
+ "NewModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
9
+ "AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
10
+ "AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
11
+ "AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
14
+ "AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
15
+ },
16
+ "classifier_dropout": null,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.1,
19
+ "hidden_size": 1024,
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 4096,
22
+ "layer_norm_eps": 1e-12,
23
+ "layer_norm_type": "layer_norm",
24
+ "logn_attention_clip1": false,
25
+ "logn_attention_scale": false,
26
+ "max_position_embeddings": 8192,
27
+ "model_type": "new",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 24,
30
+ "pack_qkv": true,
31
+ "pad_token_id": 0,
32
+ "position_embedding_type": "rope",
33
+ "rope_scaling": {
34
+ "factor": 2.0,
35
+ "type": "ntk"
36
+ },
37
+ "rope_theta": 160000,
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.44.0",
40
+ "type_vocab_size": 2,
41
+ "unpad_inputs": false,
42
+ "use_memory_efficient_attention": false,
43
+ "vocab_size": 30528
44
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.44.0",
5
+ "pytorch": "2.2.1"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c84c4b56f27002b3eb72ea1423f649a41c83c838b50c6cad9d1e153b01d2a2
3
+ size 1736585680
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f3b6a12c4a423fc3b883fa24730effad25a201003e4e7e0b0adf0e96a5ebf1
3
+ size 3473337082
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d314b4e768bf1d786b31d1da3633581006026bfc6f127c8abba9773c9c894b
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:668de5733e9f925fc49f180dc3accbe81ff028c5c16a3a507a52543ed5a4bf2c
3
+ size 1064
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 8000,
49
+ "model_max_length": 8192,
50
+ "pad_to_multiple_of": null,
51
+ "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
+ "sep_token": "[SEP]",
55
+ "stride": 0,
56
+ "strip_accents": null,
57
+ "tokenize_chinese_chars": true,
58
+ "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
+ "unk_token": "[UNK]"
62
+ }
trainer_state.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.7914012738853504,
5
+ "eval_steps": 7000,
6
+ "global_step": 63000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.19904458598726116,
13
+ "grad_norm": 0.15043310821056366,
14
+ "learning_rate": 9.94455501848166e-06,
15
+ "loss": 0.0057,
16
+ "step": 7000
17
+ },
18
+ {
19
+ "epoch": 0.19904458598726116,
20
+ "eval_loss": 0.002587760565802455,
21
+ "eval_runtime": 385.9016,
22
+ "eval_samples_per_second": 182.264,
23
+ "eval_steps_per_second": 22.783,
24
+ "step": 7000
25
+ },
26
+ {
27
+ "epoch": 0.3980891719745223,
28
+ "grad_norm": 0.0009809082839637995,
29
+ "learning_rate": 8.900666645603615e-06,
30
+ "loss": 0.0019,
31
+ "step": 14000
32
+ },
33
+ {
34
+ "epoch": 0.3980891719745223,
35
+ "eval_loss": 0.0018288933206349611,
36
+ "eval_runtime": 386.974,
37
+ "eval_samples_per_second": 181.759,
38
+ "eval_steps_per_second": 22.72,
39
+ "step": 14000
40
+ },
41
+ {
42
+ "epoch": 0.5971337579617835,
43
+ "grad_norm": 0.004145281855016947,
44
+ "learning_rate": 7.79517234842501e-06,
45
+ "loss": 0.0016,
46
+ "step": 21000
47
+ },
48
+ {
49
+ "epoch": 0.5971337579617835,
50
+ "eval_loss": 0.0012058253632858396,
51
+ "eval_runtime": 385.6472,
52
+ "eval_samples_per_second": 182.384,
53
+ "eval_steps_per_second": 22.798,
54
+ "step": 21000
55
+ },
56
+ {
57
+ "epoch": 0.7961783439490446,
58
+ "grad_norm": 0.0015833042562007904,
59
+ "learning_rate": 6.689678051246407e-06,
60
+ "loss": 0.001,
61
+ "step": 28000
62
+ },
63
+ {
64
+ "epoch": 0.7961783439490446,
65
+ "eval_loss": 0.0009336507064290345,
66
+ "eval_runtime": 386.8698,
67
+ "eval_samples_per_second": 181.808,
68
+ "eval_steps_per_second": 22.726,
69
+ "step": 28000
70
+ },
71
+ {
72
+ "epoch": 0.9952229299363057,
73
+ "grad_norm": 0.0003439185384195298,
74
+ "learning_rate": 5.5843417269596545e-06,
75
+ "loss": 0.001,
76
+ "step": 35000
77
+ },
78
+ {
79
+ "epoch": 0.9952229299363057,
80
+ "eval_loss": 0.0008668347145430744,
81
+ "eval_runtime": 386.7927,
82
+ "eval_samples_per_second": 181.844,
83
+ "eval_steps_per_second": 22.731,
84
+ "step": 35000
85
+ },
86
+ {
87
+ "epoch": 1.194267515923567,
88
+ "grad_norm": 0.0005341056967154145,
89
+ "learning_rate": 4.478847429781049e-06,
90
+ "loss": 0.0007,
91
+ "step": 42000
92
+ },
93
+ {
94
+ "epoch": 1.194267515923567,
95
+ "eval_loss": 0.0008429304580204189,
96
+ "eval_runtime": 387.0524,
97
+ "eval_samples_per_second": 181.722,
98
+ "eval_steps_per_second": 22.715,
99
+ "step": 42000
100
+ },
101
+ {
102
+ "epoch": 1.393312101910828,
103
+ "grad_norm": 0.00042841769754886627,
104
+ "learning_rate": 3.3735111054942974e-06,
105
+ "loss": 0.0004,
106
+ "step": 49000
107
+ },
108
+ {
109
+ "epoch": 1.393312101910828,
110
+ "eval_loss": 0.0009025875478982925,
111
+ "eval_runtime": 390.6459,
112
+ "eval_samples_per_second": 180.051,
113
+ "eval_steps_per_second": 22.506,
114
+ "step": 49000
115
+ },
116
+ {
117
+ "epoch": 1.5923566878980893,
118
+ "grad_norm": 0.0007971890736371279,
119
+ "learning_rate": 2.2678588354238415e-06,
120
+ "loss": 0.0003,
121
+ "step": 56000
122
+ },
123
+ {
124
+ "epoch": 1.5923566878980893,
125
+ "eval_loss": 0.0009143418865278363,
126
+ "eval_runtime": 388.6682,
127
+ "eval_samples_per_second": 180.967,
128
+ "eval_steps_per_second": 22.621,
129
+ "step": 56000
130
+ },
131
+ {
132
+ "epoch": 1.7914012738853504,
133
+ "grad_norm": 0.02023099735379219,
134
+ "learning_rate": 1.162522511137089e-06,
135
+ "loss": 0.0002,
136
+ "step": 63000
137
+ },
138
+ {
139
+ "epoch": 1.7914012738853504,
140
+ "eval_loss": 0.0008290820405818522,
141
+ "eval_runtime": 389.2129,
142
+ "eval_samples_per_second": 180.713,
143
+ "eval_steps_per_second": 22.589,
144
+ "step": 63000
145
+ }
146
+ ],
147
+ "logging_steps": 7000,
148
+ "max_steps": 70336,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 2,
151
+ "save_steps": 7000,
152
+ "stateful_callbacks": {
153
+ "TrainerControl": {
154
+ "args": {
155
+ "should_epoch_stop": false,
156
+ "should_evaluate": false,
157
+ "should_log": false,
158
+ "should_save": true,
159
+ "should_training_stop": false
160
+ },
161
+ "attributes": {}
162
+ }
163
+ },
164
+ "total_flos": 0.0,
165
+ "train_batch_size": 8,
166
+ "trial_name": null,
167
+ "trial_params": null
168
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb08c5d98989717c95b2c6f128c0ede37adf47aaf8ad45515a15d140c41272c0
3
+ size 5368
vocab.txt ADDED
The diff for this file is too large to render. See raw diff