spl4shedEdu commited on
Commit
63f8616
·
verified ·
1 Parent(s): b336abd

Upload model checkpoint

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: sentence-transformers/all-mpnet-base-v2
3
+ datasets: []
4
+ language: []
5
+ library_name: sentence-transformers
6
+ pipeline_tag: sentence-similarity
7
+ tags:
8
+ - sentence-transformers
9
+ - sentence-similarity
10
+ - feature-extraction
11
+ - generated_from_trainer
12
+ - dataset_size:281342
13
+ - loss:CachedMultipleNegativesRankingLoss
14
+ widget:
15
+ - source_sentence: ez buy federal 556 nato xm855 62 grain green tip fmj us bulk 223
16
+ ammo for sale cheap bulk ammunition in fmj and jhp depotus american eagle 223556
17
+ fmj rebate 400 maximum per household case upcs accepted for mailin us identifiers
18
+ is xm855lpc120rebate30 category of toolsandhomeimprovement
19
+ sentences:
20
+ - ez buy federal 556 nato xm855 62 grain green tip fmj us bulk 223 ammo for sale
21
+ cheap bulk ammunition in fmj and jhp depotus american eagle 223556 fmj rebate
22
+ 400 maximum per household case upcs accepted for mailin us identifiers is xm855lpc120rebate30
23
+ category of toolsandhomeimprovement
24
+ - 3m cable for dlink 10gbe cx4 module demcb300cx 3m module dlink list retailers
25
+ identifiers is demcb300cx category of otherelectronics
26
+ - seat frame wiring harnessd 02082010 gb 2013 volkswagen golf china market electrics
27
+ harness 4doorright gb identifiers is 1k4971369f category of automotive
28
+ - source_sentence: epson t33xl photo black inkjet cartridge ink colours photo black
29
+ identifiers is 8715946600598 category of officeproducts key specifications are
30
+ attributes ink colours photo black volume 81ml newremanufactured new single or
31
+ multi colour cartridge compatibility compatible brand epson printers expression
32
+ premium xp530xp630xp635xp830 originalcompatible original manufacturer general
33
+ number t33614010 c13t33614012 model name 33xl physical form factor pack quantity
34
+ 1 pieces recycling information can i recycle it click here for details on how
35
+ to recycle
36
+ sentences:
37
+ - rear child seat support bolt 516x35 belts upholstery page 1 1994 bmw 325i base
38
+ sedan seats produced by genuine bmw identifiers is 72111922499boe category of
39
+ automotive
40
+ - epson claria 33xl ink cartridge photo black inkjet 400 page 1 blister pack c13t33614010
41
+ novatech inkjet 400 page 1 blister pack produced by epson identifiers is eps101979
42
+ category of officeproducts
43
+ - maglite xenon replacement lamps for 2cell aa flashlights 2packus the maglite xenon
44
+ replacement lamps for 2cell aa flashlights 2pack help keep your mini shining mini
45
+ not included are highintensity bulbs and come in a package of 2 convenience us
46
+ produced by maglite us identifiers is 100045339 category of toolsandhomeimprovement
47
+ - source_sentence: control arm front right lower 1993 bmw 325i base sedan suspension
48
+ shocks springs page 8 produced by genuine bmw identifiers is 31122339996boe category
49
+ of automotive
50
+ sentences:
51
+ - vehicle jump starter jumpncarry 660 note 1700 peak amps 425 cranking clore proformer
52
+ battery technology 46 2 awg welding cable leads industrialgrade clamps builtin
53
+ charger automatic charging voltmeter provides charge status of onboard battery
54
+ 12v dc outlet to power accessories 1991 bmw 325i base sedan charging system page
55
+ 2 note 1700 peak amps 425 cranking clore proformer battery technology 46 2 awg
56
+ welding cable leads industrialgrade clamps builtin charger automatic charging
57
+ voltmeter provides charge status of onboard battery 12v dc outlet to power accessories
58
+ produced by null identifiers is jnc660m1313 category of automotive
59
+ - hose clamp 1628 mm range 9 width screw type 1997 bmw 318is base coupe radiators
60
+ page 2 produced by norma identifiers is 64218367179m249 category of automotive
61
+ - control arm front right lower 1998 bmw 318ti base hatchback suspension shocks
62
+ springs page 6 produced by delphi identifiers is 31122339996m292 category of automotive
63
+ - source_sentence: sunvisor support bracket interior right sideus chevy parts sunvisor
64
+ interior this is the interior sunvisor support bracket for right side with screws
65
+ and a template passenger sideus identifiers is 986155r4753 category of automotive
66
+ sentences:
67
+ - chevrolet sunvisor support bracket interior right sideus chevy parts interior
68
+ sunvisors chevs of the 40sus this is the interior sunvisor support bracket for
69
+ right side with screws and a template passenger side 1947 1948 1949 1950 1951
70
+ 1952 1953 chevrolet trucks us identifiers is 986155r4753 category of automotive
71
+ - abbey round 30 wall mirror in frameless 791888045671 guildhall 8quote 1 light
72
+ sconce dutch goldantique sale home lighting fixtures lamps more online this stylish
73
+ silver wall mirror will introduce a modern feel to any room its sleek design makes
74
+ the versatile and distinct in frameless produced by cooper classics identifiers
75
+ is 4567upc791888045671 category of toolsandhomeimprovement
76
+ - 10700 series return shell 30w x 20d 2912h henna cherry hon107270xjj home office
77
+ desks page 601 furniture town 10700 series return shell 30w x 20d 29 12h henna
78
+ cherry produced by mydirectadvantage identifiers is hon107270xjj107270xjj category
79
+ of officeproducts
80
+ - source_sentence: paint sealant sonax profiline polymer net shield 75 ml aerosol
81
+ can 1994 bmw 318is base coupe miscellaneous page 24 note innovative surface protection
82
+ based on hybrid polymers protects the paintwork by means of a resistant network
83
+ made from organic and inorganic components can be applied quickly easily intensively
84
+ freshens up paint color produces silky smooth with an outstanding drip off effect
85
+ one 75 ml should complete average size car produced by sonax identifiers is 223000m941
86
+ category of automotive
87
+ sentences:
88
+ - paint sealant sonax profiline polymer net shield 75 ml aerosol can 1991 bmw 325i
89
+ base convertible miscellaneous page 23 note innovative surface protection based
90
+ on hybrid polymers protects the paintwork by means of a resistant network made
91
+ from organic and inorganic components can be applied quickly easily intensively
92
+ freshens up paint color produces silky smooth with an outstanding drip off effect
93
+ one 75 ml should complete average size car produced by sonax identifiers is 223000m941
94
+ category of automotive
95
+ - honeywell accessories for terminal cod99exmb12 honeywell cod871238012 honeywell
96
+ dolphin 99ex mobile base vehicle kit charging cradle rs232 universal mounting
97
+ bracket and 12v cigarette lighter power adapter produced by honeywell metrologic
98
+ identifiers is 99exmb12 category of computersandaccessories
99
+ - usb flash drives hard quillcom null identifiers is 901507043 category of computersandaccessories
100
+ ---
101
+
102
+ # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
103
+
104
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
105
+
106
+ ## Model Details
107
+
108
+ ### Model Description
109
+ - **Model Type:** Sentence Transformer
110
+ - **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 84f2bcc00d77236f9e89c8a360a00fb1139bf47d -->
111
+ - **Maximum Sequence Length:** 384 tokens
112
+ - **Output Dimensionality:** 768 tokens
113
+ - **Similarity Function:** Cosine Similarity
114
+ <!-- - **Training Dataset:** Unknown -->
115
+ <!-- - **Language:** Unknown -->
116
+ <!-- - **License:** Unknown -->
117
+
118
+ ### Model Sources
119
+
120
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
121
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
122
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
123
+
124
+ ### Full Model Architecture
125
+
126
+ ```
127
+ SentenceTransformer(
128
+ (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel
129
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
130
+ (2): Normalize()
131
+ )
132
+ ```
133
+
134
+ ## Usage
135
+
136
+ ### Direct Usage (Sentence Transformers)
137
+
138
+ First install the Sentence Transformers library:
139
+
140
+ ```bash
141
+ pip install -U sentence-transformers
142
+ ```
143
+
144
+ Then you can load this model and run inference.
145
+ ```python
146
+ from sentence_transformers import SentenceTransformer
147
+
148
+ # Download from the 🤗 Hub
149
+ model = SentenceTransformer("sentence_transformers_model_id")
150
+ # Run inference
151
+ sentences = [
152
+ 'paint sealant sonax profiline polymer net shield 75 ml aerosol can 1994 bmw 318is base coupe miscellaneous page 24 note innovative surface protection based on hybrid polymers protects the paintwork by means of a resistant network made from organic and inorganic components can be applied quickly easily intensively freshens up paint color produces silky smooth with an outstanding drip off effect one 75 ml should complete average size car produced by sonax identifiers is 223000m941 category of automotive',
153
+ 'paint sealant sonax profiline polymer net shield 75 ml aerosol can 1991 bmw 325i base convertible miscellaneous page 23 note innovative surface protection based on hybrid polymers protects the paintwork by means of a resistant network made from organic and inorganic components can be applied quickly easily intensively freshens up paint color produces silky smooth with an outstanding drip off effect one 75 ml should complete average size car produced by sonax identifiers is 223000m941 category of automotive',
154
+ 'honeywell accessories for terminal cod99exmb12 honeywell cod871238012 honeywell dolphin 99ex mobile base vehicle kit charging cradle rs232 universal mounting bracket and 12v cigarette lighter power adapter produced by honeywell metrologic identifiers is 99exmb12 category of computersandaccessories',
155
+ ]
156
+ embeddings = model.encode(sentences)
157
+ print(embeddings.shape)
158
+ # [3, 768]
159
+
160
+ # Get the similarity scores for the embeddings
161
+ similarities = model.similarity(embeddings, embeddings)
162
+ print(similarities.shape)
163
+ # [3, 3]
164
+ ```
165
+
166
+ <!--
167
+ ### Direct Usage (Transformers)
168
+
169
+ <details><summary>Click to see the direct usage in Transformers</summary>
170
+
171
+ </details>
172
+ -->
173
+
174
+ <!--
175
+ ### Downstream Usage (Sentence Transformers)
176
+
177
+ You can finetune this model on your own dataset.
178
+
179
+ <details><summary>Click to expand</summary>
180
+
181
+ </details>
182
+ -->
183
+
184
+ <!--
185
+ ### Out-of-Scope Use
186
+
187
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
188
+ -->
189
+
190
+ <!--
191
+ ## Bias, Risks and Limitations
192
+
193
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
194
+ -->
195
+
196
+ <!--
197
+ ### Recommendations
198
+
199
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
200
+ -->
201
+
202
+ ## Training Details
203
+
204
+ ### Training Dataset
205
+
206
+ #### Unnamed Dataset
207
+
208
+
209
+ * Size: 281,342 training samples
210
+ * Columns: <code>anchor</code> and <code>positive</code>
211
+ * Approximate statistics based on the first 1000 samples:
212
+ | | anchor | positive |
213
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
214
+ | type | string | string |
215
+ | details | <ul><li>min: 25 tokens</li><li>mean: 74.69 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 25 tokens</li><li>mean: 75.63 tokens</li><li>max: 384 tokens</li></ul> |
216
+ * Samples:
217
+ | anchor | positive |
218
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
219
+ | <code>honeywell hand held products dolphin 99509951 series mobile computer usb cable 6 ft 18m 80000355e usb cable 6 ft 18m identifiers is 80000355e category of computersandaccessories</code> | <code>hand held usb cable 6 ft hand ft 80000355e scanner accessories cdwcom hand held products is the leading provider of imagebased data collection solutions for mobile wireless and transaction processing applications to end users throughout world by investing in hhp products its customers are able reduce costs improve service position their companies future growth identifiers is 26121604 category of computersandaccessories</code> |
220
+ | <code>intake boot air mass sensor to throttle housing 1995 bmw 318i base convertible intake system page 2 note from 0994 produced by oem identifiers is 13711247829m58 category of automotive</code> | <code>intake boot air mass sensor to throttle housing 1995 bmw 318i base convertible intake system page 2 produced by crp identifiers is 13711247829int category of automotive</code> |
221
+ | <code>blue sky panorama with transparent clouds vector image sky images over 150 000 vector blue sky panorama with transparent clouds vector background image identifiers is 15266707 category of officeproducts</code> | <code>blue sky panorama with transparent clouds vector image images within landscapes nature over 55 000 vector blue sky panorama with transparent clouds vector background image identifiers is 15266707 category of officeproducts</code> |
222
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
223
+ ```json
224
+ {
225
+ "scale": 20.0,
226
+ "similarity_fct": "cos_sim"
227
+ }
228
+ ```
229
+
230
+ ### Evaluation Dataset
231
+
232
+ #### Unnamed Dataset
233
+
234
+
235
+ * Size: 70,336 evaluation samples
236
+ * Columns: <code>anchor</code> and <code>positive</code>
237
+ * Approximate statistics based on the first 1000 samples:
238
+ | | anchor | positive |
239
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
240
+ | type | string | string |
241
+ | details | <ul><li>min: 25 tokens</li><li>mean: 78.58 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 81.44 tokens</li><li>max: 384 tokens</li></ul> |
242
+ * Samples:
243
+ | anchor | positive |
244
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
245
+ | <code>heater hose inlet from cylinder head to water valve 1997 bmw 318i base sedan heater system page 3 produced by genuine bmw identifiers is 64211394295boe category of automotive</code> | <code>heater hose inlet from cylinder head to water valve 1996 bmw 318i base convertible heater system page 3 produced by genuine bmw identifiers is 64211394295boe category of automotive</code> |
246
+ | <code>harris harris group inc group 1 full quote netdaniacom produced by source nasdaq identifiers is isinus4138331040 category of toolsandhomeimprovement</code> | <code>harris harris group inc 1 statistics netdaniacom group produced by source nasdaq identifiers is isinus4138331040 category of toolsandhomeimprovement</code> |
247
+ | <code>swiffer dusters with extendable handledusters plastic handle extends to 3 ft 1 per kit handledusters ft kitpag82074 buy online at janeice products identifiers is pag82074 category of toolsandhomeimprovement key specifications are weight per case std pkg quantity package one handle and three dusters description includes item cube 008276 upc code 037000447504 pack 00037000820741 length 092 width 022 height 042 0476</code> | <code>6 pack value bundle pag82074 dusters plastic handle extends to 3 ft 1 dusters per kitus feather page 5 the janitorial marketus now its easier than ever to get those hardtoreach places pivoting head can be adjusted and locked into place for cleaning angled surfaces such as ceiling fans cabinet corners baseboards refill dusters sold separately one handle three per box bristle material fiber color white plastic greenus produced by pag82074us identifiers is pag82074 category of toolsandhomeimprovement</code> |
248
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
249
+ ```json
250
+ {
251
+ "scale": 20.0,
252
+ "similarity_fct": "cos_sim"
253
+ }
254
+ ```
255
+
256
+ ### Training Hyperparameters
257
+ #### Non-Default Hyperparameters
258
+
259
+ - `eval_strategy`: steps
260
+ - `learning_rate`: 1e-05
261
+ - `num_train_epochs`: 2
262
+ - `warmup_ratio`: 0.1
263
+ - `fp16`: True
264
+ - `auto_find_batch_size`: True
265
+ - `batch_sampler`: no_duplicates
266
+
267
+ #### All Hyperparameters
268
+ <details><summary>Click to expand</summary>
269
+
270
+ - `overwrite_output_dir`: False
271
+ - `do_predict`: False
272
+ - `eval_strategy`: steps
273
+ - `prediction_loss_only`: True
274
+ - `per_device_train_batch_size`: 8
275
+ - `per_device_eval_batch_size`: 8
276
+ - `per_gpu_train_batch_size`: None
277
+ - `per_gpu_eval_batch_size`: None
278
+ - `gradient_accumulation_steps`: 1
279
+ - `eval_accumulation_steps`: None
280
+ - `torch_empty_cache_steps`: None
281
+ - `learning_rate`: 1e-05
282
+ - `weight_decay`: 0.0
283
+ - `adam_beta1`: 0.9
284
+ - `adam_beta2`: 0.999
285
+ - `adam_epsilon`: 1e-08
286
+ - `max_grad_norm`: 1.0
287
+ - `num_train_epochs`: 2
288
+ - `max_steps`: -1
289
+ - `lr_scheduler_type`: linear
290
+ - `lr_scheduler_kwargs`: {}
291
+ - `warmup_ratio`: 0.1
292
+ - `warmup_steps`: 0
293
+ - `log_level`: passive
294
+ - `log_level_replica`: warning
295
+ - `log_on_each_node`: True
296
+ - `logging_nan_inf_filter`: True
297
+ - `save_safetensors`: True
298
+ - `save_on_each_node`: False
299
+ - `save_only_model`: False
300
+ - `restore_callback_states_from_checkpoint`: False
301
+ - `no_cuda`: False
302
+ - `use_cpu`: False
303
+ - `use_mps_device`: False
304
+ - `seed`: 42
305
+ - `data_seed`: None
306
+ - `jit_mode_eval`: False
307
+ - `use_ipex`: False
308
+ - `bf16`: False
309
+ - `fp16`: True
310
+ - `fp16_opt_level`: O1
311
+ - `half_precision_backend`: auto
312
+ - `bf16_full_eval`: False
313
+ - `fp16_full_eval`: False
314
+ - `tf32`: None
315
+ - `local_rank`: 0
316
+ - `ddp_backend`: None
317
+ - `tpu_num_cores`: None
318
+ - `tpu_metrics_debug`: False
319
+ - `debug`: []
320
+ - `dataloader_drop_last`: False
321
+ - `dataloader_num_workers`: 0
322
+ - `dataloader_prefetch_factor`: None
323
+ - `past_index`: -1
324
+ - `disable_tqdm`: False
325
+ - `remove_unused_columns`: True
326
+ - `label_names`: None
327
+ - `load_best_model_at_end`: False
328
+ - `ignore_data_skip`: False
329
+ - `fsdp`: []
330
+ - `fsdp_min_num_params`: 0
331
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
332
+ - `fsdp_transformer_layer_cls_to_wrap`: None
333
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
334
+ - `deepspeed`: None
335
+ - `label_smoothing_factor`: 0.0
336
+ - `optim`: adamw_torch
337
+ - `optim_args`: None
338
+ - `adafactor`: False
339
+ - `group_by_length`: False
340
+ - `length_column_name`: length
341
+ - `ddp_find_unused_parameters`: None
342
+ - `ddp_bucket_cap_mb`: None
343
+ - `ddp_broadcast_buffers`: False
344
+ - `dataloader_pin_memory`: True
345
+ - `dataloader_persistent_workers`: False
346
+ - `skip_memory_metrics`: True
347
+ - `use_legacy_prediction_loop`: False
348
+ - `push_to_hub`: False
349
+ - `resume_from_checkpoint`: None
350
+ - `hub_model_id`: None
351
+ - `hub_strategy`: every_save
352
+ - `hub_private_repo`: False
353
+ - `hub_always_push`: False
354
+ - `gradient_checkpointing`: False
355
+ - `gradient_checkpointing_kwargs`: None
356
+ - `include_inputs_for_metrics`: False
357
+ - `eval_do_concat_batches`: True
358
+ - `fp16_backend`: auto
359
+ - `push_to_hub_model_id`: None
360
+ - `push_to_hub_organization`: None
361
+ - `mp_parameters`:
362
+ - `auto_find_batch_size`: True
363
+ - `full_determinism`: False
364
+ - `torchdynamo`: None
365
+ - `ray_scope`: last
366
+ - `ddp_timeout`: 1800
367
+ - `torch_compile`: False
368
+ - `torch_compile_backend`: None
369
+ - `torch_compile_mode`: None
370
+ - `dispatch_batches`: None
371
+ - `split_batches`: None
372
+ - `include_tokens_per_second`: False
373
+ - `include_num_input_tokens_seen`: False
374
+ - `neftune_noise_alpha`: None
375
+ - `optim_target_modules`: None
376
+ - `batch_eval_metrics`: False
377
+ - `eval_on_start`: False
378
+ - `eval_use_gather_object`: False
379
+ - `batch_sampler`: no_duplicates
380
+ - `multi_dataset_batch_sampler`: proportional
381
+
382
+ </details>
383
+
384
+ ### Training Logs
385
+ | Epoch | Step | Training Loss | loss |
386
+ |:------:|:-----:|:-------------:|:------:|
387
+ | 0.1990 | 7000 | 0.0083 | 0.0029 |
388
+ | 0.3981 | 14000 | 0.0026 | 0.0019 |
389
+ | 0.5971 | 21000 | 0.0015 | 0.0014 |
390
+ | 0.7962 | 28000 | 0.0013 | 0.0011 |
391
+ | 0.9952 | 35000 | 0.0013 | 0.0010 |
392
+ | 1.1943 | 42000 | 0.0008 | 0.0010 |
393
+ | 1.3933 | 49000 | 0.0005 | 0.0009 |
394
+ | 1.5924 | 56000 | 0.0003 | 0.0009 |
395
+
396
+
397
+ ### Framework Versions
398
+ - Python: 3.10.13
399
+ - Sentence Transformers: 3.0.1
400
+ - Transformers: 4.44.0
401
+ - PyTorch: 2.2.1
402
+ - Accelerate: 0.33.0
403
+ - Datasets: 2.21.0
404
+ - Tokenizers: 0.19.1
405
+
406
+ ## Citation
407
+
408
+ ### BibTeX
409
+
410
+ #### Sentence Transformers
411
+ ```bibtex
412
+ @inproceedings{reimers-2019-sentence-bert,
413
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
414
+ author = "Reimers, Nils and Gurevych, Iryna",
415
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
416
+ month = "11",
417
+ year = "2019",
418
+ publisher = "Association for Computational Linguistics",
419
+ url = "https://arxiv.org/abs/1908.10084",
420
+ }
421
+ ```
422
+
423
+ #### CachedMultipleNegativesRankingLoss
424
+ ```bibtex
425
+ @misc{gao2021scaling,
426
+ title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
427
+ author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
428
+ year={2021},
429
+ eprint={2101.06983},
430
+ archivePrefix={arXiv},
431
+ primaryClass={cs.LG}
432
+ }
433
+ ```
434
+
435
+ <!--
436
+ ## Glossary
437
+
438
+ *Clearly define terms in order to be accessible across audiences.*
439
+ -->
440
+
441
+ <!--
442
+ ## Model Card Authors
443
+
444
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
445
+ -->
446
+
447
+ <!--
448
+ ## Model Card Contact
449
+
450
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
451
+ -->
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
3
+ "architectures": [
4
+ "MPNetModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.44.0",
23
+ "vocab_size": 30527
24
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.44.0",
5
+ "pytorch": "2.2.1"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e266683916753caf604b1813d347ffac0dc8a34244ee6c1295f3e0b45ce284
3
+ size 437967672
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b69efe25866431df1b62d40fbe38c1105576e656362fc7f5dce862455dfa03e7
3
+ size 871331770
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cd293246b360c89a52808a6915adce6376258887785b1c284bf376ba6238405
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801539f0e554cac782b1d9195dfe01948924cc3e8dad48d60f1f1dd6dc515001
3
+ size 1064
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 384,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": true,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "mask_token": "<mask>",
58
+ "max_length": 128,
59
+ "model_max_length": 384,
60
+ "pad_to_multiple_of": null,
61
+ "pad_token": "<pad>",
62
+ "pad_token_type_id": 0,
63
+ "padding_side": "right",
64
+ "sep_token": "</s>",
65
+ "stride": 0,
66
+ "strip_accents": null,
67
+ "tokenize_chinese_chars": true,
68
+ "tokenizer_class": "MPNetTokenizer",
69
+ "truncation_side": "right",
70
+ "truncation_strategy": "longest_first",
71
+ "unk_token": "[UNK]"
72
+ }
trainer_state.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.5923566878980893,
5
+ "eval_steps": 7000,
6
+ "global_step": 56000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.19904458598726116,
13
+ "grad_norm": 0.2248842865228653,
14
+ "learning_rate": 9.947398350867217e-06,
15
+ "loss": 0.0083,
16
+ "step": 7000
17
+ },
18
+ {
19
+ "epoch": 0.19904458598726116,
20
+ "eval_loss": 0.002906730631366372,
21
+ "eval_runtime": 281.988,
22
+ "eval_samples_per_second": 249.429,
23
+ "eval_steps_per_second": 31.179,
24
+ "step": 7000
25
+ },
26
+ {
27
+ "epoch": 0.3980891719745223,
28
+ "grad_norm": 0.0002081295824609697,
29
+ "learning_rate": 8.900350699819912e-06,
30
+ "loss": 0.0026,
31
+ "step": 14000
32
+ },
33
+ {
34
+ "epoch": 0.3980891719745223,
35
+ "eval_loss": 0.0018650980200618505,
36
+ "eval_runtime": 284.0569,
37
+ "eval_samples_per_second": 247.612,
38
+ "eval_steps_per_second": 30.952,
39
+ "step": 14000
40
+ },
41
+ {
42
+ "epoch": 0.5971337579617835,
43
+ "grad_norm": 0.010839835740625858,
44
+ "learning_rate": 7.795014375533159e-06,
45
+ "loss": 0.0015,
46
+ "step": 21000
47
+ },
48
+ {
49
+ "epoch": 0.5971337579617835,
50
+ "eval_loss": 0.001360387192107737,
51
+ "eval_runtime": 285.5315,
52
+ "eval_samples_per_second": 246.334,
53
+ "eval_steps_per_second": 30.792,
54
+ "step": 21000
55
+ },
56
+ {
57
+ "epoch": 0.7961783439490446,
58
+ "grad_norm": 0.00013278079859446734,
59
+ "learning_rate": 6.689836024138259e-06,
60
+ "loss": 0.0013,
61
+ "step": 28000
62
+ },
63
+ {
64
+ "epoch": 0.7961783439490446,
65
+ "eval_loss": 0.0011233221739530563,
66
+ "eval_runtime": 285.4742,
67
+ "eval_samples_per_second": 246.383,
68
+ "eval_steps_per_second": 30.798,
69
+ "step": 28000
70
+ },
71
+ {
72
+ "epoch": 0.9952229299363057,
73
+ "grad_norm": 0.00014249606465455145,
74
+ "learning_rate": 5.5841837540678024e-06,
75
+ "loss": 0.0013,
76
+ "step": 35000
77
+ },
78
+ {
79
+ "epoch": 0.9952229299363057,
80
+ "eval_loss": 0.0010357595747336745,
81
+ "eval_runtime": 286.0827,
82
+ "eval_samples_per_second": 245.859,
83
+ "eval_steps_per_second": 30.732,
84
+ "step": 35000
85
+ },
86
+ {
87
+ "epoch": 1.194267515923567,
88
+ "grad_norm": 0.0002924288564827293,
89
+ "learning_rate": 4.478689456889198e-06,
90
+ "loss": 0.0008,
91
+ "step": 42000
92
+ },
93
+ {
94
+ "epoch": 1.194267515923567,
95
+ "eval_loss": 0.0010089210700243711,
96
+ "eval_runtime": 282.9155,
97
+ "eval_samples_per_second": 248.611,
98
+ "eval_steps_per_second": 31.076,
99
+ "step": 42000
100
+ },
101
+ {
102
+ "epoch": 1.393312101910828,
103
+ "grad_norm": 0.00017198333807755262,
104
+ "learning_rate": 3.3733531326024454e-06,
105
+ "loss": 0.0005,
106
+ "step": 49000
107
+ },
108
+ {
109
+ "epoch": 1.393312101910828,
110
+ "eval_loss": 0.0009262111852876842,
111
+ "eval_runtime": 285.6294,
112
+ "eval_samples_per_second": 246.249,
113
+ "eval_steps_per_second": 30.781,
114
+ "step": 49000
115
+ },
116
+ {
117
+ "epoch": 1.5923566878980893,
118
+ "grad_norm": 2.850917553587351e-05,
119
+ "learning_rate": 2.2678588354238415e-06,
120
+ "loss": 0.0003,
121
+ "step": 56000
122
+ },
123
+ {
124
+ "epoch": 1.5923566878980893,
125
+ "eval_loss": 0.0008692654664628208,
126
+ "eval_runtime": 281.4473,
127
+ "eval_samples_per_second": 249.908,
128
+ "eval_steps_per_second": 31.239,
129
+ "step": 56000
130
+ }
131
+ ],
132
+ "logging_steps": 7000,
133
+ "max_steps": 70336,
134
+ "num_input_tokens_seen": 0,
135
+ "num_train_epochs": 2,
136
+ "save_steps": 7000,
137
+ "stateful_callbacks": {
138
+ "TrainerControl": {
139
+ "args": {
140
+ "should_epoch_stop": false,
141
+ "should_evaluate": false,
142
+ "should_log": false,
143
+ "should_save": true,
144
+ "should_training_stop": false
145
+ },
146
+ "attributes": {}
147
+ }
148
+ },
149
+ "total_flos": 0.0,
150
+ "train_batch_size": 8,
151
+ "trial_name": null,
152
+ "trial_params": null
153
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebd26e3916d1afe3a7febaec075216749fc2d7d7b776a249951c32c8b626f07c
3
+ size 5368
vocab.txt ADDED
The diff for this file is too large to render. See raw diff