spl4shedEdu commited on
Commit
246f80f
·
verified ·
1 Parent(s): e3c70db

Upload model checkpoint

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: sentence-transformers/all-mpnet-base-v2
3
+ datasets: []
4
+ language: []
5
+ library_name: sentence-transformers
6
+ pipeline_tag: sentence-similarity
7
+ tags:
8
+ - sentence-transformers
9
+ - sentence-similarity
10
+ - feature-extraction
11
+ - generated_from_trainer
12
+ - dataset_size:281362
13
+ - loss:CachedMultipleNegativesRankingLoss
14
+ widget:
15
+ - source_sentence: steel lock washer 8 x 144 2 mm zinc plated split 2004 bmw 325ci
16
+ base convertible miscellaneous hardware page 3 auveco 17397m769 automotive
17
+ sentences:
18
+ - steel lock washer 8 x 144 2 mm zinc plated split 1993 bmw 318i base sedan miscellaneous
19
+ hardware page 3 auveco 17397m769 automotive
20
+ - generac protector rg03624ansx standby generators liquidcooled reviews ratings
21
+ product discontinued discontinued electric directcom 696471617450 toolsandhomeimprovement
22
+ - drive belt tensioner water pumpalternator 1994 bmw 325i base convertible charging
23
+ system battery page 6 note shock type hydraulic ina 11281717188m40 automotive
24
+ - source_sentence: nokya hyper white front turn signal light bulbs 2010 toyota camry
25
+ please double check your bulbs to make sure we have the right replacement bulb
26
+ listed so there is arguably even an added benefit of increased safety tooplease
27
+ note then you almost have change corner lights too avoid ruining benefits new
28
+ headlights give look car also signal in style with these nokya hyper white front
29
+ turn signal bulbs instead ugly stock orange 1010 camry came while try be as accurate
30
+ possible our listings custom front definitely stand out more compared other could
31
+ whole assembly a set in case where are already changing headlight color nok52022pcs
32
+ automotive
33
+ sentences:
34
+ - datalogic accessories for readers codbc9180433 datalogic codstdp090 datalogic
35
+ base stationcharger ethernet datalogic bc9180433 computersandaccessories
36
+ - nokya hyper white front turn signal light bulbs 2010 toyota camry please double
37
+ check your bulbs to make sure we have the right replacement bulb listed so there
38
+ is arguably even an added benefit of increased safety tooplease note then you
39
+ almost have change corner lights too avoid ruining benefits new headlights give
40
+ look car also signal in style with these nokya hyper white front turn signal bulbs
41
+ instead ugly stock orange 1010 camry came while try be as accurate possible our
42
+ listings custom front definitely stand out more compared other could whole assembly
43
+ a set in case where are already changing headlight color nok52022pcs automotive
44
+ - 39400001 axor citterio wall mounted bath tub filler faucetnohtin 39034821 bathroom
45
+ faucet tall and handle brushed sale appliance specials and replacement parts axor
46
+ citterio revives the opulence of water and redefines the purity of space each
47
+ arch angle and line weds clarity and harmony evoking timeless classics that are
48
+ mysterious yet somehow familiar discover a new form of luxury with axor citterio
49
+ axor 232848id39400001 toolsandhomeimprovement
50
+ - source_sentence: canon pixus 865r cartridges for ink jet printers quillcom null
51
+ 901tgbci6bkclo officeproducts
52
+ sentences:
53
+ - smart racing products smartcamber digital camber gauge 2003 bmw 325ci base convertible
54
+ suspension upgrades performance page 7 pel1850070smrt automotive valving option
55
+ street comfort front spring 180mm 8kg rear spring 135mm 10kg front pillowball
56
+ pillowball w camber plates rear pillowball n1 basic w top plates no camber plates
57
+ valving option street sport front spring 180mm 8kg rear spring 135mm 10kg front
58
+ pillowball pillowball w camber plates rear pillowball basic w top plates no camber
59
+ plates valving option track race front spring 180mm 10kg rear spring 140mm 10kg
60
+ front pillowball pillowball w camber plates rear pillowball basic w top plates
61
+ no camber plates
62
+ - datalogic cable for readers cod90a051903 datalogic cod90a051330 datalogic cable
63
+ cab413 usb straight ibm pos mode datalogic 90a051903 computersandaccessories
64
+ - canon pixus 865r cartridges for ink jet printers quillcom null 901tgbci6bkclo
65
+ officeproducts
66
+ - source_sentence: headlamp restoration kit sonax 2002 bmw 325i base wagon lights
67
+ and lenses page 7 note removes yellowing and haze of plastic headlight lenses
68
+ restoring likenew clarity one kit restores four headlights simple three step process
69
+ requires no polishing machine step one use the circular sanding pad to gently
70
+ remove stubborn headlight hazing step two use the abrasive polish and application
71
+ pad to gently remove sanding marks step three use the towelette to apply a uv
72
+ protective coating to maintain headlight clarity contains qty 1 75 ml polish 4
73
+ sanding discs 5000 grit 2 application sponges 4 polishing cloths 2 moist cloths
74
+ with sealant sonax 405941m941 automotive
75
+ sentences:
76
+ - headlamp restoration kit sonax 1976 bmw 30si base sedan lights and lenses page
77
+ 2 note removes yellowing and haze of plastic headlight lenses restoring likenew
78
+ clarity one kit restores four headlights simple three step process requires no
79
+ polishing machine step one use the circular sanding pad to gently remove stubborn
80
+ headlight hazing step two use the abrasive polish and application pad to gently
81
+ remove sanding marks step three use the towelette to apply a uv protective coating
82
+ to maintain headlight clarity contains qty 1 75 ml polish 4 sanding discs 5000
83
+ grit 2 application sponges 4 polishing cloths 2 moist cloths with sealant sonax
84
+ 405941m941 automotive
85
+ - philips ultinon led lighting 2122w 43mm festoon white 1 piece 1996 bmw 318i base
86
+ convertible lights and lenses page 3 phi2122ulwx11 automotive
87
+ - canon pixma mx850 cartridges for ink jet printers quillcom trust genuine canon
88
+ cli8bk ink cartridges to provide outstanding print quality for all your important
89
+ photos and documentsunlike bargain replacement inks original canon cli8bk ink
90
+ cartridges are designed specifically to work with canon printers for exceptional
91
+ reliability and performancehave full photolithography inkjet nozzle engineering
92
+ 901cli8bk officeproducts
93
+ - source_sentence: phone cable flat 4 wire solid silver 1000ft 26awg wire solid 1000ft
94
+ phone cable flat 4 wire solid silver 1000ft 26awg allows you to connect your telephones
95
+ faxes answering machines and most modems perfect for all your custom installation
96
+ projects 1000ft roll bulk phone cable flat cable silver color 4 conductor 26 awg
97
+ solid copper ul listed 815239013642 otherelectronics
98
+ sentences:
99
+ - phone cable flat 4 wire solid silver 1000ft 26awg wire solid 1000ft phone cable
100
+ flat 4 wire solid silver 1000ft 26awg allows you to connect your telephones faxes
101
+ answering machines and most modems perfect for all your custom installation projects
102
+ 1000ft roll bulk phone cable flat cable silver color 4 conductor 26 awg solid
103
+ copper ul listed 815239013642 otherelectronics
104
+ - soul black gb 2013 audi a4 allroad quattro canada market body middle armrest front
105
+ pr6e3gb fz period 1111 gb 8k0864207jtq8 automotive
106
+ - flashlight streamlight stinger led 1970 bmw 1602 base coupe tools page 8 note
107
+ compact and extremely powerful with 3 microprocessor controlled intensity modes
108
+ strobe mode and the latest in power led technology 6000 series machined aircraft
109
+ aluminum with nonslip rubberized comfort grip with antiroll rubber ring unbreakable
110
+ polycarbonate lens with scratchresistant coating oring sealed c4 led technology
111
+ impervious to shock with a 50000 hour lifetime includes qty 2 3cell 36 volt nicd
112
+ subc battery rechargeable upto 1000 times 1 piggy back chargerholder 1 120v ac
113
+ charge cord 1 12v dc charge cord 841 inch length 162 inch major diameter 117 inch
114
+ body diameter light output 350 lumens on high 175 lumens on medium 85 lumens on
115
+ low streamlight blue 552480010m1272 toolsandhomeimprovement
116
+ ---
117
+
118
+ # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
119
+
120
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
121
+
122
+ ## Model Details
123
+
124
+ ### Model Description
125
+ - **Model Type:** Sentence Transformer
126
+ - **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 84f2bcc00d77236f9e89c8a360a00fb1139bf47d -->
127
+ - **Maximum Sequence Length:** 384 tokens
128
+ - **Output Dimensionality:** 768 tokens
129
+ - **Similarity Function:** Cosine Similarity
130
+ <!-- - **Training Dataset:** Unknown -->
131
+ <!-- - **Language:** Unknown -->
132
+ <!-- - **License:** Unknown -->
133
+
134
+ ### Model Sources
135
+
136
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
137
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
138
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
139
+
140
+ ### Full Model Architecture
141
+
142
+ ```
143
+ SentenceTransformer(
144
+ (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel
145
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
146
+ (2): Normalize()
147
+ )
148
+ ```
149
+
150
+ ## Usage
151
+
152
+ ### Direct Usage (Sentence Transformers)
153
+
154
+ First install the Sentence Transformers library:
155
+
156
+ ```bash
157
+ pip install -U sentence-transformers
158
+ ```
159
+
160
+ Then you can load this model and run inference.
161
+ ```python
162
+ from sentence_transformers import SentenceTransformer
163
+
164
+ # Download from the 🤗 Hub
165
+ model = SentenceTransformer("sentence_transformers_model_id")
166
+ # Run inference
167
+ sentences = [
168
+ 'phone cable flat 4 wire solid silver 1000ft 26awg wire solid 1000ft phone cable flat 4 wire solid silver 1000ft 26awg allows you to connect your telephones faxes answering machines and most modems perfect for all your custom installation projects 1000ft roll bulk phone cable flat cable silver color 4 conductor 26 awg solid copper ul listed 815239013642 otherelectronics',
169
+ 'phone cable flat 4 wire solid silver 1000ft 26awg wire solid 1000ft phone cable flat 4 wire solid silver 1000ft 26awg allows you to connect your telephones faxes answering machines and most modems perfect for all your custom installation projects 1000ft roll bulk phone cable flat cable silver color 4 conductor 26 awg solid copper ul listed 815239013642 otherelectronics',
170
+ 'soul black gb 2013 audi a4 allroad quattro canada market body middle armrest front pr6e3gb fz period 1111 gb 8k0864207jtq8 automotive',
171
+ ]
172
+ embeddings = model.encode(sentences)
173
+ print(embeddings.shape)
174
+ # [3, 768]
175
+
176
+ # Get the similarity scores for the embeddings
177
+ similarities = model.similarity(embeddings, embeddings)
178
+ print(similarities.shape)
179
+ # [3, 3]
180
+ ```
181
+
182
+ <!--
183
+ ### Direct Usage (Transformers)
184
+
185
+ <details><summary>Click to see the direct usage in Transformers</summary>
186
+
187
+ </details>
188
+ -->
189
+
190
+ <!--
191
+ ### Downstream Usage (Sentence Transformers)
192
+
193
+ You can finetune this model on your own dataset.
194
+
195
+ <details><summary>Click to expand</summary>
196
+
197
+ </details>
198
+ -->
199
+
200
+ <!--
201
+ ### Out-of-Scope Use
202
+
203
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
204
+ -->
205
+
206
+ <!--
207
+ ## Bias, Risks and Limitations
208
+
209
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
210
+ -->
211
+
212
+ <!--
213
+ ### Recommendations
214
+
215
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
216
+ -->
217
+
218
+ ## Training Details
219
+
220
+ ### Training Dataset
221
+
222
+ #### Unnamed Dataset
223
+
224
+
225
+ * Size: 281,362 training samples
226
+ * Columns: <code>anchor</code> and <code>positive</code>
227
+ * Approximate statistics based on the first 1000 samples:
228
+ | | anchor | positive |
229
+ |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
230
+ | type | string | string |
231
+ | details | <ul><li>min: 14 tokens</li><li>mean: 77.68 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 79.97 tokens</li><li>max: 384 tokens</li></ul> |
232
+ * Samples:
233
+ | anchor | positive |
234
+ |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
235
+ | <code>glue tamiya cement 40ml 12 johnn johnny herbert gb shunko models marking livery 120 scale lotus ford type 102d camel 11 tam20033 and tam20034 ref shkd310 decals markings f1 cars 90 years spotmodel derek warwick japan grand prix 1992 water slide decals assembly instructions for references tam20030 tamiya tam87003 automotive</code> | <code>glue tamiya cement 40ml shunko models marking livery 120 scale benetton ford b192 camel 19 20 michael schumacher de martin brundle gb fia formula 1 world championship 1992 water slide decals and assembly instructions for reference tam20036 ref shkd281 decals markings f1 cars 90 years spotmodel tamiya tam87003 automotive</code> |
236
+ | <code>hose clamp 29325 mm range 12 width spring type 1995 bmw 325i base sedan radiators page 3 mubea sc2932512m219 automotive</code> | <code>hose clamp 29325 mm range 12 width spring type bmw 7series e65 20022008 cooling system miscellaneous page 1 mubea sc2932512m219 automotive part 07129952131boe more info 760i 200406 760li 200308 part 11151726339m395 more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 16121180240m395 more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 16121180240boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 16121180242boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 32411156956m395 more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 32411156956boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 32411712735boe more info 745i and 745li 200205 760i 200406 760li 200308 alpina b7 200708 part 32416751127m9 more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 64218367179boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 07129952102boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 07129952123boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 12511309471boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 16121176918boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308 alpina b7 200708 part 11631716970boe more info 745i and 745li 200205 750i and 750li 200608 760i 200406 760li 200308</code> |
237
+ | <code>serial rj45 interlocking cable codak17463008 zebra europe qlrwp4t series lithium ion fast charger codat187373 zebra serial rj45 interlocking cable zebra ak17463008 computersandaccessories</code> | <code>zebra universal accessories other by totalbarcodecom zebra ak17463008 kit mod plug to 9pin db pc cable ak17463008 computersandaccessories</code> |
238
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
239
+ ```json
240
+ {
241
+ "scale": 20.0,
242
+ "similarity_fct": "cos_sim"
243
+ }
244
+ ```
245
+
246
+ ### Evaluation Dataset
247
+
248
+ #### Unnamed Dataset
249
+
250
+
251
+ * Size: 70,341 evaluation samples
252
+ * Columns: <code>anchor</code> and <code>positive</code>
253
+ * Approximate statistics based on the first 1000 samples:
254
+ | | anchor | positive |
255
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
256
+ | type | string | string |
257
+ | details | <ul><li>min: 21 tokens</li><li>mean: 83.4 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 83.0 tokens</li><li>max: 384 tokens</li></ul> |
258
+ * Samples:
259
+ | anchor | positive |
260
+ |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
261
+ | <code>coolant antifreeze blue 1 liter 1996 bmw 318is base coupe radiators page 1 note approved for all bmw and mini engines concentrate for distilled water see part 55 7864 010 fuchs maintain fricofin 82142209769m865 automotive</code> | <code>coolant antifreeze blue 1 liter 1996 bmw 318is base coupe radiators page 1 note approved for all bmw and mini engines concentrate for distilled water see part 55 7864 010 genuine bmw 82142209769m9 automotive</code> |
262
+ | <code>sealing compound loctite rtv 5699 gray silicone gasket maker 80 ml tube and supplies page 2 1991 bmw 318i base convertible engine rebuilding kits tools note high performance and noncorrosive designed for high torque applications loctite 37464m258 automotive</code> | <code>sealing compound loctite rtv 5699 gray silicone gasket maker 80 ml tube and supplies page 2 1991 bmw 318i base convertible engine rebuilding kits tools note high performance and noncorrosive designed for high torque applications loctite 37464m258 automotive</code> |
263
+ | <code>lexmark remanufactured 18c2090 14 black ink cartridge lexmark x2630 cartridges 4inkjets remanlx14 officeproducts</code> | <code>remanufactured lexmark inkjet cartridge 18c2090 14 black ink lexmark z2320 ink cartridges and printer supplies inkcartridges remanlx14 officeproducts</code> |
264
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
265
+ ```json
266
+ {
267
+ "scale": 20.0,
268
+ "similarity_fct": "cos_sim"
269
+ }
270
+ ```
271
+
272
+ ### Training Hyperparameters
273
+ #### Non-Default Hyperparameters
274
+
275
+ - `eval_strategy`: steps
276
+ - `learning_rate`: 1e-05
277
+ - `num_train_epochs`: 2
278
+ - `warmup_ratio`: 0.1
279
+ - `fp16`: True
280
+ - `auto_find_batch_size`: True
281
+ - `batch_sampler`: no_duplicates
282
+
283
+ #### All Hyperparameters
284
+ <details><summary>Click to expand</summary>
285
+
286
+ - `overwrite_output_dir`: False
287
+ - `do_predict`: False
288
+ - `eval_strategy`: steps
289
+ - `prediction_loss_only`: True
290
+ - `per_device_train_batch_size`: 8
291
+ - `per_device_eval_batch_size`: 8
292
+ - `per_gpu_train_batch_size`: None
293
+ - `per_gpu_eval_batch_size`: None
294
+ - `gradient_accumulation_steps`: 1
295
+ - `eval_accumulation_steps`: None
296
+ - `torch_empty_cache_steps`: None
297
+ - `learning_rate`: 1e-05
298
+ - `weight_decay`: 0.0
299
+ - `adam_beta1`: 0.9
300
+ - `adam_beta2`: 0.999
301
+ - `adam_epsilon`: 1e-08
302
+ - `max_grad_norm`: 1.0
303
+ - `num_train_epochs`: 2
304
+ - `max_steps`: -1
305
+ - `lr_scheduler_type`: linear
306
+ - `lr_scheduler_kwargs`: {}
307
+ - `warmup_ratio`: 0.1
308
+ - `warmup_steps`: 0
309
+ - `log_level`: passive
310
+ - `log_level_replica`: warning
311
+ - `log_on_each_node`: True
312
+ - `logging_nan_inf_filter`: True
313
+ - `save_safetensors`: True
314
+ - `save_on_each_node`: False
315
+ - `save_only_model`: False
316
+ - `restore_callback_states_from_checkpoint`: False
317
+ - `no_cuda`: False
318
+ - `use_cpu`: False
319
+ - `use_mps_device`: False
320
+ - `seed`: 42
321
+ - `data_seed`: None
322
+ - `jit_mode_eval`: False
323
+ - `use_ipex`: False
324
+ - `bf16`: False
325
+ - `fp16`: True
326
+ - `fp16_opt_level`: O1
327
+ - `half_precision_backend`: auto
328
+ - `bf16_full_eval`: False
329
+ - `fp16_full_eval`: False
330
+ - `tf32`: None
331
+ - `local_rank`: 0
332
+ - `ddp_backend`: None
333
+ - `tpu_num_cores`: None
334
+ - `tpu_metrics_debug`: False
335
+ - `debug`: []
336
+ - `dataloader_drop_last`: False
337
+ - `dataloader_num_workers`: 0
338
+ - `dataloader_prefetch_factor`: None
339
+ - `past_index`: -1
340
+ - `disable_tqdm`: False
341
+ - `remove_unused_columns`: True
342
+ - `label_names`: None
343
+ - `load_best_model_at_end`: False
344
+ - `ignore_data_skip`: False
345
+ - `fsdp`: []
346
+ - `fsdp_min_num_params`: 0
347
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
348
+ - `fsdp_transformer_layer_cls_to_wrap`: None
349
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
350
+ - `deepspeed`: None
351
+ - `label_smoothing_factor`: 0.0
352
+ - `optim`: adamw_torch
353
+ - `optim_args`: None
354
+ - `adafactor`: False
355
+ - `group_by_length`: False
356
+ - `length_column_name`: length
357
+ - `ddp_find_unused_parameters`: None
358
+ - `ddp_bucket_cap_mb`: None
359
+ - `ddp_broadcast_buffers`: False
360
+ - `dataloader_pin_memory`: True
361
+ - `dataloader_persistent_workers`: False
362
+ - `skip_memory_metrics`: True
363
+ - `use_legacy_prediction_loop`: False
364
+ - `push_to_hub`: False
365
+ - `resume_from_checkpoint`: None
366
+ - `hub_model_id`: None
367
+ - `hub_strategy`: every_save
368
+ - `hub_private_repo`: False
369
+ - `hub_always_push`: False
370
+ - `gradient_checkpointing`: False
371
+ - `gradient_checkpointing_kwargs`: None
372
+ - `include_inputs_for_metrics`: False
373
+ - `eval_do_concat_batches`: True
374
+ - `fp16_backend`: auto
375
+ - `push_to_hub_model_id`: None
376
+ - `push_to_hub_organization`: None
377
+ - `mp_parameters`:
378
+ - `auto_find_batch_size`: True
379
+ - `full_determinism`: False
380
+ - `torchdynamo`: None
381
+ - `ray_scope`: last
382
+ - `ddp_timeout`: 1800
383
+ - `torch_compile`: False
384
+ - `torch_compile_backend`: None
385
+ - `torch_compile_mode`: None
386
+ - `dispatch_batches`: None
387
+ - `split_batches`: None
388
+ - `include_tokens_per_second`: False
389
+ - `include_num_input_tokens_seen`: False
390
+ - `neftune_noise_alpha`: None
391
+ - `optim_target_modules`: None
392
+ - `batch_eval_metrics`: False
393
+ - `eval_on_start`: False
394
+ - `eval_use_gather_object`: False
395
+ - `batch_sampler`: no_duplicates
396
+ - `multi_dataset_batch_sampler`: proportional
397
+
398
+ </details>
399
+
400
+ ### Training Logs
401
+ | Epoch | Step | Training Loss | loss |
402
+ |:------:|:-----:|:-------------:|:------:|
403
+ | 0.1990 | 7000 | 0.0113 | 0.0031 |
404
+ | 0.3981 | 14000 | 0.0022 | 0.0019 |
405
+ | 0.5971 | 21000 | 0.0019 | 0.0012 |
406
+ | 0.7961 | 28000 | 0.0017 | 0.0012 |
407
+ | 0.9951 | 35000 | 0.0013 | 0.0011 |
408
+ | 1.1942 | 42000 | 0.0012 | 0.0008 |
409
+ | 1.3932 | 49000 | 0.0005 | 0.0008 |
410
+
411
+
412
+ ### Framework Versions
413
+ - Python: 3.10.13
414
+ - Sentence Transformers: 3.0.1
415
+ - Transformers: 4.44.0
416
+ - PyTorch: 2.2.1
417
+ - Accelerate: 0.33.0
418
+ - Datasets: 2.21.0
419
+ - Tokenizers: 0.19.1
420
+
421
+ ## Citation
422
+
423
+ ### BibTeX
424
+
425
+ #### Sentence Transformers
426
+ ```bibtex
427
+ @inproceedings{reimers-2019-sentence-bert,
428
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
429
+ author = "Reimers, Nils and Gurevych, Iryna",
430
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
431
+ month = "11",
432
+ year = "2019",
433
+ publisher = "Association for Computational Linguistics",
434
+ url = "https://arxiv.org/abs/1908.10084",
435
+ }
436
+ ```
437
+
438
+ #### CachedMultipleNegativesRankingLoss
439
+ ```bibtex
440
+ @misc{gao2021scaling,
441
+ title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
442
+ author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
443
+ year={2021},
444
+ eprint={2101.06983},
445
+ archivePrefix={arXiv},
446
+ primaryClass={cs.LG}
447
+ }
448
+ ```
449
+
450
+ <!--
451
+ ## Glossary
452
+
453
+ *Clearly define terms in order to be accessible across audiences.*
454
+ -->
455
+
456
+ <!--
457
+ ## Model Card Authors
458
+
459
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
460
+ -->
461
+
462
+ <!--
463
+ ## Model Card Contact
464
+
465
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
466
+ -->
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
3
+ "architectures": [
4
+ "MPNetModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.44.0",
23
+ "vocab_size": 30527
24
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.44.0",
5
+ "pytorch": "2.2.1"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1dccd7a17ecfffe603c6bb81740ddddb65a5afb248339b56e2e32445894074
3
+ size 437967672
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
mpnet_incl_no_struct_no_token_MP_embedded.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e9d10d1b091f0c536d2e077760ba6d4c6165e0ebd6ec9c405b45375960e76c
3
+ size 8268733
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f697239641a090ee5056f3d51612f13f7c0932a3d911667a0fb6042ec284b1db
3
+ size 871331770
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:169ef975fad5dea24803b259f3001a8774668b530044c586388945fe852b3012
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9fb216ceb2785819ed11bbc7a3c870cfcf3a2abf50744edfaf81ad676f3226
3
+ size 1064
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 384,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": true,
54
+ "cls_token": "<s>",
55
+ "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "mask_token": "<mask>",
58
+ "max_length": 128,
59
+ "model_max_length": 384,
60
+ "pad_to_multiple_of": null,
61
+ "pad_token": "<pad>",
62
+ "pad_token_type_id": 0,
63
+ "padding_side": "right",
64
+ "sep_token": "</s>",
65
+ "stride": 0,
66
+ "strip_accents": null,
67
+ "tokenize_chinese_chars": true,
68
+ "tokenizer_class": "MPNetTokenizer",
69
+ "truncation_side": "right",
70
+ "truncation_strategy": "longest_first",
71
+ "unk_token": "[UNK]"
72
+ }
trainer_state.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.3931932558073412,
5
+ "eval_steps": 7000,
6
+ "global_step": 49000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.19902760797247732,
13
+ "grad_norm": 0.001206448650918901,
14
+ "learning_rate": 9.945984363894813e-06,
15
+ "loss": 0.0113,
16
+ "step": 7000
17
+ },
18
+ {
19
+ "epoch": 0.19902760797247732,
20
+ "eval_loss": 0.003107509110122919,
21
+ "eval_runtime": 304.4798,
22
+ "eval_samples_per_second": 231.02,
23
+ "eval_steps_per_second": 28.879,
24
+ "step": 7000
25
+ },
26
+ {
27
+ "epoch": 0.39805521594495463,
28
+ "grad_norm": 0.26781165599823,
29
+ "learning_rate": 8.900753471180123e-06,
30
+ "loss": 0.0022,
31
+ "step": 14000
32
+ },
33
+ {
34
+ "epoch": 0.39805521594495463,
35
+ "eval_loss": 0.0018909029895439744,
36
+ "eval_runtime": 304.1801,
37
+ "eval_samples_per_second": 231.248,
38
+ "eval_steps_per_second": 28.907,
39
+ "step": 14000
40
+ },
41
+ {
42
+ "epoch": 0.597082823917432,
43
+ "grad_norm": 0.00027483125450089574,
44
+ "learning_rate": 7.795346486170566e-06,
45
+ "loss": 0.0019,
46
+ "step": 21000
47
+ },
48
+ {
49
+ "epoch": 0.597082823917432,
50
+ "eval_loss": 0.0012107242364436388,
51
+ "eval_runtime": 304.0636,
52
+ "eval_samples_per_second": 231.336,
53
+ "eval_steps_per_second": 28.918,
54
+ "step": 21000
55
+ },
56
+ {
57
+ "epoch": 0.7961104318899093,
58
+ "grad_norm": 0.21627014875411987,
59
+ "learning_rate": 6.6900974615761295e-06,
60
+ "loss": 0.0017,
61
+ "step": 28000
62
+ },
63
+ {
64
+ "epoch": 0.7961104318899093,
65
+ "eval_loss": 0.001225769054144621,
66
+ "eval_runtime": 304.172,
67
+ "eval_samples_per_second": 231.254,
68
+ "eval_steps_per_second": 28.908,
69
+ "step": 28000
70
+ },
71
+ {
72
+ "epoch": 0.9951380398623866,
73
+ "grad_norm": 0.003856346942484379,
74
+ "learning_rate": 5.584690476566573e-06,
75
+ "loss": 0.0013,
76
+ "step": 35000
77
+ },
78
+ {
79
+ "epoch": 0.9951380398623866,
80
+ "eval_loss": 0.0011495049111545086,
81
+ "eval_runtime": 304.9212,
82
+ "eval_samples_per_second": 230.686,
83
+ "eval_steps_per_second": 28.837,
84
+ "step": 35000
85
+ },
86
+ {
87
+ "epoch": 1.194165647834864,
88
+ "grad_norm": 0.00557931000366807,
89
+ "learning_rate": 4.479283491557016e-06,
90
+ "loss": 0.0012,
91
+ "step": 42000
92
+ },
93
+ {
94
+ "epoch": 1.194165647834864,
95
+ "eval_loss": 0.0007833261624909937,
96
+ "eval_runtime": 304.4978,
97
+ "eval_samples_per_second": 231.007,
98
+ "eval_steps_per_second": 28.877,
99
+ "step": 42000
100
+ },
101
+ {
102
+ "epoch": 1.3931932558073412,
103
+ "grad_norm": 0.0003088970552198589,
104
+ "learning_rate": 3.374192427377699e-06,
105
+ "loss": 0.0005,
106
+ "step": 49000
107
+ },
108
+ {
109
+ "epoch": 1.3931932558073412,
110
+ "eval_loss": 0.0007826934452168643,
111
+ "eval_runtime": 305.4918,
112
+ "eval_samples_per_second": 230.255,
113
+ "eval_steps_per_second": 28.783,
114
+ "step": 49000
115
+ }
116
+ ],
117
+ "logging_steps": 7000,
118
+ "max_steps": 70342,
119
+ "num_input_tokens_seen": 0,
120
+ "num_train_epochs": 2,
121
+ "save_steps": 7000,
122
+ "stateful_callbacks": {
123
+ "TrainerControl": {
124
+ "args": {
125
+ "should_epoch_stop": false,
126
+ "should_evaluate": false,
127
+ "should_log": false,
128
+ "should_save": true,
129
+ "should_training_stop": false
130
+ },
131
+ "attributes": {}
132
+ }
133
+ },
134
+ "total_flos": 0.0,
135
+ "train_batch_size": 8,
136
+ "trial_name": null,
137
+ "trial_params": null
138
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45dbf1a267df37822a08c7ca8ab904b06a6a28e4cea29ecda4d798055c2b695f
3
+ size 5368
training_checkpoint-49000_matching_scores.png ADDED
training_checkpoint-49000_matching_scores.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Matching Scores:
2
+ Optimal Cosine Similarity Threshold: 0.35000000000000003
3
+ F1 Score: 0.7659574468085106
4
+ Precision: 0.6233766233766234
5
+ Recall: 0.993103448275862
6
+ Accuracy: 0.6351575456053068
vocab.txt ADDED
The diff for this file is too large to render. See raw diff