smikulas commited on
Commit
4c1bea0
·
verified ·
1 Parent(s): 697eb71

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 1_Pooling/config.json +10 -0
  2. README.md +843 -0
  3. checkpoint-38/1_Pooling/config.json +10 -0
  4. checkpoint-38/README.md +836 -0
  5. checkpoint-38/config.json +25 -0
  6. checkpoint-38/config_sentence_transformers.json +10 -0
  7. checkpoint-38/model.safetensors +3 -0
  8. checkpoint-38/modules.json +20 -0
  9. checkpoint-38/optimizer.pt +3 -0
  10. checkpoint-38/rng_state.pth +3 -0
  11. checkpoint-38/scheduler.pt +3 -0
  12. checkpoint-38/sentence_bert_config.json +4 -0
  13. checkpoint-38/special_tokens_map.json +37 -0
  14. checkpoint-38/tokenizer.json +0 -0
  15. checkpoint-38/tokenizer_config.json +65 -0
  16. checkpoint-38/trainer_state.json +251 -0
  17. checkpoint-38/training_args.bin +3 -0
  18. checkpoint-38/vocab.txt +0 -0
  19. checkpoint-57/1_Pooling/config.json +10 -0
  20. checkpoint-57/README.md +839 -0
  21. checkpoint-57/config.json +25 -0
  22. checkpoint-57/config_sentence_transformers.json +10 -0
  23. checkpoint-57/model.safetensors +3 -0
  24. checkpoint-57/modules.json +20 -0
  25. checkpoint-57/optimizer.pt +3 -0
  26. checkpoint-57/rng_state.pth +3 -0
  27. checkpoint-57/scheduler.pt +3 -0
  28. checkpoint-57/sentence_bert_config.json +4 -0
  29. checkpoint-57/special_tokens_map.json +37 -0
  30. checkpoint-57/tokenizer.json +0 -0
  31. checkpoint-57/tokenizer_config.json +65 -0
  32. checkpoint-57/trainer_state.json +363 -0
  33. checkpoint-57/training_args.bin +3 -0
  34. checkpoint-57/vocab.txt +0 -0
  35. checkpoint-72/1_Pooling/config.json +10 -0
  36. checkpoint-72/README.md +842 -0
  37. checkpoint-72/config.json +25 -0
  38. checkpoint-72/config_sentence_transformers.json +10 -0
  39. checkpoint-72/model.safetensors +3 -0
  40. checkpoint-72/modules.json +20 -0
  41. checkpoint-72/optimizer.pt +3 -0
  42. checkpoint-72/rng_state.pth +3 -0
  43. checkpoint-72/scheduler.pt +3 -0
  44. checkpoint-72/sentence_bert_config.json +4 -0
  45. checkpoint-72/special_tokens_map.json +37 -0
  46. checkpoint-72/tokenizer.json +0 -0
  47. checkpoint-72/tokenizer_config.json +65 -0
  48. checkpoint-72/trainer_state.json +475 -0
  49. checkpoint-72/training_args.bin +3 -0
  50. checkpoint-72/vocab.txt +0 -0
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,843 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:9432
11
+ - loss:MatryoshkaLoss
12
+ - loss:MultipleNegativesRankingLoss
13
+ base_model: sentence-transformers/all-MiniLM-L6-v2
14
+ widget:
15
+ - source_sentence: Atherosclerosis and coronary heart disease are examples of what
16
+ type of body system disease?
17
+ sentences:
18
+ - Diseases of the cardiovascular system are common and may be life threatening.
19
+ Examples include atherosclerosis and coronary heart disease. A healthy lifestyle
20
+ can reduce the risk of such diseases developing. This includes avoiding smoking,
21
+ getting regular physical activity, and maintaining a healthy percent of body fat.
22
+ - Osmosis Osmosis is the diffusion of water through a semipermeable membrane according
23
+ to the concentration gradient of water across the membrane. Whereas diffusion
24
+ transports material across membranes and within cells, osmosis transports only
25
+ water across a membrane and the membrane limits the diffusion of solutes in the
26
+ water. Osmosis is a special case of diffusion. Water, like other substances, moves
27
+ from an area of higher concentration to one of lower concentration. Imagine a
28
+ beaker with a semipermeable membrane, separating the two sides or halves (Figure
29
+ 3.21). On both sides of the membrane, the water level is the same, but there are
30
+ different concentrations on each side of a dissolved substance, or solute, that
31
+ cannot cross the membrane. If the volume of the water is the same, but the concentrations
32
+ of solute are different, then there are also different concentrations of water,
33
+ the solvent, on either side of the membrane.
34
+ - Circadian rhythms are regular changes in biology or behavior that occur in a 24-hour
35
+ cycle. In humans, for example, blood pressure and body temperature change in a
36
+ regular way throughout each 24-hour day. Animals may eat and drink at certain
37
+ times of day as well. Humans have daily cycles of behavior, too. Most people start
38
+ to get sleepy after dark and have a hard time sleeping when it is light outside.
39
+ In many species, including humans, circadian rhythms are controlled by a tiny
40
+ structure called the biological clock . This structure is located in a gland at
41
+ the base of the brain. The biological clock sends signals to the body. The signals
42
+ cause regular changes in behavior and body processes. The amount of light entering
43
+ the eyes helps control the biological clock. The clock causes changes that repeat
44
+ every 24 hours.
45
+ - source_sentence: How does a cell's membrane keep extracellular materials from mixing
46
+ with it's internal components?
47
+ sentences:
48
+ - We know that the Universe is expanding. Astronomers have wondered if it is expanding
49
+ fast enough to escape the pull of gravity. Would the Universe just expand forever?
50
+ If it could not escape the pull of gravity, would it someday start to contract?
51
+ This means it would eventually get squeezed together in a big crunch. This is
52
+ the opposite of the Big Bang.
53
+ - Physical properties that do not depend on the amount of substance present are
54
+ called intensive properties . Intensive properties do not change with changes
55
+ of size, shape, or scale. Examples of intensive properties are as follows in the
56
+ Table below .
57
+ - CHAPTER REVIEW 3.1 The Cell Membrane The cell membrane provides a barrier around
58
+ the cell, separating its internal components from the extracellular environment.
59
+ It is composed of a phospholipid bilayer, with hydrophobic internal lipid “tails”
60
+ and hydrophilic external phosphate “heads. ” Various membrane proteins are scattered
61
+ throughout the bilayer, both inserted within it and attached to it peripherally.
62
+ The cell membrane is selectively permeable, allowing only a limited number of
63
+ materials to diffuse through its lipid bilayer. All materials that cross the membrane
64
+ do so using passive (non energy-requiring) or active (energy-requiring) transport
65
+ processes. During passive transport, materials move by simple diffusion or by
66
+ facilitated diffusion through the membrane, down their concentration gradient.
67
+ Water passes through the membrane in a diffusion process called osmosis. During
68
+ active transport, energy is expended to assist material movement across the membrane
69
+ in a direction against their concentration gradient. Active transport may take
70
+ place with the help of protein pumps or through the use of vesicles.
71
+ - source_sentence: An infection may be intracellular or extracellular, depending on
72
+ this?
73
+ sentences:
74
+ - '22.3 Magnetic Fields and Magnetic Field Lines • Magnetic fields can be pictorially
75
+ represented by magnetic field lines, the properties of which are as follows: 1.
76
+ The field is tangent to the magnetic field line. Field strength is proportional
77
+ to the line density. Field lines cannot cross. Field lines are continuous loops.'
78
+ - Figure 24.13 The lifecycle of an ascomycete is characterized by the production
79
+ of asci during the sexual phase. The haploid phase is the predominant phase of
80
+ the life cycle.
81
+ - Caffeine is an example of a psychoactive drug. It is found in coffee and many
82
+ other products (see Table below ). Caffeine is a central nervous system stimulant
83
+ . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive
84
+ drugs include alcohol, nicotine, and marijuana. Each has a different effect on
85
+ the central nervous system. Alcohol, for example, is a depressant . It has the
86
+ opposite effects of a stimulant like caffeine.
87
+ - source_sentence: What does water treatment do to water?
88
+ sentences:
89
+ - Some solutes, such as sodium acetate, do not recrystallize easily. Suppose an
90
+ exactly saturated solution of sodium acetate is prepared at 50°C. As it cools
91
+ back to room temperature, no crystals appear in the solution, even though the
92
+ solubility of sodium acetate is lower at room temperature. A supersaturated solution
93
+ is a solution that contains more than the maximum amount of solute that is capable
94
+ of being dissolved at a given temperature. The recrystallization of the excess
95
+ dissolved solute in a supersaturated solution can be initiated by the addition
96
+ of a tiny crystal of solute, called a seed crystal. The seed crystal provides
97
+ a nucleation site on which the excess dissolved crystals can begin to grow. Recrystallization
98
+ from a supersaturated solution is typically very fast.
99
+ - Figure 23.13, the esophagus runs a mainly straight route through the mediastinum
100
+ of the thorax. To enter the abdomen, the esophagus penetrates the diaphragm through
101
+ an opening called the esophageal hiatus.
102
+ - Water treatment is a series of processes that remove unwanted substances from
103
+ water. More processes are needed to purify water for drinking than for other uses.
104
+ - source_sentence: 'There are only four possible bases that make up each dna nucleotide:
105
+ adenine, guanine, thymine, and?'
106
+ sentences:
107
+ - Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism
108
+ if it is exposed to extreme heat and pressure within the crust. With metamorphism
109
+ , the rock does not melt all the way. The rock changes due to heat and pressure.
110
+ A metamorphic rock may have a new mineral composition and/or texture.
111
+ - Forest and Kim Starr (Flickr:Starr Environmental). Secondary succession occurs
112
+ when nature reclaims areas formerly occupied by life . CC BY 2.0.
113
+ - 'The only difference between each nucleotide is the identity of the base. There
114
+ are only four possible bases that make up each DNA nucleotide: adenine (A), guanine
115
+ (G), thymine (T), and cytosine (C).'
116
+ pipeline_tag: sentence-similarity
117
+ library_name: sentence-transformers
118
+ metrics:
119
+ - cosine_accuracy@1
120
+ - cosine_accuracy@3
121
+ - cosine_accuracy@5
122
+ - cosine_accuracy@10
123
+ - cosine_precision@1
124
+ - cosine_precision@3
125
+ - cosine_precision@5
126
+ - cosine_precision@10
127
+ - cosine_recall@1
128
+ - cosine_recall@3
129
+ - cosine_recall@5
130
+ - cosine_recall@10
131
+ - cosine_ndcg@10
132
+ - cosine_mrr@10
133
+ - cosine_map@100
134
+ model-index:
135
+ - name: MNLP M3 Encoder SciQA
136
+ results:
137
+ - task:
138
+ type: information-retrieval
139
+ name: Information Retrieval
140
+ dataset:
141
+ name: dim 384
142
+ type: dim_384
143
+ metrics:
144
+ - type: cosine_accuracy@1
145
+ value: 0.6120114394661582
146
+ name: Cosine Accuracy@1
147
+ - type: cosine_accuracy@3
148
+ value: 0.8017159199237369
149
+ name: Cosine Accuracy@3
150
+ - type: cosine_accuracy@5
151
+ value: 0.8541468064823642
152
+ name: Cosine Accuracy@5
153
+ - type: cosine_accuracy@10
154
+ value: 0.9275500476644424
155
+ name: Cosine Accuracy@10
156
+ - type: cosine_precision@1
157
+ value: 0.6120114394661582
158
+ name: Cosine Precision@1
159
+ - type: cosine_precision@3
160
+ value: 0.267238639974579
161
+ name: Cosine Precision@3
162
+ - type: cosine_precision@5
163
+ value: 0.17082936129647283
164
+ name: Cosine Precision@5
165
+ - type: cosine_precision@10
166
+ value: 0.09275500476644424
167
+ name: Cosine Precision@10
168
+ - type: cosine_recall@1
169
+ value: 0.6120114394661582
170
+ name: Cosine Recall@1
171
+ - type: cosine_recall@3
172
+ value: 0.8017159199237369
173
+ name: Cosine Recall@3
174
+ - type: cosine_recall@5
175
+ value: 0.8541468064823642
176
+ name: Cosine Recall@5
177
+ - type: cosine_recall@10
178
+ value: 0.9275500476644424
179
+ name: Cosine Recall@10
180
+ - type: cosine_ndcg@10
181
+ value: 0.7690377395004954
182
+ name: Cosine Ndcg@10
183
+ - type: cosine_mrr@10
184
+ value: 0.7184669450875366
185
+ name: Cosine Mrr@10
186
+ - type: cosine_map@100
187
+ value: 0.7210073638258574
188
+ name: Cosine Map@100
189
+ - task:
190
+ type: information-retrieval
191
+ name: Information Retrieval
192
+ dataset:
193
+ name: dim 256
194
+ type: dim_256
195
+ metrics:
196
+ - type: cosine_accuracy@1
197
+ value: 0.5977121067683508
198
+ name: Cosine Accuracy@1
199
+ - type: cosine_accuracy@3
200
+ value: 0.7912297426120114
201
+ name: Cosine Accuracy@3
202
+ - type: cosine_accuracy@5
203
+ value: 0.8398474737845567
204
+ name: Cosine Accuracy@5
205
+ - type: cosine_accuracy@10
206
+ value: 0.9151572926596759
207
+ name: Cosine Accuracy@10
208
+ - type: cosine_precision@1
209
+ value: 0.5977121067683508
210
+ name: Cosine Precision@1
211
+ - type: cosine_precision@3
212
+ value: 0.26374324753733713
213
+ name: Cosine Precision@3
214
+ - type: cosine_precision@5
215
+ value: 0.16796949475691134
216
+ name: Cosine Precision@5
217
+ - type: cosine_precision@10
218
+ value: 0.09151572926596759
219
+ name: Cosine Precision@10
220
+ - type: cosine_recall@1
221
+ value: 0.5977121067683508
222
+ name: Cosine Recall@1
223
+ - type: cosine_recall@3
224
+ value: 0.7912297426120114
225
+ name: Cosine Recall@3
226
+ - type: cosine_recall@5
227
+ value: 0.8398474737845567
228
+ name: Cosine Recall@5
229
+ - type: cosine_recall@10
230
+ value: 0.9151572926596759
231
+ name: Cosine Recall@10
232
+ - type: cosine_ndcg@10
233
+ value: 0.7558547240171754
234
+ name: Cosine Ndcg@10
235
+ - type: cosine_mrr@10
236
+ value: 0.7049529408204341
237
+ name: Cosine Mrr@10
238
+ - type: cosine_map@100
239
+ value: 0.7084736712852033
240
+ name: Cosine Map@100
241
+ - task:
242
+ type: information-retrieval
243
+ name: Information Retrieval
244
+ dataset:
245
+ name: dim 192
246
+ type: dim_192
247
+ metrics:
248
+ - type: cosine_accuracy@1
249
+ value: 0.5891325071496664
250
+ name: Cosine Accuracy@1
251
+ - type: cosine_accuracy@3
252
+ value: 0.778836987607245
253
+ name: Cosine Accuracy@3
254
+ - type: cosine_accuracy@5
255
+ value: 0.8331744518589133
256
+ name: Cosine Accuracy@5
257
+ - type: cosine_accuracy@10
258
+ value: 0.90371782650143
259
+ name: Cosine Accuracy@10
260
+ - type: cosine_precision@1
261
+ value: 0.5891325071496664
262
+ name: Cosine Precision@1
263
+ - type: cosine_precision@3
264
+ value: 0.259612329202415
265
+ name: Cosine Precision@3
266
+ - type: cosine_precision@5
267
+ value: 0.16663489037178267
268
+ name: Cosine Precision@5
269
+ - type: cosine_precision@10
270
+ value: 0.090371782650143
271
+ name: Cosine Precision@10
272
+ - type: cosine_recall@1
273
+ value: 0.5891325071496664
274
+ name: Cosine Recall@1
275
+ - type: cosine_recall@3
276
+ value: 0.778836987607245
277
+ name: Cosine Recall@3
278
+ - type: cosine_recall@5
279
+ value: 0.8331744518589133
280
+ name: Cosine Recall@5
281
+ - type: cosine_recall@10
282
+ value: 0.90371782650143
283
+ name: Cosine Recall@10
284
+ - type: cosine_ndcg@10
285
+ value: 0.7467179313530818
286
+ name: Cosine Ndcg@10
287
+ - type: cosine_mrr@10
288
+ value: 0.6964694266648511
289
+ name: Cosine Mrr@10
290
+ - type: cosine_map@100
291
+ value: 0.7004357679049269
292
+ name: Cosine Map@100
293
+ - task:
294
+ type: information-retrieval
295
+ name: Information Retrieval
296
+ dataset:
297
+ name: dim 128
298
+ type: dim_128
299
+ metrics:
300
+ - type: cosine_accuracy@1
301
+ value: 0.5662535748331744
302
+ name: Cosine Accuracy@1
303
+ - type: cosine_accuracy@3
304
+ value: 0.7626310772163966
305
+ name: Cosine Accuracy@3
306
+ - type: cosine_accuracy@5
307
+ value: 0.8265014299332698
308
+ name: Cosine Accuracy@5
309
+ - type: cosine_accuracy@10
310
+ value: 0.8913250714966635
311
+ name: Cosine Accuracy@10
312
+ - type: cosine_precision@1
313
+ value: 0.5662535748331744
314
+ name: Cosine Precision@1
315
+ - type: cosine_precision@3
316
+ value: 0.25421035907213213
317
+ name: Cosine Precision@3
318
+ - type: cosine_precision@5
319
+ value: 0.16530028598665394
320
+ name: Cosine Precision@5
321
+ - type: cosine_precision@10
322
+ value: 0.08913250714966635
323
+ name: Cosine Precision@10
324
+ - type: cosine_recall@1
325
+ value: 0.5662535748331744
326
+ name: Cosine Recall@1
327
+ - type: cosine_recall@3
328
+ value: 0.7626310772163966
329
+ name: Cosine Recall@3
330
+ - type: cosine_recall@5
331
+ value: 0.8265014299332698
332
+ name: Cosine Recall@5
333
+ - type: cosine_recall@10
334
+ value: 0.8913250714966635
335
+ name: Cosine Recall@10
336
+ - type: cosine_ndcg@10
337
+ value: 0.7275517192718437
338
+ name: Cosine Ndcg@10
339
+ - type: cosine_mrr@10
340
+ value: 0.6752375656331816
341
+ name: Cosine Mrr@10
342
+ - type: cosine_map@100
343
+ value: 0.6793502491099088
344
+ name: Cosine Map@100
345
+ - task:
346
+ type: information-retrieval
347
+ name: Information Retrieval
348
+ dataset:
349
+ name: dim 96
350
+ type: dim_96
351
+ metrics:
352
+ - type: cosine_accuracy@1
353
+ value: 0.551954242135367
354
+ name: Cosine Accuracy@1
355
+ - type: cosine_accuracy@3
356
+ value: 0.7416587225929456
357
+ name: Cosine Accuracy@3
358
+ - type: cosine_accuracy@5
359
+ value: 0.8093422306959008
360
+ name: Cosine Accuracy@5
361
+ - type: cosine_accuracy@10
362
+ value: 0.8732125834127741
363
+ name: Cosine Accuracy@10
364
+ - type: cosine_precision@1
365
+ value: 0.551954242135367
366
+ name: Cosine Precision@1
367
+ - type: cosine_precision@3
368
+ value: 0.24721957419764853
369
+ name: Cosine Precision@3
370
+ - type: cosine_precision@5
371
+ value: 0.1618684461391802
372
+ name: Cosine Precision@5
373
+ - type: cosine_precision@10
374
+ value: 0.08732125834127741
375
+ name: Cosine Precision@10
376
+ - type: cosine_recall@1
377
+ value: 0.551954242135367
378
+ name: Cosine Recall@1
379
+ - type: cosine_recall@3
380
+ value: 0.7416587225929456
381
+ name: Cosine Recall@3
382
+ - type: cosine_recall@5
383
+ value: 0.8093422306959008
384
+ name: Cosine Recall@5
385
+ - type: cosine_recall@10
386
+ value: 0.8732125834127741
387
+ name: Cosine Recall@10
388
+ - type: cosine_ndcg@10
389
+ value: 0.7119774118711802
390
+ name: Cosine Ndcg@10
391
+ - type: cosine_mrr@10
392
+ value: 0.660333348464903
393
+ name: Cosine Mrr@10
394
+ - type: cosine_map@100
395
+ value: 0.6648689218069684
396
+ name: Cosine Map@100
397
+ - task:
398
+ type: information-retrieval
399
+ name: Information Retrieval
400
+ dataset:
401
+ name: dim 64
402
+ type: dim_64
403
+ metrics:
404
+ - type: cosine_accuracy@1
405
+ value: 0.5166825548141086
406
+ name: Cosine Accuracy@1
407
+ - type: cosine_accuracy@3
408
+ value: 0.7044804575786463
409
+ name: Cosine Accuracy@3
410
+ - type: cosine_accuracy@5
411
+ value: 0.7683508102955195
412
+ name: Cosine Accuracy@5
413
+ - type: cosine_accuracy@10
414
+ value: 0.8369876072449952
415
+ name: Cosine Accuracy@10
416
+ - type: cosine_precision@1
417
+ value: 0.5166825548141086
418
+ name: Cosine Precision@1
419
+ - type: cosine_precision@3
420
+ value: 0.2348268191928821
421
+ name: Cosine Precision@3
422
+ - type: cosine_precision@5
423
+ value: 0.1536701620591039
424
+ name: Cosine Precision@5
425
+ - type: cosine_precision@10
426
+ value: 0.08369876072449953
427
+ name: Cosine Precision@10
428
+ - type: cosine_recall@1
429
+ value: 0.5166825548141086
430
+ name: Cosine Recall@1
431
+ - type: cosine_recall@3
432
+ value: 0.7044804575786463
433
+ name: Cosine Recall@3
434
+ - type: cosine_recall@5
435
+ value: 0.7683508102955195
436
+ name: Cosine Recall@5
437
+ - type: cosine_recall@10
438
+ value: 0.8369876072449952
439
+ name: Cosine Recall@10
440
+ - type: cosine_ndcg@10
441
+ value: 0.6755211859192654
442
+ name: Cosine Ndcg@10
443
+ - type: cosine_mrr@10
444
+ value: 0.6239059875618503
445
+ name: Cosine Mrr@10
446
+ - type: cosine_map@100
447
+ value: 0.6292715088820261
448
+ name: Cosine Map@100
449
+ ---
450
+
451
+ # MNLP M3 Encoder SciQA
452
+
453
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the json dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
454
+
455
+ ## Model Details
456
+
457
+ ### Model Description
458
+ - **Model Type:** Sentence Transformer
459
+ - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
460
+ - **Maximum Sequence Length:** 256 tokens
461
+ - **Output Dimensionality:** 384 dimensions
462
+ - **Similarity Function:** Cosine Similarity
463
+ - **Training Dataset:**
464
+ - json
465
+ - **Language:** en
466
+ - **License:** apache-2.0
467
+
468
+ ### Model Sources
469
+
470
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
471
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
472
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
473
+
474
+ ### Full Model Architecture
475
+
476
+ ```
477
+ SentenceTransformer(
478
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
479
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
480
+ (2): Normalize()
481
+ )
482
+ ```
483
+
484
+ ## Usage
485
+
486
+ ### Direct Usage (Sentence Transformers)
487
+
488
+ First install the Sentence Transformers library:
489
+
490
+ ```bash
491
+ pip install -U sentence-transformers
492
+ ```
493
+
494
+ Then you can load this model and run inference.
495
+ ```python
496
+ from sentence_transformers import SentenceTransformer
497
+
498
+ # Download from the 🤗 Hub
499
+ model = SentenceTransformer("sentence_transformers_model_id")
500
+ # Run inference
501
+ sentences = [
502
+ 'There are only four possible bases that make up each dna nucleotide: adenine, guanine, thymine, and?',
503
+ 'The only difference between each nucleotide is the identity of the base. There are only four possible bases that make up each DNA nucleotide: adenine (A), guanine (G), thymine (T), and cytosine (C).',
504
+ 'Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism if it is exposed to extreme heat and pressure within the crust. With metamorphism , the rock does not melt all the way. The rock changes due to heat and pressure. A metamorphic rock may have a new mineral composition and/or texture.',
505
+ ]
506
+ embeddings = model.encode(sentences)
507
+ print(embeddings.shape)
508
+ # [3, 384]
509
+
510
+ # Get the similarity scores for the embeddings
511
+ similarities = model.similarity(embeddings, embeddings)
512
+ print(similarities.shape)
513
+ # [3, 3]
514
+ ```
515
+
516
+ <!--
517
+ ### Direct Usage (Transformers)
518
+
519
+ <details><summary>Click to see the direct usage in Transformers</summary>
520
+
521
+ </details>
522
+ -->
523
+
524
+ <!--
525
+ ### Downstream Usage (Sentence Transformers)
526
+
527
+ You can finetune this model on your own dataset.
528
+
529
+ <details><summary>Click to expand</summary>
530
+
531
+ </details>
532
+ -->
533
+
534
+ <!--
535
+ ### Out-of-Scope Use
536
+
537
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
538
+ -->
539
+
540
+ ## Evaluation
541
+
542
+ ### Metrics
543
+
544
+ #### Information Retrieval
545
+
546
+ * Datasets: `dim_384`, `dim_256`, `dim_192`, `dim_128`, `dim_96` and `dim_64`
547
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
548
+
549
+ | Metric | dim_384 | dim_256 | dim_192 | dim_128 | dim_96 | dim_64 |
550
+ |:--------------------|:----------|:-----------|:-----------|:-----------|:----------|:-----------|
551
+ | cosine_accuracy@1 | 0.612 | 0.5977 | 0.5891 | 0.5663 | 0.552 | 0.5167 |
552
+ | cosine_accuracy@3 | 0.8017 | 0.7912 | 0.7788 | 0.7626 | 0.7417 | 0.7045 |
553
+ | cosine_accuracy@5 | 0.8541 | 0.8398 | 0.8332 | 0.8265 | 0.8093 | 0.7684 |
554
+ | cosine_accuracy@10 | 0.9276 | 0.9152 | 0.9037 | 0.8913 | 0.8732 | 0.837 |
555
+ | cosine_precision@1 | 0.612 | 0.5977 | 0.5891 | 0.5663 | 0.552 | 0.5167 |
556
+ | cosine_precision@3 | 0.2672 | 0.2637 | 0.2596 | 0.2542 | 0.2472 | 0.2348 |
557
+ | cosine_precision@5 | 0.1708 | 0.168 | 0.1666 | 0.1653 | 0.1619 | 0.1537 |
558
+ | cosine_precision@10 | 0.0928 | 0.0915 | 0.0904 | 0.0891 | 0.0873 | 0.0837 |
559
+ | cosine_recall@1 | 0.612 | 0.5977 | 0.5891 | 0.5663 | 0.552 | 0.5167 |
560
+ | cosine_recall@3 | 0.8017 | 0.7912 | 0.7788 | 0.7626 | 0.7417 | 0.7045 |
561
+ | cosine_recall@5 | 0.8541 | 0.8398 | 0.8332 | 0.8265 | 0.8093 | 0.7684 |
562
+ | cosine_recall@10 | 0.9276 | 0.9152 | 0.9037 | 0.8913 | 0.8732 | 0.837 |
563
+ | **cosine_ndcg@10** | **0.769** | **0.7559** | **0.7467** | **0.7276** | **0.712** | **0.6755** |
564
+ | cosine_mrr@10 | 0.7185 | 0.705 | 0.6965 | 0.6752 | 0.6603 | 0.6239 |
565
+ | cosine_map@100 | 0.721 | 0.7085 | 0.7004 | 0.6794 | 0.6649 | 0.6293 |
566
+
567
+ <!--
568
+ ## Bias, Risks and Limitations
569
+
570
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
571
+ -->
572
+
573
+ <!--
574
+ ### Recommendations
575
+
576
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
577
+ -->
578
+
579
+ ## Training Details
580
+
581
+ ### Training Dataset
582
+
583
+ #### json
584
+
585
+ * Dataset: json
586
+ * Size: 9,432 training samples
587
+ * Columns: <code>anchor</code> and <code>positive</code>
588
+ * Approximate statistics based on the first 1000 samples:
589
+ | | anchor | positive |
590
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
591
+ | type | string | string |
592
+ | details | <ul><li>min: 7 tokens</li><li>mean: 18.15 tokens</li><li>max: 60 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 94.56 tokens</li><li>max: 256 tokens</li></ul> |
593
+ * Samples:
594
+ | anchor | positive |
595
+ |:-------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
596
+ | <code>What is the term for atherosclerosis of arteries that supply the heart muscle?</code> | <code>Atherosclerosis of arteries that supply the heart muscle is called coronary heart disease . This disease may or may not have symptoms, such as chest pain. As the disease progresses, there is an increased risk of heart attack. A heart attack occurs when the blood supply to part of the heart muscle is blocked and cardiac muscle fibers die. Coronary heart disease is the leading cause of death of adults in the United States.</code> |
597
+ | <code>What term describes a drug that has an effect on the central nervous system?</code> | <code>Caffeine is an example of a psychoactive drug. It is found in coffee and many other products (see Table below ). Caffeine is a central nervous system stimulant . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive drugs include alcohol, nicotine, and marijuana. Each has a different effect on the central nervous system. Alcohol, for example, is a depressant . It has the opposite effects of a stimulant like caffeine.</code> |
598
+ | <code>What scale is used to succinctly communicate the acidity or basicity of a solution?</code> | <code>The pH scale is used to succinctly communicate the acidity or basicity of a solution.</code> |
599
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
600
+ ```json
601
+ {
602
+ "loss": "MultipleNegativesRankingLoss",
603
+ "matryoshka_dims": [
604
+ 384,
605
+ 256,
606
+ 192,
607
+ 128,
608
+ 96,
609
+ 64
610
+ ],
611
+ "matryoshka_weights": [
612
+ 1,
613
+ 1,
614
+ 1,
615
+ 1,
616
+ 1,
617
+ 1
618
+ ],
619
+ "n_dims_per_step": -1
620
+ }
621
+ ```
622
+
623
+ ### Training Hyperparameters
624
+ #### Non-Default Hyperparameters
625
+
626
+ - `eval_strategy`: epoch
627
+ - `per_device_train_batch_size`: 32
628
+ - `per_device_eval_batch_size`: 16
629
+ - `gradient_accumulation_steps`: 16
630
+ - `learning_rate`: 2e-05
631
+ - `num_train_epochs`: 4
632
+ - `lr_scheduler_type`: cosine
633
+ - `warmup_ratio`: 0.1
634
+ - `bf16`: True
635
+ - `tf32`: True
636
+ - `load_best_model_at_end`: True
637
+ - `optim`: adamw_torch_fused
638
+ - `batch_sampler`: no_duplicates
639
+
640
+ #### All Hyperparameters
641
+ <details><summary>Click to expand</summary>
642
+
643
+ - `overwrite_output_dir`: False
644
+ - `do_predict`: False
645
+ - `eval_strategy`: epoch
646
+ - `prediction_loss_only`: True
647
+ - `per_device_train_batch_size`: 32
648
+ - `per_device_eval_batch_size`: 16
649
+ - `per_gpu_train_batch_size`: None
650
+ - `per_gpu_eval_batch_size`: None
651
+ - `gradient_accumulation_steps`: 16
652
+ - `eval_accumulation_steps`: None
653
+ - `torch_empty_cache_steps`: None
654
+ - `learning_rate`: 2e-05
655
+ - `weight_decay`: 0.0
656
+ - `adam_beta1`: 0.9
657
+ - `adam_beta2`: 0.999
658
+ - `adam_epsilon`: 1e-08
659
+ - `max_grad_norm`: 1.0
660
+ - `num_train_epochs`: 4
661
+ - `max_steps`: -1
662
+ - `lr_scheduler_type`: cosine
663
+ - `lr_scheduler_kwargs`: {}
664
+ - `warmup_ratio`: 0.1
665
+ - `warmup_steps`: 0
666
+ - `log_level`: passive
667
+ - `log_level_replica`: warning
668
+ - `log_on_each_node`: True
669
+ - `logging_nan_inf_filter`: True
670
+ - `save_safetensors`: True
671
+ - `save_on_each_node`: False
672
+ - `save_only_model`: False
673
+ - `restore_callback_states_from_checkpoint`: False
674
+ - `no_cuda`: False
675
+ - `use_cpu`: False
676
+ - `use_mps_device`: False
677
+ - `seed`: 42
678
+ - `data_seed`: None
679
+ - `jit_mode_eval`: False
680
+ - `use_ipex`: False
681
+ - `bf16`: True
682
+ - `fp16`: False
683
+ - `fp16_opt_level`: O1
684
+ - `half_precision_backend`: auto
685
+ - `bf16_full_eval`: False
686
+ - `fp16_full_eval`: False
687
+ - `tf32`: True
688
+ - `local_rank`: 0
689
+ - `ddp_backend`: None
690
+ - `tpu_num_cores`: None
691
+ - `tpu_metrics_debug`: False
692
+ - `debug`: []
693
+ - `dataloader_drop_last`: False
694
+ - `dataloader_num_workers`: 0
695
+ - `dataloader_prefetch_factor`: None
696
+ - `past_index`: -1
697
+ - `disable_tqdm`: False
698
+ - `remove_unused_columns`: True
699
+ - `label_names`: None
700
+ - `load_best_model_at_end`: True
701
+ - `ignore_data_skip`: False
702
+ - `fsdp`: []
703
+ - `fsdp_min_num_params`: 0
704
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
705
+ - `tp_size`: 0
706
+ - `fsdp_transformer_layer_cls_to_wrap`: None
707
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
708
+ - `deepspeed`: None
709
+ - `label_smoothing_factor`: 0.0
710
+ - `optim`: adamw_torch_fused
711
+ - `optim_args`: None
712
+ - `adafactor`: False
713
+ - `group_by_length`: False
714
+ - `length_column_name`: length
715
+ - `ddp_find_unused_parameters`: None
716
+ - `ddp_bucket_cap_mb`: None
717
+ - `ddp_broadcast_buffers`: False
718
+ - `dataloader_pin_memory`: True
719
+ - `dataloader_persistent_workers`: False
720
+ - `skip_memory_metrics`: True
721
+ - `use_legacy_prediction_loop`: False
722
+ - `push_to_hub`: False
723
+ - `resume_from_checkpoint`: None
724
+ - `hub_model_id`: None
725
+ - `hub_strategy`: every_save
726
+ - `hub_private_repo`: None
727
+ - `hub_always_push`: False
728
+ - `gradient_checkpointing`: False
729
+ - `gradient_checkpointing_kwargs`: None
730
+ - `include_inputs_for_metrics`: False
731
+ - `include_for_metrics`: []
732
+ - `eval_do_concat_batches`: True
733
+ - `fp16_backend`: auto
734
+ - `push_to_hub_model_id`: None
735
+ - `push_to_hub_organization`: None
736
+ - `mp_parameters`:
737
+ - `auto_find_batch_size`: False
738
+ - `full_determinism`: False
739
+ - `torchdynamo`: None
740
+ - `ray_scope`: last
741
+ - `ddp_timeout`: 1800
742
+ - `torch_compile`: False
743
+ - `torch_compile_backend`: None
744
+ - `torch_compile_mode`: None
745
+ - `include_tokens_per_second`: False
746
+ - `include_num_input_tokens_seen`: False
747
+ - `neftune_noise_alpha`: None
748
+ - `optim_target_modules`: None
749
+ - `batch_eval_metrics`: False
750
+ - `eval_on_start`: False
751
+ - `use_liger_kernel`: False
752
+ - `eval_use_gather_object`: False
753
+ - `average_tokens_across_devices`: False
754
+ - `prompts`: None
755
+ - `batch_sampler`: no_duplicates
756
+ - `multi_dataset_batch_sampler`: proportional
757
+
758
+ </details>
759
+
760
+ ### Training Logs
761
+ | Epoch | Step | Training Loss | dim_384_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_192_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_96_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
762
+ |:----------:|:------:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:---------------------:|
763
+ | 0.5424 | 10 | 22.4049 | - | - | - | - | - | - |
764
+ | 1.0 | 19 | - | 0.7424 | 0.7315 | 0.7263 | 0.7093 | 0.6919 | 0.6575 |
765
+ | 1.0542 | 20 | 16.6616 | - | - | - | - | - | - |
766
+ | 1.5966 | 30 | 16.8367 | - | - | - | - | - | - |
767
+ | 2.0 | 38 | - | 0.7612 | 0.7520 | 0.7431 | 0.7261 | 0.7097 | 0.6708 |
768
+ | 2.1085 | 40 | 12.8169 | - | - | - | - | - | - |
769
+ | 2.6508 | 50 | 13.7826 | - | - | - | - | - | - |
770
+ | 3.0 | 57 | - | 0.7675 | 0.7548 | 0.7477 | 0.7274 | 0.7125 | 0.6756 |
771
+ | 3.1627 | 60 | 12.4455 | - | - | - | - | - | - |
772
+ | 3.7051 | 70 | 12.2968 | - | - | - | - | - | - |
773
+ | **3.8136** | **72** | **-** | **0.769** | **0.7559** | **0.7467** | **0.7276** | **0.712** | **0.6755** |
774
+
775
+ * The bold row denotes the saved checkpoint.
776
+
777
+ ### Framework Versions
778
+ - Python: 3.12.8
779
+ - Sentence Transformers: 3.4.1
780
+ - Transformers: 4.51.3
781
+ - PyTorch: 2.5.1+cu124
782
+ - Accelerate: 1.3.0
783
+ - Datasets: 3.6.0
784
+ - Tokenizers: 0.21.0
785
+
786
+ ## Citation
787
+
788
+ ### BibTeX
789
+
790
+ #### Sentence Transformers
791
+ ```bibtex
792
+ @inproceedings{reimers-2019-sentence-bert,
793
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
794
+ author = "Reimers, Nils and Gurevych, Iryna",
795
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
796
+ month = "11",
797
+ year = "2019",
798
+ publisher = "Association for Computational Linguistics",
799
+ url = "https://arxiv.org/abs/1908.10084",
800
+ }
801
+ ```
802
+
803
+ #### MatryoshkaLoss
804
+ ```bibtex
805
+ @misc{kusupati2024matryoshka,
806
+ title={Matryoshka Representation Learning},
807
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
808
+ year={2024},
809
+ eprint={2205.13147},
810
+ archivePrefix={arXiv},
811
+ primaryClass={cs.LG}
812
+ }
813
+ ```
814
+
815
+ #### MultipleNegativesRankingLoss
816
+ ```bibtex
817
+ @misc{henderson2017efficient,
818
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
819
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
820
+ year={2017},
821
+ eprint={1705.00652},
822
+ archivePrefix={arXiv},
823
+ primaryClass={cs.CL}
824
+ }
825
+ ```
826
+
827
+ <!--
828
+ ## Glossary
829
+
830
+ *Clearly define terms in order to be accessible across audiences.*
831
+ -->
832
+
833
+ <!--
834
+ ## Model Card Authors
835
+
836
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
837
+ -->
838
+
839
+ <!--
840
+ ## Model Card Contact
841
+
842
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
843
+ -->
checkpoint-38/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-38/README.md ADDED
@@ -0,0 +1,836 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:9432
11
+ - loss:MatryoshkaLoss
12
+ - loss:MultipleNegativesRankingLoss
13
+ base_model: sentence-transformers/all-MiniLM-L6-v2
14
+ widget:
15
+ - source_sentence: Atherosclerosis and coronary heart disease are examples of what
16
+ type of body system disease?
17
+ sentences:
18
+ - Diseases of the cardiovascular system are common and may be life threatening.
19
+ Examples include atherosclerosis and coronary heart disease. A healthy lifestyle
20
+ can reduce the risk of such diseases developing. This includes avoiding smoking,
21
+ getting regular physical activity, and maintaining a healthy percent of body fat.
22
+ - Osmosis Osmosis is the diffusion of water through a semipermeable membrane according
23
+ to the concentration gradient of water across the membrane. Whereas diffusion
24
+ transports material across membranes and within cells, osmosis transports only
25
+ water across a membrane and the membrane limits the diffusion of solutes in the
26
+ water. Osmosis is a special case of diffusion. Water, like other substances, moves
27
+ from an area of higher concentration to one of lower concentration. Imagine a
28
+ beaker with a semipermeable membrane, separating the two sides or halves (Figure
29
+ 3.21). On both sides of the membrane, the water level is the same, but there are
30
+ different concentrations on each side of a dissolved substance, or solute, that
31
+ cannot cross the membrane. If the volume of the water is the same, but the concentrations
32
+ of solute are different, then there are also different concentrations of water,
33
+ the solvent, on either side of the membrane.
34
+ - Circadian rhythms are regular changes in biology or behavior that occur in a 24-hour
35
+ cycle. In humans, for example, blood pressure and body temperature change in a
36
+ regular way throughout each 24-hour day. Animals may eat and drink at certain
37
+ times of day as well. Humans have daily cycles of behavior, too. Most people start
38
+ to get sleepy after dark and have a hard time sleeping when it is light outside.
39
+ In many species, including humans, circadian rhythms are controlled by a tiny
40
+ structure called the biological clock . This structure is located in a gland at
41
+ the base of the brain. The biological clock sends signals to the body. The signals
42
+ cause regular changes in behavior and body processes. The amount of light entering
43
+ the eyes helps control the biological clock. The clock causes changes that repeat
44
+ every 24 hours.
45
+ - source_sentence: How does a cell's membrane keep extracellular materials from mixing
46
+ with it's internal components?
47
+ sentences:
48
+ - We know that the Universe is expanding. Astronomers have wondered if it is expanding
49
+ fast enough to escape the pull of gravity. Would the Universe just expand forever?
50
+ If it could not escape the pull of gravity, would it someday start to contract?
51
+ This means it would eventually get squeezed together in a big crunch. This is
52
+ the opposite of the Big Bang.
53
+ - Physical properties that do not depend on the amount of substance present are
54
+ called intensive properties . Intensive properties do not change with changes
55
+ of size, shape, or scale. Examples of intensive properties are as follows in the
56
+ Table below .
57
+ - CHAPTER REVIEW 3.1 The Cell Membrane The cell membrane provides a barrier around
58
+ the cell, separating its internal components from the extracellular environment.
59
+ It is composed of a phospholipid bilayer, with hydrophobic internal lipid “tails”
60
+ and hydrophilic external phosphate “heads. ” Various membrane proteins are scattered
61
+ throughout the bilayer, both inserted within it and attached to it peripherally.
62
+ The cell membrane is selectively permeable, allowing only a limited number of
63
+ materials to diffuse through its lipid bilayer. All materials that cross the membrane
64
+ do so using passive (non energy-requiring) or active (energy-requiring) transport
65
+ processes. During passive transport, materials move by simple diffusion or by
66
+ facilitated diffusion through the membrane, down their concentration gradient.
67
+ Water passes through the membrane in a diffusion process called osmosis. During
68
+ active transport, energy is expended to assist material movement across the membrane
69
+ in a direction against their concentration gradient. Active transport may take
70
+ place with the help of protein pumps or through the use of vesicles.
71
+ - source_sentence: An infection may be intracellular or extracellular, depending on
72
+ this?
73
+ sentences:
74
+ - '22.3 Magnetic Fields and Magnetic Field Lines • Magnetic fields can be pictorially
75
+ represented by magnetic field lines, the properties of which are as follows: 1.
76
+ The field is tangent to the magnetic field line. Field strength is proportional
77
+ to the line density. Field lines cannot cross. Field lines are continuous loops.'
78
+ - Figure 24.13 The lifecycle of an ascomycete is characterized by the production
79
+ of asci during the sexual phase. The haploid phase is the predominant phase of
80
+ the life cycle.
81
+ - Caffeine is an example of a psychoactive drug. It is found in coffee and many
82
+ other products (see Table below ). Caffeine is a central nervous system stimulant
83
+ . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive
84
+ drugs include alcohol, nicotine, and marijuana. Each has a different effect on
85
+ the central nervous system. Alcohol, for example, is a depressant . It has the
86
+ opposite effects of a stimulant like caffeine.
87
+ - source_sentence: What does water treatment do to water?
88
+ sentences:
89
+ - Some solutes, such as sodium acetate, do not recrystallize easily. Suppose an
90
+ exactly saturated solution of sodium acetate is prepared at 50°C. As it cools
91
+ back to room temperature, no crystals appear in the solution, even though the
92
+ solubility of sodium acetate is lower at room temperature. A supersaturated solution
93
+ is a solution that contains more than the maximum amount of solute that is capable
94
+ of being dissolved at a given temperature. The recrystallization of the excess
95
+ dissolved solute in a supersaturated solution can be initiated by the addition
96
+ of a tiny crystal of solute, called a seed crystal. The seed crystal provides
97
+ a nucleation site on which the excess dissolved crystals can begin to grow. Recrystallization
98
+ from a supersaturated solution is typically very fast.
99
+ - Figure 23.13, the esophagus runs a mainly straight route through the mediastinum
100
+ of the thorax. To enter the abdomen, the esophagus penetrates the diaphragm through
101
+ an opening called the esophageal hiatus.
102
+ - Water treatment is a series of processes that remove unwanted substances from
103
+ water. More processes are needed to purify water for drinking than for other uses.
104
+ - source_sentence: 'There are only four possible bases that make up each dna nucleotide:
105
+ adenine, guanine, thymine, and?'
106
+ sentences:
107
+ - Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism
108
+ if it is exposed to extreme heat and pressure within the crust. With metamorphism
109
+ , the rock does not melt all the way. The rock changes due to heat and pressure.
110
+ A metamorphic rock may have a new mineral composition and/or texture.
111
+ - Forest and Kim Starr (Flickr:Starr Environmental). Secondary succession occurs
112
+ when nature reclaims areas formerly occupied by life . CC BY 2.0.
113
+ - 'The only difference between each nucleotide is the identity of the base. There
114
+ are only four possible bases that make up each DNA nucleotide: adenine (A), guanine
115
+ (G), thymine (T), and cytosine (C).'
116
+ pipeline_tag: sentence-similarity
117
+ library_name: sentence-transformers
118
+ metrics:
119
+ - cosine_accuracy@1
120
+ - cosine_accuracy@3
121
+ - cosine_accuracy@5
122
+ - cosine_accuracy@10
123
+ - cosine_precision@1
124
+ - cosine_precision@3
125
+ - cosine_precision@5
126
+ - cosine_precision@10
127
+ - cosine_recall@1
128
+ - cosine_recall@3
129
+ - cosine_recall@5
130
+ - cosine_recall@10
131
+ - cosine_ndcg@10
132
+ - cosine_mrr@10
133
+ - cosine_map@100
134
+ model-index:
135
+ - name: MNLP M3 Encoder SciQA
136
+ results:
137
+ - task:
138
+ type: information-retrieval
139
+ name: Information Retrieval
140
+ dataset:
141
+ name: dim 384
142
+ type: dim_384
143
+ metrics:
144
+ - type: cosine_accuracy@1
145
+ value: 0.6015252621544328
146
+ name: Cosine Accuracy@1
147
+ - type: cosine_accuracy@3
148
+ value: 0.7959961868446139
149
+ name: Cosine Accuracy@3
150
+ - type: cosine_accuracy@5
151
+ value: 0.8531935176358436
152
+ name: Cosine Accuracy@5
153
+ - type: cosine_accuracy@10
154
+ value: 0.9199237368922784
155
+ name: Cosine Accuracy@10
156
+ - type: cosine_precision@1
157
+ value: 0.6015252621544328
158
+ name: Cosine Precision@1
159
+ - type: cosine_precision@3
160
+ value: 0.26533206228153794
161
+ name: Cosine Precision@3
162
+ - type: cosine_precision@5
163
+ value: 0.17063870352716873
164
+ name: Cosine Precision@5
165
+ - type: cosine_precision@10
166
+ value: 0.09199237368922783
167
+ name: Cosine Precision@10
168
+ - type: cosine_recall@1
169
+ value: 0.6015252621544328
170
+ name: Cosine Recall@1
171
+ - type: cosine_recall@3
172
+ value: 0.7959961868446139
173
+ name: Cosine Recall@3
174
+ - type: cosine_recall@5
175
+ value: 0.8531935176358436
176
+ name: Cosine Recall@5
177
+ - type: cosine_recall@10
178
+ value: 0.9199237368922784
179
+ name: Cosine Recall@10
180
+ - type: cosine_ndcg@10
181
+ value: 0.761241503632434
182
+ name: Cosine Ndcg@10
183
+ - type: cosine_mrr@10
184
+ value: 0.7104082497314151
185
+ name: Cosine Mrr@10
186
+ - type: cosine_map@100
187
+ value: 0.713601684515785
188
+ name: Cosine Map@100
189
+ - task:
190
+ type: information-retrieval
191
+ name: Information Retrieval
192
+ dataset:
193
+ name: dim 256
194
+ type: dim_256
195
+ metrics:
196
+ - type: cosine_accuracy@1
197
+ value: 0.5919923736892279
198
+ name: Cosine Accuracy@1
199
+ - type: cosine_accuracy@3
200
+ value: 0.7902764537654909
201
+ name: Cosine Accuracy@3
202
+ - type: cosine_accuracy@5
203
+ value: 0.8360343183984748
204
+ name: Cosine Accuracy@5
205
+ - type: cosine_accuracy@10
206
+ value: 0.9142040038131554
207
+ name: Cosine Accuracy@10
208
+ - type: cosine_precision@1
209
+ value: 0.5919923736892279
210
+ name: Cosine Precision@1
211
+ - type: cosine_precision@3
212
+ value: 0.26342548458849696
213
+ name: Cosine Precision@3
214
+ - type: cosine_precision@5
215
+ value: 0.16720686367969492
216
+ name: Cosine Precision@5
217
+ - type: cosine_precision@10
218
+ value: 0.09142040038131555
219
+ name: Cosine Precision@10
220
+ - type: cosine_recall@1
221
+ value: 0.5919923736892279
222
+ name: Cosine Recall@1
223
+ - type: cosine_recall@3
224
+ value: 0.7902764537654909
225
+ name: Cosine Recall@3
226
+ - type: cosine_recall@5
227
+ value: 0.8360343183984748
228
+ name: Cosine Recall@5
229
+ - type: cosine_recall@10
230
+ value: 0.9142040038131554
231
+ name: Cosine Recall@10
232
+ - type: cosine_ndcg@10
233
+ value: 0.7520267351833514
234
+ name: Cosine Ndcg@10
235
+ - type: cosine_mrr@10
236
+ value: 0.700305279404422
237
+ name: Cosine Mrr@10
238
+ - type: cosine_map@100
239
+ value: 0.7038093293311698
240
+ name: Cosine Map@100
241
+ - task:
242
+ type: information-retrieval
243
+ name: Information Retrieval
244
+ dataset:
245
+ name: dim 192
246
+ type: dim_192
247
+ metrics:
248
+ - type: cosine_accuracy@1
249
+ value: 0.5805529075309819
250
+ name: Cosine Accuracy@1
251
+ - type: cosine_accuracy@3
252
+ value: 0.782650142993327
253
+ name: Cosine Accuracy@3
254
+ - type: cosine_accuracy@5
255
+ value: 0.8322211630123928
256
+ name: Cosine Accuracy@5
257
+ - type: cosine_accuracy@10
258
+ value: 0.9008579599618685
259
+ name: Cosine Accuracy@10
260
+ - type: cosine_precision@1
261
+ value: 0.5805529075309819
262
+ name: Cosine Precision@1
263
+ - type: cosine_precision@3
264
+ value: 0.26088338099777564
265
+ name: Cosine Precision@3
266
+ - type: cosine_precision@5
267
+ value: 0.16644423260247856
268
+ name: Cosine Precision@5
269
+ - type: cosine_precision@10
270
+ value: 0.09008579599618685
271
+ name: Cosine Precision@10
272
+ - type: cosine_recall@1
273
+ value: 0.5805529075309819
274
+ name: Cosine Recall@1
275
+ - type: cosine_recall@3
276
+ value: 0.782650142993327
277
+ name: Cosine Recall@3
278
+ - type: cosine_recall@5
279
+ value: 0.8322211630123928
280
+ name: Cosine Recall@5
281
+ - type: cosine_recall@10
282
+ value: 0.9008579599618685
283
+ name: Cosine Recall@10
284
+ - type: cosine_ndcg@10
285
+ value: 0.7430712975035773
286
+ name: Cosine Ndcg@10
287
+ - type: cosine_mrr@10
288
+ value: 0.6923562879234952
289
+ name: Cosine Mrr@10
290
+ - type: cosine_map@100
291
+ value: 0.6964841260809953
292
+ name: Cosine Map@100
293
+ - task:
294
+ type: information-retrieval
295
+ name: Information Retrieval
296
+ dataset:
297
+ name: dim 128
298
+ type: dim_128
299
+ metrics:
300
+ - type: cosine_accuracy@1
301
+ value: 0.567206863679695
302
+ name: Cosine Accuracy@1
303
+ - type: cosine_accuracy@3
304
+ value: 0.7607244995233555
305
+ name: Cosine Accuracy@3
306
+ - type: cosine_accuracy@5
307
+ value: 0.8236415633937083
308
+ name: Cosine Accuracy@5
309
+ - type: cosine_accuracy@10
310
+ value: 0.886558627264061
311
+ name: Cosine Accuracy@10
312
+ - type: cosine_precision@1
313
+ value: 0.567206863679695
314
+ name: Cosine Precision@1
315
+ - type: cosine_precision@3
316
+ value: 0.25357483317445184
317
+ name: Cosine Precision@3
318
+ - type: cosine_precision@5
319
+ value: 0.16472831267874166
320
+ name: Cosine Precision@5
321
+ - type: cosine_precision@10
322
+ value: 0.0886558627264061
323
+ name: Cosine Precision@10
324
+ - type: cosine_recall@1
325
+ value: 0.567206863679695
326
+ name: Cosine Recall@1
327
+ - type: cosine_recall@3
328
+ value: 0.7607244995233555
329
+ name: Cosine Recall@3
330
+ - type: cosine_recall@5
331
+ value: 0.8236415633937083
332
+ name: Cosine Recall@5
333
+ - type: cosine_recall@10
334
+ value: 0.886558627264061
335
+ name: Cosine Recall@10
336
+ - type: cosine_ndcg@10
337
+ value: 0.7260517487265687
338
+ name: Cosine Ndcg@10
339
+ - type: cosine_mrr@10
340
+ value: 0.6746886679679823
341
+ name: Cosine Mrr@10
342
+ - type: cosine_map@100
343
+ value: 0.6790430112153837
344
+ name: Cosine Map@100
345
+ - task:
346
+ type: information-retrieval
347
+ name: Information Retrieval
348
+ dataset:
349
+ name: dim 96
350
+ type: dim_96
351
+ metrics:
352
+ - type: cosine_accuracy@1
353
+ value: 0.5471877979027645
354
+ name: Cosine Accuracy@1
355
+ - type: cosine_accuracy@3
356
+ value: 0.7407054337464252
357
+ name: Cosine Accuracy@3
358
+ - type: cosine_accuracy@5
359
+ value: 0.8017159199237369
360
+ name: Cosine Accuracy@5
361
+ - type: cosine_accuracy@10
362
+ value: 0.8722592945662536
363
+ name: Cosine Accuracy@10
364
+ - type: cosine_precision@1
365
+ value: 0.5471877979027645
366
+ name: Cosine Precision@1
367
+ - type: cosine_precision@3
368
+ value: 0.2469018112488084
369
+ name: Cosine Precision@3
370
+ - type: cosine_precision@5
371
+ value: 0.16034318398474737
372
+ name: Cosine Precision@5
373
+ - type: cosine_precision@10
374
+ value: 0.08722592945662536
375
+ name: Cosine Precision@10
376
+ - type: cosine_recall@1
377
+ value: 0.5471877979027645
378
+ name: Cosine Recall@1
379
+ - type: cosine_recall@3
380
+ value: 0.7407054337464252
381
+ name: Cosine Recall@3
382
+ - type: cosine_recall@5
383
+ value: 0.8017159199237369
384
+ name: Cosine Recall@5
385
+ - type: cosine_recall@10
386
+ value: 0.8722592945662536
387
+ name: Cosine Recall@10
388
+ - type: cosine_ndcg@10
389
+ value: 0.7097194683573752
390
+ name: Cosine Ndcg@10
391
+ - type: cosine_mrr@10
392
+ value: 0.6576811627097615
393
+ name: Cosine Mrr@10
394
+ - type: cosine_map@100
395
+ value: 0.6622003643008398
396
+ name: Cosine Map@100
397
+ - task:
398
+ type: information-retrieval
399
+ name: Information Retrieval
400
+ dataset:
401
+ name: dim 64
402
+ type: dim_64
403
+ metrics:
404
+ - type: cosine_accuracy@1
405
+ value: 0.5138226882745471
406
+ name: Cosine Accuracy@1
407
+ - type: cosine_accuracy@3
408
+ value: 0.7016205910390848
409
+ name: Cosine Accuracy@3
410
+ - type: cosine_accuracy@5
411
+ value: 0.7645376549094376
412
+ name: Cosine Accuracy@5
413
+ - type: cosine_accuracy@10
414
+ value: 0.8341277407054337
415
+ name: Cosine Accuracy@10
416
+ - type: cosine_precision@1
417
+ value: 0.5138226882745471
418
+ name: Cosine Precision@1
419
+ - type: cosine_precision@3
420
+ value: 0.2338735303463616
421
+ name: Cosine Precision@3
422
+ - type: cosine_precision@5
423
+ value: 0.1529075309818875
424
+ name: Cosine Precision@5
425
+ - type: cosine_precision@10
426
+ value: 0.08341277407054337
427
+ name: Cosine Precision@10
428
+ - type: cosine_recall@1
429
+ value: 0.5138226882745471
430
+ name: Cosine Recall@1
431
+ - type: cosine_recall@3
432
+ value: 0.7016205910390848
433
+ name: Cosine Recall@3
434
+ - type: cosine_recall@5
435
+ value: 0.7645376549094376
436
+ name: Cosine Recall@5
437
+ - type: cosine_recall@10
438
+ value: 0.8341277407054337
439
+ name: Cosine Recall@10
440
+ - type: cosine_ndcg@10
441
+ value: 0.6707950308444217
442
+ name: Cosine Ndcg@10
443
+ - type: cosine_mrr@10
444
+ value: 0.618670464690484
445
+ name: Cosine Mrr@10
446
+ - type: cosine_map@100
447
+ value: 0.6242158272303533
448
+ name: Cosine Map@100
449
+ ---
450
+
451
+ # MNLP M3 Encoder SciQA
452
+
453
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the json dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
454
+
455
+ ## Model Details
456
+
457
+ ### Model Description
458
+ - **Model Type:** Sentence Transformer
459
+ - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
460
+ - **Maximum Sequence Length:** 256 tokens
461
+ - **Output Dimensionality:** 384 dimensions
462
+ - **Similarity Function:** Cosine Similarity
463
+ - **Training Dataset:**
464
+ - json
465
+ - **Language:** en
466
+ - **License:** apache-2.0
467
+
468
+ ### Model Sources
469
+
470
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
471
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
472
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
473
+
474
+ ### Full Model Architecture
475
+
476
+ ```
477
+ SentenceTransformer(
478
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
479
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
480
+ (2): Normalize()
481
+ )
482
+ ```
483
+
484
+ ## Usage
485
+
486
+ ### Direct Usage (Sentence Transformers)
487
+
488
+ First install the Sentence Transformers library:
489
+
490
+ ```bash
491
+ pip install -U sentence-transformers
492
+ ```
493
+
494
+ Then you can load this model and run inference.
495
+ ```python
496
+ from sentence_transformers import SentenceTransformer
497
+
498
+ # Download from the 🤗 Hub
499
+ model = SentenceTransformer("sentence_transformers_model_id")
500
+ # Run inference
501
+ sentences = [
502
+ 'There are only four possible bases that make up each dna nucleotide: adenine, guanine, thymine, and?',
503
+ 'The only difference between each nucleotide is the identity of the base. There are only four possible bases that make up each DNA nucleotide: adenine (A), guanine (G), thymine (T), and cytosine (C).',
504
+ 'Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism if it is exposed to extreme heat and pressure within the crust. With metamorphism , the rock does not melt all the way. The rock changes due to heat and pressure. A metamorphic rock may have a new mineral composition and/or texture.',
505
+ ]
506
+ embeddings = model.encode(sentences)
507
+ print(embeddings.shape)
508
+ # [3, 384]
509
+
510
+ # Get the similarity scores for the embeddings
511
+ similarities = model.similarity(embeddings, embeddings)
512
+ print(similarities.shape)
513
+ # [3, 3]
514
+ ```
515
+
516
+ <!--
517
+ ### Direct Usage (Transformers)
518
+
519
+ <details><summary>Click to see the direct usage in Transformers</summary>
520
+
521
+ </details>
522
+ -->
523
+
524
+ <!--
525
+ ### Downstream Usage (Sentence Transformers)
526
+
527
+ You can finetune this model on your own dataset.
528
+
529
+ <details><summary>Click to expand</summary>
530
+
531
+ </details>
532
+ -->
533
+
534
+ <!--
535
+ ### Out-of-Scope Use
536
+
537
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
538
+ -->
539
+
540
+ ## Evaluation
541
+
542
+ ### Metrics
543
+
544
+ #### Information Retrieval
545
+
546
+ * Datasets: `dim_384`, `dim_256`, `dim_192`, `dim_128`, `dim_96` and `dim_64`
547
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
548
+
549
+ | Metric | dim_384 | dim_256 | dim_192 | dim_128 | dim_96 | dim_64 |
550
+ |:--------------------|:-----------|:----------|:-----------|:-----------|:-----------|:-----------|
551
+ | cosine_accuracy@1 | 0.6015 | 0.592 | 0.5806 | 0.5672 | 0.5472 | 0.5138 |
552
+ | cosine_accuracy@3 | 0.796 | 0.7903 | 0.7827 | 0.7607 | 0.7407 | 0.7016 |
553
+ | cosine_accuracy@5 | 0.8532 | 0.836 | 0.8322 | 0.8236 | 0.8017 | 0.7645 |
554
+ | cosine_accuracy@10 | 0.9199 | 0.9142 | 0.9009 | 0.8866 | 0.8723 | 0.8341 |
555
+ | cosine_precision@1 | 0.6015 | 0.592 | 0.5806 | 0.5672 | 0.5472 | 0.5138 |
556
+ | cosine_precision@3 | 0.2653 | 0.2634 | 0.2609 | 0.2536 | 0.2469 | 0.2339 |
557
+ | cosine_precision@5 | 0.1706 | 0.1672 | 0.1664 | 0.1647 | 0.1603 | 0.1529 |
558
+ | cosine_precision@10 | 0.092 | 0.0914 | 0.0901 | 0.0887 | 0.0872 | 0.0834 |
559
+ | cosine_recall@1 | 0.6015 | 0.592 | 0.5806 | 0.5672 | 0.5472 | 0.5138 |
560
+ | cosine_recall@3 | 0.796 | 0.7903 | 0.7827 | 0.7607 | 0.7407 | 0.7016 |
561
+ | cosine_recall@5 | 0.8532 | 0.836 | 0.8322 | 0.8236 | 0.8017 | 0.7645 |
562
+ | cosine_recall@10 | 0.9199 | 0.9142 | 0.9009 | 0.8866 | 0.8723 | 0.8341 |
563
+ | **cosine_ndcg@10** | **0.7612** | **0.752** | **0.7431** | **0.7261** | **0.7097** | **0.6708** |
564
+ | cosine_mrr@10 | 0.7104 | 0.7003 | 0.6924 | 0.6747 | 0.6577 | 0.6187 |
565
+ | cosine_map@100 | 0.7136 | 0.7038 | 0.6965 | 0.679 | 0.6622 | 0.6242 |
566
+
567
+ <!--
568
+ ## Bias, Risks and Limitations
569
+
570
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
571
+ -->
572
+
573
+ <!--
574
+ ### Recommendations
575
+
576
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
577
+ -->
578
+
579
+ ## Training Details
580
+
581
+ ### Training Dataset
582
+
583
+ #### json
584
+
585
+ * Dataset: json
586
+ * Size: 9,432 training samples
587
+ * Columns: <code>anchor</code> and <code>positive</code>
588
+ * Approximate statistics based on the first 1000 samples:
589
+ | | anchor | positive |
590
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
591
+ | type | string | string |
592
+ | details | <ul><li>min: 7 tokens</li><li>mean: 18.15 tokens</li><li>max: 60 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 94.56 tokens</li><li>max: 256 tokens</li></ul> |
593
+ * Samples:
594
+ | anchor | positive |
595
+ |:-------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
596
+ | <code>What is the term for atherosclerosis of arteries that supply the heart muscle?</code> | <code>Atherosclerosis of arteries that supply the heart muscle is called coronary heart disease . This disease may or may not have symptoms, such as chest pain. As the disease progresses, there is an increased risk of heart attack. A heart attack occurs when the blood supply to part of the heart muscle is blocked and cardiac muscle fibers die. Coronary heart disease is the leading cause of death of adults in the United States.</code> |
597
+ | <code>What term describes a drug that has an effect on the central nervous system?</code> | <code>Caffeine is an example of a psychoactive drug. It is found in coffee and many other products (see Table below ). Caffeine is a central nervous system stimulant . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive drugs include alcohol, nicotine, and marijuana. Each has a different effect on the central nervous system. Alcohol, for example, is a depressant . It has the opposite effects of a stimulant like caffeine.</code> |
598
+ | <code>What scale is used to succinctly communicate the acidity or basicity of a solution?</code> | <code>The pH scale is used to succinctly communicate the acidity or basicity of a solution.</code> |
599
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
600
+ ```json
601
+ {
602
+ "loss": "MultipleNegativesRankingLoss",
603
+ "matryoshka_dims": [
604
+ 384,
605
+ 256,
606
+ 192,
607
+ 128,
608
+ 96,
609
+ 64
610
+ ],
611
+ "matryoshka_weights": [
612
+ 1,
613
+ 1,
614
+ 1,
615
+ 1,
616
+ 1,
617
+ 1
618
+ ],
619
+ "n_dims_per_step": -1
620
+ }
621
+ ```
622
+
623
+ ### Training Hyperparameters
624
+ #### Non-Default Hyperparameters
625
+
626
+ - `eval_strategy`: epoch
627
+ - `per_device_train_batch_size`: 32
628
+ - `per_device_eval_batch_size`: 16
629
+ - `gradient_accumulation_steps`: 16
630
+ - `learning_rate`: 2e-05
631
+ - `num_train_epochs`: 4
632
+ - `lr_scheduler_type`: cosine
633
+ - `warmup_ratio`: 0.1
634
+ - `bf16`: True
635
+ - `tf32`: True
636
+ - `load_best_model_at_end`: True
637
+ - `optim`: adamw_torch_fused
638
+ - `batch_sampler`: no_duplicates
639
+
640
+ #### All Hyperparameters
641
+ <details><summary>Click to expand</summary>
642
+
643
+ - `overwrite_output_dir`: False
644
+ - `do_predict`: False
645
+ - `eval_strategy`: epoch
646
+ - `prediction_loss_only`: True
647
+ - `per_device_train_batch_size`: 32
648
+ - `per_device_eval_batch_size`: 16
649
+ - `per_gpu_train_batch_size`: None
650
+ - `per_gpu_eval_batch_size`: None
651
+ - `gradient_accumulation_steps`: 16
652
+ - `eval_accumulation_steps`: None
653
+ - `torch_empty_cache_steps`: None
654
+ - `learning_rate`: 2e-05
655
+ - `weight_decay`: 0.0
656
+ - `adam_beta1`: 0.9
657
+ - `adam_beta2`: 0.999
658
+ - `adam_epsilon`: 1e-08
659
+ - `max_grad_norm`: 1.0
660
+ - `num_train_epochs`: 4
661
+ - `max_steps`: -1
662
+ - `lr_scheduler_type`: cosine
663
+ - `lr_scheduler_kwargs`: {}
664
+ - `warmup_ratio`: 0.1
665
+ - `warmup_steps`: 0
666
+ - `log_level`: passive
667
+ - `log_level_replica`: warning
668
+ - `log_on_each_node`: True
669
+ - `logging_nan_inf_filter`: True
670
+ - `save_safetensors`: True
671
+ - `save_on_each_node`: False
672
+ - `save_only_model`: False
673
+ - `restore_callback_states_from_checkpoint`: False
674
+ - `no_cuda`: False
675
+ - `use_cpu`: False
676
+ - `use_mps_device`: False
677
+ - `seed`: 42
678
+ - `data_seed`: None
679
+ - `jit_mode_eval`: False
680
+ - `use_ipex`: False
681
+ - `bf16`: True
682
+ - `fp16`: False
683
+ - `fp16_opt_level`: O1
684
+ - `half_precision_backend`: auto
685
+ - `bf16_full_eval`: False
686
+ - `fp16_full_eval`: False
687
+ - `tf32`: True
688
+ - `local_rank`: 0
689
+ - `ddp_backend`: None
690
+ - `tpu_num_cores`: None
691
+ - `tpu_metrics_debug`: False
692
+ - `debug`: []
693
+ - `dataloader_drop_last`: False
694
+ - `dataloader_num_workers`: 0
695
+ - `dataloader_prefetch_factor`: None
696
+ - `past_index`: -1
697
+ - `disable_tqdm`: False
698
+ - `remove_unused_columns`: True
699
+ - `label_names`: None
700
+ - `load_best_model_at_end`: True
701
+ - `ignore_data_skip`: False
702
+ - `fsdp`: []
703
+ - `fsdp_min_num_params`: 0
704
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
705
+ - `tp_size`: 0
706
+ - `fsdp_transformer_layer_cls_to_wrap`: None
707
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
708
+ - `deepspeed`: None
709
+ - `label_smoothing_factor`: 0.0
710
+ - `optim`: adamw_torch_fused
711
+ - `optim_args`: None
712
+ - `adafactor`: False
713
+ - `group_by_length`: False
714
+ - `length_column_name`: length
715
+ - `ddp_find_unused_parameters`: None
716
+ - `ddp_bucket_cap_mb`: None
717
+ - `ddp_broadcast_buffers`: False
718
+ - `dataloader_pin_memory`: True
719
+ - `dataloader_persistent_workers`: False
720
+ - `skip_memory_metrics`: True
721
+ - `use_legacy_prediction_loop`: False
722
+ - `push_to_hub`: False
723
+ - `resume_from_checkpoint`: None
724
+ - `hub_model_id`: None
725
+ - `hub_strategy`: every_save
726
+ - `hub_private_repo`: None
727
+ - `hub_always_push`: False
728
+ - `gradient_checkpointing`: False
729
+ - `gradient_checkpointing_kwargs`: None
730
+ - `include_inputs_for_metrics`: False
731
+ - `include_for_metrics`: []
732
+ - `eval_do_concat_batches`: True
733
+ - `fp16_backend`: auto
734
+ - `push_to_hub_model_id`: None
735
+ - `push_to_hub_organization`: None
736
+ - `mp_parameters`:
737
+ - `auto_find_batch_size`: False
738
+ - `full_determinism`: False
739
+ - `torchdynamo`: None
740
+ - `ray_scope`: last
741
+ - `ddp_timeout`: 1800
742
+ - `torch_compile`: False
743
+ - `torch_compile_backend`: None
744
+ - `torch_compile_mode`: None
745
+ - `include_tokens_per_second`: False
746
+ - `include_num_input_tokens_seen`: False
747
+ - `neftune_noise_alpha`: None
748
+ - `optim_target_modules`: None
749
+ - `batch_eval_metrics`: False
750
+ - `eval_on_start`: False
751
+ - `use_liger_kernel`: False
752
+ - `eval_use_gather_object`: False
753
+ - `average_tokens_across_devices`: False
754
+ - `prompts`: None
755
+ - `batch_sampler`: no_duplicates
756
+ - `multi_dataset_batch_sampler`: proportional
757
+
758
+ </details>
759
+
760
+ ### Training Logs
761
+ | Epoch | Step | Training Loss | dim_384_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_192_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_96_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
762
+ |:------:|:----:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:---------------------:|
763
+ | 0.5424 | 10 | 22.4049 | - | - | - | - | - | - |
764
+ | 1.0 | 19 | - | 0.7424 | 0.7315 | 0.7263 | 0.7093 | 0.6919 | 0.6575 |
765
+ | 1.0542 | 20 | 16.6616 | - | - | - | - | - | - |
766
+ | 1.5966 | 30 | 16.8367 | - | - | - | - | - | - |
767
+ | 2.0 | 38 | - | 0.7612 | 0.7520 | 0.7431 | 0.7261 | 0.7097 | 0.6708 |
768
+
769
+
770
+ ### Framework Versions
771
+ - Python: 3.12.8
772
+ - Sentence Transformers: 3.4.1
773
+ - Transformers: 4.51.3
774
+ - PyTorch: 2.5.1+cu124
775
+ - Accelerate: 1.3.0
776
+ - Datasets: 3.6.0
777
+ - Tokenizers: 0.21.0
778
+
779
+ ## Citation
780
+
781
+ ### BibTeX
782
+
783
+ #### Sentence Transformers
784
+ ```bibtex
785
+ @inproceedings{reimers-2019-sentence-bert,
786
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
787
+ author = "Reimers, Nils and Gurevych, Iryna",
788
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
789
+ month = "11",
790
+ year = "2019",
791
+ publisher = "Association for Computational Linguistics",
792
+ url = "https://arxiv.org/abs/1908.10084",
793
+ }
794
+ ```
795
+
796
+ #### MatryoshkaLoss
797
+ ```bibtex
798
+ @misc{kusupati2024matryoshka,
799
+ title={Matryoshka Representation Learning},
800
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
801
+ year={2024},
802
+ eprint={2205.13147},
803
+ archivePrefix={arXiv},
804
+ primaryClass={cs.LG}
805
+ }
806
+ ```
807
+
808
+ #### MultipleNegativesRankingLoss
809
+ ```bibtex
810
+ @misc{henderson2017efficient,
811
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
812
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
813
+ year={2017},
814
+ eprint={1705.00652},
815
+ archivePrefix={arXiv},
816
+ primaryClass={cs.CL}
817
+ }
818
+ ```
819
+
820
+ <!--
821
+ ## Glossary
822
+
823
+ *Clearly define terms in order to be accessible across audiences.*
824
+ -->
825
+
826
+ <!--
827
+ ## Model Card Authors
828
+
829
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
830
+ -->
831
+
832
+ <!--
833
+ ## Model Card Contact
834
+
835
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
836
+ -->
checkpoint-38/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 6,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-38/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.51.3",
5
+ "pytorch": "2.5.1+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-38/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f29d54691f2f22086eab54cd065fcbd6eb7442fcf83db77547a763abac8dd4
3
+ size 90864192
checkpoint-38/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-38/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a2f659147ed1482b4c13a6e7c3ec08442a3655e48bae6eddb0f3bbb201e98e
3
+ size 180609146
checkpoint-38/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:261075a64ba299505c00ddbcf99387454f5c91d994e778dd47128116ab115482
3
+ size 14244
checkpoint-38/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42a752cfb92d8b7877eb9d60307f7aab577546eb11bf5509ccf042dbfa5686c
3
+ size 1064
checkpoint-38/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
checkpoint-38/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-38/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-38/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
checkpoint-38/trainer_state.json ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 38,
3
+ "best_metric": 0.7260517487265687,
4
+ "best_model_checkpoint": "MNLP_M3_document_encoder_sciqa/checkpoint-38",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 38,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5423728813559322,
14
+ "grad_norm": 164.72093200683594,
15
+ "learning_rate": 1.9987954562051724e-05,
16
+ "loss": 22.4049,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_dim_128_cosine_accuracy@1": 0.5452812202097236,
22
+ "eval_dim_128_cosine_accuracy@10": 0.8760724499523356,
23
+ "eval_dim_128_cosine_accuracy@3": 0.7416587225929456,
24
+ "eval_dim_128_cosine_accuracy@5": 0.8074356530028599,
25
+ "eval_dim_128_cosine_map@100": 0.6605612754102786,
26
+ "eval_dim_128_cosine_mrr@10": 0.6559349796480402,
27
+ "eval_dim_128_cosine_ndcg@10": 0.7092688022688834,
28
+ "eval_dim_128_cosine_precision@1": 0.5452812202097236,
29
+ "eval_dim_128_cosine_precision@10": 0.08760724499523356,
30
+ "eval_dim_128_cosine_precision@3": 0.24721957419764853,
31
+ "eval_dim_128_cosine_precision@5": 0.161487130600572,
32
+ "eval_dim_128_cosine_recall@1": 0.5452812202097236,
33
+ "eval_dim_128_cosine_recall@10": 0.8760724499523356,
34
+ "eval_dim_128_cosine_recall@3": 0.7416587225929456,
35
+ "eval_dim_128_cosine_recall@5": 0.8074356530028599,
36
+ "eval_dim_192_cosine_accuracy@1": 0.5624404194470924,
37
+ "eval_dim_192_cosine_accuracy@10": 0.8932316491897044,
38
+ "eval_dim_192_cosine_accuracy@3": 0.7597712106768351,
39
+ "eval_dim_192_cosine_accuracy@5": 0.8188751191611058,
40
+ "eval_dim_192_cosine_map@100": 0.677245219852975,
41
+ "eval_dim_192_cosine_mrr@10": 0.6730234388003697,
42
+ "eval_dim_192_cosine_ndcg@10": 0.7262712999939527,
43
+ "eval_dim_192_cosine_precision@1": 0.5624404194470924,
44
+ "eval_dim_192_cosine_precision@10": 0.08932316491897044,
45
+ "eval_dim_192_cosine_precision@3": 0.25325707022561167,
46
+ "eval_dim_192_cosine_precision@5": 0.16377502383222117,
47
+ "eval_dim_192_cosine_recall@1": 0.5624404194470924,
48
+ "eval_dim_192_cosine_recall@10": 0.8932316491897044,
49
+ "eval_dim_192_cosine_recall@3": 0.7597712106768351,
50
+ "eval_dim_192_cosine_recall@5": 0.8188751191611058,
51
+ "eval_dim_256_cosine_accuracy@1": 0.5653002859866539,
52
+ "eval_dim_256_cosine_accuracy@10": 0.8960915157292659,
53
+ "eval_dim_256_cosine_accuracy@3": 0.7683508102955195,
54
+ "eval_dim_256_cosine_accuracy@5": 0.8236415633937083,
55
+ "eval_dim_256_cosine_map@100": 0.6831583296339104,
56
+ "eval_dim_256_cosine_mrr@10": 0.6786784844220503,
57
+ "eval_dim_256_cosine_ndcg@10": 0.7314611486548883,
58
+ "eval_dim_256_cosine_precision@1": 0.5653002859866539,
59
+ "eval_dim_256_cosine_precision@10": 0.08960915157292659,
60
+ "eval_dim_256_cosine_precision@3": 0.25611693676517316,
61
+ "eval_dim_256_cosine_precision@5": 0.16472831267874166,
62
+ "eval_dim_256_cosine_recall@1": 0.5653002859866539,
63
+ "eval_dim_256_cosine_recall@10": 0.8960915157292659,
64
+ "eval_dim_256_cosine_recall@3": 0.7683508102955195,
65
+ "eval_dim_256_cosine_recall@5": 0.8236415633937083,
66
+ "eval_dim_384_cosine_accuracy@1": 0.5786463298379408,
67
+ "eval_dim_384_cosine_accuracy@10": 0.9075309818875119,
68
+ "eval_dim_384_cosine_accuracy@3": 0.776930409914204,
69
+ "eval_dim_384_cosine_accuracy@5": 0.8417540514775977,
70
+ "eval_dim_384_cosine_map@100": 0.6932934943306605,
71
+ "eval_dim_384_cosine_mrr@10": 0.6894563227261042,
72
+ "eval_dim_384_cosine_ndcg@10": 0.7423737824827953,
73
+ "eval_dim_384_cosine_precision@1": 0.5786463298379408,
74
+ "eval_dim_384_cosine_precision@10": 0.0907530981887512,
75
+ "eval_dim_384_cosine_precision@3": 0.2589768033047346,
76
+ "eval_dim_384_cosine_precision@5": 0.16835081029551957,
77
+ "eval_dim_384_cosine_recall@1": 0.5786463298379408,
78
+ "eval_dim_384_cosine_recall@10": 0.9075309818875119,
79
+ "eval_dim_384_cosine_recall@3": 0.776930409914204,
80
+ "eval_dim_384_cosine_recall@5": 0.8417540514775977,
81
+ "eval_dim_64_cosine_accuracy@1": 0.49285033365109626,
82
+ "eval_dim_64_cosine_accuracy@10": 0.8274547187797903,
83
+ "eval_dim_64_cosine_accuracy@3": 0.684461391801716,
84
+ "eval_dim_64_cosine_accuracy@5": 0.7578646329837941,
85
+ "eval_dim_64_cosine_map@100": 0.6088952628032813,
86
+ "eval_dim_64_cosine_mrr@10": 0.6032237807738285,
87
+ "eval_dim_64_cosine_ndcg@10": 0.6575406372744073,
88
+ "eval_dim_64_cosine_precision@1": 0.49285033365109626,
89
+ "eval_dim_64_cosine_precision@10": 0.08274547187797902,
90
+ "eval_dim_64_cosine_precision@3": 0.2281537972672386,
91
+ "eval_dim_64_cosine_precision@5": 0.1515729265967588,
92
+ "eval_dim_64_cosine_recall@1": 0.49285033365109626,
93
+ "eval_dim_64_cosine_recall@10": 0.8274547187797903,
94
+ "eval_dim_64_cosine_recall@3": 0.684461391801716,
95
+ "eval_dim_64_cosine_recall@5": 0.7578646329837941,
96
+ "eval_dim_96_cosine_accuracy@1": 0.5214489990467112,
97
+ "eval_dim_96_cosine_accuracy@10": 0.8636796949475691,
98
+ "eval_dim_96_cosine_accuracy@3": 0.7264061010486177,
99
+ "eval_dim_96_cosine_accuracy@5": 0.7893231649189705,
100
+ "eval_dim_96_cosine_map@100": 0.6418431352074736,
101
+ "eval_dim_96_cosine_mrr@10": 0.6369528046363133,
102
+ "eval_dim_96_cosine_ndcg@10": 0.6919097155042885,
103
+ "eval_dim_96_cosine_precision@1": 0.5214489990467112,
104
+ "eval_dim_96_cosine_precision@10": 0.0863679694947569,
105
+ "eval_dim_96_cosine_precision@3": 0.2421353670162059,
106
+ "eval_dim_96_cosine_precision@5": 0.15786463298379408,
107
+ "eval_dim_96_cosine_recall@1": 0.5214489990467112,
108
+ "eval_dim_96_cosine_recall@10": 0.8636796949475691,
109
+ "eval_dim_96_cosine_recall@3": 0.7264061010486177,
110
+ "eval_dim_96_cosine_recall@5": 0.7893231649189705,
111
+ "eval_runtime": 116.4269,
112
+ "eval_samples_per_second": 0.0,
113
+ "eval_sequential_score": 0.6575406372744073,
114
+ "eval_steps_per_second": 0.0,
115
+ "step": 19
116
+ },
117
+ {
118
+ "epoch": 1.0542372881355933,
119
+ "grad_norm": 107.04779815673828,
120
+ "learning_rate": 1.8577286100002723e-05,
121
+ "loss": 16.6616,
122
+ "step": 20
123
+ },
124
+ {
125
+ "epoch": 1.5966101694915253,
126
+ "grad_norm": 97.63832092285156,
127
+ "learning_rate": 1.5141027441932217e-05,
128
+ "loss": 16.8367,
129
+ "step": 30
130
+ },
131
+ {
132
+ "epoch": 2.0,
133
+ "eval_dim_128_cosine_accuracy@1": 0.567206863679695,
134
+ "eval_dim_128_cosine_accuracy@10": 0.886558627264061,
135
+ "eval_dim_128_cosine_accuracy@3": 0.7607244995233555,
136
+ "eval_dim_128_cosine_accuracy@5": 0.8236415633937083,
137
+ "eval_dim_128_cosine_map@100": 0.6790430112153837,
138
+ "eval_dim_128_cosine_mrr@10": 0.6746886679679823,
139
+ "eval_dim_128_cosine_ndcg@10": 0.7260517487265687,
140
+ "eval_dim_128_cosine_precision@1": 0.567206863679695,
141
+ "eval_dim_128_cosine_precision@10": 0.0886558627264061,
142
+ "eval_dim_128_cosine_precision@3": 0.25357483317445184,
143
+ "eval_dim_128_cosine_precision@5": 0.16472831267874166,
144
+ "eval_dim_128_cosine_recall@1": 0.567206863679695,
145
+ "eval_dim_128_cosine_recall@10": 0.886558627264061,
146
+ "eval_dim_128_cosine_recall@3": 0.7607244995233555,
147
+ "eval_dim_128_cosine_recall@5": 0.8236415633937083,
148
+ "eval_dim_192_cosine_accuracy@1": 0.5805529075309819,
149
+ "eval_dim_192_cosine_accuracy@10": 0.9008579599618685,
150
+ "eval_dim_192_cosine_accuracy@3": 0.782650142993327,
151
+ "eval_dim_192_cosine_accuracy@5": 0.8322211630123928,
152
+ "eval_dim_192_cosine_map@100": 0.6964841260809953,
153
+ "eval_dim_192_cosine_mrr@10": 0.6923562879234952,
154
+ "eval_dim_192_cosine_ndcg@10": 0.7430712975035773,
155
+ "eval_dim_192_cosine_precision@1": 0.5805529075309819,
156
+ "eval_dim_192_cosine_precision@10": 0.09008579599618685,
157
+ "eval_dim_192_cosine_precision@3": 0.26088338099777564,
158
+ "eval_dim_192_cosine_precision@5": 0.16644423260247856,
159
+ "eval_dim_192_cosine_recall@1": 0.5805529075309819,
160
+ "eval_dim_192_cosine_recall@10": 0.9008579599618685,
161
+ "eval_dim_192_cosine_recall@3": 0.782650142993327,
162
+ "eval_dim_192_cosine_recall@5": 0.8322211630123928,
163
+ "eval_dim_256_cosine_accuracy@1": 0.5919923736892279,
164
+ "eval_dim_256_cosine_accuracy@10": 0.9142040038131554,
165
+ "eval_dim_256_cosine_accuracy@3": 0.7902764537654909,
166
+ "eval_dim_256_cosine_accuracy@5": 0.8360343183984748,
167
+ "eval_dim_256_cosine_map@100": 0.7038093293311698,
168
+ "eval_dim_256_cosine_mrr@10": 0.700305279404422,
169
+ "eval_dim_256_cosine_ndcg@10": 0.7520267351833514,
170
+ "eval_dim_256_cosine_precision@1": 0.5919923736892279,
171
+ "eval_dim_256_cosine_precision@10": 0.09142040038131555,
172
+ "eval_dim_256_cosine_precision@3": 0.26342548458849696,
173
+ "eval_dim_256_cosine_precision@5": 0.16720686367969492,
174
+ "eval_dim_256_cosine_recall@1": 0.5919923736892279,
175
+ "eval_dim_256_cosine_recall@10": 0.9142040038131554,
176
+ "eval_dim_256_cosine_recall@3": 0.7902764537654909,
177
+ "eval_dim_256_cosine_recall@5": 0.8360343183984748,
178
+ "eval_dim_384_cosine_accuracy@1": 0.6015252621544328,
179
+ "eval_dim_384_cosine_accuracy@10": 0.9199237368922784,
180
+ "eval_dim_384_cosine_accuracy@3": 0.7959961868446139,
181
+ "eval_dim_384_cosine_accuracy@5": 0.8531935176358436,
182
+ "eval_dim_384_cosine_map@100": 0.713601684515785,
183
+ "eval_dim_384_cosine_mrr@10": 0.7104082497314151,
184
+ "eval_dim_384_cosine_ndcg@10": 0.761241503632434,
185
+ "eval_dim_384_cosine_precision@1": 0.6015252621544328,
186
+ "eval_dim_384_cosine_precision@10": 0.09199237368922783,
187
+ "eval_dim_384_cosine_precision@3": 0.26533206228153794,
188
+ "eval_dim_384_cosine_precision@5": 0.17063870352716873,
189
+ "eval_dim_384_cosine_recall@1": 0.6015252621544328,
190
+ "eval_dim_384_cosine_recall@10": 0.9199237368922784,
191
+ "eval_dim_384_cosine_recall@3": 0.7959961868446139,
192
+ "eval_dim_384_cosine_recall@5": 0.8531935176358436,
193
+ "eval_dim_64_cosine_accuracy@1": 0.5138226882745471,
194
+ "eval_dim_64_cosine_accuracy@10": 0.8341277407054337,
195
+ "eval_dim_64_cosine_accuracy@3": 0.7016205910390848,
196
+ "eval_dim_64_cosine_accuracy@5": 0.7645376549094376,
197
+ "eval_dim_64_cosine_map@100": 0.6242158272303533,
198
+ "eval_dim_64_cosine_mrr@10": 0.618670464690484,
199
+ "eval_dim_64_cosine_ndcg@10": 0.6707950308444217,
200
+ "eval_dim_64_cosine_precision@1": 0.5138226882745471,
201
+ "eval_dim_64_cosine_precision@10": 0.08341277407054337,
202
+ "eval_dim_64_cosine_precision@3": 0.2338735303463616,
203
+ "eval_dim_64_cosine_precision@5": 0.1529075309818875,
204
+ "eval_dim_64_cosine_recall@1": 0.5138226882745471,
205
+ "eval_dim_64_cosine_recall@10": 0.8341277407054337,
206
+ "eval_dim_64_cosine_recall@3": 0.7016205910390848,
207
+ "eval_dim_64_cosine_recall@5": 0.7645376549094376,
208
+ "eval_dim_96_cosine_accuracy@1": 0.5471877979027645,
209
+ "eval_dim_96_cosine_accuracy@10": 0.8722592945662536,
210
+ "eval_dim_96_cosine_accuracy@3": 0.7407054337464252,
211
+ "eval_dim_96_cosine_accuracy@5": 0.8017159199237369,
212
+ "eval_dim_96_cosine_map@100": 0.6622003643008398,
213
+ "eval_dim_96_cosine_mrr@10": 0.6576811627097615,
214
+ "eval_dim_96_cosine_ndcg@10": 0.7097194683573752,
215
+ "eval_dim_96_cosine_precision@1": 0.5471877979027645,
216
+ "eval_dim_96_cosine_precision@10": 0.08722592945662536,
217
+ "eval_dim_96_cosine_precision@3": 0.2469018112488084,
218
+ "eval_dim_96_cosine_precision@5": 0.16034318398474737,
219
+ "eval_dim_96_cosine_recall@1": 0.5471877979027645,
220
+ "eval_dim_96_cosine_recall@10": 0.8722592945662536,
221
+ "eval_dim_96_cosine_recall@3": 0.7407054337464252,
222
+ "eval_dim_96_cosine_recall@5": 0.8017159199237369,
223
+ "eval_runtime": 119.8934,
224
+ "eval_samples_per_second": 0.0,
225
+ "eval_sequential_score": 0.6707950308444217,
226
+ "eval_steps_per_second": 0.0,
227
+ "step": 38
228
+ }
229
+ ],
230
+ "logging_steps": 10,
231
+ "max_steps": 72,
232
+ "num_input_tokens_seen": 0,
233
+ "num_train_epochs": 4,
234
+ "save_steps": 500,
235
+ "stateful_callbacks": {
236
+ "TrainerControl": {
237
+ "args": {
238
+ "should_epoch_stop": false,
239
+ "should_evaluate": false,
240
+ "should_log": false,
241
+ "should_save": true,
242
+ "should_training_stop": false
243
+ },
244
+ "attributes": {}
245
+ }
246
+ },
247
+ "total_flos": 0.0,
248
+ "train_batch_size": 32,
249
+ "trial_name": null,
250
+ "trial_params": null
251
+ }
checkpoint-38/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7792b3d6d0521f78d711be5b9ea8a9fbf7602d27ea1f1e28ac62990d20c22b2
3
+ size 5624
checkpoint-38/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-57/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-57/README.md ADDED
@@ -0,0 +1,839 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:9432
11
+ - loss:MatryoshkaLoss
12
+ - loss:MultipleNegativesRankingLoss
13
+ base_model: sentence-transformers/all-MiniLM-L6-v2
14
+ widget:
15
+ - source_sentence: Atherosclerosis and coronary heart disease are examples of what
16
+ type of body system disease?
17
+ sentences:
18
+ - Diseases of the cardiovascular system are common and may be life threatening.
19
+ Examples include atherosclerosis and coronary heart disease. A healthy lifestyle
20
+ can reduce the risk of such diseases developing. This includes avoiding smoking,
21
+ getting regular physical activity, and maintaining a healthy percent of body fat.
22
+ - Osmosis Osmosis is the diffusion of water through a semipermeable membrane according
23
+ to the concentration gradient of water across the membrane. Whereas diffusion
24
+ transports material across membranes and within cells, osmosis transports only
25
+ water across a membrane and the membrane limits the diffusion of solutes in the
26
+ water. Osmosis is a special case of diffusion. Water, like other substances, moves
27
+ from an area of higher concentration to one of lower concentration. Imagine a
28
+ beaker with a semipermeable membrane, separating the two sides or halves (Figure
29
+ 3.21). On both sides of the membrane, the water level is the same, but there are
30
+ different concentrations on each side of a dissolved substance, or solute, that
31
+ cannot cross the membrane. If the volume of the water is the same, but the concentrations
32
+ of solute are different, then there are also different concentrations of water,
33
+ the solvent, on either side of the membrane.
34
+ - Circadian rhythms are regular changes in biology or behavior that occur in a 24-hour
35
+ cycle. In humans, for example, blood pressure and body temperature change in a
36
+ regular way throughout each 24-hour day. Animals may eat and drink at certain
37
+ times of day as well. Humans have daily cycles of behavior, too. Most people start
38
+ to get sleepy after dark and have a hard time sleeping when it is light outside.
39
+ In many species, including humans, circadian rhythms are controlled by a tiny
40
+ structure called the biological clock . This structure is located in a gland at
41
+ the base of the brain. The biological clock sends signals to the body. The signals
42
+ cause regular changes in behavior and body processes. The amount of light entering
43
+ the eyes helps control the biological clock. The clock causes changes that repeat
44
+ every 24 hours.
45
+ - source_sentence: How does a cell's membrane keep extracellular materials from mixing
46
+ with it's internal components?
47
+ sentences:
48
+ - We know that the Universe is expanding. Astronomers have wondered if it is expanding
49
+ fast enough to escape the pull of gravity. Would the Universe just expand forever?
50
+ If it could not escape the pull of gravity, would it someday start to contract?
51
+ This means it would eventually get squeezed together in a big crunch. This is
52
+ the opposite of the Big Bang.
53
+ - Physical properties that do not depend on the amount of substance present are
54
+ called intensive properties . Intensive properties do not change with changes
55
+ of size, shape, or scale. Examples of intensive properties are as follows in the
56
+ Table below .
57
+ - CHAPTER REVIEW 3.1 The Cell Membrane The cell membrane provides a barrier around
58
+ the cell, separating its internal components from the extracellular environment.
59
+ It is composed of a phospholipid bilayer, with hydrophobic internal lipid “tails”
60
+ and hydrophilic external phosphate “heads. ” Various membrane proteins are scattered
61
+ throughout the bilayer, both inserted within it and attached to it peripherally.
62
+ The cell membrane is selectively permeable, allowing only a limited number of
63
+ materials to diffuse through its lipid bilayer. All materials that cross the membrane
64
+ do so using passive (non energy-requiring) or active (energy-requiring) transport
65
+ processes. During passive transport, materials move by simple diffusion or by
66
+ facilitated diffusion through the membrane, down their concentration gradient.
67
+ Water passes through the membrane in a diffusion process called osmosis. During
68
+ active transport, energy is expended to assist material movement across the membrane
69
+ in a direction against their concentration gradient. Active transport may take
70
+ place with the help of protein pumps or through the use of vesicles.
71
+ - source_sentence: An infection may be intracellular or extracellular, depending on
72
+ this?
73
+ sentences:
74
+ - '22.3 Magnetic Fields and Magnetic Field Lines • Magnetic fields can be pictorially
75
+ represented by magnetic field lines, the properties of which are as follows: 1.
76
+ The field is tangent to the magnetic field line. Field strength is proportional
77
+ to the line density. Field lines cannot cross. Field lines are continuous loops.'
78
+ - Figure 24.13 The lifecycle of an ascomycete is characterized by the production
79
+ of asci during the sexual phase. The haploid phase is the predominant phase of
80
+ the life cycle.
81
+ - Caffeine is an example of a psychoactive drug. It is found in coffee and many
82
+ other products (see Table below ). Caffeine is a central nervous system stimulant
83
+ . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive
84
+ drugs include alcohol, nicotine, and marijuana. Each has a different effect on
85
+ the central nervous system. Alcohol, for example, is a depressant . It has the
86
+ opposite effects of a stimulant like caffeine.
87
+ - source_sentence: What does water treatment do to water?
88
+ sentences:
89
+ - Some solutes, such as sodium acetate, do not recrystallize easily. Suppose an
90
+ exactly saturated solution of sodium acetate is prepared at 50°C. As it cools
91
+ back to room temperature, no crystals appear in the solution, even though the
92
+ solubility of sodium acetate is lower at room temperature. A supersaturated solution
93
+ is a solution that contains more than the maximum amount of solute that is capable
94
+ of being dissolved at a given temperature. The recrystallization of the excess
95
+ dissolved solute in a supersaturated solution can be initiated by the addition
96
+ of a tiny crystal of solute, called a seed crystal. The seed crystal provides
97
+ a nucleation site on which the excess dissolved crystals can begin to grow. Recrystallization
98
+ from a supersaturated solution is typically very fast.
99
+ - Figure 23.13, the esophagus runs a mainly straight route through the mediastinum
100
+ of the thorax. To enter the abdomen, the esophagus penetrates the diaphragm through
101
+ an opening called the esophageal hiatus.
102
+ - Water treatment is a series of processes that remove unwanted substances from
103
+ water. More processes are needed to purify water for drinking than for other uses.
104
+ - source_sentence: 'There are only four possible bases that make up each dna nucleotide:
105
+ adenine, guanine, thymine, and?'
106
+ sentences:
107
+ - Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism
108
+ if it is exposed to extreme heat and pressure within the crust. With metamorphism
109
+ , the rock does not melt all the way. The rock changes due to heat and pressure.
110
+ A metamorphic rock may have a new mineral composition and/or texture.
111
+ - Forest and Kim Starr (Flickr:Starr Environmental). Secondary succession occurs
112
+ when nature reclaims areas formerly occupied by life . CC BY 2.0.
113
+ - 'The only difference between each nucleotide is the identity of the base. There
114
+ are only four possible bases that make up each DNA nucleotide: adenine (A), guanine
115
+ (G), thymine (T), and cytosine (C).'
116
+ pipeline_tag: sentence-similarity
117
+ library_name: sentence-transformers
118
+ metrics:
119
+ - cosine_accuracy@1
120
+ - cosine_accuracy@3
121
+ - cosine_accuracy@5
122
+ - cosine_accuracy@10
123
+ - cosine_precision@1
124
+ - cosine_precision@3
125
+ - cosine_precision@5
126
+ - cosine_precision@10
127
+ - cosine_recall@1
128
+ - cosine_recall@3
129
+ - cosine_recall@5
130
+ - cosine_recall@10
131
+ - cosine_ndcg@10
132
+ - cosine_mrr@10
133
+ - cosine_map@100
134
+ model-index:
135
+ - name: MNLP M3 Encoder SciQA
136
+ results:
137
+ - task:
138
+ type: information-retrieval
139
+ name: Information Retrieval
140
+ dataset:
141
+ name: dim 384
142
+ type: dim_384
143
+ metrics:
144
+ - type: cosine_accuracy@1
145
+ value: 0.6101048617731173
146
+ name: Cosine Accuracy@1
147
+ - type: cosine_accuracy@3
148
+ value: 0.8007626310772163
149
+ name: Cosine Accuracy@3
150
+ - type: cosine_accuracy@5
151
+ value: 0.8541468064823642
152
+ name: Cosine Accuracy@5
153
+ - type: cosine_accuracy@10
154
+ value: 0.9256434699714013
155
+ name: Cosine Accuracy@10
156
+ - type: cosine_precision@1
157
+ value: 0.6101048617731173
158
+ name: Cosine Precision@1
159
+ - type: cosine_precision@3
160
+ value: 0.2669208770257388
161
+ name: Cosine Precision@3
162
+ - type: cosine_precision@5
163
+ value: 0.17082936129647283
164
+ name: Cosine Precision@5
165
+ - type: cosine_precision@10
166
+ value: 0.09256434699714014
167
+ name: Cosine Precision@10
168
+ - type: cosine_recall@1
169
+ value: 0.6101048617731173
170
+ name: Cosine Recall@1
171
+ - type: cosine_recall@3
172
+ value: 0.8007626310772163
173
+ name: Cosine Recall@3
174
+ - type: cosine_recall@5
175
+ value: 0.8541468064823642
176
+ name: Cosine Recall@5
177
+ - type: cosine_recall@10
178
+ value: 0.9256434699714013
179
+ name: Cosine Recall@10
180
+ - type: cosine_ndcg@10
181
+ value: 0.7675175612283535
182
+ name: Cosine Ndcg@10
183
+ - type: cosine_mrr@10
184
+ value: 0.7170116664396936
185
+ name: Cosine Mrr@10
186
+ - type: cosine_map@100
187
+ value: 0.7197084605820631
188
+ name: Cosine Map@100
189
+ - task:
190
+ type: information-retrieval
191
+ name: Information Retrieval
192
+ dataset:
193
+ name: dim 256
194
+ type: dim_256
195
+ metrics:
196
+ - type: cosine_accuracy@1
197
+ value: 0.5948522402287894
198
+ name: Cosine Accuracy@1
199
+ - type: cosine_accuracy@3
200
+ value: 0.792183031458532
201
+ name: Cosine Accuracy@3
202
+ - type: cosine_accuracy@5
203
+ value: 0.8398474737845567
204
+ name: Cosine Accuracy@5
205
+ - type: cosine_accuracy@10
206
+ value: 0.9151572926596759
207
+ name: Cosine Accuracy@10
208
+ - type: cosine_precision@1
209
+ value: 0.5948522402287894
210
+ name: Cosine Precision@1
211
+ - type: cosine_precision@3
212
+ value: 0.2640610104861773
213
+ name: Cosine Precision@3
214
+ - type: cosine_precision@5
215
+ value: 0.16796949475691134
216
+ name: Cosine Precision@5
217
+ - type: cosine_precision@10
218
+ value: 0.09151572926596759
219
+ name: Cosine Precision@10
220
+ - type: cosine_recall@1
221
+ value: 0.5948522402287894
222
+ name: Cosine Recall@1
223
+ - type: cosine_recall@3
224
+ value: 0.792183031458532
225
+ name: Cosine Recall@3
226
+ - type: cosine_recall@5
227
+ value: 0.8398474737845567
228
+ name: Cosine Recall@5
229
+ - type: cosine_recall@10
230
+ value: 0.9151572926596759
231
+ name: Cosine Recall@10
232
+ - type: cosine_ndcg@10
233
+ value: 0.7548435122429773
234
+ name: Cosine Ndcg@10
235
+ - type: cosine_mrr@10
236
+ value: 0.7035797509343749
237
+ name: Cosine Mrr@10
238
+ - type: cosine_map@100
239
+ value: 0.7070932589939358
240
+ name: Cosine Map@100
241
+ - task:
242
+ type: information-retrieval
243
+ name: Information Retrieval
244
+ dataset:
245
+ name: dim 192
246
+ type: dim_192
247
+ metrics:
248
+ - type: cosine_accuracy@1
249
+ value: 0.5910390848427073
250
+ name: Cosine Accuracy@1
251
+ - type: cosine_accuracy@3
252
+ value: 0.7778836987607245
253
+ name: Cosine Accuracy@3
254
+ - type: cosine_accuracy@5
255
+ value: 0.8360343183984748
256
+ name: Cosine Accuracy@5
257
+ - type: cosine_accuracy@10
258
+ value: 0.9046711153479504
259
+ name: Cosine Accuracy@10
260
+ - type: cosine_precision@1
261
+ value: 0.5910390848427073
262
+ name: Cosine Precision@1
263
+ - type: cosine_precision@3
264
+ value: 0.25929456625357483
265
+ name: Cosine Precision@3
266
+ - type: cosine_precision@5
267
+ value: 0.16720686367969495
268
+ name: Cosine Precision@5
269
+ - type: cosine_precision@10
270
+ value: 0.09046711153479504
271
+ name: Cosine Precision@10
272
+ - type: cosine_recall@1
273
+ value: 0.5910390848427073
274
+ name: Cosine Recall@1
275
+ - type: cosine_recall@3
276
+ value: 0.7778836987607245
277
+ name: Cosine Recall@3
278
+ - type: cosine_recall@5
279
+ value: 0.8360343183984748
280
+ name: Cosine Recall@5
281
+ - type: cosine_recall@10
282
+ value: 0.9046711153479504
283
+ name: Cosine Recall@10
284
+ - type: cosine_ndcg@10
285
+ value: 0.7477240665900656
286
+ name: Cosine Ndcg@10
287
+ - type: cosine_mrr@10
288
+ value: 0.6975449029309853
289
+ name: Cosine Mrr@10
290
+ - type: cosine_map@100
291
+ value: 0.7014228144337117
292
+ name: Cosine Map@100
293
+ - task:
294
+ type: information-retrieval
295
+ name: Information Retrieval
296
+ dataset:
297
+ name: dim 128
298
+ type: dim_128
299
+ metrics:
300
+ - type: cosine_accuracy@1
301
+ value: 0.567206863679695
302
+ name: Cosine Accuracy@1
303
+ - type: cosine_accuracy@3
304
+ value: 0.7616777883698761
305
+ name: Cosine Accuracy@3
306
+ - type: cosine_accuracy@5
307
+ value: 0.8265014299332698
308
+ name: Cosine Accuracy@5
309
+ - type: cosine_accuracy@10
310
+ value: 0.8903717826501429
311
+ name: Cosine Accuracy@10
312
+ - type: cosine_precision@1
313
+ value: 0.567206863679695
314
+ name: Cosine Precision@1
315
+ - type: cosine_precision@3
316
+ value: 0.253892596123292
317
+ name: Cosine Precision@3
318
+ - type: cosine_precision@5
319
+ value: 0.16530028598665394
320
+ name: Cosine Precision@5
321
+ - type: cosine_precision@10
322
+ value: 0.08903717826501431
323
+ name: Cosine Precision@10
324
+ - type: cosine_recall@1
325
+ value: 0.567206863679695
326
+ name: Cosine Recall@1
327
+ - type: cosine_recall@3
328
+ value: 0.7616777883698761
329
+ name: Cosine Recall@3
330
+ - type: cosine_recall@5
331
+ value: 0.8265014299332698
332
+ name: Cosine Recall@5
333
+ - type: cosine_recall@10
334
+ value: 0.8903717826501429
335
+ name: Cosine Recall@10
336
+ - type: cosine_ndcg@10
337
+ value: 0.7273531110418706
338
+ name: Cosine Ndcg@10
339
+ - type: cosine_mrr@10
340
+ value: 0.6752920392815543
341
+ name: Cosine Mrr@10
342
+ - type: cosine_map@100
343
+ value: 0.6794753898354032
344
+ name: Cosine Map@100
345
+ - task:
346
+ type: information-retrieval
347
+ name: Information Retrieval
348
+ dataset:
349
+ name: dim 96
350
+ type: dim_96
351
+ metrics:
352
+ - type: cosine_accuracy@1
353
+ value: 0.5529075309818875
354
+ name: Cosine Accuracy@1
355
+ - type: cosine_accuracy@3
356
+ value: 0.7416587225929456
357
+ name: Cosine Accuracy@3
358
+ - type: cosine_accuracy@5
359
+ value: 0.8093422306959008
360
+ name: Cosine Accuracy@5
361
+ - type: cosine_accuracy@10
362
+ value: 0.8741658722592945
363
+ name: Cosine Accuracy@10
364
+ - type: cosine_precision@1
365
+ value: 0.5529075309818875
366
+ name: Cosine Precision@1
367
+ - type: cosine_precision@3
368
+ value: 0.24721957419764853
369
+ name: Cosine Precision@3
370
+ - type: cosine_precision@5
371
+ value: 0.1618684461391802
372
+ name: Cosine Precision@5
373
+ - type: cosine_precision@10
374
+ value: 0.08741658722592945
375
+ name: Cosine Precision@10
376
+ - type: cosine_recall@1
377
+ value: 0.5529075309818875
378
+ name: Cosine Recall@1
379
+ - type: cosine_recall@3
380
+ value: 0.7416587225929456
381
+ name: Cosine Recall@3
382
+ - type: cosine_recall@5
383
+ value: 0.8093422306959008
384
+ name: Cosine Recall@5
385
+ - type: cosine_recall@10
386
+ value: 0.8741658722592945
387
+ name: Cosine Recall@10
388
+ - type: cosine_ndcg@10
389
+ value: 0.7125237648315317
390
+ name: Cosine Ndcg@10
391
+ - type: cosine_mrr@10
392
+ value: 0.6608247461679306
393
+ name: Cosine Mrr@10
394
+ - type: cosine_map@100
395
+ value: 0.6652525185575742
396
+ name: Cosine Map@100
397
+ - task:
398
+ type: information-retrieval
399
+ name: Information Retrieval
400
+ dataset:
401
+ name: dim 64
402
+ type: dim_64
403
+ metrics:
404
+ - type: cosine_accuracy@1
405
+ value: 0.5166825548141086
406
+ name: Cosine Accuracy@1
407
+ - type: cosine_accuracy@3
408
+ value: 0.7054337464251669
409
+ name: Cosine Accuracy@3
410
+ - type: cosine_accuracy@5
411
+ value: 0.7673975214489991
412
+ name: Cosine Accuracy@5
413
+ - type: cosine_accuracy@10
414
+ value: 0.8369876072449952
415
+ name: Cosine Accuracy@10
416
+ - type: cosine_precision@1
417
+ value: 0.5166825548141086
418
+ name: Cosine Precision@1
419
+ - type: cosine_precision@3
420
+ value: 0.23514458214172226
421
+ name: Cosine Precision@3
422
+ - type: cosine_precision@5
423
+ value: 0.1534795042897998
424
+ name: Cosine Precision@5
425
+ - type: cosine_precision@10
426
+ value: 0.08369876072449953
427
+ name: Cosine Precision@10
428
+ - type: cosine_recall@1
429
+ value: 0.5166825548141086
430
+ name: Cosine Recall@1
431
+ - type: cosine_recall@3
432
+ value: 0.7054337464251669
433
+ name: Cosine Recall@3
434
+ - type: cosine_recall@5
435
+ value: 0.7673975214489991
436
+ name: Cosine Recall@5
437
+ - type: cosine_recall@10
438
+ value: 0.8369876072449952
439
+ name: Cosine Recall@10
440
+ - type: cosine_ndcg@10
441
+ value: 0.6755921916053389
442
+ name: Cosine Ndcg@10
443
+ - type: cosine_mrr@10
444
+ value: 0.6240088822309986
445
+ name: Cosine Mrr@10
446
+ - type: cosine_map@100
447
+ value: 0.629350282837756
448
+ name: Cosine Map@100
449
+ ---
450
+
451
+ # MNLP M3 Encoder SciQA
452
+
453
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the json dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
454
+
455
+ ## Model Details
456
+
457
+ ### Model Description
458
+ - **Model Type:** Sentence Transformer
459
+ - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
460
+ - **Maximum Sequence Length:** 256 tokens
461
+ - **Output Dimensionality:** 384 dimensions
462
+ - **Similarity Function:** Cosine Similarity
463
+ - **Training Dataset:**
464
+ - json
465
+ - **Language:** en
466
+ - **License:** apache-2.0
467
+
468
+ ### Model Sources
469
+
470
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
471
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
472
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
473
+
474
+ ### Full Model Architecture
475
+
476
+ ```
477
+ SentenceTransformer(
478
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
479
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
480
+ (2): Normalize()
481
+ )
482
+ ```
483
+
484
+ ## Usage
485
+
486
+ ### Direct Usage (Sentence Transformers)
487
+
488
+ First install the Sentence Transformers library:
489
+
490
+ ```bash
491
+ pip install -U sentence-transformers
492
+ ```
493
+
494
+ Then you can load this model and run inference.
495
+ ```python
496
+ from sentence_transformers import SentenceTransformer
497
+
498
+ # Download from the 🤗 Hub
499
+ model = SentenceTransformer("sentence_transformers_model_id")
500
+ # Run inference
501
+ sentences = [
502
+ 'There are only four possible bases that make up each dna nucleotide: adenine, guanine, thymine, and?',
503
+ 'The only difference between each nucleotide is the identity of the base. There are only four possible bases that make up each DNA nucleotide: adenine (A), guanine (G), thymine (T), and cytosine (C).',
504
+ 'Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism if it is exposed to extreme heat and pressure within the crust. With metamorphism , the rock does not melt all the way. The rock changes due to heat and pressure. A metamorphic rock may have a new mineral composition and/or texture.',
505
+ ]
506
+ embeddings = model.encode(sentences)
507
+ print(embeddings.shape)
508
+ # [3, 384]
509
+
510
+ # Get the similarity scores for the embeddings
511
+ similarities = model.similarity(embeddings, embeddings)
512
+ print(similarities.shape)
513
+ # [3, 3]
514
+ ```
515
+
516
+ <!--
517
+ ### Direct Usage (Transformers)
518
+
519
+ <details><summary>Click to see the direct usage in Transformers</summary>
520
+
521
+ </details>
522
+ -->
523
+
524
+ <!--
525
+ ### Downstream Usage (Sentence Transformers)
526
+
527
+ You can finetune this model on your own dataset.
528
+
529
+ <details><summary>Click to expand</summary>
530
+
531
+ </details>
532
+ -->
533
+
534
+ <!--
535
+ ### Out-of-Scope Use
536
+
537
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
538
+ -->
539
+
540
+ ## Evaluation
541
+
542
+ ### Metrics
543
+
544
+ #### Information Retrieval
545
+
546
+ * Datasets: `dim_384`, `dim_256`, `dim_192`, `dim_128`, `dim_96` and `dim_64`
547
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
548
+
549
+ | Metric | dim_384 | dim_256 | dim_192 | dim_128 | dim_96 | dim_64 |
550
+ |:--------------------|:-----------|:-----------|:-----------|:-----------|:-----------|:-----------|
551
+ | cosine_accuracy@1 | 0.6101 | 0.5949 | 0.591 | 0.5672 | 0.5529 | 0.5167 |
552
+ | cosine_accuracy@3 | 0.8008 | 0.7922 | 0.7779 | 0.7617 | 0.7417 | 0.7054 |
553
+ | cosine_accuracy@5 | 0.8541 | 0.8398 | 0.836 | 0.8265 | 0.8093 | 0.7674 |
554
+ | cosine_accuracy@10 | 0.9256 | 0.9152 | 0.9047 | 0.8904 | 0.8742 | 0.837 |
555
+ | cosine_precision@1 | 0.6101 | 0.5949 | 0.591 | 0.5672 | 0.5529 | 0.5167 |
556
+ | cosine_precision@3 | 0.2669 | 0.2641 | 0.2593 | 0.2539 | 0.2472 | 0.2351 |
557
+ | cosine_precision@5 | 0.1708 | 0.168 | 0.1672 | 0.1653 | 0.1619 | 0.1535 |
558
+ | cosine_precision@10 | 0.0926 | 0.0915 | 0.0905 | 0.089 | 0.0874 | 0.0837 |
559
+ | cosine_recall@1 | 0.6101 | 0.5949 | 0.591 | 0.5672 | 0.5529 | 0.5167 |
560
+ | cosine_recall@3 | 0.8008 | 0.7922 | 0.7779 | 0.7617 | 0.7417 | 0.7054 |
561
+ | cosine_recall@5 | 0.8541 | 0.8398 | 0.836 | 0.8265 | 0.8093 | 0.7674 |
562
+ | cosine_recall@10 | 0.9256 | 0.9152 | 0.9047 | 0.8904 | 0.8742 | 0.837 |
563
+ | **cosine_ndcg@10** | **0.7675** | **0.7548** | **0.7477** | **0.7274** | **0.7125** | **0.6756** |
564
+ | cosine_mrr@10 | 0.717 | 0.7036 | 0.6975 | 0.6753 | 0.6608 | 0.624 |
565
+ | cosine_map@100 | 0.7197 | 0.7071 | 0.7014 | 0.6795 | 0.6653 | 0.6294 |
566
+
567
+ <!--
568
+ ## Bias, Risks and Limitations
569
+
570
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
571
+ -->
572
+
573
+ <!--
574
+ ### Recommendations
575
+
576
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
577
+ -->
578
+
579
+ ## Training Details
580
+
581
+ ### Training Dataset
582
+
583
+ #### json
584
+
585
+ * Dataset: json
586
+ * Size: 9,432 training samples
587
+ * Columns: <code>anchor</code> and <code>positive</code>
588
+ * Approximate statistics based on the first 1000 samples:
589
+ | | anchor | positive |
590
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
591
+ | type | string | string |
592
+ | details | <ul><li>min: 7 tokens</li><li>mean: 18.15 tokens</li><li>max: 60 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 94.56 tokens</li><li>max: 256 tokens</li></ul> |
593
+ * Samples:
594
+ | anchor | positive |
595
+ |:-------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
596
+ | <code>What is the term for atherosclerosis of arteries that supply the heart muscle?</code> | <code>Atherosclerosis of arteries that supply the heart muscle is called coronary heart disease . This disease may or may not have symptoms, such as chest pain. As the disease progresses, there is an increased risk of heart attack. A heart attack occurs when the blood supply to part of the heart muscle is blocked and cardiac muscle fibers die. Coronary heart disease is the leading cause of death of adults in the United States.</code> |
597
+ | <code>What term describes a drug that has an effect on the central nervous system?</code> | <code>Caffeine is an example of a psychoactive drug. It is found in coffee and many other products (see Table below ). Caffeine is a central nervous system stimulant . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive drugs include alcohol, nicotine, and marijuana. Each has a different effect on the central nervous system. Alcohol, for example, is a depressant . It has the opposite effects of a stimulant like caffeine.</code> |
598
+ | <code>What scale is used to succinctly communicate the acidity or basicity of a solution?</code> | <code>The pH scale is used to succinctly communicate the acidity or basicity of a solution.</code> |
599
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
600
+ ```json
601
+ {
602
+ "loss": "MultipleNegativesRankingLoss",
603
+ "matryoshka_dims": [
604
+ 384,
605
+ 256,
606
+ 192,
607
+ 128,
608
+ 96,
609
+ 64
610
+ ],
611
+ "matryoshka_weights": [
612
+ 1,
613
+ 1,
614
+ 1,
615
+ 1,
616
+ 1,
617
+ 1
618
+ ],
619
+ "n_dims_per_step": -1
620
+ }
621
+ ```
622
+
623
+ ### Training Hyperparameters
624
+ #### Non-Default Hyperparameters
625
+
626
+ - `eval_strategy`: epoch
627
+ - `per_device_train_batch_size`: 32
628
+ - `per_device_eval_batch_size`: 16
629
+ - `gradient_accumulation_steps`: 16
630
+ - `learning_rate`: 2e-05
631
+ - `num_train_epochs`: 4
632
+ - `lr_scheduler_type`: cosine
633
+ - `warmup_ratio`: 0.1
634
+ - `bf16`: True
635
+ - `tf32`: True
636
+ - `load_best_model_at_end`: True
637
+ - `optim`: adamw_torch_fused
638
+ - `batch_sampler`: no_duplicates
639
+
640
+ #### All Hyperparameters
641
+ <details><summary>Click to expand</summary>
642
+
643
+ - `overwrite_output_dir`: False
644
+ - `do_predict`: False
645
+ - `eval_strategy`: epoch
646
+ - `prediction_loss_only`: True
647
+ - `per_device_train_batch_size`: 32
648
+ - `per_device_eval_batch_size`: 16
649
+ - `per_gpu_train_batch_size`: None
650
+ - `per_gpu_eval_batch_size`: None
651
+ - `gradient_accumulation_steps`: 16
652
+ - `eval_accumulation_steps`: None
653
+ - `torch_empty_cache_steps`: None
654
+ - `learning_rate`: 2e-05
655
+ - `weight_decay`: 0.0
656
+ - `adam_beta1`: 0.9
657
+ - `adam_beta2`: 0.999
658
+ - `adam_epsilon`: 1e-08
659
+ - `max_grad_norm`: 1.0
660
+ - `num_train_epochs`: 4
661
+ - `max_steps`: -1
662
+ - `lr_scheduler_type`: cosine
663
+ - `lr_scheduler_kwargs`: {}
664
+ - `warmup_ratio`: 0.1
665
+ - `warmup_steps`: 0
666
+ - `log_level`: passive
667
+ - `log_level_replica`: warning
668
+ - `log_on_each_node`: True
669
+ - `logging_nan_inf_filter`: True
670
+ - `save_safetensors`: True
671
+ - `save_on_each_node`: False
672
+ - `save_only_model`: False
673
+ - `restore_callback_states_from_checkpoint`: False
674
+ - `no_cuda`: False
675
+ - `use_cpu`: False
676
+ - `use_mps_device`: False
677
+ - `seed`: 42
678
+ - `data_seed`: None
679
+ - `jit_mode_eval`: False
680
+ - `use_ipex`: False
681
+ - `bf16`: True
682
+ - `fp16`: False
683
+ - `fp16_opt_level`: O1
684
+ - `half_precision_backend`: auto
685
+ - `bf16_full_eval`: False
686
+ - `fp16_full_eval`: False
687
+ - `tf32`: True
688
+ - `local_rank`: 0
689
+ - `ddp_backend`: None
690
+ - `tpu_num_cores`: None
691
+ - `tpu_metrics_debug`: False
692
+ - `debug`: []
693
+ - `dataloader_drop_last`: False
694
+ - `dataloader_num_workers`: 0
695
+ - `dataloader_prefetch_factor`: None
696
+ - `past_index`: -1
697
+ - `disable_tqdm`: False
698
+ - `remove_unused_columns`: True
699
+ - `label_names`: None
700
+ - `load_best_model_at_end`: True
701
+ - `ignore_data_skip`: False
702
+ - `fsdp`: []
703
+ - `fsdp_min_num_params`: 0
704
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
705
+ - `tp_size`: 0
706
+ - `fsdp_transformer_layer_cls_to_wrap`: None
707
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
708
+ - `deepspeed`: None
709
+ - `label_smoothing_factor`: 0.0
710
+ - `optim`: adamw_torch_fused
711
+ - `optim_args`: None
712
+ - `adafactor`: False
713
+ - `group_by_length`: False
714
+ - `length_column_name`: length
715
+ - `ddp_find_unused_parameters`: None
716
+ - `ddp_bucket_cap_mb`: None
717
+ - `ddp_broadcast_buffers`: False
718
+ - `dataloader_pin_memory`: True
719
+ - `dataloader_persistent_workers`: False
720
+ - `skip_memory_metrics`: True
721
+ - `use_legacy_prediction_loop`: False
722
+ - `push_to_hub`: False
723
+ - `resume_from_checkpoint`: None
724
+ - `hub_model_id`: None
725
+ - `hub_strategy`: every_save
726
+ - `hub_private_repo`: None
727
+ - `hub_always_push`: False
728
+ - `gradient_checkpointing`: False
729
+ - `gradient_checkpointing_kwargs`: None
730
+ - `include_inputs_for_metrics`: False
731
+ - `include_for_metrics`: []
732
+ - `eval_do_concat_batches`: True
733
+ - `fp16_backend`: auto
734
+ - `push_to_hub_model_id`: None
735
+ - `push_to_hub_organization`: None
736
+ - `mp_parameters`:
737
+ - `auto_find_batch_size`: False
738
+ - `full_determinism`: False
739
+ - `torchdynamo`: None
740
+ - `ray_scope`: last
741
+ - `ddp_timeout`: 1800
742
+ - `torch_compile`: False
743
+ - `torch_compile_backend`: None
744
+ - `torch_compile_mode`: None
745
+ - `include_tokens_per_second`: False
746
+ - `include_num_input_tokens_seen`: False
747
+ - `neftune_noise_alpha`: None
748
+ - `optim_target_modules`: None
749
+ - `batch_eval_metrics`: False
750
+ - `eval_on_start`: False
751
+ - `use_liger_kernel`: False
752
+ - `eval_use_gather_object`: False
753
+ - `average_tokens_across_devices`: False
754
+ - `prompts`: None
755
+ - `batch_sampler`: no_duplicates
756
+ - `multi_dataset_batch_sampler`: proportional
757
+
758
+ </details>
759
+
760
+ ### Training Logs
761
+ | Epoch | Step | Training Loss | dim_384_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_192_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_96_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
762
+ |:------:|:----:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:---------------------:|
763
+ | 0.5424 | 10 | 22.4049 | - | - | - | - | - | - |
764
+ | 1.0 | 19 | - | 0.7424 | 0.7315 | 0.7263 | 0.7093 | 0.6919 | 0.6575 |
765
+ | 1.0542 | 20 | 16.6616 | - | - | - | - | - | - |
766
+ | 1.5966 | 30 | 16.8367 | - | - | - | - | - | - |
767
+ | 2.0 | 38 | - | 0.7612 | 0.7520 | 0.7431 | 0.7261 | 0.7097 | 0.6708 |
768
+ | 2.1085 | 40 | 12.8169 | - | - | - | - | - | - |
769
+ | 2.6508 | 50 | 13.7826 | - | - | - | - | - | - |
770
+ | 3.0 | 57 | - | 0.7675 | 0.7548 | 0.7477 | 0.7274 | 0.7125 | 0.6756 |
771
+
772
+
773
+ ### Framework Versions
774
+ - Python: 3.12.8
775
+ - Sentence Transformers: 3.4.1
776
+ - Transformers: 4.51.3
777
+ - PyTorch: 2.5.1+cu124
778
+ - Accelerate: 1.3.0
779
+ - Datasets: 3.6.0
780
+ - Tokenizers: 0.21.0
781
+
782
+ ## Citation
783
+
784
+ ### BibTeX
785
+
786
+ #### Sentence Transformers
787
+ ```bibtex
788
+ @inproceedings{reimers-2019-sentence-bert,
789
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
790
+ author = "Reimers, Nils and Gurevych, Iryna",
791
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
792
+ month = "11",
793
+ year = "2019",
794
+ publisher = "Association for Computational Linguistics",
795
+ url = "https://arxiv.org/abs/1908.10084",
796
+ }
797
+ ```
798
+
799
+ #### MatryoshkaLoss
800
+ ```bibtex
801
+ @misc{kusupati2024matryoshka,
802
+ title={Matryoshka Representation Learning},
803
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
804
+ year={2024},
805
+ eprint={2205.13147},
806
+ archivePrefix={arXiv},
807
+ primaryClass={cs.LG}
808
+ }
809
+ ```
810
+
811
+ #### MultipleNegativesRankingLoss
812
+ ```bibtex
813
+ @misc{henderson2017efficient,
814
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
815
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
816
+ year={2017},
817
+ eprint={1705.00652},
818
+ archivePrefix={arXiv},
819
+ primaryClass={cs.CL}
820
+ }
821
+ ```
822
+
823
+ <!--
824
+ ## Glossary
825
+
826
+ *Clearly define terms in order to be accessible across audiences.*
827
+ -->
828
+
829
+ <!--
830
+ ## Model Card Authors
831
+
832
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
833
+ -->
834
+
835
+ <!--
836
+ ## Model Card Contact
837
+
838
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
839
+ -->
checkpoint-57/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 6,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-57/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.51.3",
5
+ "pytorch": "2.5.1+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-57/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd45ec83ada224ca7190e67a569b7ea60dc28dfba30f5715e74e23149b2a7990
3
+ size 90864192
checkpoint-57/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-57/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:041f2d5f3016c41a5f69d0e56f823be7d86ebe48252fe99c65b4f747e5912eaf
3
+ size 180609146
checkpoint-57/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a9c1f5e734a42c9d58108336589b52bde6ed84b40cc4be4c092a31867711776
3
+ size 14244
checkpoint-57/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb476d88e6963e72c10dd5b6893385caadfb93000f1b44d45341555c1f3669f
3
+ size 1064
checkpoint-57/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
checkpoint-57/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-57/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-57/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
checkpoint-57/trainer_state.json ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 57,
3
+ "best_metric": 0.7273531110418706,
4
+ "best_model_checkpoint": "MNLP_M3_document_encoder_sciqa/checkpoint-57",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 57,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5423728813559322,
14
+ "grad_norm": 164.72093200683594,
15
+ "learning_rate": 1.9987954562051724e-05,
16
+ "loss": 22.4049,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_dim_128_cosine_accuracy@1": 0.5452812202097236,
22
+ "eval_dim_128_cosine_accuracy@10": 0.8760724499523356,
23
+ "eval_dim_128_cosine_accuracy@3": 0.7416587225929456,
24
+ "eval_dim_128_cosine_accuracy@5": 0.8074356530028599,
25
+ "eval_dim_128_cosine_map@100": 0.6605612754102786,
26
+ "eval_dim_128_cosine_mrr@10": 0.6559349796480402,
27
+ "eval_dim_128_cosine_ndcg@10": 0.7092688022688834,
28
+ "eval_dim_128_cosine_precision@1": 0.5452812202097236,
29
+ "eval_dim_128_cosine_precision@10": 0.08760724499523356,
30
+ "eval_dim_128_cosine_precision@3": 0.24721957419764853,
31
+ "eval_dim_128_cosine_precision@5": 0.161487130600572,
32
+ "eval_dim_128_cosine_recall@1": 0.5452812202097236,
33
+ "eval_dim_128_cosine_recall@10": 0.8760724499523356,
34
+ "eval_dim_128_cosine_recall@3": 0.7416587225929456,
35
+ "eval_dim_128_cosine_recall@5": 0.8074356530028599,
36
+ "eval_dim_192_cosine_accuracy@1": 0.5624404194470924,
37
+ "eval_dim_192_cosine_accuracy@10": 0.8932316491897044,
38
+ "eval_dim_192_cosine_accuracy@3": 0.7597712106768351,
39
+ "eval_dim_192_cosine_accuracy@5": 0.8188751191611058,
40
+ "eval_dim_192_cosine_map@100": 0.677245219852975,
41
+ "eval_dim_192_cosine_mrr@10": 0.6730234388003697,
42
+ "eval_dim_192_cosine_ndcg@10": 0.7262712999939527,
43
+ "eval_dim_192_cosine_precision@1": 0.5624404194470924,
44
+ "eval_dim_192_cosine_precision@10": 0.08932316491897044,
45
+ "eval_dim_192_cosine_precision@3": 0.25325707022561167,
46
+ "eval_dim_192_cosine_precision@5": 0.16377502383222117,
47
+ "eval_dim_192_cosine_recall@1": 0.5624404194470924,
48
+ "eval_dim_192_cosine_recall@10": 0.8932316491897044,
49
+ "eval_dim_192_cosine_recall@3": 0.7597712106768351,
50
+ "eval_dim_192_cosine_recall@5": 0.8188751191611058,
51
+ "eval_dim_256_cosine_accuracy@1": 0.5653002859866539,
52
+ "eval_dim_256_cosine_accuracy@10": 0.8960915157292659,
53
+ "eval_dim_256_cosine_accuracy@3": 0.7683508102955195,
54
+ "eval_dim_256_cosine_accuracy@5": 0.8236415633937083,
55
+ "eval_dim_256_cosine_map@100": 0.6831583296339104,
56
+ "eval_dim_256_cosine_mrr@10": 0.6786784844220503,
57
+ "eval_dim_256_cosine_ndcg@10": 0.7314611486548883,
58
+ "eval_dim_256_cosine_precision@1": 0.5653002859866539,
59
+ "eval_dim_256_cosine_precision@10": 0.08960915157292659,
60
+ "eval_dim_256_cosine_precision@3": 0.25611693676517316,
61
+ "eval_dim_256_cosine_precision@5": 0.16472831267874166,
62
+ "eval_dim_256_cosine_recall@1": 0.5653002859866539,
63
+ "eval_dim_256_cosine_recall@10": 0.8960915157292659,
64
+ "eval_dim_256_cosine_recall@3": 0.7683508102955195,
65
+ "eval_dim_256_cosine_recall@5": 0.8236415633937083,
66
+ "eval_dim_384_cosine_accuracy@1": 0.5786463298379408,
67
+ "eval_dim_384_cosine_accuracy@10": 0.9075309818875119,
68
+ "eval_dim_384_cosine_accuracy@3": 0.776930409914204,
69
+ "eval_dim_384_cosine_accuracy@5": 0.8417540514775977,
70
+ "eval_dim_384_cosine_map@100": 0.6932934943306605,
71
+ "eval_dim_384_cosine_mrr@10": 0.6894563227261042,
72
+ "eval_dim_384_cosine_ndcg@10": 0.7423737824827953,
73
+ "eval_dim_384_cosine_precision@1": 0.5786463298379408,
74
+ "eval_dim_384_cosine_precision@10": 0.0907530981887512,
75
+ "eval_dim_384_cosine_precision@3": 0.2589768033047346,
76
+ "eval_dim_384_cosine_precision@5": 0.16835081029551957,
77
+ "eval_dim_384_cosine_recall@1": 0.5786463298379408,
78
+ "eval_dim_384_cosine_recall@10": 0.9075309818875119,
79
+ "eval_dim_384_cosine_recall@3": 0.776930409914204,
80
+ "eval_dim_384_cosine_recall@5": 0.8417540514775977,
81
+ "eval_dim_64_cosine_accuracy@1": 0.49285033365109626,
82
+ "eval_dim_64_cosine_accuracy@10": 0.8274547187797903,
83
+ "eval_dim_64_cosine_accuracy@3": 0.684461391801716,
84
+ "eval_dim_64_cosine_accuracy@5": 0.7578646329837941,
85
+ "eval_dim_64_cosine_map@100": 0.6088952628032813,
86
+ "eval_dim_64_cosine_mrr@10": 0.6032237807738285,
87
+ "eval_dim_64_cosine_ndcg@10": 0.6575406372744073,
88
+ "eval_dim_64_cosine_precision@1": 0.49285033365109626,
89
+ "eval_dim_64_cosine_precision@10": 0.08274547187797902,
90
+ "eval_dim_64_cosine_precision@3": 0.2281537972672386,
91
+ "eval_dim_64_cosine_precision@5": 0.1515729265967588,
92
+ "eval_dim_64_cosine_recall@1": 0.49285033365109626,
93
+ "eval_dim_64_cosine_recall@10": 0.8274547187797903,
94
+ "eval_dim_64_cosine_recall@3": 0.684461391801716,
95
+ "eval_dim_64_cosine_recall@5": 0.7578646329837941,
96
+ "eval_dim_96_cosine_accuracy@1": 0.5214489990467112,
97
+ "eval_dim_96_cosine_accuracy@10": 0.8636796949475691,
98
+ "eval_dim_96_cosine_accuracy@3": 0.7264061010486177,
99
+ "eval_dim_96_cosine_accuracy@5": 0.7893231649189705,
100
+ "eval_dim_96_cosine_map@100": 0.6418431352074736,
101
+ "eval_dim_96_cosine_mrr@10": 0.6369528046363133,
102
+ "eval_dim_96_cosine_ndcg@10": 0.6919097155042885,
103
+ "eval_dim_96_cosine_precision@1": 0.5214489990467112,
104
+ "eval_dim_96_cosine_precision@10": 0.0863679694947569,
105
+ "eval_dim_96_cosine_precision@3": 0.2421353670162059,
106
+ "eval_dim_96_cosine_precision@5": 0.15786463298379408,
107
+ "eval_dim_96_cosine_recall@1": 0.5214489990467112,
108
+ "eval_dim_96_cosine_recall@10": 0.8636796949475691,
109
+ "eval_dim_96_cosine_recall@3": 0.7264061010486177,
110
+ "eval_dim_96_cosine_recall@5": 0.7893231649189705,
111
+ "eval_runtime": 116.4269,
112
+ "eval_samples_per_second": 0.0,
113
+ "eval_sequential_score": 0.6575406372744073,
114
+ "eval_steps_per_second": 0.0,
115
+ "step": 19
116
+ },
117
+ {
118
+ "epoch": 1.0542372881355933,
119
+ "grad_norm": 107.04779815673828,
120
+ "learning_rate": 1.8577286100002723e-05,
121
+ "loss": 16.6616,
122
+ "step": 20
123
+ },
124
+ {
125
+ "epoch": 1.5966101694915253,
126
+ "grad_norm": 97.63832092285156,
127
+ "learning_rate": 1.5141027441932217e-05,
128
+ "loss": 16.8367,
129
+ "step": 30
130
+ },
131
+ {
132
+ "epoch": 2.0,
133
+ "eval_dim_128_cosine_accuracy@1": 0.567206863679695,
134
+ "eval_dim_128_cosine_accuracy@10": 0.886558627264061,
135
+ "eval_dim_128_cosine_accuracy@3": 0.7607244995233555,
136
+ "eval_dim_128_cosine_accuracy@5": 0.8236415633937083,
137
+ "eval_dim_128_cosine_map@100": 0.6790430112153837,
138
+ "eval_dim_128_cosine_mrr@10": 0.6746886679679823,
139
+ "eval_dim_128_cosine_ndcg@10": 0.7260517487265687,
140
+ "eval_dim_128_cosine_precision@1": 0.567206863679695,
141
+ "eval_dim_128_cosine_precision@10": 0.0886558627264061,
142
+ "eval_dim_128_cosine_precision@3": 0.25357483317445184,
143
+ "eval_dim_128_cosine_precision@5": 0.16472831267874166,
144
+ "eval_dim_128_cosine_recall@1": 0.567206863679695,
145
+ "eval_dim_128_cosine_recall@10": 0.886558627264061,
146
+ "eval_dim_128_cosine_recall@3": 0.7607244995233555,
147
+ "eval_dim_128_cosine_recall@5": 0.8236415633937083,
148
+ "eval_dim_192_cosine_accuracy@1": 0.5805529075309819,
149
+ "eval_dim_192_cosine_accuracy@10": 0.9008579599618685,
150
+ "eval_dim_192_cosine_accuracy@3": 0.782650142993327,
151
+ "eval_dim_192_cosine_accuracy@5": 0.8322211630123928,
152
+ "eval_dim_192_cosine_map@100": 0.6964841260809953,
153
+ "eval_dim_192_cosine_mrr@10": 0.6923562879234952,
154
+ "eval_dim_192_cosine_ndcg@10": 0.7430712975035773,
155
+ "eval_dim_192_cosine_precision@1": 0.5805529075309819,
156
+ "eval_dim_192_cosine_precision@10": 0.09008579599618685,
157
+ "eval_dim_192_cosine_precision@3": 0.26088338099777564,
158
+ "eval_dim_192_cosine_precision@5": 0.16644423260247856,
159
+ "eval_dim_192_cosine_recall@1": 0.5805529075309819,
160
+ "eval_dim_192_cosine_recall@10": 0.9008579599618685,
161
+ "eval_dim_192_cosine_recall@3": 0.782650142993327,
162
+ "eval_dim_192_cosine_recall@5": 0.8322211630123928,
163
+ "eval_dim_256_cosine_accuracy@1": 0.5919923736892279,
164
+ "eval_dim_256_cosine_accuracy@10": 0.9142040038131554,
165
+ "eval_dim_256_cosine_accuracy@3": 0.7902764537654909,
166
+ "eval_dim_256_cosine_accuracy@5": 0.8360343183984748,
167
+ "eval_dim_256_cosine_map@100": 0.7038093293311698,
168
+ "eval_dim_256_cosine_mrr@10": 0.700305279404422,
169
+ "eval_dim_256_cosine_ndcg@10": 0.7520267351833514,
170
+ "eval_dim_256_cosine_precision@1": 0.5919923736892279,
171
+ "eval_dim_256_cosine_precision@10": 0.09142040038131555,
172
+ "eval_dim_256_cosine_precision@3": 0.26342548458849696,
173
+ "eval_dim_256_cosine_precision@5": 0.16720686367969492,
174
+ "eval_dim_256_cosine_recall@1": 0.5919923736892279,
175
+ "eval_dim_256_cosine_recall@10": 0.9142040038131554,
176
+ "eval_dim_256_cosine_recall@3": 0.7902764537654909,
177
+ "eval_dim_256_cosine_recall@5": 0.8360343183984748,
178
+ "eval_dim_384_cosine_accuracy@1": 0.6015252621544328,
179
+ "eval_dim_384_cosine_accuracy@10": 0.9199237368922784,
180
+ "eval_dim_384_cosine_accuracy@3": 0.7959961868446139,
181
+ "eval_dim_384_cosine_accuracy@5": 0.8531935176358436,
182
+ "eval_dim_384_cosine_map@100": 0.713601684515785,
183
+ "eval_dim_384_cosine_mrr@10": 0.7104082497314151,
184
+ "eval_dim_384_cosine_ndcg@10": 0.761241503632434,
185
+ "eval_dim_384_cosine_precision@1": 0.6015252621544328,
186
+ "eval_dim_384_cosine_precision@10": 0.09199237368922783,
187
+ "eval_dim_384_cosine_precision@3": 0.26533206228153794,
188
+ "eval_dim_384_cosine_precision@5": 0.17063870352716873,
189
+ "eval_dim_384_cosine_recall@1": 0.6015252621544328,
190
+ "eval_dim_384_cosine_recall@10": 0.9199237368922784,
191
+ "eval_dim_384_cosine_recall@3": 0.7959961868446139,
192
+ "eval_dim_384_cosine_recall@5": 0.8531935176358436,
193
+ "eval_dim_64_cosine_accuracy@1": 0.5138226882745471,
194
+ "eval_dim_64_cosine_accuracy@10": 0.8341277407054337,
195
+ "eval_dim_64_cosine_accuracy@3": 0.7016205910390848,
196
+ "eval_dim_64_cosine_accuracy@5": 0.7645376549094376,
197
+ "eval_dim_64_cosine_map@100": 0.6242158272303533,
198
+ "eval_dim_64_cosine_mrr@10": 0.618670464690484,
199
+ "eval_dim_64_cosine_ndcg@10": 0.6707950308444217,
200
+ "eval_dim_64_cosine_precision@1": 0.5138226882745471,
201
+ "eval_dim_64_cosine_precision@10": 0.08341277407054337,
202
+ "eval_dim_64_cosine_precision@3": 0.2338735303463616,
203
+ "eval_dim_64_cosine_precision@5": 0.1529075309818875,
204
+ "eval_dim_64_cosine_recall@1": 0.5138226882745471,
205
+ "eval_dim_64_cosine_recall@10": 0.8341277407054337,
206
+ "eval_dim_64_cosine_recall@3": 0.7016205910390848,
207
+ "eval_dim_64_cosine_recall@5": 0.7645376549094376,
208
+ "eval_dim_96_cosine_accuracy@1": 0.5471877979027645,
209
+ "eval_dim_96_cosine_accuracy@10": 0.8722592945662536,
210
+ "eval_dim_96_cosine_accuracy@3": 0.7407054337464252,
211
+ "eval_dim_96_cosine_accuracy@5": 0.8017159199237369,
212
+ "eval_dim_96_cosine_map@100": 0.6622003643008398,
213
+ "eval_dim_96_cosine_mrr@10": 0.6576811627097615,
214
+ "eval_dim_96_cosine_ndcg@10": 0.7097194683573752,
215
+ "eval_dim_96_cosine_precision@1": 0.5471877979027645,
216
+ "eval_dim_96_cosine_precision@10": 0.08722592945662536,
217
+ "eval_dim_96_cosine_precision@3": 0.2469018112488084,
218
+ "eval_dim_96_cosine_precision@5": 0.16034318398474737,
219
+ "eval_dim_96_cosine_recall@1": 0.5471877979027645,
220
+ "eval_dim_96_cosine_recall@10": 0.8722592945662536,
221
+ "eval_dim_96_cosine_recall@3": 0.7407054337464252,
222
+ "eval_dim_96_cosine_recall@5": 0.8017159199237369,
223
+ "eval_runtime": 119.8934,
224
+ "eval_samples_per_second": 0.0,
225
+ "eval_sequential_score": 0.6707950308444217,
226
+ "eval_steps_per_second": 0.0,
227
+ "step": 38
228
+ },
229
+ {
230
+ "epoch": 2.1084745762711865,
231
+ "grad_norm": 113.89473724365234,
232
+ "learning_rate": 1.0490676743274181e-05,
233
+ "loss": 12.8169,
234
+ "step": 40
235
+ },
236
+ {
237
+ "epoch": 2.6508474576271186,
238
+ "grad_norm": 81.27326965332031,
239
+ "learning_rate": 5.724449065697182e-06,
240
+ "loss": 13.7826,
241
+ "step": 50
242
+ },
243
+ {
244
+ "epoch": 3.0,
245
+ "eval_dim_128_cosine_accuracy@1": 0.567206863679695,
246
+ "eval_dim_128_cosine_accuracy@10": 0.8903717826501429,
247
+ "eval_dim_128_cosine_accuracy@3": 0.7616777883698761,
248
+ "eval_dim_128_cosine_accuracy@5": 0.8265014299332698,
249
+ "eval_dim_128_cosine_map@100": 0.6794753898354032,
250
+ "eval_dim_128_cosine_mrr@10": 0.6752920392815543,
251
+ "eval_dim_128_cosine_ndcg@10": 0.7273531110418706,
252
+ "eval_dim_128_cosine_precision@1": 0.567206863679695,
253
+ "eval_dim_128_cosine_precision@10": 0.08903717826501431,
254
+ "eval_dim_128_cosine_precision@3": 0.253892596123292,
255
+ "eval_dim_128_cosine_precision@5": 0.16530028598665394,
256
+ "eval_dim_128_cosine_recall@1": 0.567206863679695,
257
+ "eval_dim_128_cosine_recall@10": 0.8903717826501429,
258
+ "eval_dim_128_cosine_recall@3": 0.7616777883698761,
259
+ "eval_dim_128_cosine_recall@5": 0.8265014299332698,
260
+ "eval_dim_192_cosine_accuracy@1": 0.5910390848427073,
261
+ "eval_dim_192_cosine_accuracy@10": 0.9046711153479504,
262
+ "eval_dim_192_cosine_accuracy@3": 0.7778836987607245,
263
+ "eval_dim_192_cosine_accuracy@5": 0.8360343183984748,
264
+ "eval_dim_192_cosine_map@100": 0.7014228144337117,
265
+ "eval_dim_192_cosine_mrr@10": 0.6975449029309853,
266
+ "eval_dim_192_cosine_ndcg@10": 0.7477240665900656,
267
+ "eval_dim_192_cosine_precision@1": 0.5910390848427073,
268
+ "eval_dim_192_cosine_precision@10": 0.09046711153479504,
269
+ "eval_dim_192_cosine_precision@3": 0.25929456625357483,
270
+ "eval_dim_192_cosine_precision@5": 0.16720686367969495,
271
+ "eval_dim_192_cosine_recall@1": 0.5910390848427073,
272
+ "eval_dim_192_cosine_recall@10": 0.9046711153479504,
273
+ "eval_dim_192_cosine_recall@3": 0.7778836987607245,
274
+ "eval_dim_192_cosine_recall@5": 0.8360343183984748,
275
+ "eval_dim_256_cosine_accuracy@1": 0.5948522402287894,
276
+ "eval_dim_256_cosine_accuracy@10": 0.9151572926596759,
277
+ "eval_dim_256_cosine_accuracy@3": 0.792183031458532,
278
+ "eval_dim_256_cosine_accuracy@5": 0.8398474737845567,
279
+ "eval_dim_256_cosine_map@100": 0.7070932589939358,
280
+ "eval_dim_256_cosine_mrr@10": 0.7035797509343749,
281
+ "eval_dim_256_cosine_ndcg@10": 0.7548435122429773,
282
+ "eval_dim_256_cosine_precision@1": 0.5948522402287894,
283
+ "eval_dim_256_cosine_precision@10": 0.09151572926596759,
284
+ "eval_dim_256_cosine_precision@3": 0.2640610104861773,
285
+ "eval_dim_256_cosine_precision@5": 0.16796949475691134,
286
+ "eval_dim_256_cosine_recall@1": 0.5948522402287894,
287
+ "eval_dim_256_cosine_recall@10": 0.9151572926596759,
288
+ "eval_dim_256_cosine_recall@3": 0.792183031458532,
289
+ "eval_dim_256_cosine_recall@5": 0.8398474737845567,
290
+ "eval_dim_384_cosine_accuracy@1": 0.6101048617731173,
291
+ "eval_dim_384_cosine_accuracy@10": 0.9256434699714013,
292
+ "eval_dim_384_cosine_accuracy@3": 0.8007626310772163,
293
+ "eval_dim_384_cosine_accuracy@5": 0.8541468064823642,
294
+ "eval_dim_384_cosine_map@100": 0.7197084605820631,
295
+ "eval_dim_384_cosine_mrr@10": 0.7170116664396936,
296
+ "eval_dim_384_cosine_ndcg@10": 0.7675175612283535,
297
+ "eval_dim_384_cosine_precision@1": 0.6101048617731173,
298
+ "eval_dim_384_cosine_precision@10": 0.09256434699714014,
299
+ "eval_dim_384_cosine_precision@3": 0.2669208770257388,
300
+ "eval_dim_384_cosine_precision@5": 0.17082936129647283,
301
+ "eval_dim_384_cosine_recall@1": 0.6101048617731173,
302
+ "eval_dim_384_cosine_recall@10": 0.9256434699714013,
303
+ "eval_dim_384_cosine_recall@3": 0.8007626310772163,
304
+ "eval_dim_384_cosine_recall@5": 0.8541468064823642,
305
+ "eval_dim_64_cosine_accuracy@1": 0.5166825548141086,
306
+ "eval_dim_64_cosine_accuracy@10": 0.8369876072449952,
307
+ "eval_dim_64_cosine_accuracy@3": 0.7054337464251669,
308
+ "eval_dim_64_cosine_accuracy@5": 0.7673975214489991,
309
+ "eval_dim_64_cosine_map@100": 0.629350282837756,
310
+ "eval_dim_64_cosine_mrr@10": 0.6240088822309986,
311
+ "eval_dim_64_cosine_ndcg@10": 0.6755921916053389,
312
+ "eval_dim_64_cosine_precision@1": 0.5166825548141086,
313
+ "eval_dim_64_cosine_precision@10": 0.08369876072449953,
314
+ "eval_dim_64_cosine_precision@3": 0.23514458214172226,
315
+ "eval_dim_64_cosine_precision@5": 0.1534795042897998,
316
+ "eval_dim_64_cosine_recall@1": 0.5166825548141086,
317
+ "eval_dim_64_cosine_recall@10": 0.8369876072449952,
318
+ "eval_dim_64_cosine_recall@3": 0.7054337464251669,
319
+ "eval_dim_64_cosine_recall@5": 0.7673975214489991,
320
+ "eval_dim_96_cosine_accuracy@1": 0.5529075309818875,
321
+ "eval_dim_96_cosine_accuracy@10": 0.8741658722592945,
322
+ "eval_dim_96_cosine_accuracy@3": 0.7416587225929456,
323
+ "eval_dim_96_cosine_accuracy@5": 0.8093422306959008,
324
+ "eval_dim_96_cosine_map@100": 0.6652525185575742,
325
+ "eval_dim_96_cosine_mrr@10": 0.6608247461679306,
326
+ "eval_dim_96_cosine_ndcg@10": 0.7125237648315317,
327
+ "eval_dim_96_cosine_precision@1": 0.5529075309818875,
328
+ "eval_dim_96_cosine_precision@10": 0.08741658722592945,
329
+ "eval_dim_96_cosine_precision@3": 0.24721957419764853,
330
+ "eval_dim_96_cosine_precision@5": 0.1618684461391802,
331
+ "eval_dim_96_cosine_recall@1": 0.5529075309818875,
332
+ "eval_dim_96_cosine_recall@10": 0.8741658722592945,
333
+ "eval_dim_96_cosine_recall@3": 0.7416587225929456,
334
+ "eval_dim_96_cosine_recall@5": 0.8093422306959008,
335
+ "eval_runtime": 120.2085,
336
+ "eval_samples_per_second": 0.0,
337
+ "eval_sequential_score": 0.6755921916053389,
338
+ "eval_steps_per_second": 0.0,
339
+ "step": 57
340
+ }
341
+ ],
342
+ "logging_steps": 10,
343
+ "max_steps": 72,
344
+ "num_input_tokens_seen": 0,
345
+ "num_train_epochs": 4,
346
+ "save_steps": 500,
347
+ "stateful_callbacks": {
348
+ "TrainerControl": {
349
+ "args": {
350
+ "should_epoch_stop": false,
351
+ "should_evaluate": false,
352
+ "should_log": false,
353
+ "should_save": true,
354
+ "should_training_stop": false
355
+ },
356
+ "attributes": {}
357
+ }
358
+ },
359
+ "total_flos": 0.0,
360
+ "train_batch_size": 32,
361
+ "trial_name": null,
362
+ "trial_params": null
363
+ }
checkpoint-57/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7792b3d6d0521f78d711be5b9ea8a9fbf7602d27ea1f1e28ac62990d20c22b2
3
+ size 5624
checkpoint-57/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-72/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-72/README.md ADDED
@@ -0,0 +1,842 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - generated_from_trainer
10
+ - dataset_size:9432
11
+ - loss:MatryoshkaLoss
12
+ - loss:MultipleNegativesRankingLoss
13
+ base_model: sentence-transformers/all-MiniLM-L6-v2
14
+ widget:
15
+ - source_sentence: Atherosclerosis and coronary heart disease are examples of what
16
+ type of body system disease?
17
+ sentences:
18
+ - Diseases of the cardiovascular system are common and may be life threatening.
19
+ Examples include atherosclerosis and coronary heart disease. A healthy lifestyle
20
+ can reduce the risk of such diseases developing. This includes avoiding smoking,
21
+ getting regular physical activity, and maintaining a healthy percent of body fat.
22
+ - Osmosis Osmosis is the diffusion of water through a semipermeable membrane according
23
+ to the concentration gradient of water across the membrane. Whereas diffusion
24
+ transports material across membranes and within cells, osmosis transports only
25
+ water across a membrane and the membrane limits the diffusion of solutes in the
26
+ water. Osmosis is a special case of diffusion. Water, like other substances, moves
27
+ from an area of higher concentration to one of lower concentration. Imagine a
28
+ beaker with a semipermeable membrane, separating the two sides or halves (Figure
29
+ 3.21). On both sides of the membrane, the water level is the same, but there are
30
+ different concentrations on each side of a dissolved substance, or solute, that
31
+ cannot cross the membrane. If the volume of the water is the same, but the concentrations
32
+ of solute are different, then there are also different concentrations of water,
33
+ the solvent, on either side of the membrane.
34
+ - Circadian rhythms are regular changes in biology or behavior that occur in a 24-hour
35
+ cycle. In humans, for example, blood pressure and body temperature change in a
36
+ regular way throughout each 24-hour day. Animals may eat and drink at certain
37
+ times of day as well. Humans have daily cycles of behavior, too. Most people start
38
+ to get sleepy after dark and have a hard time sleeping when it is light outside.
39
+ In many species, including humans, circadian rhythms are controlled by a tiny
40
+ structure called the biological clock . This structure is located in a gland at
41
+ the base of the brain. The biological clock sends signals to the body. The signals
42
+ cause regular changes in behavior and body processes. The amount of light entering
43
+ the eyes helps control the biological clock. The clock causes changes that repeat
44
+ every 24 hours.
45
+ - source_sentence: How does a cell's membrane keep extracellular materials from mixing
46
+ with it's internal components?
47
+ sentences:
48
+ - We know that the Universe is expanding. Astronomers have wondered if it is expanding
49
+ fast enough to escape the pull of gravity. Would the Universe just expand forever?
50
+ If it could not escape the pull of gravity, would it someday start to contract?
51
+ This means it would eventually get squeezed together in a big crunch. This is
52
+ the opposite of the Big Bang.
53
+ - Physical properties that do not depend on the amount of substance present are
54
+ called intensive properties . Intensive properties do not change with changes
55
+ of size, shape, or scale. Examples of intensive properties are as follows in the
56
+ Table below .
57
+ - CHAPTER REVIEW 3.1 The Cell Membrane The cell membrane provides a barrier around
58
+ the cell, separating its internal components from the extracellular environment.
59
+ It is composed of a phospholipid bilayer, with hydrophobic internal lipid “tails”
60
+ and hydrophilic external phosphate “heads. ” Various membrane proteins are scattered
61
+ throughout the bilayer, both inserted within it and attached to it peripherally.
62
+ The cell membrane is selectively permeable, allowing only a limited number of
63
+ materials to diffuse through its lipid bilayer. All materials that cross the membrane
64
+ do so using passive (non energy-requiring) or active (energy-requiring) transport
65
+ processes. During passive transport, materials move by simple diffusion or by
66
+ facilitated diffusion through the membrane, down their concentration gradient.
67
+ Water passes through the membrane in a diffusion process called osmosis. During
68
+ active transport, energy is expended to assist material movement across the membrane
69
+ in a direction against their concentration gradient. Active transport may take
70
+ place with the help of protein pumps or through the use of vesicles.
71
+ - source_sentence: An infection may be intracellular or extracellular, depending on
72
+ this?
73
+ sentences:
74
+ - '22.3 Magnetic Fields and Magnetic Field Lines • Magnetic fields can be pictorially
75
+ represented by magnetic field lines, the properties of which are as follows: 1.
76
+ The field is tangent to the magnetic field line. Field strength is proportional
77
+ to the line density. Field lines cannot cross. Field lines are continuous loops.'
78
+ - Figure 24.13 The lifecycle of an ascomycete is characterized by the production
79
+ of asci during the sexual phase. The haploid phase is the predominant phase of
80
+ the life cycle.
81
+ - Caffeine is an example of a psychoactive drug. It is found in coffee and many
82
+ other products (see Table below ). Caffeine is a central nervous system stimulant
83
+ . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive
84
+ drugs include alcohol, nicotine, and marijuana. Each has a different effect on
85
+ the central nervous system. Alcohol, for example, is a depressant . It has the
86
+ opposite effects of a stimulant like caffeine.
87
+ - source_sentence: What does water treatment do to water?
88
+ sentences:
89
+ - Some solutes, such as sodium acetate, do not recrystallize easily. Suppose an
90
+ exactly saturated solution of sodium acetate is prepared at 50°C. As it cools
91
+ back to room temperature, no crystals appear in the solution, even though the
92
+ solubility of sodium acetate is lower at room temperature. A supersaturated solution
93
+ is a solution that contains more than the maximum amount of solute that is capable
94
+ of being dissolved at a given temperature. The recrystallization of the excess
95
+ dissolved solute in a supersaturated solution can be initiated by the addition
96
+ of a tiny crystal of solute, called a seed crystal. The seed crystal provides
97
+ a nucleation site on which the excess dissolved crystals can begin to grow. Recrystallization
98
+ from a supersaturated solution is typically very fast.
99
+ - Figure 23.13, the esophagus runs a mainly straight route through the mediastinum
100
+ of the thorax. To enter the abdomen, the esophagus penetrates the diaphragm through
101
+ an opening called the esophageal hiatus.
102
+ - Water treatment is a series of processes that remove unwanted substances from
103
+ water. More processes are needed to purify water for drinking than for other uses.
104
+ - source_sentence: 'There are only four possible bases that make up each dna nucleotide:
105
+ adenine, guanine, thymine, and?'
106
+ sentences:
107
+ - Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism
108
+ if it is exposed to extreme heat and pressure within the crust. With metamorphism
109
+ , the rock does not melt all the way. The rock changes due to heat and pressure.
110
+ A metamorphic rock may have a new mineral composition and/or texture.
111
+ - Forest and Kim Starr (Flickr:Starr Environmental). Secondary succession occurs
112
+ when nature reclaims areas formerly occupied by life . CC BY 2.0.
113
+ - 'The only difference between each nucleotide is the identity of the base. There
114
+ are only four possible bases that make up each DNA nucleotide: adenine (A), guanine
115
+ (G), thymine (T), and cytosine (C).'
116
+ pipeline_tag: sentence-similarity
117
+ library_name: sentence-transformers
118
+ metrics:
119
+ - cosine_accuracy@1
120
+ - cosine_accuracy@3
121
+ - cosine_accuracy@5
122
+ - cosine_accuracy@10
123
+ - cosine_precision@1
124
+ - cosine_precision@3
125
+ - cosine_precision@5
126
+ - cosine_precision@10
127
+ - cosine_recall@1
128
+ - cosine_recall@3
129
+ - cosine_recall@5
130
+ - cosine_recall@10
131
+ - cosine_ndcg@10
132
+ - cosine_mrr@10
133
+ - cosine_map@100
134
+ model-index:
135
+ - name: MNLP M3 Encoder SciQA
136
+ results:
137
+ - task:
138
+ type: information-retrieval
139
+ name: Information Retrieval
140
+ dataset:
141
+ name: dim 384
142
+ type: dim_384
143
+ metrics:
144
+ - type: cosine_accuracy@1
145
+ value: 0.6120114394661582
146
+ name: Cosine Accuracy@1
147
+ - type: cosine_accuracy@3
148
+ value: 0.8017159199237369
149
+ name: Cosine Accuracy@3
150
+ - type: cosine_accuracy@5
151
+ value: 0.8541468064823642
152
+ name: Cosine Accuracy@5
153
+ - type: cosine_accuracy@10
154
+ value: 0.9275500476644424
155
+ name: Cosine Accuracy@10
156
+ - type: cosine_precision@1
157
+ value: 0.6120114394661582
158
+ name: Cosine Precision@1
159
+ - type: cosine_precision@3
160
+ value: 0.267238639974579
161
+ name: Cosine Precision@3
162
+ - type: cosine_precision@5
163
+ value: 0.17082936129647283
164
+ name: Cosine Precision@5
165
+ - type: cosine_precision@10
166
+ value: 0.09275500476644424
167
+ name: Cosine Precision@10
168
+ - type: cosine_recall@1
169
+ value: 0.6120114394661582
170
+ name: Cosine Recall@1
171
+ - type: cosine_recall@3
172
+ value: 0.8017159199237369
173
+ name: Cosine Recall@3
174
+ - type: cosine_recall@5
175
+ value: 0.8541468064823642
176
+ name: Cosine Recall@5
177
+ - type: cosine_recall@10
178
+ value: 0.9275500476644424
179
+ name: Cosine Recall@10
180
+ - type: cosine_ndcg@10
181
+ value: 0.7690377395004954
182
+ name: Cosine Ndcg@10
183
+ - type: cosine_mrr@10
184
+ value: 0.7184669450875366
185
+ name: Cosine Mrr@10
186
+ - type: cosine_map@100
187
+ value: 0.7210073638258574
188
+ name: Cosine Map@100
189
+ - task:
190
+ type: information-retrieval
191
+ name: Information Retrieval
192
+ dataset:
193
+ name: dim 256
194
+ type: dim_256
195
+ metrics:
196
+ - type: cosine_accuracy@1
197
+ value: 0.5977121067683508
198
+ name: Cosine Accuracy@1
199
+ - type: cosine_accuracy@3
200
+ value: 0.7912297426120114
201
+ name: Cosine Accuracy@3
202
+ - type: cosine_accuracy@5
203
+ value: 0.8398474737845567
204
+ name: Cosine Accuracy@5
205
+ - type: cosine_accuracy@10
206
+ value: 0.9151572926596759
207
+ name: Cosine Accuracy@10
208
+ - type: cosine_precision@1
209
+ value: 0.5977121067683508
210
+ name: Cosine Precision@1
211
+ - type: cosine_precision@3
212
+ value: 0.26374324753733713
213
+ name: Cosine Precision@3
214
+ - type: cosine_precision@5
215
+ value: 0.16796949475691134
216
+ name: Cosine Precision@5
217
+ - type: cosine_precision@10
218
+ value: 0.09151572926596759
219
+ name: Cosine Precision@10
220
+ - type: cosine_recall@1
221
+ value: 0.5977121067683508
222
+ name: Cosine Recall@1
223
+ - type: cosine_recall@3
224
+ value: 0.7912297426120114
225
+ name: Cosine Recall@3
226
+ - type: cosine_recall@5
227
+ value: 0.8398474737845567
228
+ name: Cosine Recall@5
229
+ - type: cosine_recall@10
230
+ value: 0.9151572926596759
231
+ name: Cosine Recall@10
232
+ - type: cosine_ndcg@10
233
+ value: 0.7558547240171754
234
+ name: Cosine Ndcg@10
235
+ - type: cosine_mrr@10
236
+ value: 0.7049529408204341
237
+ name: Cosine Mrr@10
238
+ - type: cosine_map@100
239
+ value: 0.7084736712852033
240
+ name: Cosine Map@100
241
+ - task:
242
+ type: information-retrieval
243
+ name: Information Retrieval
244
+ dataset:
245
+ name: dim 192
246
+ type: dim_192
247
+ metrics:
248
+ - type: cosine_accuracy@1
249
+ value: 0.5891325071496664
250
+ name: Cosine Accuracy@1
251
+ - type: cosine_accuracy@3
252
+ value: 0.778836987607245
253
+ name: Cosine Accuracy@3
254
+ - type: cosine_accuracy@5
255
+ value: 0.8331744518589133
256
+ name: Cosine Accuracy@5
257
+ - type: cosine_accuracy@10
258
+ value: 0.90371782650143
259
+ name: Cosine Accuracy@10
260
+ - type: cosine_precision@1
261
+ value: 0.5891325071496664
262
+ name: Cosine Precision@1
263
+ - type: cosine_precision@3
264
+ value: 0.259612329202415
265
+ name: Cosine Precision@3
266
+ - type: cosine_precision@5
267
+ value: 0.16663489037178267
268
+ name: Cosine Precision@5
269
+ - type: cosine_precision@10
270
+ value: 0.090371782650143
271
+ name: Cosine Precision@10
272
+ - type: cosine_recall@1
273
+ value: 0.5891325071496664
274
+ name: Cosine Recall@1
275
+ - type: cosine_recall@3
276
+ value: 0.778836987607245
277
+ name: Cosine Recall@3
278
+ - type: cosine_recall@5
279
+ value: 0.8331744518589133
280
+ name: Cosine Recall@5
281
+ - type: cosine_recall@10
282
+ value: 0.90371782650143
283
+ name: Cosine Recall@10
284
+ - type: cosine_ndcg@10
285
+ value: 0.7467179313530818
286
+ name: Cosine Ndcg@10
287
+ - type: cosine_mrr@10
288
+ value: 0.6964694266648511
289
+ name: Cosine Mrr@10
290
+ - type: cosine_map@100
291
+ value: 0.7004357679049269
292
+ name: Cosine Map@100
293
+ - task:
294
+ type: information-retrieval
295
+ name: Information Retrieval
296
+ dataset:
297
+ name: dim 128
298
+ type: dim_128
299
+ metrics:
300
+ - type: cosine_accuracy@1
301
+ value: 0.5662535748331744
302
+ name: Cosine Accuracy@1
303
+ - type: cosine_accuracy@3
304
+ value: 0.7626310772163966
305
+ name: Cosine Accuracy@3
306
+ - type: cosine_accuracy@5
307
+ value: 0.8265014299332698
308
+ name: Cosine Accuracy@5
309
+ - type: cosine_accuracy@10
310
+ value: 0.8913250714966635
311
+ name: Cosine Accuracy@10
312
+ - type: cosine_precision@1
313
+ value: 0.5662535748331744
314
+ name: Cosine Precision@1
315
+ - type: cosine_precision@3
316
+ value: 0.25421035907213213
317
+ name: Cosine Precision@3
318
+ - type: cosine_precision@5
319
+ value: 0.16530028598665394
320
+ name: Cosine Precision@5
321
+ - type: cosine_precision@10
322
+ value: 0.08913250714966635
323
+ name: Cosine Precision@10
324
+ - type: cosine_recall@1
325
+ value: 0.5662535748331744
326
+ name: Cosine Recall@1
327
+ - type: cosine_recall@3
328
+ value: 0.7626310772163966
329
+ name: Cosine Recall@3
330
+ - type: cosine_recall@5
331
+ value: 0.8265014299332698
332
+ name: Cosine Recall@5
333
+ - type: cosine_recall@10
334
+ value: 0.8913250714966635
335
+ name: Cosine Recall@10
336
+ - type: cosine_ndcg@10
337
+ value: 0.7275517192718437
338
+ name: Cosine Ndcg@10
339
+ - type: cosine_mrr@10
340
+ value: 0.6752375656331816
341
+ name: Cosine Mrr@10
342
+ - type: cosine_map@100
343
+ value: 0.6793502491099088
344
+ name: Cosine Map@100
345
+ - task:
346
+ type: information-retrieval
347
+ name: Information Retrieval
348
+ dataset:
349
+ name: dim 96
350
+ type: dim_96
351
+ metrics:
352
+ - type: cosine_accuracy@1
353
+ value: 0.551954242135367
354
+ name: Cosine Accuracy@1
355
+ - type: cosine_accuracy@3
356
+ value: 0.7416587225929456
357
+ name: Cosine Accuracy@3
358
+ - type: cosine_accuracy@5
359
+ value: 0.8093422306959008
360
+ name: Cosine Accuracy@5
361
+ - type: cosine_accuracy@10
362
+ value: 0.8732125834127741
363
+ name: Cosine Accuracy@10
364
+ - type: cosine_precision@1
365
+ value: 0.551954242135367
366
+ name: Cosine Precision@1
367
+ - type: cosine_precision@3
368
+ value: 0.24721957419764853
369
+ name: Cosine Precision@3
370
+ - type: cosine_precision@5
371
+ value: 0.1618684461391802
372
+ name: Cosine Precision@5
373
+ - type: cosine_precision@10
374
+ value: 0.08732125834127741
375
+ name: Cosine Precision@10
376
+ - type: cosine_recall@1
377
+ value: 0.551954242135367
378
+ name: Cosine Recall@1
379
+ - type: cosine_recall@3
380
+ value: 0.7416587225929456
381
+ name: Cosine Recall@3
382
+ - type: cosine_recall@5
383
+ value: 0.8093422306959008
384
+ name: Cosine Recall@5
385
+ - type: cosine_recall@10
386
+ value: 0.8732125834127741
387
+ name: Cosine Recall@10
388
+ - type: cosine_ndcg@10
389
+ value: 0.7119774118711802
390
+ name: Cosine Ndcg@10
391
+ - type: cosine_mrr@10
392
+ value: 0.660333348464903
393
+ name: Cosine Mrr@10
394
+ - type: cosine_map@100
395
+ value: 0.6648689218069684
396
+ name: Cosine Map@100
397
+ - task:
398
+ type: information-retrieval
399
+ name: Information Retrieval
400
+ dataset:
401
+ name: dim 64
402
+ type: dim_64
403
+ metrics:
404
+ - type: cosine_accuracy@1
405
+ value: 0.5166825548141086
406
+ name: Cosine Accuracy@1
407
+ - type: cosine_accuracy@3
408
+ value: 0.7044804575786463
409
+ name: Cosine Accuracy@3
410
+ - type: cosine_accuracy@5
411
+ value: 0.7683508102955195
412
+ name: Cosine Accuracy@5
413
+ - type: cosine_accuracy@10
414
+ value: 0.8369876072449952
415
+ name: Cosine Accuracy@10
416
+ - type: cosine_precision@1
417
+ value: 0.5166825548141086
418
+ name: Cosine Precision@1
419
+ - type: cosine_precision@3
420
+ value: 0.2348268191928821
421
+ name: Cosine Precision@3
422
+ - type: cosine_precision@5
423
+ value: 0.1536701620591039
424
+ name: Cosine Precision@5
425
+ - type: cosine_precision@10
426
+ value: 0.08369876072449953
427
+ name: Cosine Precision@10
428
+ - type: cosine_recall@1
429
+ value: 0.5166825548141086
430
+ name: Cosine Recall@1
431
+ - type: cosine_recall@3
432
+ value: 0.7044804575786463
433
+ name: Cosine Recall@3
434
+ - type: cosine_recall@5
435
+ value: 0.7683508102955195
436
+ name: Cosine Recall@5
437
+ - type: cosine_recall@10
438
+ value: 0.8369876072449952
439
+ name: Cosine Recall@10
440
+ - type: cosine_ndcg@10
441
+ value: 0.6755211859192654
442
+ name: Cosine Ndcg@10
443
+ - type: cosine_mrr@10
444
+ value: 0.6239059875618503
445
+ name: Cosine Mrr@10
446
+ - type: cosine_map@100
447
+ value: 0.6292715088820261
448
+ name: Cosine Map@100
449
+ ---
450
+
451
+ # MNLP M3 Encoder SciQA
452
+
453
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the json dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
454
+
455
+ ## Model Details
456
+
457
+ ### Model Description
458
+ - **Model Type:** Sentence Transformer
459
+ - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
460
+ - **Maximum Sequence Length:** 256 tokens
461
+ - **Output Dimensionality:** 384 dimensions
462
+ - **Similarity Function:** Cosine Similarity
463
+ - **Training Dataset:**
464
+ - json
465
+ - **Language:** en
466
+ - **License:** apache-2.0
467
+
468
+ ### Model Sources
469
+
470
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
471
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
472
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
473
+
474
+ ### Full Model Architecture
475
+
476
+ ```
477
+ SentenceTransformer(
478
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
479
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
480
+ (2): Normalize()
481
+ )
482
+ ```
483
+
484
+ ## Usage
485
+
486
+ ### Direct Usage (Sentence Transformers)
487
+
488
+ First install the Sentence Transformers library:
489
+
490
+ ```bash
491
+ pip install -U sentence-transformers
492
+ ```
493
+
494
+ Then you can load this model and run inference.
495
+ ```python
496
+ from sentence_transformers import SentenceTransformer
497
+
498
+ # Download from the 🤗 Hub
499
+ model = SentenceTransformer("sentence_transformers_model_id")
500
+ # Run inference
501
+ sentences = [
502
+ 'There are only four possible bases that make up each dna nucleotide: adenine, guanine, thymine, and?',
503
+ 'The only difference between each nucleotide is the identity of the base. There are only four possible bases that make up each DNA nucleotide: adenine (A), guanine (G), thymine (T), and cytosine (C).',
504
+ 'Metamorphism. This long word means “to change form. “ A rock undergoes metamorphism if it is exposed to extreme heat and pressure within the crust. With metamorphism , the rock does not melt all the way. The rock changes due to heat and pressure. A metamorphic rock may have a new mineral composition and/or texture.',
505
+ ]
506
+ embeddings = model.encode(sentences)
507
+ print(embeddings.shape)
508
+ # [3, 384]
509
+
510
+ # Get the similarity scores for the embeddings
511
+ similarities = model.similarity(embeddings, embeddings)
512
+ print(similarities.shape)
513
+ # [3, 3]
514
+ ```
515
+
516
+ <!--
517
+ ### Direct Usage (Transformers)
518
+
519
+ <details><summary>Click to see the direct usage in Transformers</summary>
520
+
521
+ </details>
522
+ -->
523
+
524
+ <!--
525
+ ### Downstream Usage (Sentence Transformers)
526
+
527
+ You can finetune this model on your own dataset.
528
+
529
+ <details><summary>Click to expand</summary>
530
+
531
+ </details>
532
+ -->
533
+
534
+ <!--
535
+ ### Out-of-Scope Use
536
+
537
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
538
+ -->
539
+
540
+ ## Evaluation
541
+
542
+ ### Metrics
543
+
544
+ #### Information Retrieval
545
+
546
+ * Datasets: `dim_384`, `dim_256`, `dim_192`, `dim_128`, `dim_96` and `dim_64`
547
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
548
+
549
+ | Metric | dim_384 | dim_256 | dim_192 | dim_128 | dim_96 | dim_64 |
550
+ |:--------------------|:----------|:-----------|:-----------|:-----------|:----------|:-----------|
551
+ | cosine_accuracy@1 | 0.612 | 0.5977 | 0.5891 | 0.5663 | 0.552 | 0.5167 |
552
+ | cosine_accuracy@3 | 0.8017 | 0.7912 | 0.7788 | 0.7626 | 0.7417 | 0.7045 |
553
+ | cosine_accuracy@5 | 0.8541 | 0.8398 | 0.8332 | 0.8265 | 0.8093 | 0.7684 |
554
+ | cosine_accuracy@10 | 0.9276 | 0.9152 | 0.9037 | 0.8913 | 0.8732 | 0.837 |
555
+ | cosine_precision@1 | 0.612 | 0.5977 | 0.5891 | 0.5663 | 0.552 | 0.5167 |
556
+ | cosine_precision@3 | 0.2672 | 0.2637 | 0.2596 | 0.2542 | 0.2472 | 0.2348 |
557
+ | cosine_precision@5 | 0.1708 | 0.168 | 0.1666 | 0.1653 | 0.1619 | 0.1537 |
558
+ | cosine_precision@10 | 0.0928 | 0.0915 | 0.0904 | 0.0891 | 0.0873 | 0.0837 |
559
+ | cosine_recall@1 | 0.612 | 0.5977 | 0.5891 | 0.5663 | 0.552 | 0.5167 |
560
+ | cosine_recall@3 | 0.8017 | 0.7912 | 0.7788 | 0.7626 | 0.7417 | 0.7045 |
561
+ | cosine_recall@5 | 0.8541 | 0.8398 | 0.8332 | 0.8265 | 0.8093 | 0.7684 |
562
+ | cosine_recall@10 | 0.9276 | 0.9152 | 0.9037 | 0.8913 | 0.8732 | 0.837 |
563
+ | **cosine_ndcg@10** | **0.769** | **0.7559** | **0.7467** | **0.7276** | **0.712** | **0.6755** |
564
+ | cosine_mrr@10 | 0.7185 | 0.705 | 0.6965 | 0.6752 | 0.6603 | 0.6239 |
565
+ | cosine_map@100 | 0.721 | 0.7085 | 0.7004 | 0.6794 | 0.6649 | 0.6293 |
566
+
567
+ <!--
568
+ ## Bias, Risks and Limitations
569
+
570
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
571
+ -->
572
+
573
+ <!--
574
+ ### Recommendations
575
+
576
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
577
+ -->
578
+
579
+ ## Training Details
580
+
581
+ ### Training Dataset
582
+
583
+ #### json
584
+
585
+ * Dataset: json
586
+ * Size: 9,432 training samples
587
+ * Columns: <code>anchor</code> and <code>positive</code>
588
+ * Approximate statistics based on the first 1000 samples:
589
+ | | anchor | positive |
590
+ |:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
591
+ | type | string | string |
592
+ | details | <ul><li>min: 7 tokens</li><li>mean: 18.15 tokens</li><li>max: 60 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 94.56 tokens</li><li>max: 256 tokens</li></ul> |
593
+ * Samples:
594
+ | anchor | positive |
595
+ |:-------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
596
+ | <code>What is the term for atherosclerosis of arteries that supply the heart muscle?</code> | <code>Atherosclerosis of arteries that supply the heart muscle is called coronary heart disease . This disease may or may not have symptoms, such as chest pain. As the disease progresses, there is an increased risk of heart attack. A heart attack occurs when the blood supply to part of the heart muscle is blocked and cardiac muscle fibers die. Coronary heart disease is the leading cause of death of adults in the United States.</code> |
597
+ | <code>What term describes a drug that has an effect on the central nervous system?</code> | <code>Caffeine is an example of a psychoactive drug. It is found in coffee and many other products (see Table below ). Caffeine is a central nervous system stimulant . Like other stimulant drugs, it makes you feel more awake and alert. Other psychoactive drugs include alcohol, nicotine, and marijuana. Each has a different effect on the central nervous system. Alcohol, for example, is a depressant . It has the opposite effects of a stimulant like caffeine.</code> |
598
+ | <code>What scale is used to succinctly communicate the acidity or basicity of a solution?</code> | <code>The pH scale is used to succinctly communicate the acidity or basicity of a solution.</code> |
599
+ * Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
600
+ ```json
601
+ {
602
+ "loss": "MultipleNegativesRankingLoss",
603
+ "matryoshka_dims": [
604
+ 384,
605
+ 256,
606
+ 192,
607
+ 128,
608
+ 96,
609
+ 64
610
+ ],
611
+ "matryoshka_weights": [
612
+ 1,
613
+ 1,
614
+ 1,
615
+ 1,
616
+ 1,
617
+ 1
618
+ ],
619
+ "n_dims_per_step": -1
620
+ }
621
+ ```
622
+
623
+ ### Training Hyperparameters
624
+ #### Non-Default Hyperparameters
625
+
626
+ - `eval_strategy`: epoch
627
+ - `per_device_train_batch_size`: 32
628
+ - `per_device_eval_batch_size`: 16
629
+ - `gradient_accumulation_steps`: 16
630
+ - `learning_rate`: 2e-05
631
+ - `num_train_epochs`: 4
632
+ - `lr_scheduler_type`: cosine
633
+ - `warmup_ratio`: 0.1
634
+ - `bf16`: True
635
+ - `tf32`: True
636
+ - `load_best_model_at_end`: True
637
+ - `optim`: adamw_torch_fused
638
+ - `batch_sampler`: no_duplicates
639
+
640
+ #### All Hyperparameters
641
+ <details><summary>Click to expand</summary>
642
+
643
+ - `overwrite_output_dir`: False
644
+ - `do_predict`: False
645
+ - `eval_strategy`: epoch
646
+ - `prediction_loss_only`: True
647
+ - `per_device_train_batch_size`: 32
648
+ - `per_device_eval_batch_size`: 16
649
+ - `per_gpu_train_batch_size`: None
650
+ - `per_gpu_eval_batch_size`: None
651
+ - `gradient_accumulation_steps`: 16
652
+ - `eval_accumulation_steps`: None
653
+ - `torch_empty_cache_steps`: None
654
+ - `learning_rate`: 2e-05
655
+ - `weight_decay`: 0.0
656
+ - `adam_beta1`: 0.9
657
+ - `adam_beta2`: 0.999
658
+ - `adam_epsilon`: 1e-08
659
+ - `max_grad_norm`: 1.0
660
+ - `num_train_epochs`: 4
661
+ - `max_steps`: -1
662
+ - `lr_scheduler_type`: cosine
663
+ - `lr_scheduler_kwargs`: {}
664
+ - `warmup_ratio`: 0.1
665
+ - `warmup_steps`: 0
666
+ - `log_level`: passive
667
+ - `log_level_replica`: warning
668
+ - `log_on_each_node`: True
669
+ - `logging_nan_inf_filter`: True
670
+ - `save_safetensors`: True
671
+ - `save_on_each_node`: False
672
+ - `save_only_model`: False
673
+ - `restore_callback_states_from_checkpoint`: False
674
+ - `no_cuda`: False
675
+ - `use_cpu`: False
676
+ - `use_mps_device`: False
677
+ - `seed`: 42
678
+ - `data_seed`: None
679
+ - `jit_mode_eval`: False
680
+ - `use_ipex`: False
681
+ - `bf16`: True
682
+ - `fp16`: False
683
+ - `fp16_opt_level`: O1
684
+ - `half_precision_backend`: auto
685
+ - `bf16_full_eval`: False
686
+ - `fp16_full_eval`: False
687
+ - `tf32`: True
688
+ - `local_rank`: 0
689
+ - `ddp_backend`: None
690
+ - `tpu_num_cores`: None
691
+ - `tpu_metrics_debug`: False
692
+ - `debug`: []
693
+ - `dataloader_drop_last`: False
694
+ - `dataloader_num_workers`: 0
695
+ - `dataloader_prefetch_factor`: None
696
+ - `past_index`: -1
697
+ - `disable_tqdm`: False
698
+ - `remove_unused_columns`: True
699
+ - `label_names`: None
700
+ - `load_best_model_at_end`: True
701
+ - `ignore_data_skip`: False
702
+ - `fsdp`: []
703
+ - `fsdp_min_num_params`: 0
704
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
705
+ - `tp_size`: 0
706
+ - `fsdp_transformer_layer_cls_to_wrap`: None
707
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
708
+ - `deepspeed`: None
709
+ - `label_smoothing_factor`: 0.0
710
+ - `optim`: adamw_torch_fused
711
+ - `optim_args`: None
712
+ - `adafactor`: False
713
+ - `group_by_length`: False
714
+ - `length_column_name`: length
715
+ - `ddp_find_unused_parameters`: None
716
+ - `ddp_bucket_cap_mb`: None
717
+ - `ddp_broadcast_buffers`: False
718
+ - `dataloader_pin_memory`: True
719
+ - `dataloader_persistent_workers`: False
720
+ - `skip_memory_metrics`: True
721
+ - `use_legacy_prediction_loop`: False
722
+ - `push_to_hub`: False
723
+ - `resume_from_checkpoint`: None
724
+ - `hub_model_id`: None
725
+ - `hub_strategy`: every_save
726
+ - `hub_private_repo`: None
727
+ - `hub_always_push`: False
728
+ - `gradient_checkpointing`: False
729
+ - `gradient_checkpointing_kwargs`: None
730
+ - `include_inputs_for_metrics`: False
731
+ - `include_for_metrics`: []
732
+ - `eval_do_concat_batches`: True
733
+ - `fp16_backend`: auto
734
+ - `push_to_hub_model_id`: None
735
+ - `push_to_hub_organization`: None
736
+ - `mp_parameters`:
737
+ - `auto_find_batch_size`: False
738
+ - `full_determinism`: False
739
+ - `torchdynamo`: None
740
+ - `ray_scope`: last
741
+ - `ddp_timeout`: 1800
742
+ - `torch_compile`: False
743
+ - `torch_compile_backend`: None
744
+ - `torch_compile_mode`: None
745
+ - `include_tokens_per_second`: False
746
+ - `include_num_input_tokens_seen`: False
747
+ - `neftune_noise_alpha`: None
748
+ - `optim_target_modules`: None
749
+ - `batch_eval_metrics`: False
750
+ - `eval_on_start`: False
751
+ - `use_liger_kernel`: False
752
+ - `eval_use_gather_object`: False
753
+ - `average_tokens_across_devices`: False
754
+ - `prompts`: None
755
+ - `batch_sampler`: no_duplicates
756
+ - `multi_dataset_batch_sampler`: proportional
757
+
758
+ </details>
759
+
760
+ ### Training Logs
761
+ | Epoch | Step | Training Loss | dim_384_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_192_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_96_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
762
+ |:------:|:----:|:-------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|:---------------------:|
763
+ | 0.5424 | 10 | 22.4049 | - | - | - | - | - | - |
764
+ | 1.0 | 19 | - | 0.7424 | 0.7315 | 0.7263 | 0.7093 | 0.6919 | 0.6575 |
765
+ | 1.0542 | 20 | 16.6616 | - | - | - | - | - | - |
766
+ | 1.5966 | 30 | 16.8367 | - | - | - | - | - | - |
767
+ | 2.0 | 38 | - | 0.7612 | 0.7520 | 0.7431 | 0.7261 | 0.7097 | 0.6708 |
768
+ | 2.1085 | 40 | 12.8169 | - | - | - | - | - | - |
769
+ | 2.6508 | 50 | 13.7826 | - | - | - | - | - | - |
770
+ | 3.0 | 57 | - | 0.7675 | 0.7548 | 0.7477 | 0.7274 | 0.7125 | 0.6756 |
771
+ | 3.1627 | 60 | 12.4455 | - | - | - | - | - | - |
772
+ | 3.7051 | 70 | 12.2968 | - | - | - | - | - | - |
773
+ | 3.8136 | 72 | - | 0.7690 | 0.7559 | 0.7467 | 0.7276 | 0.7120 | 0.6755 |
774
+
775
+
776
+ ### Framework Versions
777
+ - Python: 3.12.8
778
+ - Sentence Transformers: 3.4.1
779
+ - Transformers: 4.51.3
780
+ - PyTorch: 2.5.1+cu124
781
+ - Accelerate: 1.3.0
782
+ - Datasets: 3.6.0
783
+ - Tokenizers: 0.21.0
784
+
785
+ ## Citation
786
+
787
+ ### BibTeX
788
+
789
+ #### Sentence Transformers
790
+ ```bibtex
791
+ @inproceedings{reimers-2019-sentence-bert,
792
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
793
+ author = "Reimers, Nils and Gurevych, Iryna",
794
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
795
+ month = "11",
796
+ year = "2019",
797
+ publisher = "Association for Computational Linguistics",
798
+ url = "https://arxiv.org/abs/1908.10084",
799
+ }
800
+ ```
801
+
802
+ #### MatryoshkaLoss
803
+ ```bibtex
804
+ @misc{kusupati2024matryoshka,
805
+ title={Matryoshka Representation Learning},
806
+ author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
807
+ year={2024},
808
+ eprint={2205.13147},
809
+ archivePrefix={arXiv},
810
+ primaryClass={cs.LG}
811
+ }
812
+ ```
813
+
814
+ #### MultipleNegativesRankingLoss
815
+ ```bibtex
816
+ @misc{henderson2017efficient,
817
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
818
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
819
+ year={2017},
820
+ eprint={1705.00652},
821
+ archivePrefix={arXiv},
822
+ primaryClass={cs.CL}
823
+ }
824
+ ```
825
+
826
+ <!--
827
+ ## Glossary
828
+
829
+ *Clearly define terms in order to be accessible across audiences.*
830
+ -->
831
+
832
+ <!--
833
+ ## Model Card Authors
834
+
835
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
836
+ -->
837
+
838
+ <!--
839
+ ## Model Card Contact
840
+
841
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
842
+ -->
checkpoint-72/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 6,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.51.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
checkpoint-72/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.51.3",
5
+ "pytorch": "2.5.1+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
checkpoint-72/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7780ff320e3af2bf82dd782d7a2ed67e93016e9de957e0414ad80606a2de56fb
3
+ size 90864192
checkpoint-72/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
checkpoint-72/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b7ee7244cceffc106c594d5c4b03f39f00382a0ab7d54854704343e2826482
3
+ size 180609146
checkpoint-72/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a912f9519662c8f3b93f1294e923d8dedbbc9477760e6ce924d59bb9f23e5188
3
+ size 14244
checkpoint-72/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ea1a884b139cef8d4ee3a8825068be6171b4f2afe9b8e0b3f291a200d3aab1
3
+ size 1064
checkpoint-72/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
checkpoint-72/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-72/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-72/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
checkpoint-72/trainer_state.json ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 72,
3
+ "best_metric": 0.7275517192718437,
4
+ "best_model_checkpoint": "MNLP_M3_document_encoder_sciqa/checkpoint-72",
5
+ "epoch": 3.8135593220338984,
6
+ "eval_steps": 500,
7
+ "global_step": 72,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.5423728813559322,
14
+ "grad_norm": 164.72093200683594,
15
+ "learning_rate": 1.9987954562051724e-05,
16
+ "loss": 22.4049,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_dim_128_cosine_accuracy@1": 0.5452812202097236,
22
+ "eval_dim_128_cosine_accuracy@10": 0.8760724499523356,
23
+ "eval_dim_128_cosine_accuracy@3": 0.7416587225929456,
24
+ "eval_dim_128_cosine_accuracy@5": 0.8074356530028599,
25
+ "eval_dim_128_cosine_map@100": 0.6605612754102786,
26
+ "eval_dim_128_cosine_mrr@10": 0.6559349796480402,
27
+ "eval_dim_128_cosine_ndcg@10": 0.7092688022688834,
28
+ "eval_dim_128_cosine_precision@1": 0.5452812202097236,
29
+ "eval_dim_128_cosine_precision@10": 0.08760724499523356,
30
+ "eval_dim_128_cosine_precision@3": 0.24721957419764853,
31
+ "eval_dim_128_cosine_precision@5": 0.161487130600572,
32
+ "eval_dim_128_cosine_recall@1": 0.5452812202097236,
33
+ "eval_dim_128_cosine_recall@10": 0.8760724499523356,
34
+ "eval_dim_128_cosine_recall@3": 0.7416587225929456,
35
+ "eval_dim_128_cosine_recall@5": 0.8074356530028599,
36
+ "eval_dim_192_cosine_accuracy@1": 0.5624404194470924,
37
+ "eval_dim_192_cosine_accuracy@10": 0.8932316491897044,
38
+ "eval_dim_192_cosine_accuracy@3": 0.7597712106768351,
39
+ "eval_dim_192_cosine_accuracy@5": 0.8188751191611058,
40
+ "eval_dim_192_cosine_map@100": 0.677245219852975,
41
+ "eval_dim_192_cosine_mrr@10": 0.6730234388003697,
42
+ "eval_dim_192_cosine_ndcg@10": 0.7262712999939527,
43
+ "eval_dim_192_cosine_precision@1": 0.5624404194470924,
44
+ "eval_dim_192_cosine_precision@10": 0.08932316491897044,
45
+ "eval_dim_192_cosine_precision@3": 0.25325707022561167,
46
+ "eval_dim_192_cosine_precision@5": 0.16377502383222117,
47
+ "eval_dim_192_cosine_recall@1": 0.5624404194470924,
48
+ "eval_dim_192_cosine_recall@10": 0.8932316491897044,
49
+ "eval_dim_192_cosine_recall@3": 0.7597712106768351,
50
+ "eval_dim_192_cosine_recall@5": 0.8188751191611058,
51
+ "eval_dim_256_cosine_accuracy@1": 0.5653002859866539,
52
+ "eval_dim_256_cosine_accuracy@10": 0.8960915157292659,
53
+ "eval_dim_256_cosine_accuracy@3": 0.7683508102955195,
54
+ "eval_dim_256_cosine_accuracy@5": 0.8236415633937083,
55
+ "eval_dim_256_cosine_map@100": 0.6831583296339104,
56
+ "eval_dim_256_cosine_mrr@10": 0.6786784844220503,
57
+ "eval_dim_256_cosine_ndcg@10": 0.7314611486548883,
58
+ "eval_dim_256_cosine_precision@1": 0.5653002859866539,
59
+ "eval_dim_256_cosine_precision@10": 0.08960915157292659,
60
+ "eval_dim_256_cosine_precision@3": 0.25611693676517316,
61
+ "eval_dim_256_cosine_precision@5": 0.16472831267874166,
62
+ "eval_dim_256_cosine_recall@1": 0.5653002859866539,
63
+ "eval_dim_256_cosine_recall@10": 0.8960915157292659,
64
+ "eval_dim_256_cosine_recall@3": 0.7683508102955195,
65
+ "eval_dim_256_cosine_recall@5": 0.8236415633937083,
66
+ "eval_dim_384_cosine_accuracy@1": 0.5786463298379408,
67
+ "eval_dim_384_cosine_accuracy@10": 0.9075309818875119,
68
+ "eval_dim_384_cosine_accuracy@3": 0.776930409914204,
69
+ "eval_dim_384_cosine_accuracy@5": 0.8417540514775977,
70
+ "eval_dim_384_cosine_map@100": 0.6932934943306605,
71
+ "eval_dim_384_cosine_mrr@10": 0.6894563227261042,
72
+ "eval_dim_384_cosine_ndcg@10": 0.7423737824827953,
73
+ "eval_dim_384_cosine_precision@1": 0.5786463298379408,
74
+ "eval_dim_384_cosine_precision@10": 0.0907530981887512,
75
+ "eval_dim_384_cosine_precision@3": 0.2589768033047346,
76
+ "eval_dim_384_cosine_precision@5": 0.16835081029551957,
77
+ "eval_dim_384_cosine_recall@1": 0.5786463298379408,
78
+ "eval_dim_384_cosine_recall@10": 0.9075309818875119,
79
+ "eval_dim_384_cosine_recall@3": 0.776930409914204,
80
+ "eval_dim_384_cosine_recall@5": 0.8417540514775977,
81
+ "eval_dim_64_cosine_accuracy@1": 0.49285033365109626,
82
+ "eval_dim_64_cosine_accuracy@10": 0.8274547187797903,
83
+ "eval_dim_64_cosine_accuracy@3": 0.684461391801716,
84
+ "eval_dim_64_cosine_accuracy@5": 0.7578646329837941,
85
+ "eval_dim_64_cosine_map@100": 0.6088952628032813,
86
+ "eval_dim_64_cosine_mrr@10": 0.6032237807738285,
87
+ "eval_dim_64_cosine_ndcg@10": 0.6575406372744073,
88
+ "eval_dim_64_cosine_precision@1": 0.49285033365109626,
89
+ "eval_dim_64_cosine_precision@10": 0.08274547187797902,
90
+ "eval_dim_64_cosine_precision@3": 0.2281537972672386,
91
+ "eval_dim_64_cosine_precision@5": 0.1515729265967588,
92
+ "eval_dim_64_cosine_recall@1": 0.49285033365109626,
93
+ "eval_dim_64_cosine_recall@10": 0.8274547187797903,
94
+ "eval_dim_64_cosine_recall@3": 0.684461391801716,
95
+ "eval_dim_64_cosine_recall@5": 0.7578646329837941,
96
+ "eval_dim_96_cosine_accuracy@1": 0.5214489990467112,
97
+ "eval_dim_96_cosine_accuracy@10": 0.8636796949475691,
98
+ "eval_dim_96_cosine_accuracy@3": 0.7264061010486177,
99
+ "eval_dim_96_cosine_accuracy@5": 0.7893231649189705,
100
+ "eval_dim_96_cosine_map@100": 0.6418431352074736,
101
+ "eval_dim_96_cosine_mrr@10": 0.6369528046363133,
102
+ "eval_dim_96_cosine_ndcg@10": 0.6919097155042885,
103
+ "eval_dim_96_cosine_precision@1": 0.5214489990467112,
104
+ "eval_dim_96_cosine_precision@10": 0.0863679694947569,
105
+ "eval_dim_96_cosine_precision@3": 0.2421353670162059,
106
+ "eval_dim_96_cosine_precision@5": 0.15786463298379408,
107
+ "eval_dim_96_cosine_recall@1": 0.5214489990467112,
108
+ "eval_dim_96_cosine_recall@10": 0.8636796949475691,
109
+ "eval_dim_96_cosine_recall@3": 0.7264061010486177,
110
+ "eval_dim_96_cosine_recall@5": 0.7893231649189705,
111
+ "eval_runtime": 116.4269,
112
+ "eval_samples_per_second": 0.0,
113
+ "eval_sequential_score": 0.6575406372744073,
114
+ "eval_steps_per_second": 0.0,
115
+ "step": 19
116
+ },
117
+ {
118
+ "epoch": 1.0542372881355933,
119
+ "grad_norm": 107.04779815673828,
120
+ "learning_rate": 1.8577286100002723e-05,
121
+ "loss": 16.6616,
122
+ "step": 20
123
+ },
124
+ {
125
+ "epoch": 1.5966101694915253,
126
+ "grad_norm": 97.63832092285156,
127
+ "learning_rate": 1.5141027441932217e-05,
128
+ "loss": 16.8367,
129
+ "step": 30
130
+ },
131
+ {
132
+ "epoch": 2.0,
133
+ "eval_dim_128_cosine_accuracy@1": 0.567206863679695,
134
+ "eval_dim_128_cosine_accuracy@10": 0.886558627264061,
135
+ "eval_dim_128_cosine_accuracy@3": 0.7607244995233555,
136
+ "eval_dim_128_cosine_accuracy@5": 0.8236415633937083,
137
+ "eval_dim_128_cosine_map@100": 0.6790430112153837,
138
+ "eval_dim_128_cosine_mrr@10": 0.6746886679679823,
139
+ "eval_dim_128_cosine_ndcg@10": 0.7260517487265687,
140
+ "eval_dim_128_cosine_precision@1": 0.567206863679695,
141
+ "eval_dim_128_cosine_precision@10": 0.0886558627264061,
142
+ "eval_dim_128_cosine_precision@3": 0.25357483317445184,
143
+ "eval_dim_128_cosine_precision@5": 0.16472831267874166,
144
+ "eval_dim_128_cosine_recall@1": 0.567206863679695,
145
+ "eval_dim_128_cosine_recall@10": 0.886558627264061,
146
+ "eval_dim_128_cosine_recall@3": 0.7607244995233555,
147
+ "eval_dim_128_cosine_recall@5": 0.8236415633937083,
148
+ "eval_dim_192_cosine_accuracy@1": 0.5805529075309819,
149
+ "eval_dim_192_cosine_accuracy@10": 0.9008579599618685,
150
+ "eval_dim_192_cosine_accuracy@3": 0.782650142993327,
151
+ "eval_dim_192_cosine_accuracy@5": 0.8322211630123928,
152
+ "eval_dim_192_cosine_map@100": 0.6964841260809953,
153
+ "eval_dim_192_cosine_mrr@10": 0.6923562879234952,
154
+ "eval_dim_192_cosine_ndcg@10": 0.7430712975035773,
155
+ "eval_dim_192_cosine_precision@1": 0.5805529075309819,
156
+ "eval_dim_192_cosine_precision@10": 0.09008579599618685,
157
+ "eval_dim_192_cosine_precision@3": 0.26088338099777564,
158
+ "eval_dim_192_cosine_precision@5": 0.16644423260247856,
159
+ "eval_dim_192_cosine_recall@1": 0.5805529075309819,
160
+ "eval_dim_192_cosine_recall@10": 0.9008579599618685,
161
+ "eval_dim_192_cosine_recall@3": 0.782650142993327,
162
+ "eval_dim_192_cosine_recall@5": 0.8322211630123928,
163
+ "eval_dim_256_cosine_accuracy@1": 0.5919923736892279,
164
+ "eval_dim_256_cosine_accuracy@10": 0.9142040038131554,
165
+ "eval_dim_256_cosine_accuracy@3": 0.7902764537654909,
166
+ "eval_dim_256_cosine_accuracy@5": 0.8360343183984748,
167
+ "eval_dim_256_cosine_map@100": 0.7038093293311698,
168
+ "eval_dim_256_cosine_mrr@10": 0.700305279404422,
169
+ "eval_dim_256_cosine_ndcg@10": 0.7520267351833514,
170
+ "eval_dim_256_cosine_precision@1": 0.5919923736892279,
171
+ "eval_dim_256_cosine_precision@10": 0.09142040038131555,
172
+ "eval_dim_256_cosine_precision@3": 0.26342548458849696,
173
+ "eval_dim_256_cosine_precision@5": 0.16720686367969492,
174
+ "eval_dim_256_cosine_recall@1": 0.5919923736892279,
175
+ "eval_dim_256_cosine_recall@10": 0.9142040038131554,
176
+ "eval_dim_256_cosine_recall@3": 0.7902764537654909,
177
+ "eval_dim_256_cosine_recall@5": 0.8360343183984748,
178
+ "eval_dim_384_cosine_accuracy@1": 0.6015252621544328,
179
+ "eval_dim_384_cosine_accuracy@10": 0.9199237368922784,
180
+ "eval_dim_384_cosine_accuracy@3": 0.7959961868446139,
181
+ "eval_dim_384_cosine_accuracy@5": 0.8531935176358436,
182
+ "eval_dim_384_cosine_map@100": 0.713601684515785,
183
+ "eval_dim_384_cosine_mrr@10": 0.7104082497314151,
184
+ "eval_dim_384_cosine_ndcg@10": 0.761241503632434,
185
+ "eval_dim_384_cosine_precision@1": 0.6015252621544328,
186
+ "eval_dim_384_cosine_precision@10": 0.09199237368922783,
187
+ "eval_dim_384_cosine_precision@3": 0.26533206228153794,
188
+ "eval_dim_384_cosine_precision@5": 0.17063870352716873,
189
+ "eval_dim_384_cosine_recall@1": 0.6015252621544328,
190
+ "eval_dim_384_cosine_recall@10": 0.9199237368922784,
191
+ "eval_dim_384_cosine_recall@3": 0.7959961868446139,
192
+ "eval_dim_384_cosine_recall@5": 0.8531935176358436,
193
+ "eval_dim_64_cosine_accuracy@1": 0.5138226882745471,
194
+ "eval_dim_64_cosine_accuracy@10": 0.8341277407054337,
195
+ "eval_dim_64_cosine_accuracy@3": 0.7016205910390848,
196
+ "eval_dim_64_cosine_accuracy@5": 0.7645376549094376,
197
+ "eval_dim_64_cosine_map@100": 0.6242158272303533,
198
+ "eval_dim_64_cosine_mrr@10": 0.618670464690484,
199
+ "eval_dim_64_cosine_ndcg@10": 0.6707950308444217,
200
+ "eval_dim_64_cosine_precision@1": 0.5138226882745471,
201
+ "eval_dim_64_cosine_precision@10": 0.08341277407054337,
202
+ "eval_dim_64_cosine_precision@3": 0.2338735303463616,
203
+ "eval_dim_64_cosine_precision@5": 0.1529075309818875,
204
+ "eval_dim_64_cosine_recall@1": 0.5138226882745471,
205
+ "eval_dim_64_cosine_recall@10": 0.8341277407054337,
206
+ "eval_dim_64_cosine_recall@3": 0.7016205910390848,
207
+ "eval_dim_64_cosine_recall@5": 0.7645376549094376,
208
+ "eval_dim_96_cosine_accuracy@1": 0.5471877979027645,
209
+ "eval_dim_96_cosine_accuracy@10": 0.8722592945662536,
210
+ "eval_dim_96_cosine_accuracy@3": 0.7407054337464252,
211
+ "eval_dim_96_cosine_accuracy@5": 0.8017159199237369,
212
+ "eval_dim_96_cosine_map@100": 0.6622003643008398,
213
+ "eval_dim_96_cosine_mrr@10": 0.6576811627097615,
214
+ "eval_dim_96_cosine_ndcg@10": 0.7097194683573752,
215
+ "eval_dim_96_cosine_precision@1": 0.5471877979027645,
216
+ "eval_dim_96_cosine_precision@10": 0.08722592945662536,
217
+ "eval_dim_96_cosine_precision@3": 0.2469018112488084,
218
+ "eval_dim_96_cosine_precision@5": 0.16034318398474737,
219
+ "eval_dim_96_cosine_recall@1": 0.5471877979027645,
220
+ "eval_dim_96_cosine_recall@10": 0.8722592945662536,
221
+ "eval_dim_96_cosine_recall@3": 0.7407054337464252,
222
+ "eval_dim_96_cosine_recall@5": 0.8017159199237369,
223
+ "eval_runtime": 119.8934,
224
+ "eval_samples_per_second": 0.0,
225
+ "eval_sequential_score": 0.6707950308444217,
226
+ "eval_steps_per_second": 0.0,
227
+ "step": 38
228
+ },
229
+ {
230
+ "epoch": 2.1084745762711865,
231
+ "grad_norm": 113.89473724365234,
232
+ "learning_rate": 1.0490676743274181e-05,
233
+ "loss": 12.8169,
234
+ "step": 40
235
+ },
236
+ {
237
+ "epoch": 2.6508474576271186,
238
+ "grad_norm": 81.27326965332031,
239
+ "learning_rate": 5.724449065697182e-06,
240
+ "loss": 13.7826,
241
+ "step": 50
242
+ },
243
+ {
244
+ "epoch": 3.0,
245
+ "eval_dim_128_cosine_accuracy@1": 0.567206863679695,
246
+ "eval_dim_128_cosine_accuracy@10": 0.8903717826501429,
247
+ "eval_dim_128_cosine_accuracy@3": 0.7616777883698761,
248
+ "eval_dim_128_cosine_accuracy@5": 0.8265014299332698,
249
+ "eval_dim_128_cosine_map@100": 0.6794753898354032,
250
+ "eval_dim_128_cosine_mrr@10": 0.6752920392815543,
251
+ "eval_dim_128_cosine_ndcg@10": 0.7273531110418706,
252
+ "eval_dim_128_cosine_precision@1": 0.567206863679695,
253
+ "eval_dim_128_cosine_precision@10": 0.08903717826501431,
254
+ "eval_dim_128_cosine_precision@3": 0.253892596123292,
255
+ "eval_dim_128_cosine_precision@5": 0.16530028598665394,
256
+ "eval_dim_128_cosine_recall@1": 0.567206863679695,
257
+ "eval_dim_128_cosine_recall@10": 0.8903717826501429,
258
+ "eval_dim_128_cosine_recall@3": 0.7616777883698761,
259
+ "eval_dim_128_cosine_recall@5": 0.8265014299332698,
260
+ "eval_dim_192_cosine_accuracy@1": 0.5910390848427073,
261
+ "eval_dim_192_cosine_accuracy@10": 0.9046711153479504,
262
+ "eval_dim_192_cosine_accuracy@3": 0.7778836987607245,
263
+ "eval_dim_192_cosine_accuracy@5": 0.8360343183984748,
264
+ "eval_dim_192_cosine_map@100": 0.7014228144337117,
265
+ "eval_dim_192_cosine_mrr@10": 0.6975449029309853,
266
+ "eval_dim_192_cosine_ndcg@10": 0.7477240665900656,
267
+ "eval_dim_192_cosine_precision@1": 0.5910390848427073,
268
+ "eval_dim_192_cosine_precision@10": 0.09046711153479504,
269
+ "eval_dim_192_cosine_precision@3": 0.25929456625357483,
270
+ "eval_dim_192_cosine_precision@5": 0.16720686367969495,
271
+ "eval_dim_192_cosine_recall@1": 0.5910390848427073,
272
+ "eval_dim_192_cosine_recall@10": 0.9046711153479504,
273
+ "eval_dim_192_cosine_recall@3": 0.7778836987607245,
274
+ "eval_dim_192_cosine_recall@5": 0.8360343183984748,
275
+ "eval_dim_256_cosine_accuracy@1": 0.5948522402287894,
276
+ "eval_dim_256_cosine_accuracy@10": 0.9151572926596759,
277
+ "eval_dim_256_cosine_accuracy@3": 0.792183031458532,
278
+ "eval_dim_256_cosine_accuracy@5": 0.8398474737845567,
279
+ "eval_dim_256_cosine_map@100": 0.7070932589939358,
280
+ "eval_dim_256_cosine_mrr@10": 0.7035797509343749,
281
+ "eval_dim_256_cosine_ndcg@10": 0.7548435122429773,
282
+ "eval_dim_256_cosine_precision@1": 0.5948522402287894,
283
+ "eval_dim_256_cosine_precision@10": 0.09151572926596759,
284
+ "eval_dim_256_cosine_precision@3": 0.2640610104861773,
285
+ "eval_dim_256_cosine_precision@5": 0.16796949475691134,
286
+ "eval_dim_256_cosine_recall@1": 0.5948522402287894,
287
+ "eval_dim_256_cosine_recall@10": 0.9151572926596759,
288
+ "eval_dim_256_cosine_recall@3": 0.792183031458532,
289
+ "eval_dim_256_cosine_recall@5": 0.8398474737845567,
290
+ "eval_dim_384_cosine_accuracy@1": 0.6101048617731173,
291
+ "eval_dim_384_cosine_accuracy@10": 0.9256434699714013,
292
+ "eval_dim_384_cosine_accuracy@3": 0.8007626310772163,
293
+ "eval_dim_384_cosine_accuracy@5": 0.8541468064823642,
294
+ "eval_dim_384_cosine_map@100": 0.7197084605820631,
295
+ "eval_dim_384_cosine_mrr@10": 0.7170116664396936,
296
+ "eval_dim_384_cosine_ndcg@10": 0.7675175612283535,
297
+ "eval_dim_384_cosine_precision@1": 0.6101048617731173,
298
+ "eval_dim_384_cosine_precision@10": 0.09256434699714014,
299
+ "eval_dim_384_cosine_precision@3": 0.2669208770257388,
300
+ "eval_dim_384_cosine_precision@5": 0.17082936129647283,
301
+ "eval_dim_384_cosine_recall@1": 0.6101048617731173,
302
+ "eval_dim_384_cosine_recall@10": 0.9256434699714013,
303
+ "eval_dim_384_cosine_recall@3": 0.8007626310772163,
304
+ "eval_dim_384_cosine_recall@5": 0.8541468064823642,
305
+ "eval_dim_64_cosine_accuracy@1": 0.5166825548141086,
306
+ "eval_dim_64_cosine_accuracy@10": 0.8369876072449952,
307
+ "eval_dim_64_cosine_accuracy@3": 0.7054337464251669,
308
+ "eval_dim_64_cosine_accuracy@5": 0.7673975214489991,
309
+ "eval_dim_64_cosine_map@100": 0.629350282837756,
310
+ "eval_dim_64_cosine_mrr@10": 0.6240088822309986,
311
+ "eval_dim_64_cosine_ndcg@10": 0.6755921916053389,
312
+ "eval_dim_64_cosine_precision@1": 0.5166825548141086,
313
+ "eval_dim_64_cosine_precision@10": 0.08369876072449953,
314
+ "eval_dim_64_cosine_precision@3": 0.23514458214172226,
315
+ "eval_dim_64_cosine_precision@5": 0.1534795042897998,
316
+ "eval_dim_64_cosine_recall@1": 0.5166825548141086,
317
+ "eval_dim_64_cosine_recall@10": 0.8369876072449952,
318
+ "eval_dim_64_cosine_recall@3": 0.7054337464251669,
319
+ "eval_dim_64_cosine_recall@5": 0.7673975214489991,
320
+ "eval_dim_96_cosine_accuracy@1": 0.5529075309818875,
321
+ "eval_dim_96_cosine_accuracy@10": 0.8741658722592945,
322
+ "eval_dim_96_cosine_accuracy@3": 0.7416587225929456,
323
+ "eval_dim_96_cosine_accuracy@5": 0.8093422306959008,
324
+ "eval_dim_96_cosine_map@100": 0.6652525185575742,
325
+ "eval_dim_96_cosine_mrr@10": 0.6608247461679306,
326
+ "eval_dim_96_cosine_ndcg@10": 0.7125237648315317,
327
+ "eval_dim_96_cosine_precision@1": 0.5529075309818875,
328
+ "eval_dim_96_cosine_precision@10": 0.08741658722592945,
329
+ "eval_dim_96_cosine_precision@3": 0.24721957419764853,
330
+ "eval_dim_96_cosine_precision@5": 0.1618684461391802,
331
+ "eval_dim_96_cosine_recall@1": 0.5529075309818875,
332
+ "eval_dim_96_cosine_recall@10": 0.8741658722592945,
333
+ "eval_dim_96_cosine_recall@3": 0.7416587225929456,
334
+ "eval_dim_96_cosine_recall@5": 0.8093422306959008,
335
+ "eval_runtime": 120.2085,
336
+ "eval_samples_per_second": 0.0,
337
+ "eval_sequential_score": 0.6755921916053389,
338
+ "eval_steps_per_second": 0.0,
339
+ "step": 57
340
+ },
341
+ {
342
+ "epoch": 3.1627118644067798,
343
+ "grad_norm": 102.55581665039062,
344
+ "learning_rate": 1.967924685193552e-06,
345
+ "loss": 12.4455,
346
+ "step": 60
347
+ },
348
+ {
349
+ "epoch": 3.705084745762712,
350
+ "grad_norm": 84.57442474365234,
351
+ "learning_rate": 1.0823490035218986e-07,
352
+ "loss": 12.2968,
353
+ "step": 70
354
+ },
355
+ {
356
+ "epoch": 3.8135593220338984,
357
+ "eval_dim_128_cosine_accuracy@1": 0.5662535748331744,
358
+ "eval_dim_128_cosine_accuracy@10": 0.8913250714966635,
359
+ "eval_dim_128_cosine_accuracy@3": 0.7626310772163966,
360
+ "eval_dim_128_cosine_accuracy@5": 0.8265014299332698,
361
+ "eval_dim_128_cosine_map@100": 0.6793502491099088,
362
+ "eval_dim_128_cosine_mrr@10": 0.6752375656331816,
363
+ "eval_dim_128_cosine_ndcg@10": 0.7275517192718437,
364
+ "eval_dim_128_cosine_precision@1": 0.5662535748331744,
365
+ "eval_dim_128_cosine_precision@10": 0.08913250714966635,
366
+ "eval_dim_128_cosine_precision@3": 0.25421035907213213,
367
+ "eval_dim_128_cosine_precision@5": 0.16530028598665394,
368
+ "eval_dim_128_cosine_recall@1": 0.5662535748331744,
369
+ "eval_dim_128_cosine_recall@10": 0.8913250714966635,
370
+ "eval_dim_128_cosine_recall@3": 0.7626310772163966,
371
+ "eval_dim_128_cosine_recall@5": 0.8265014299332698,
372
+ "eval_dim_192_cosine_accuracy@1": 0.5891325071496664,
373
+ "eval_dim_192_cosine_accuracy@10": 0.90371782650143,
374
+ "eval_dim_192_cosine_accuracy@3": 0.778836987607245,
375
+ "eval_dim_192_cosine_accuracy@5": 0.8331744518589133,
376
+ "eval_dim_192_cosine_map@100": 0.7004357679049269,
377
+ "eval_dim_192_cosine_mrr@10": 0.6964694266648511,
378
+ "eval_dim_192_cosine_ndcg@10": 0.7467179313530818,
379
+ "eval_dim_192_cosine_precision@1": 0.5891325071496664,
380
+ "eval_dim_192_cosine_precision@10": 0.090371782650143,
381
+ "eval_dim_192_cosine_precision@3": 0.259612329202415,
382
+ "eval_dim_192_cosine_precision@5": 0.16663489037178267,
383
+ "eval_dim_192_cosine_recall@1": 0.5891325071496664,
384
+ "eval_dim_192_cosine_recall@10": 0.90371782650143,
385
+ "eval_dim_192_cosine_recall@3": 0.778836987607245,
386
+ "eval_dim_192_cosine_recall@5": 0.8331744518589133,
387
+ "eval_dim_256_cosine_accuracy@1": 0.5977121067683508,
388
+ "eval_dim_256_cosine_accuracy@10": 0.9151572926596759,
389
+ "eval_dim_256_cosine_accuracy@3": 0.7912297426120114,
390
+ "eval_dim_256_cosine_accuracy@5": 0.8398474737845567,
391
+ "eval_dim_256_cosine_map@100": 0.7084736712852033,
392
+ "eval_dim_256_cosine_mrr@10": 0.7049529408204341,
393
+ "eval_dim_256_cosine_ndcg@10": 0.7558547240171754,
394
+ "eval_dim_256_cosine_precision@1": 0.5977121067683508,
395
+ "eval_dim_256_cosine_precision@10": 0.09151572926596759,
396
+ "eval_dim_256_cosine_precision@3": 0.26374324753733713,
397
+ "eval_dim_256_cosine_precision@5": 0.16796949475691134,
398
+ "eval_dim_256_cosine_recall@1": 0.5977121067683508,
399
+ "eval_dim_256_cosine_recall@10": 0.9151572926596759,
400
+ "eval_dim_256_cosine_recall@3": 0.7912297426120114,
401
+ "eval_dim_256_cosine_recall@5": 0.8398474737845567,
402
+ "eval_dim_384_cosine_accuracy@1": 0.6120114394661582,
403
+ "eval_dim_384_cosine_accuracy@10": 0.9275500476644424,
404
+ "eval_dim_384_cosine_accuracy@3": 0.8017159199237369,
405
+ "eval_dim_384_cosine_accuracy@5": 0.8541468064823642,
406
+ "eval_dim_384_cosine_map@100": 0.7210073638258574,
407
+ "eval_dim_384_cosine_mrr@10": 0.7184669450875366,
408
+ "eval_dim_384_cosine_ndcg@10": 0.7690377395004954,
409
+ "eval_dim_384_cosine_precision@1": 0.6120114394661582,
410
+ "eval_dim_384_cosine_precision@10": 0.09275500476644424,
411
+ "eval_dim_384_cosine_precision@3": 0.267238639974579,
412
+ "eval_dim_384_cosine_precision@5": 0.17082936129647283,
413
+ "eval_dim_384_cosine_recall@1": 0.6120114394661582,
414
+ "eval_dim_384_cosine_recall@10": 0.9275500476644424,
415
+ "eval_dim_384_cosine_recall@3": 0.8017159199237369,
416
+ "eval_dim_384_cosine_recall@5": 0.8541468064823642,
417
+ "eval_dim_64_cosine_accuracy@1": 0.5166825548141086,
418
+ "eval_dim_64_cosine_accuracy@10": 0.8369876072449952,
419
+ "eval_dim_64_cosine_accuracy@3": 0.7044804575786463,
420
+ "eval_dim_64_cosine_accuracy@5": 0.7683508102955195,
421
+ "eval_dim_64_cosine_map@100": 0.6292715088820261,
422
+ "eval_dim_64_cosine_mrr@10": 0.6239059875618503,
423
+ "eval_dim_64_cosine_ndcg@10": 0.6755211859192654,
424
+ "eval_dim_64_cosine_precision@1": 0.5166825548141086,
425
+ "eval_dim_64_cosine_precision@10": 0.08369876072449953,
426
+ "eval_dim_64_cosine_precision@3": 0.2348268191928821,
427
+ "eval_dim_64_cosine_precision@5": 0.1536701620591039,
428
+ "eval_dim_64_cosine_recall@1": 0.5166825548141086,
429
+ "eval_dim_64_cosine_recall@10": 0.8369876072449952,
430
+ "eval_dim_64_cosine_recall@3": 0.7044804575786463,
431
+ "eval_dim_64_cosine_recall@5": 0.7683508102955195,
432
+ "eval_dim_96_cosine_accuracy@1": 0.551954242135367,
433
+ "eval_dim_96_cosine_accuracy@10": 0.8732125834127741,
434
+ "eval_dim_96_cosine_accuracy@3": 0.7416587225929456,
435
+ "eval_dim_96_cosine_accuracy@5": 0.8093422306959008,
436
+ "eval_dim_96_cosine_map@100": 0.6648689218069684,
437
+ "eval_dim_96_cosine_mrr@10": 0.660333348464903,
438
+ "eval_dim_96_cosine_ndcg@10": 0.7119774118711802,
439
+ "eval_dim_96_cosine_precision@1": 0.551954242135367,
440
+ "eval_dim_96_cosine_precision@10": 0.08732125834127741,
441
+ "eval_dim_96_cosine_precision@3": 0.24721957419764853,
442
+ "eval_dim_96_cosine_precision@5": 0.1618684461391802,
443
+ "eval_dim_96_cosine_recall@1": 0.551954242135367,
444
+ "eval_dim_96_cosine_recall@10": 0.8732125834127741,
445
+ "eval_dim_96_cosine_recall@3": 0.7416587225929456,
446
+ "eval_dim_96_cosine_recall@5": 0.8093422306959008,
447
+ "eval_runtime": 122.4656,
448
+ "eval_samples_per_second": 0.0,
449
+ "eval_sequential_score": 0.6755211859192654,
450
+ "eval_steps_per_second": 0.0,
451
+ "step": 72
452
+ }
453
+ ],
454
+ "logging_steps": 10,
455
+ "max_steps": 72,
456
+ "num_input_tokens_seen": 0,
457
+ "num_train_epochs": 4,
458
+ "save_steps": 500,
459
+ "stateful_callbacks": {
460
+ "TrainerControl": {
461
+ "args": {
462
+ "should_epoch_stop": false,
463
+ "should_evaluate": false,
464
+ "should_log": false,
465
+ "should_save": true,
466
+ "should_training_stop": true
467
+ },
468
+ "attributes": {}
469
+ }
470
+ },
471
+ "total_flos": 0.0,
472
+ "train_batch_size": 32,
473
+ "trial_name": null,
474
+ "trial_params": null
475
+ }
checkpoint-72/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7792b3d6d0521f78d711be5b9ea8a9fbf7602d27ea1f1e28ac62990d20c22b2
3
+ size 5624
checkpoint-72/vocab.txt ADDED
The diff for this file is too large to render. See raw diff