meandyou200175 commited on
Commit
a84fe1e
·
verified ·
1 Parent(s): 704c787

Add new SentenceTransformer model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,800 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:21620
9
+ - loss:MultipleNegativesRankingLoss
10
+ base_model: meandyou200175/E5_v3_instruct_topic
11
+ widget:
12
+ - source_sentence: 'task: classification | query: hope'
13
+ sentences:
14
+ - Khác
15
+ - Lễ hội
16
+ - Khác
17
+ - source_sentence: 'task: classification | query: Empire State Aerial View'
18
+ sentences:
19
+ - Khác
20
+ - Du lịch Việt Nam|Danh lam thắng cảnh
21
+ - Danh lam thắng cảnh|Du lịch quốc tế
22
+ - source_sentence: 'task: classification | query: Tập 8: Chuyện li kì của Huế : Chùa
23
+ Từ Hiếu - P2. Tại sao người dân không dám ăn cá? Tại sao chưa ai thấy chiếc hồ
24
+ này cạn?'
25
+ sentences:
26
+ - Lịch sử|Du lịch Việt Nam
27
+ - Tôn giáo
28
+ - Khác
29
+ - source_sentence: 'task: classification | query: Nội dung của xương chậu nữ Hiển
30
+ thị các cơ quan chính của xương chậu nữ'
31
+ sentences:
32
+ - sách
33
+ - Danh lam thắng cảnh|Du lịch Việt Nam
34
+ - Y tế
35
+ - source_sentence: 'task: classification | query: Học lịch sử cười đau bụng, vừa vui
36
+ vừa dễ nhớ!'
37
+ sentences:
38
+ - Lịch sử
39
+ - Thủ công mỹ nghệ
40
+ - Âm nhạc
41
+ pipeline_tag: sentence-similarity
42
+ library_name: sentence-transformers
43
+ metrics:
44
+ - cosine_accuracy@1
45
+ - cosine_accuracy@2
46
+ - cosine_accuracy@5
47
+ - cosine_accuracy@10
48
+ - cosine_accuracy@100
49
+ - cosine_precision@1
50
+ - cosine_precision@2
51
+ - cosine_precision@5
52
+ - cosine_precision@10
53
+ - cosine_precision@100
54
+ - cosine_recall@1
55
+ - cosine_recall@2
56
+ - cosine_recall@5
57
+ - cosine_recall@10
58
+ - cosine_recall@100
59
+ - cosine_ndcg@10
60
+ - cosine_mrr@1
61
+ - cosine_mrr@2
62
+ - cosine_mrr@5
63
+ - cosine_mrr@10
64
+ - cosine_mrr@100
65
+ - cosine_map@100
66
+ model-index:
67
+ - name: SentenceTransformer based on meandyou200175/E5_v3_instruct_topic
68
+ results:
69
+ - task:
70
+ type: information-retrieval
71
+ name: Information Retrieval
72
+ dataset:
73
+ name: Unknown
74
+ type: unknown
75
+ metrics:
76
+ - type: cosine_accuracy@1
77
+ value: 0.0328755722014149
78
+ name: Cosine Accuracy@1
79
+ - type: cosine_accuracy@2
80
+ value: 0.06325426550145652
81
+ name: Cosine Accuracy@2
82
+ - type: cosine_accuracy@5
83
+ value: 0.14024136496046608
84
+ name: Cosine Accuracy@5
85
+ - type: cosine_accuracy@10
86
+ value: 0.23928422804827298
87
+ name: Cosine Accuracy@10
88
+ - type: cosine_accuracy@100
89
+ value: 0.7719517270079068
90
+ name: Cosine Accuracy@100
91
+ - type: cosine_precision@1
92
+ value: 0.0328755722014149
93
+ name: Cosine Precision@1
94
+ - type: cosine_precision@2
95
+ value: 0.03162713275072826
96
+ name: Cosine Precision@2
97
+ - type: cosine_precision@5
98
+ value: 0.02804827299209322
99
+ name: Cosine Precision@5
100
+ - type: cosine_precision@10
101
+ value: 0.0239284228048273
102
+ name: Cosine Precision@10
103
+ - type: cosine_precision@100
104
+ value: 0.007719517270079068
105
+ name: Cosine Precision@100
106
+ - type: cosine_recall@1
107
+ value: 0.0328755722014149
108
+ name: Cosine Recall@1
109
+ - type: cosine_recall@2
110
+ value: 0.06325426550145652
111
+ name: Cosine Recall@2
112
+ - type: cosine_recall@5
113
+ value: 0.14024136496046608
114
+ name: Cosine Recall@5
115
+ - type: cosine_recall@10
116
+ value: 0.23928422804827298
117
+ name: Cosine Recall@10
118
+ - type: cosine_recall@100
119
+ value: 0.7719517270079068
120
+ name: Cosine Recall@100
121
+ - type: cosine_ndcg@10
122
+ value: 0.11846399039841277
123
+ name: Cosine Ndcg@10
124
+ - type: cosine_mrr@1
125
+ value: 0.0328755722014149
126
+ name: Cosine Mrr@1
127
+ - type: cosine_mrr@2
128
+ value: 0.048064918851435705
129
+ name: Cosine Mrr@2
130
+ - type: cosine_mrr@5
131
+ value: 0.06906644472187543
132
+ name: Cosine Mrr@5
133
+ - type: cosine_mrr@10
134
+ value: 0.08210801313173335
135
+ name: Cosine Mrr@10
136
+ - type: cosine_mrr@100
137
+ value: 0.10204118490728512
138
+ name: Cosine Mrr@100
139
+ - type: cosine_map@100
140
+ value: 0.10204118490728524
141
+ name: Cosine Map@100
142
+ ---
143
+
144
+ # SentenceTransformer based on meandyou200175/E5_v3_instruct_topic
145
+
146
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [meandyou200175/E5_v3_instruct_topic](https://huggingface.co/meandyou200175/E5_v3_instruct_topic). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
147
+
148
+ ## Model Details
149
+
150
+ ### Model Description
151
+ - **Model Type:** Sentence Transformer
152
+ - **Base model:** [meandyou200175/E5_v3_instruct_topic](https://huggingface.co/meandyou200175/E5_v3_instruct_topic) <!-- at revision e1cd18d29dcab90869d10fb264523bc44cbe8455 -->
153
+ - **Maximum Sequence Length:** 512 tokens
154
+ - **Output Dimensionality:** 1024 dimensions
155
+ - **Similarity Function:** Cosine Similarity
156
+ <!-- - **Training Dataset:** Unknown -->
157
+ <!-- - **Language:** Unknown -->
158
+ <!-- - **License:** Unknown -->
159
+
160
+ ### Model Sources
161
+
162
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
163
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
164
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
165
+
166
+ ### Full Model Architecture
167
+
168
+ ```
169
+ SentenceTransformer(
170
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
171
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
172
+ (2): Normalize()
173
+ )
174
+ ```
175
+
176
+ ## Usage
177
+
178
+ ### Direct Usage (Sentence Transformers)
179
+
180
+ First install the Sentence Transformers library:
181
+
182
+ ```bash
183
+ pip install -U sentence-transformers
184
+ ```
185
+
186
+ Then you can load this model and run inference.
187
+ ```python
188
+ from sentence_transformers import SentenceTransformer
189
+
190
+ # Download from the 🤗 Hub
191
+ model = SentenceTransformer("meandyou200175/E5_v4_instruct_topic_continue")
192
+ # Run inference
193
+ sentences = [
194
+ 'task: classification | query: Học lịch sử cười đau bụng, vừa vui vừa dễ nhớ!',
195
+ 'Lịch sử',
196
+ 'Âm nhạc',
197
+ ]
198
+ embeddings = model.encode(sentences)
199
+ print(embeddings.shape)
200
+ # [3, 1024]
201
+
202
+ # Get the similarity scores for the embeddings
203
+ similarities = model.similarity(embeddings, embeddings)
204
+ print(similarities)
205
+ # tensor([[ 1.0000, 0.7909, -0.1353],
206
+ # [ 0.7909, 1.0000, -0.0556],
207
+ # [-0.1353, -0.0556, 1.0000]])
208
+ ```
209
+
210
+ <!--
211
+ ### Direct Usage (Transformers)
212
+
213
+ <details><summary>Click to see the direct usage in Transformers</summary>
214
+
215
+ </details>
216
+ -->
217
+
218
+ <!--
219
+ ### Downstream Usage (Sentence Transformers)
220
+
221
+ You can finetune this model on your own dataset.
222
+
223
+ <details><summary>Click to expand</summary>
224
+
225
+ </details>
226
+ -->
227
+
228
+ <!--
229
+ ### Out-of-Scope Use
230
+
231
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
232
+ -->
233
+
234
+ ## Evaluation
235
+
236
+ ### Metrics
237
+
238
+ #### Information Retrieval
239
+
240
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
241
+
242
+ | Metric | Value |
243
+ |:---------------------|:-----------|
244
+ | cosine_accuracy@1 | 0.0329 |
245
+ | cosine_accuracy@2 | 0.0633 |
246
+ | cosine_accuracy@5 | 0.1402 |
247
+ | cosine_accuracy@10 | 0.2393 |
248
+ | cosine_accuracy@100 | 0.772 |
249
+ | cosine_precision@1 | 0.0329 |
250
+ | cosine_precision@2 | 0.0316 |
251
+ | cosine_precision@5 | 0.028 |
252
+ | cosine_precision@10 | 0.0239 |
253
+ | cosine_precision@100 | 0.0077 |
254
+ | cosine_recall@1 | 0.0329 |
255
+ | cosine_recall@2 | 0.0633 |
256
+ | cosine_recall@5 | 0.1402 |
257
+ | cosine_recall@10 | 0.2393 |
258
+ | cosine_recall@100 | 0.772 |
259
+ | **cosine_ndcg@10** | **0.1185** |
260
+ | cosine_mrr@1 | 0.0329 |
261
+ | cosine_mrr@2 | 0.0481 |
262
+ | cosine_mrr@5 | 0.0691 |
263
+ | cosine_mrr@10 | 0.0821 |
264
+ | cosine_mrr@100 | 0.102 |
265
+ | cosine_map@100 | 0.102 |
266
+
267
+ <!--
268
+ ## Bias, Risks and Limitations
269
+
270
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
271
+ -->
272
+
273
+ <!--
274
+ ### Recommendations
275
+
276
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
277
+ -->
278
+
279
+ ## Training Details
280
+
281
+ ### Training Dataset
282
+
283
+ #### Unnamed Dataset
284
+
285
+ * Size: 21,620 training samples
286
+ * Columns: <code>anchor</code> and <code>positive</code>
287
+ * Approximate statistics based on the first 1000 samples:
288
+ | | anchor | positive |
289
+ |:--------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
290
+ | type | string | string |
291
+ | details | <ul><li>min: 12 tokens</li><li>mean: 49.27 tokens</li><li>max: 389 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 7.09 tokens</li><li>max: 21 tokens</li></ul> |
292
+ * Samples:
293
+ | anchor | positive |
294
+ |:------------------------------------------------------------------------------------------------------------|:---------------------------------------------------|
295
+ | <code>task: classification \| query: Phở tái lăn bí truyền</code> | <code>Ẩm thực địa phương</code> |
296
+ | <code>task: classification \| query: 100 ngày thay đổi tư duy với DAS - Bí Mật Học Viện Số - Ngày 19</code> | <code>Khám phá kiến thức\|Học tập - Kỹ năng</code> |
297
+ | <code>task: classification \| query: Vacation Houses Nguồn: tourswaparks</code> | <code>Du lịch nghỉ dưỡng</code> |
298
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
299
+ ```json
300
+ {
301
+ "scale": 20.0,
302
+ "similarity_fct": "cos_sim",
303
+ "gather_across_devices": false
304
+ }
305
+ ```
306
+
307
+ ### Evaluation Dataset
308
+
309
+ #### Unnamed Dataset
310
+
311
+ * Size: 2,403 evaluation samples
312
+ * Columns: <code>anchor</code> and <code>positive</code>
313
+ * Approximate statistics based on the first 1000 samples:
314
+ | | anchor | positive |
315
+ |:--------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
316
+ | type | string | string |
317
+ | details | <ul><li>min: 12 tokens</li><li>mean: 45.45 tokens</li><li>max: 267 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 7.08 tokens</li><li>max: 21 tokens</li></ul> |
318
+ * Samples:
319
+ | anchor | positive |
320
+ |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------|
321
+ | <code>task: classification \| query: Yoolife Nền tảng số mang thực phẩm sạch từ Nông Trại đến tận mâm cơm gia đình Việt Làm người nông dân sao mà khổ thế!! lam lũ quoanh năm, thiên tai hạn hán dịch bệnh hoành hành, được mùa thì mất giá được giá thì mất mùa. Thấu hiểu được những khó khăn của các chủ nông trại, Yoolife đã trở thành cầu nối tiêu thụ nông sản sạch, an toàn trên không gian mạng. Thông qua siêu ứng dụng Yooseller, bạn có thể: • Mang thực phẩm sạch đến người dùng, không qua trung gian • Sản phẩm kiểm định chặt chẽ, đạt tiêu chuẩn VietGAP • Tiếp cận hơn 1 triệu người dùng trên nền tảng • Miễn phí cho các nhà cung cấp dịch vụ YooSeller cầu nối giữa các trang trại với người tiêu dùng. Hãy tải ngay siêu ứng dụng để đưa thực phẩm sạch đến gần với người dùng nhé! ---------------------------------</code> | <code>Chuyển đổi số\|Công nghệ thông tin - Viễn thông</code> |
322
+ | <code>task: classification \| query: Nhà máy Sunhouse Sunhouse có tên đầy đủ là Công ty Cổ phần Tập đoàn Sunhouse, tiền thân là Công ty TNHH Phú Thắng. Đây là doanh nghiệp chuyên sản xuất và kinh doanh các thiết bị gia dụng. Công ty chính thức được thành lập vào ngày 22 5 2000. Đến năm 2004, Sunhouse liên doanh với công ty TNHH Sunhouse Hàn Quốc, thành lập nên Công ty TNHH Sunhouse Việt Nam. Sau 19 năm hình thành và phát triển, Sunhouse đã gia nhập vào nhóm những doanh nghiệp nghìn tỷ, với 7 công ty thành viên và 6 cụm nhà máy, tổng diện tích hơn 60 ha. Tổng số cán bộ nhân viên làm việc tại tập đoàn lên đến hơn 2.000 người. Sunhouse được đánh giá là cánh chim đầu đàn của ngành gia dụng Việt Nam. Không chỉ nổi tiếng trong lãnh thổ Việt Nam mà các sản phẩm mang thương hiệu này đã vươn tầm quốc tế. Hiện nay, doanh nghiệp có mạng lưới 50.000 điểm bá</code> | <code>Doanh nghiệp> Thông tin doanh nghiệp</code> |
323
+ | <code>task: classification \| query: Chào các bạn! Hôm nay, mình sẽ chia sẻ những mẹo và kiến thức cực kỳ hữu ích để bạn tự tin hơn khi thi bằng lái xe! P33</code> | <code>Đời sống> Xe</code> |
324
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
325
+ ```json
326
+ {
327
+ "scale": 20.0,
328
+ "similarity_fct": "cos_sim",
329
+ "gather_across_devices": false
330
+ }
331
+ ```
332
+
333
+ ### Training Hyperparameters
334
+ #### Non-Default Hyperparameters
335
+
336
+ - `eval_strategy`: steps
337
+ - `per_device_train_batch_size`: 4
338
+ - `per_device_eval_batch_size`: 4
339
+ - `learning_rate`: 2e-05
340
+ - `num_train_epochs`: 5
341
+ - `warmup_ratio`: 0.1
342
+ - `fp16`: True
343
+ - `batch_sampler`: no_duplicates
344
+
345
+ #### All Hyperparameters
346
+ <details><summary>Click to expand</summary>
347
+
348
+ - `overwrite_output_dir`: False
349
+ - `do_predict`: False
350
+ - `eval_strategy`: steps
351
+ - `prediction_loss_only`: True
352
+ - `per_device_train_batch_size`: 4
353
+ - `per_device_eval_batch_size`: 4
354
+ - `per_gpu_train_batch_size`: None
355
+ - `per_gpu_eval_batch_size`: None
356
+ - `gradient_accumulation_steps`: 1
357
+ - `eval_accumulation_steps`: None
358
+ - `torch_empty_cache_steps`: None
359
+ - `learning_rate`: 2e-05
360
+ - `weight_decay`: 0.0
361
+ - `adam_beta1`: 0.9
362
+ - `adam_beta2`: 0.999
363
+ - `adam_epsilon`: 1e-08
364
+ - `max_grad_norm`: 1.0
365
+ - `num_train_epochs`: 5
366
+ - `max_steps`: -1
367
+ - `lr_scheduler_type`: linear
368
+ - `lr_scheduler_kwargs`: {}
369
+ - `warmup_ratio`: 0.1
370
+ - `warmup_steps`: 0
371
+ - `log_level`: passive
372
+ - `log_level_replica`: warning
373
+ - `log_on_each_node`: True
374
+ - `logging_nan_inf_filter`: True
375
+ - `save_safetensors`: True
376
+ - `save_on_each_node`: False
377
+ - `save_only_model`: False
378
+ - `restore_callback_states_from_checkpoint`: False
379
+ - `no_cuda`: False
380
+ - `use_cpu`: False
381
+ - `use_mps_device`: False
382
+ - `seed`: 42
383
+ - `data_seed`: None
384
+ - `jit_mode_eval`: False
385
+ - `use_ipex`: False
386
+ - `bf16`: False
387
+ - `fp16`: True
388
+ - `fp16_opt_level`: O1
389
+ - `half_precision_backend`: auto
390
+ - `bf16_full_eval`: False
391
+ - `fp16_full_eval`: False
392
+ - `tf32`: None
393
+ - `local_rank`: 0
394
+ - `ddp_backend`: None
395
+ - `tpu_num_cores`: None
396
+ - `tpu_metrics_debug`: False
397
+ - `debug`: []
398
+ - `dataloader_drop_last`: False
399
+ - `dataloader_num_workers`: 0
400
+ - `dataloader_prefetch_factor`: None
401
+ - `past_index`: -1
402
+ - `disable_tqdm`: False
403
+ - `remove_unused_columns`: True
404
+ - `label_names`: None
405
+ - `load_best_model_at_end`: False
406
+ - `ignore_data_skip`: False
407
+ - `fsdp`: []
408
+ - `fsdp_min_num_params`: 0
409
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
410
+ - `fsdp_transformer_layer_cls_to_wrap`: None
411
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
412
+ - `deepspeed`: None
413
+ - `label_smoothing_factor`: 0.0
414
+ - `optim`: adamw_torch
415
+ - `optim_args`: None
416
+ - `adafactor`: False
417
+ - `group_by_length`: False
418
+ - `length_column_name`: length
419
+ - `ddp_find_unused_parameters`: None
420
+ - `ddp_bucket_cap_mb`: None
421
+ - `ddp_broadcast_buffers`: False
422
+ - `dataloader_pin_memory`: True
423
+ - `dataloader_persistent_workers`: False
424
+ - `skip_memory_metrics`: True
425
+ - `use_legacy_prediction_loop`: False
426
+ - `push_to_hub`: False
427
+ - `resume_from_checkpoint`: None
428
+ - `hub_model_id`: None
429
+ - `hub_strategy`: every_save
430
+ - `hub_private_repo`: None
431
+ - `hub_always_push`: False
432
+ - `hub_revision`: None
433
+ - `gradient_checkpointing`: False
434
+ - `gradient_checkpointing_kwargs`: None
435
+ - `include_inputs_for_metrics`: False
436
+ - `include_for_metrics`: []
437
+ - `eval_do_concat_batches`: True
438
+ - `fp16_backend`: auto
439
+ - `push_to_hub_model_id`: None
440
+ - `push_to_hub_organization`: None
441
+ - `mp_parameters`:
442
+ - `auto_find_batch_size`: False
443
+ - `full_determinism`: False
444
+ - `torchdynamo`: None
445
+ - `ray_scope`: last
446
+ - `ddp_timeout`: 1800
447
+ - `torch_compile`: False
448
+ - `torch_compile_backend`: None
449
+ - `torch_compile_mode`: None
450
+ - `include_tokens_per_second`: False
451
+ - `include_num_input_tokens_seen`: False
452
+ - `neftune_noise_alpha`: None
453
+ - `optim_target_modules`: None
454
+ - `batch_eval_metrics`: False
455
+ - `eval_on_start`: False
456
+ - `use_liger_kernel`: False
457
+ - `liger_kernel_config`: None
458
+ - `eval_use_gather_object`: False
459
+ - `average_tokens_across_devices`: False
460
+ - `prompts`: None
461
+ - `batch_sampler`: no_duplicates
462
+ - `multi_dataset_batch_sampler`: proportional
463
+ - `router_mapping`: {}
464
+ - `learning_rate_mapping`: {}
465
+
466
+ </details>
467
+
468
+ ### Training Logs
469
+ <details><summary>Click to expand</summary>
470
+
471
+ | Epoch | Step | Training Loss | Validation Loss | cosine_ndcg@10 |
472
+ |:------:|:-----:|:-------------:|:---------------:|:--------------:|
473
+ | 0.0185 | 100 | 2.469 | - | - |
474
+ | 0.0370 | 200 | 1.3544 | - | - |
475
+ | 0.0555 | 300 | 0.6754 | - | - |
476
+ | 0.0740 | 400 | 0.6252 | - | - |
477
+ | 0.0925 | 500 | 0.4433 | - | - |
478
+ | 0.1110 | 600 | 0.5628 | - | - |
479
+ | 0.1295 | 700 | 0.3955 | - | - |
480
+ | 0.1480 | 800 | 0.4755 | - | - |
481
+ | 0.1665 | 900 | 0.3112 | - | - |
482
+ | 0.1850 | 1000 | 0.3946 | 0.3953 | 0.0805 |
483
+ | 0.2035 | 1100 | 0.3676 | - | - |
484
+ | 0.2220 | 1200 | 0.3984 | - | - |
485
+ | 0.2405 | 1300 | 0.3767 | - | - |
486
+ | 0.2590 | 1400 | 0.3796 | - | - |
487
+ | 0.2775 | 1500 | 0.3332 | - | - |
488
+ | 0.2960 | 1600 | 0.4419 | - | - |
489
+ | 0.3145 | 1700 | 0.4107 | - | - |
490
+ | 0.3330 | 1800 | 0.3513 | - | - |
491
+ | 0.3515 | 1900 | 0.3502 | - | - |
492
+ | 0.3700 | 2000 | 0.4331 | 0.3697 | 0.0884 |
493
+ | 0.3885 | 2100 | 0.5259 | - | - |
494
+ | 0.4070 | 2200 | 0.4406 | - | - |
495
+ | 0.4255 | 2300 | 0.4705 | - | - |
496
+ | 0.4440 | 2400 | 0.3596 | - | - |
497
+ | 0.4625 | 2500 | 0.2859 | - | - |
498
+ | 0.4810 | 2600 | 0.3895 | - | - |
499
+ | 0.4995 | 2700 | 0.4653 | - | - |
500
+ | 0.5180 | 2800 | 0.3776 | - | - |
501
+ | 0.5365 | 2900 | 0.4929 | - | - |
502
+ | 0.5550 | 3000 | 0.31 | 0.4504 | 0.0847 |
503
+ | 0.5735 | 3100 | 0.3791 | - | - |
504
+ | 0.5920 | 3200 | 0.3522 | - | - |
505
+ | 0.6105 | 3300 | 0.3995 | - | - |
506
+ | 0.6290 | 3400 | 0.3699 | - | - |
507
+ | 0.6475 | 3500 | 0.3751 | - | - |
508
+ | 0.6660 | 3600 | 0.3472 | - | - |
509
+ | 0.6846 | 3700 | 0.3968 | - | - |
510
+ | 0.7031 | 3800 | 0.4328 | - | - |
511
+ | 0.7216 | 3900 | 0.4753 | - | - |
512
+ | 0.7401 | 4000 | 0.3527 | 0.3451 | 0.0974 |
513
+ | 0.7586 | 4100 | 0.506 | - | - |
514
+ | 0.7771 | 4200 | 0.4896 | - | - |
515
+ | 0.7956 | 4300 | 0.4368 | - | - |
516
+ | 0.8141 | 4400 | 0.373 | - | - |
517
+ | 0.8326 | 4500 | 0.3498 | - | - |
518
+ | 0.8511 | 4600 | 0.3926 | - | - |
519
+ | 0.8696 | 4700 | 0.3924 | - | - |
520
+ | 0.8881 | 4800 | 0.4206 | - | - |
521
+ | 0.9066 | 4900 | 0.4101 | - | - |
522
+ | 0.9251 | 5000 | 0.4193 | 0.3383 | 0.0910 |
523
+ | 0.9436 | 5100 | 0.3777 | - | - |
524
+ | 0.9621 | 5200 | 0.3059 | - | - |
525
+ | 0.9806 | 5300 | 0.4198 | - | - |
526
+ | 0.9991 | 5400 | 0.2563 | - | - |
527
+ | 1.0176 | 5500 | 0.225 | - | - |
528
+ | 1.0361 | 5600 | 0.3237 | - | - |
529
+ | 1.0546 | 5700 | 0.2978 | - | - |
530
+ | 1.0731 | 5800 | 0.3044 | - | - |
531
+ | 1.0916 | 5900 | 0.2087 | - | - |
532
+ | 1.1101 | 6000 | 0.2689 | 0.3643 | 0.0988 |
533
+ | 1.1286 | 6100 | 0.3699 | - | - |
534
+ | 1.1471 | 6200 | 0.2942 | - | - |
535
+ | 1.1656 | 6300 | 0.2929 | - | - |
536
+ | 1.1841 | 6400 | 0.3152 | - | - |
537
+ | 1.2026 | 6500 | 0.3352 | - | - |
538
+ | 1.2211 | 6600 | 0.3146 | - | - |
539
+ | 1.2396 | 6700 | 0.3873 | - | - |
540
+ | 1.2581 | 6800 | 0.258 | - | - |
541
+ | 1.2766 | 6900 | 0.1435 | - | - |
542
+ | 1.2951 | 7000 | 0.2508 | 0.3768 | 0.0966 |
543
+ | 1.3136 | 7100 | 0.2884 | - | - |
544
+ | 1.3321 | 7200 | 0.2962 | - | - |
545
+ | 1.3506 | 7300 | 0.1903 | - | - |
546
+ | 1.3691 | 7400 | 0.2946 | - | - |
547
+ | 1.3876 | 7500 | 0.2658 | - | - |
548
+ | 1.4061 | 7600 | 0.2052 | - | - |
549
+ | 1.4246 | 7700 | 0.3019 | - | - |
550
+ | 1.4431 | 7800 | 0.3147 | - | - |
551
+ | 1.4616 | 7900 | 0.4272 | - | - |
552
+ | 1.4801 | 8000 | 0.2707 | 0.3430 | 0.1000 |
553
+ | 1.4986 | 8100 | 0.3127 | - | - |
554
+ | 1.5171 | 8200 | 0.2775 | - | - |
555
+ | 1.5356 | 8300 | 0.2783 | - | - |
556
+ | 1.5541 | 8400 | 0.3092 | - | - |
557
+ | 1.5726 | 8500 | 0.35 | - | - |
558
+ | 1.5911 | 8600 | 0.3076 | - | - |
559
+ | 1.6096 | 8700 | 0.2935 | - | - |
560
+ | 1.6281 | 8800 | 0.3629 | - | - |
561
+ | 1.6466 | 8900 | 0.2885 | - | - |
562
+ | 1.6651 | 9000 | 0.3249 | 0.3294 | 0.0997 |
563
+ | 1.6836 | 9100 | 0.2983 | - | - |
564
+ | 1.7021 | 9200 | 0.3599 | - | - |
565
+ | 1.7206 | 9300 | 0.2341 | - | - |
566
+ | 1.7391 | 9400 | 0.4031 | - | - |
567
+ | 1.7576 | 9500 | 0.3911 | - | - |
568
+ | 1.7761 | 9600 | 0.3025 | - | - |
569
+ | 1.7946 | 9700 | 0.2315 | - | - |
570
+ | 1.8131 | 9800 | 0.2946 | - | - |
571
+ | 1.8316 | 9900 | 0.2679 | - | - |
572
+ | 1.8501 | 10000 | 0.3445 | 0.3247 | 0.1015 |
573
+ | 1.8686 | 10100 | 0.2243 | - | - |
574
+ | 1.8871 | 10200 | 0.3345 | - | - |
575
+ | 1.9056 | 10300 | 0.2642 | - | - |
576
+ | 1.9241 | 10400 | 0.2012 | - | - |
577
+ | 1.9426 | 10500 | 0.211 | - | - |
578
+ | 1.9611 | 10600 | 0.2834 | - | - |
579
+ | 1.9796 | 10700 | 0.2376 | - | - |
580
+ | 1.9981 | 10800 | 0.2351 | - | - |
581
+ | 2.0167 | 10900 | 0.1985 | - | - |
582
+ | 2.0352 | 11000 | 0.2464 | 0.3235 | 0.1079 |
583
+ | 2.0537 | 11100 | 0.2602 | - | - |
584
+ | 2.0722 | 11200 | 0.176 | - | - |
585
+ | 2.0907 | 11300 | 0.2486 | - | - |
586
+ | 2.1092 | 11400 | 0.2541 | - | - |
587
+ | 2.1277 | 11500 | 0.1925 | - | - |
588
+ | 2.1462 | 11600 | 0.2509 | - | - |
589
+ | 2.1647 | 11700 | 0.1799 | - | - |
590
+ | 2.1832 | 11800 | 0.219 | - | - |
591
+ | 2.2017 | 11900 | 0.2076 | - | - |
592
+ | 2.2202 | 12000 | 0.2285 | 0.3028 | 0.1061 |
593
+ | 2.2387 | 12100 | 0.1823 | - | - |
594
+ | 2.2572 | 12200 | 0.1999 | - | - |
595
+ | 2.2757 | 12300 | 0.1392 | - | - |
596
+ | 2.2942 | 12400 | 0.2552 | - | - |
597
+ | 2.3127 | 12500 | 0.2481 | - | - |
598
+ | 2.3312 | 12600 | 0.2164 | - | - |
599
+ | 2.3497 | 12700 | 0.2157 | - | - |
600
+ | 2.3682 | 12800 | 0.1425 | - | - |
601
+ | 2.3867 | 12900 | 0.0909 | - | - |
602
+ | 2.4052 | 13000 | 0.2931 | 0.3439 | 0.1011 |
603
+ | 2.4237 | 13100 | 0.2031 | - | - |
604
+ | 2.4422 | 13200 | 0.0993 | - | - |
605
+ | 2.4607 | 13300 | 0.1865 | - | - |
606
+ | 2.4792 | 13400 | 0.208 | - | - |
607
+ | 2.4977 | 13500 | 0.2853 | - | - |
608
+ | 2.5162 | 13600 | 0.1936 | - | - |
609
+ | 2.5347 | 13700 | 0.1752 | - | - |
610
+ | 2.5532 | 13800 | 0.2559 | - | - |
611
+ | 2.5717 | 13900 | 0.2441 | - | - |
612
+ | 2.5902 | 14000 | 0.2715 | 0.2953 | 0.1098 |
613
+ | 2.6087 | 14100 | 0.196 | - | - |
614
+ | 2.6272 | 14200 | 0.2194 | - | - |
615
+ | 2.6457 | 14300 | 0.2381 | - | - |
616
+ | 2.6642 | 14400 | 0.2637 | - | - |
617
+ | 2.6827 | 14500 | 0.1453 | - | - |
618
+ | 2.7012 | 14600 | 0.2422 | - | - |
619
+ | 2.7197 | 14700 | 0.2159 | - | - |
620
+ | 2.7382 | 14800 | 0.2205 | - | - |
621
+ | 2.7567 | 14900 | 0.1853 | - | - |
622
+ | 2.7752 | 15000 | 0.2028 | 0.2925 | 0.1072 |
623
+ | 2.7937 | 15100 | 0.2016 | - | - |
624
+ | 2.8122 | 15200 | 0.155 | - | - |
625
+ | 2.8307 | 15300 | 0.1925 | - | - |
626
+ | 2.8492 | 15400 | 0.2408 | - | - |
627
+ | 2.8677 | 15500 | 0.1464 | - | - |
628
+ | 2.8862 | 15600 | 0.2035 | - | - |
629
+ | 2.9047 | 15700 | 0.1883 | - | - |
630
+ | 2.9232 | 15800 | 0.1747 | - | - |
631
+ | 2.9417 | 15900 | 0.251 | - | - |
632
+ | 2.9602 | 16000 | 0.2151 | 0.2953 | 0.1117 |
633
+ | 2.9787 | 16100 | 0.226 | - | - |
634
+ | 2.9972 | 16200 | 0.1442 | - | - |
635
+ | 3.0157 | 16300 | 0.191 | - | - |
636
+ | 3.0342 | 16400 | 0.1304 | - | - |
637
+ | 3.0527 | 16500 | 0.2252 | - | - |
638
+ | 3.0712 | 16600 | 0.1846 | - | - |
639
+ | 3.0897 | 16700 | 0.1608 | - | - |
640
+ | 3.1082 | 16800 | 0.1582 | - | - |
641
+ | 3.1267 | 16900 | 0.1602 | - | - |
642
+ | 3.1452 | 17000 | 0.1086 | 0.2637 | 0.1048 |
643
+ | 3.1637 | 17100 | 0.1155 | - | - |
644
+ | 3.1822 | 17200 | 0.113 | - | - |
645
+ | 3.2007 | 17300 | 0.1622 | - | - |
646
+ | 3.2192 | 17400 | 0.1963 | - | - |
647
+ | 3.2377 | 17500 | 0.1556 | - | - |
648
+ | 3.2562 | 17600 | 0.0897 | - | - |
649
+ | 3.2747 | 17700 | 0.0999 | - | - |
650
+ | 3.2932 | 17800 | 0.1499 | - | - |
651
+ | 3.3117 | 17900 | 0.2365 | - | - |
652
+ | 3.3302 | 18000 | 0.146 | 0.2748 | 0.1113 |
653
+ | 3.3488 | 18100 | 0.1591 | - | - |
654
+ | 3.3673 | 18200 | 0.1885 | - | - |
655
+ | 3.3858 | 18300 | 0.1959 | - | - |
656
+ | 3.4043 | 18400 | 0.076 | - | - |
657
+ | 3.4228 | 18500 | 0.176 | - | - |
658
+ | 3.4413 | 18600 | 0.1378 | - | - |
659
+ | 3.4598 | 18700 | 0.0648 | - | - |
660
+ | 3.4783 | 18800 | 0.1488 | - | - |
661
+ | 3.4968 | 18900 | 0.1361 | - | - |
662
+ | 3.5153 | 19000 | 0.1573 | 0.2878 | 0.1096 |
663
+ | 3.5338 | 19100 | 0.2488 | - | - |
664
+ | 3.5523 | 19200 | 0.1086 | - | - |
665
+ | 3.5708 | 19300 | 0.1405 | - | - |
666
+ | 3.5893 | 19400 | 0.0423 | - | - |
667
+ | 3.6078 | 19500 | 0.1069 | - | - |
668
+ | 3.6263 | 19600 | 0.088 | - | - |
669
+ | 3.6448 | 19700 | 0.1489 | - | - |
670
+ | 3.6633 | 19800 | 0.0865 | - | - |
671
+ | 3.6818 | 19900 | 0.1839 | - | - |
672
+ | 3.7003 | 20000 | 0.1476 | 0.2914 | 0.1159 |
673
+ | 3.7188 | 20100 | 0.2212 | - | - |
674
+ | 3.7373 | 20200 | 0.1638 | - | - |
675
+ | 3.7558 | 20300 | 0.0782 | - | - |
676
+ | 3.7743 | 20400 | 0.1215 | - | - |
677
+ | 3.7928 | 20500 | 0.1478 | - | - |
678
+ | 3.8113 | 20600 | 0.1934 | - | - |
679
+ | 3.8298 | 20700 | 0.1594 | - | - |
680
+ | 3.8483 | 20800 | 0.1216 | - | - |
681
+ | 3.8668 | 20900 | 0.2124 | - | - |
682
+ | 3.8853 | 21000 | 0.0981 | 0.2789 | 0.1141 |
683
+ | 3.9038 | 21100 | 0.126 | - | - |
684
+ | 3.9223 | 21200 | 0.1077 | - | - |
685
+ | 3.9408 | 21300 | 0.1176 | - | - |
686
+ | 3.9593 | 21400 | 0.1776 | - | - |
687
+ | 3.9778 | 21500 | 0.094 | - | - |
688
+ | 3.9963 | 21600 | 0.1025 | - | - |
689
+ | 4.0148 | 21700 | 0.1589 | - | - |
690
+ | 4.0333 | 21800 | 0.1142 | - | - |
691
+ | 4.0518 | 21900 | 0.1656 | - | - |
692
+ | 4.0703 | 22000 | 0.0577 | 0.2660 | 0.1105 |
693
+ | 4.0888 | 22100 | 0.0911 | - | - |
694
+ | 4.1073 | 22200 | 0.0844 | - | - |
695
+ | 4.1258 | 22300 | 0.0606 | - | - |
696
+ | 4.1443 | 22400 | 0.1653 | - | - |
697
+ | 4.1628 | 22500 | 0.0968 | - | - |
698
+ | 4.1813 | 22600 | 0.055 | - | - |
699
+ | 4.1998 | 22700 | 0.1013 | - | - |
700
+ | 4.2183 | 22800 | 0.0587 | - | - |
701
+ | 4.2368 | 22900 | 0.1309 | - | - |
702
+ | 4.2553 | 23000 | 0.053 | 0.2554 | 0.1165 |
703
+ | 4.2738 | 23100 | 0.1312 | - | - |
704
+ | 4.2923 | 23200 | 0.1208 | - | - |
705
+ | 4.3108 | 23300 | 0.159 | - | - |
706
+ | 4.3293 | 23400 | 0.1135 | - | - |
707
+ | 4.3478 | 23500 | 0.0956 | - | - |
708
+ | 4.3663 | 23600 | 0.1353 | - | - |
709
+ | 4.3848 | 23700 | 0.1623 | - | - |
710
+ | 4.4033 | 23800 | 0.1296 | - | - |
711
+ | 4.4218 | 23900 | 0.1103 | - | - |
712
+ | 4.4403 | 24000 | 0.0837 | 0.2514 | 0.1175 |
713
+ | 4.4588 | 24100 | 0.1124 | - | - |
714
+ | 4.4773 | 24200 | 0.0893 | - | - |
715
+ | 4.4958 | 24300 | 0.0852 | - | - |
716
+ | 4.5143 | 24400 | 0.152 | - | - |
717
+ | 4.5328 | 24500 | 0.0731 | - | - |
718
+ | 4.5513 | 24600 | 0.1839 | - | - |
719
+ | 4.5698 | 24700 | 0.0393 | - | - |
720
+ | 4.5883 | 24800 | 0.1167 | - | - |
721
+ | 4.6068 | 24900 | 0.0909 | - | - |
722
+ | 4.6253 | 25000 | 0.098 | 0.2621 | 0.1196 |
723
+ | 4.6438 | 25100 | 0.1655 | - | - |
724
+ | 4.6623 | 25200 | 0.1086 | - | - |
725
+ | 4.6809 | 25300 | 0.116 | - | - |
726
+ | 4.6994 | 25400 | 0.0594 | - | - |
727
+ | 4.7179 | 25500 | 0.0677 | - | - |
728
+ | 4.7364 | 25600 | 0.0915 | - | - |
729
+ | 4.7549 | 25700 | 0.0784 | - | - |
730
+ | 4.7734 | 25800 | 0.0746 | - | - |
731
+ | 4.7919 | 25900 | 0.0613 | - | - |
732
+ | 4.8104 | 26000 | 0.0682 | 0.2570 | 0.1189 |
733
+ | 4.8289 | 26100 | 0.1423 | - | - |
734
+ | 4.8474 | 26200 | 0.1023 | - | - |
735
+ | 4.8659 | 26300 | 0.085 | - | - |
736
+ | 4.8844 | 26400 | 0.0916 | - | - |
737
+ | 4.9029 | 26500 | 0.1068 | - | - |
738
+ | 4.9214 | 26600 | 0.1184 | - | - |
739
+ | 4.9399 | 26700 | 0.0873 | - | - |
740
+ | 4.9584 | 26800 | 0.136 | - | - |
741
+ | 4.9769 | 26900 | 0.1196 | - | - |
742
+ | 4.9954 | 27000 | 0.1096 | 0.2472 | 0.1185 |
743
+
744
+ </details>
745
+
746
+ ### Framework Versions
747
+ - Python: 3.11.13
748
+ - Sentence Transformers: 5.1.2
749
+ - Transformers: 4.53.3
750
+ - PyTorch: 2.6.0+cu124
751
+ - Accelerate: 1.9.0
752
+ - Datasets: 4.4.1
753
+ - Tokenizers: 0.21.2
754
+
755
+ ## Citation
756
+
757
+ ### BibTeX
758
+
759
+ #### Sentence Transformers
760
+ ```bibtex
761
+ @inproceedings{reimers-2019-sentence-bert,
762
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
763
+ author = "Reimers, Nils and Gurevych, Iryna",
764
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
765
+ month = "11",
766
+ year = "2019",
767
+ publisher = "Association for Computational Linguistics",
768
+ url = "https://arxiv.org/abs/1908.10084",
769
+ }
770
+ ```
771
+
772
+ #### MultipleNegativesRankingLoss
773
+ ```bibtex
774
+ @misc{henderson2017efficient,
775
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
776
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
777
+ year={2017},
778
+ eprint={1705.00652},
779
+ archivePrefix={arXiv},
780
+ primaryClass={cs.CL}
781
+ }
782
+ ```
783
+
784
+ <!--
785
+ ## Glossary
786
+
787
+ *Clearly define terms in order to be accessible across audiences.*
788
+ -->
789
+
790
+ <!--
791
+ ## Model Card Authors
792
+
793
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
794
+ -->
795
+
796
+ <!--
797
+ ## Model Card Contact
798
+
799
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
800
+ -->
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.53.3",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.1.2",
4
+ "transformers": "4.53.3",
5
+ "pytorch": "2.6.0+cu124"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36668b7fbed31bc3c6a8558c1432900fd4290b8727aaabab23aa2b774015433f
3
+ size 2239607176
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085
3
+ size 17082987
tokenizer_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [],
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "<mask>",
51
+ "max_length": 512,
52
+ "model_max_length": 512,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "<pad>",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "</s>",
58
+ "stride": 0,
59
+ "tokenizer_class": "XLMRobertaTokenizer",
60
+ "truncation_side": "right",
61
+ "truncation_strategy": "longest_first",
62
+ "unk_token": "<unk>"
63
+ }