LilNomto commited on
Commit
da480d2
·
verified ·
1 Parent(s): fc04f16

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
2_Dense/config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "in_features": 768,
3
+ "out_features": 768,
4
+ "bias": true,
5
+ "activation_function": "torch.nn.modules.activation.Tanh"
6
+ }
2_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbca3add16f6ce5eb01f49f13c20c8067073ba0b489f8f113f7f3e2654c141fa
3
+ size 2362528
README.md ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:317
9
+ - loss:MultipleNegativesRankingLoss
10
+ base_model: sentence-transformers/LaBSE
11
+ widget:
12
+ - source_sentence: rab 'byor 'di ji snyam du sems| de bzhin gshegs pas de bzhin gshegs
13
+ pa mar me mdzad las gang bla na med pa yang dag par rdzogs pa'i byang chub mngon
14
+ par rdzogs par sangs rgyas pa'i chos de gang yang yod dam|
15
+ sentences:
16
+ - youn-du čü orošil ügei sedkil öüsken üyiledkü.
17
+ - tere youni tula kemēbēsü. subudi tögünčilen boluqsan zöb nomlon sayitur nomlon
18
+ tögünčilen činartu nomloxu bui. tögünčilen boluqsan buruu nomloxu busuyin tula-da.
19
+ - subudi öün-dü you sedkikü. tögünčilen boluqsan dhi paṃ ҟa ra-ēce tögünčilen boluqsani
20
+ ali dēre ügei sayitur dousuqsan bodhidu. ilerkei dousun burxan boluqsan ali
21
+ tere nom bui kemēn sedkikü buyu. teyin kemēn zarliq boluqsan-du. burxan-du
22
+ nasu tögüs subudi eyin kemēn ayiladxabai. ilaγun tögüsüqsen tögünčilen boluqsan
23
+ dhi-paṃ-ҟara burxan-ēce tögünčilen boluqsani ali dēre ügei sayitur dousuqsan
24
+ bodhi-du ilerkei dousun burxan boluqsan nom tere oγōto ügei. eyin kemēn ayiladxaqsan-du. burxan
25
+ nasu tögüs subudidu eyin kemēn zarliq bolboi. subudi tere tögünčilen tere tögünčilen
26
+ küq tögünčilen boluqsan dhi-paṃ-ҟa-raēce tögünčilen boluqsani ali dēre ügei
27
+ sayitur dousuqsan bodhi-du ilerkei dousun burxan boluqsan nom tere oγōto ügei subudi
28
+ kerbe tögünčilen boluqsan ali ilerkei dousun burxan boluqsan nom zarim bui bolxulā.
29
+ tögünčilen boluqsan dhi paṃ ҟa ra. nada biraman küböün či irē ödüi caqtu. tögünčilen
30
+ boluqsan dayini darun sayitur dousuqsan šakyamuni burxan kemēkü bolxu kemēn esi
31
+ ülü üzüülkü atala. subudi ene metü tögünčilen boluqsan ali dēre ügei sayitur dousuqsan
32
+ bodhi-du ilerkei dousun burxan boluqsan nom tere oγōto ügei töüni tula tögünčilen
33
+ boluqsan dhi-paṃ-ҟa-ra. nada biraman küböün či irē ödüi caqtu. tögünčilen boluqsan dayini
34
+ darun sayitur dousuqsan šakyamuni burxan kemēkü bolxu kemēn eši üzüülbei. tere
35
+ youni tula kemēbēsü. subudi tögünčilen boluqsan kemēkü inu. ünen tögünčilen
36
+ činariyin üge xadaqsan müni tulada. subudi ali zarim eyin kemēn. tögünčilen
37
+ boluqsan dayini darun sayitur dousuqsan burxan. dēre ügei sayitur dousuqsan
38
+ bodhi-du ilerkei dousun burxan bolboi kemēn öguulekülē tere buruu ögüülekü mün.
39
+ - source_sentence: 'de ci''i phyir zhe na| rab ''byor gal te byang chub sems dpa''
40
+ sems dpa'' chen po de dag chos su ''du shes ''jug na de nyid de dag gi bdag tu
41
+ ''dzin par ''gyur zhing| sems can du ''dzin pa dang| srog tu ''dzin pa dang| gang
42
+ zag tu ''dzin par ''gyur ba''i phyir ro. '
43
+ sentences:
44
+ - 'tere youni tula kemēbēsü. subudi kerbe bodhi-sadv mahā-sadv tede nom kemēn
45
+ xurān meden üyiledkülē tede töüni bidu barixu bolun. amitan-du barin amin-du barin
46
+ budγali-du barixu bolxuyin tulada. '
47
+ - tere youni tula kemēbēsü. subudi oroni zoҟōl-noγoud oroni zoҟōl-noγoud kemēkü.
48
+ zoҟōl tede ügei kemēn tögünčilen boluqsan nomloqsoni tulada. töüni tula oroni
49
+ zoҟōl-noγoud kemēyü.
50
+ - ilaγün tögüsüqsen Ānanda-du zarliq bolboi
51
+ - source_sentence: bcom ldan 'das kyis bka' stsal pa| yang rab 'byor skyes pa'am|
52
+ bud med gang gis lus gang gā'i klung gi bye ma snyed yongs su gtong ba bas gang
53
+ gis chos kyi rnam grangs 'di las tha na tshig bzhi pa'i tshigs su bcad pa tsam
54
+ bzung ste| gzhan dag la yang bstan na de gzhi de las bsod nams ches mang du grangs
55
+ med dpag tu med pa bskyed do.
56
+ sentences:
57
+ - 'teyin kemēn ayiladxaqsan-du burxan nasu tögüs subudi-du eyin kemēn zarliq
58
+ bolbui. '
59
+ - 'tere youni tula kemēbēsü. subudi alii amitan-du xurān medekülē. töüni bodhi-sadv
60
+ kemēn ülü ögüüleküyin tulada. '
61
+ - burxan zarliq bolboi. subudi ere buyu eme zarim γangγa müreni xumakiyin tödüi
62
+ beye oγōto ögüqsen-ēce. ken nomiyin züyil öünēce yadaba čü dörbön ügetü šülügiyin
63
+ tödüi toqtōǰi. busudtu zöb üzüükülē tere oron töün-ēce tōloši ügei caqlaši
64
+ ügei buyan maši ülemǰi öüskekü..
65
+ - source_sentence: da yang sangs rgyas spyan drangs nas chos thos ma thag tub yang
66
+ chung thob par gyur to
67
+ sentences:
68
+ - tere youni tula kemēbēsü. subudi sedkiliyin ürgülǰi sedkiliyin ürgülǰi kemēküi.
69
+ töüni ürgülǰi ügegüye tögünčilen boluqsan nomloqsoni tulada. töüni tula sedkiliyin
70
+ ürgülǰi kemēn ögüüleyü.
71
+ - 'subudi tögünčilen baroun kigēd šinggeküi zöün kigēd dēdü dorodu züq zügiyin zabsar-luγā
72
+ arban zügiyin oqtorγuyin kemǰē caqla kemǰikü kilbar kemēkü buyu. '
73
+ - 'ödügē basa burxani zalād nom sonosōd saca bodhi-yi olun üyiledbei '
74
+ - source_sentence: 'de ci''i phyir zhe na| rab ''byor gal te byang chub sems dpa''
75
+ sems can du ''du shes ''jug na| de byang chub sems dpa'' zhes mi bya ba''i phyir
76
+ ro. '
77
+ sentences:
78
+ - tere youni tula kemēbēsü. subudi dēdü činadu kürüqsen öüni tögünčilen boluqsan
79
+ zarluq bolboi.
80
+ - ' tere youni tula kemēbēsü. subudi kerbe bodhi-sadvnar amitan-du xurān medekülē.
81
+ töüni bodhi-sadv kemēn ülü ögüüleküyin tulada. '
82
+ - subudi basa nomiyin züyil ene sedkiši ügei adalidxaši ügei. öüni bolbosuraqsan
83
+ üre-yi čü sedkiši ügegüye uxan üyiled. nomiyin züyil öüni dēdü külgün-dü sayitur
84
+ oroqson amitan-noγoudiyin tusa kigēd ketürkei boluqsan külgün-du sayaitur oroqson
85
+ amitan-noγoudiyin tusayin tula tögünčilen boluqsan nomloboi.
86
+ pipeline_tag: sentence-similarity
87
+ library_name: sentence-transformers
88
+ ---
89
+
90
+ # SentenceTransformer based on sentence-transformers/LaBSE
91
+
92
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
93
+
94
+ ## Model Details
95
+
96
+ ### Model Description
97
+ - **Model Type:** Sentence Transformer
98
+ - **Base model:** [sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE) <!-- at revision 836121a0533e5664b21c7aacc5d22951f2b8b25b -->
99
+ - **Maximum Sequence Length:** 256 tokens
100
+ - **Output Dimensionality:** 768 dimensions
101
+ - **Similarity Function:** Cosine Similarity
102
+ <!-- - **Training Dataset:** Unknown -->
103
+ <!-- - **Language:** Unknown -->
104
+ <!-- - **License:** Unknown -->
105
+
106
+ ### Model Sources
107
+
108
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
109
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
110
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
111
+
112
+ ### Full Model Architecture
113
+
114
+ ```
115
+ SentenceTransformer(
116
+ (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
117
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
118
+ (2): Dense({'in_features': 768, 'out_features': 768, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
119
+ (3): Normalize()
120
+ )
121
+ ```
122
+
123
+ ## Usage
124
+
125
+ ### Direct Usage (Sentence Transformers)
126
+
127
+ First install the Sentence Transformers library:
128
+
129
+ ```bash
130
+ pip install -U sentence-transformers
131
+ ```
132
+
133
+ Then you can load this model and run inference.
134
+ ```python
135
+ from sentence_transformers import SentenceTransformer
136
+
137
+ # Download from the 🤗 Hub
138
+ model = SentenceTransformer("sentence_transformers_model_id")
139
+ # Run inference
140
+ sentences = [
141
+ "de ci'i phyir zhe na| rab 'byor gal te byang chub sems dpa' sems can du 'du shes 'jug na| de byang chub sems dpa' zhes mi bya ba'i phyir ro. ",
142
+ ' tere youni tula kemēbēsü. subudi kerbe bodhi-sadvnar amitan-du xurān medekülē. töüni bodhi-sadv kemēn ülü ögüüleküyin tulada. ',
143
+ 'tere youni tula kemēbēsü. subudi dēdü činadu kürüqsen öüni tögünčilen boluqsan zarluq bolboi.',
144
+ ]
145
+ embeddings = model.encode(sentences)
146
+ print(embeddings.shape)
147
+ # [3, 768]
148
+
149
+ # Get the similarity scores for the embeddings
150
+ similarities = model.similarity(embeddings, embeddings)
151
+ print(similarities)
152
+ # tensor([[1.0000, 0.7387, 0.5014],
153
+ # [0.7387, 1.0000, 0.6236],
154
+ # [0.5014, 0.6236, 1.0000]])
155
+ ```
156
+
157
+ <!--
158
+ ### Direct Usage (Transformers)
159
+
160
+ <details><summary>Click to see the direct usage in Transformers</summary>
161
+
162
+ </details>
163
+ -->
164
+
165
+ <!--
166
+ ### Downstream Usage (Sentence Transformers)
167
+
168
+ You can finetune this model on your own dataset.
169
+
170
+ <details><summary>Click to expand</summary>
171
+
172
+ </details>
173
+ -->
174
+
175
+ <!--
176
+ ### Out-of-Scope Use
177
+
178
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
179
+ -->
180
+
181
+ <!--
182
+ ## Bias, Risks and Limitations
183
+
184
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
185
+ -->
186
+
187
+ <!--
188
+ ### Recommendations
189
+
190
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
191
+ -->
192
+
193
+ ## Training Details
194
+
195
+ ### Training Dataset
196
+
197
+ #### Unnamed Dataset
198
+
199
+ * Size: 317 training samples
200
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
201
+ * Approximate statistics based on the first 317 samples:
202
+ | | sentence_0 | sentence_1 | label |
203
+ |:--------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------|
204
+ | type | string | string | float |
205
+ | details | <ul><li>min: 11 tokens</li><li>mean: 62.13 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 57.18 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
206
+ * Samples:
207
+ | sentence_0 | sentence_1 | label |
208
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
209
+ | <code>de ci'i phyir zhe na\| rab 'byor de bzhin gshegs pas gang bkrol ba'i sems can de dag gang yang med pa'i phyir ro.</code> | <code>tere youni tula kemēbēsü. subudi tögünčilen boluqsani ali tonilγoxui amitan tede aliba ügeyin tulada. subudi kerbe tögünčilen boluqsan ali zarim amitani tonilγoxulā. tögünčilen boluqsan bidü barixu boluyu. amitandu barin amin-du barin budγali barixu boluyu.</code> | <code>1.0</code> |
210
+ | <code>yang rab 'byor ma 'ongs pa'i dus lnga brgya tha ma la dam pa'i chos rab tu rnam par 'jig par 'gyur ba na byang chub sems dpa' sems dpa' chen po tshul khrims dang ldan pa\| yon tan dang ldan pa\| shes rab dang ldan pa dag 'byung ste\| </code> | <code>subudi irē ödüi ecüs tabun zouni caqtu dēdü nom maši ebderekui-dü. šaqšabādtai erdemtei biliq tögüsüqsen bodhi-sadv mahā-sadv-nar törökü. </code> | <code>1.0</code> |
211
+ | <code>bcom ldan 'das kyis bka' stsal pa\| rab 'byor khyod 'di skad du ma 'ongs pa'i dus lnga brgya tha ma la dam pa'i chos rab tu rnam par 'jig par 'gyur ba na sems can gang la la dag 'di lta bu'i mdo sde'i tshig bshad pa 'di la yang dag par 'du shes skyed par 'gyur ba mchis sam zhes ma zer cig\| </code> | <code>burxan zarliq bolboi. subudi či eyin kemēn irē ödüi ēcüs tabun zöüni caqtu dēdü nom maši ebdereküi-dü. ali zarim amitan ene metü suduriyin ayimagiyin üge nomloxuyidu ünēr xurān medeküi öüsken üyiledkü bui buyu kemēn tere metü bu ögüüle. </code> | <code>1.0</code> |
212
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
213
+ ```json
214
+ {
215
+ "scale": 20.0,
216
+ "similarity_fct": "cos_sim",
217
+ "gather_across_devices": false
218
+ }
219
+ ```
220
+
221
+ ### Training Hyperparameters
222
+ #### Non-Default Hyperparameters
223
+
224
+ - `eval_strategy`: steps
225
+ - `per_device_train_batch_size`: 6
226
+ - `per_device_eval_batch_size`: 6
227
+ - `num_train_epochs`: 25
228
+ - `fp16`: True
229
+ - `multi_dataset_batch_sampler`: round_robin
230
+
231
+ #### All Hyperparameters
232
+ <details><summary>Click to expand</summary>
233
+
234
+ - `overwrite_output_dir`: False
235
+ - `do_predict`: False
236
+ - `eval_strategy`: steps
237
+ - `prediction_loss_only`: True
238
+ - `per_device_train_batch_size`: 6
239
+ - `per_device_eval_batch_size`: 6
240
+ - `per_gpu_train_batch_size`: None
241
+ - `per_gpu_eval_batch_size`: None
242
+ - `gradient_accumulation_steps`: 1
243
+ - `eval_accumulation_steps`: None
244
+ - `torch_empty_cache_steps`: None
245
+ - `learning_rate`: 5e-05
246
+ - `weight_decay`: 0.0
247
+ - `adam_beta1`: 0.9
248
+ - `adam_beta2`: 0.999
249
+ - `adam_epsilon`: 1e-08
250
+ - `max_grad_norm`: 1
251
+ - `num_train_epochs`: 25
252
+ - `max_steps`: -1
253
+ - `lr_scheduler_type`: linear
254
+ - `lr_scheduler_kwargs`: {}
255
+ - `warmup_ratio`: 0.0
256
+ - `warmup_steps`: 0
257
+ - `log_level`: passive
258
+ - `log_level_replica`: warning
259
+ - `log_on_each_node`: True
260
+ - `logging_nan_inf_filter`: True
261
+ - `save_safetensors`: True
262
+ - `save_on_each_node`: False
263
+ - `save_only_model`: False
264
+ - `restore_callback_states_from_checkpoint`: False
265
+ - `no_cuda`: False
266
+ - `use_cpu`: False
267
+ - `use_mps_device`: False
268
+ - `seed`: 42
269
+ - `data_seed`: None
270
+ - `jit_mode_eval`: False
271
+ - `use_ipex`: False
272
+ - `bf16`: False
273
+ - `fp16`: True
274
+ - `fp16_opt_level`: O1
275
+ - `half_precision_backend`: auto
276
+ - `bf16_full_eval`: False
277
+ - `fp16_full_eval`: False
278
+ - `tf32`: None
279
+ - `local_rank`: 0
280
+ - `ddp_backend`: None
281
+ - `tpu_num_cores`: None
282
+ - `tpu_metrics_debug`: False
283
+ - `debug`: []
284
+ - `dataloader_drop_last`: False
285
+ - `dataloader_num_workers`: 0
286
+ - `dataloader_prefetch_factor`: None
287
+ - `past_index`: -1
288
+ - `disable_tqdm`: False
289
+ - `remove_unused_columns`: True
290
+ - `label_names`: None
291
+ - `load_best_model_at_end`: False
292
+ - `ignore_data_skip`: False
293
+ - `fsdp`: []
294
+ - `fsdp_min_num_params`: 0
295
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
296
+ - `fsdp_transformer_layer_cls_to_wrap`: None
297
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
298
+ - `parallelism_config`: None
299
+ - `deepspeed`: None
300
+ - `label_smoothing_factor`: 0.0
301
+ - `optim`: adamw_torch_fused
302
+ - `optim_args`: None
303
+ - `adafactor`: False
304
+ - `group_by_length`: False
305
+ - `length_column_name`: length
306
+ - `ddp_find_unused_parameters`: None
307
+ - `ddp_bucket_cap_mb`: None
308
+ - `ddp_broadcast_buffers`: False
309
+ - `dataloader_pin_memory`: True
310
+ - `dataloader_persistent_workers`: False
311
+ - `skip_memory_metrics`: True
312
+ - `use_legacy_prediction_loop`: False
313
+ - `push_to_hub`: False
314
+ - `resume_from_checkpoint`: None
315
+ - `hub_model_id`: None
316
+ - `hub_strategy`: every_save
317
+ - `hub_private_repo`: None
318
+ - `hub_always_push`: False
319
+ - `hub_revision`: None
320
+ - `gradient_checkpointing`: False
321
+ - `gradient_checkpointing_kwargs`: None
322
+ - `include_inputs_for_metrics`: False
323
+ - `include_for_metrics`: []
324
+ - `eval_do_concat_batches`: True
325
+ - `fp16_backend`: auto
326
+ - `push_to_hub_model_id`: None
327
+ - `push_to_hub_organization`: None
328
+ - `mp_parameters`:
329
+ - `auto_find_batch_size`: False
330
+ - `full_determinism`: False
331
+ - `torchdynamo`: None
332
+ - `ray_scope`: last
333
+ - `ddp_timeout`: 1800
334
+ - `torch_compile`: False
335
+ - `torch_compile_backend`: None
336
+ - `torch_compile_mode`: None
337
+ - `include_tokens_per_second`: False
338
+ - `include_num_input_tokens_seen`: False
339
+ - `neftune_noise_alpha`: None
340
+ - `optim_target_modules`: None
341
+ - `batch_eval_metrics`: False
342
+ - `eval_on_start`: False
343
+ - `use_liger_kernel`: False
344
+ - `liger_kernel_config`: None
345
+ - `eval_use_gather_object`: False
346
+ - `average_tokens_across_devices`: False
347
+ - `prompts`: None
348
+ - `batch_sampler`: batch_sampler
349
+ - `multi_dataset_batch_sampler`: round_robin
350
+ - `router_mapping`: {}
351
+ - `learning_rate_mapping`: {}
352
+
353
+ </details>
354
+
355
+ ### Training Logs
356
+ <details><summary>Click to expand</summary>
357
+
358
+ | Epoch | Step | Training Loss |
359
+ |:-------:|:----:|:-------------:|
360
+ | 0.0566 | 3 | - |
361
+ | 0.1132 | 6 | - |
362
+ | 0.1698 | 9 | - |
363
+ | 0.2264 | 12 | - |
364
+ | 0.2830 | 15 | - |
365
+ | 0.3396 | 18 | - |
366
+ | 0.3962 | 21 | - |
367
+ | 0.4528 | 24 | - |
368
+ | 0.5094 | 27 | - |
369
+ | 0.5660 | 30 | - |
370
+ | 0.6226 | 33 | - |
371
+ | 0.6792 | 36 | - |
372
+ | 0.7358 | 39 | - |
373
+ | 0.7925 | 42 | - |
374
+ | 0.8491 | 45 | - |
375
+ | 0.9057 | 48 | - |
376
+ | 0.9623 | 51 | - |
377
+ | 1.0 | 53 | - |
378
+ | 1.0189 | 54 | - |
379
+ | 1.0755 | 57 | - |
380
+ | 1.1321 | 60 | - |
381
+ | 1.1887 | 63 | - |
382
+ | 1.2453 | 66 | - |
383
+ | 1.3019 | 69 | - |
384
+ | 1.3585 | 72 | - |
385
+ | 1.4151 | 75 | - |
386
+ | 1.4717 | 78 | - |
387
+ | 1.5283 | 81 | - |
388
+ | 1.5849 | 84 | - |
389
+ | 1.6415 | 87 | - |
390
+ | 1.6981 | 90 | - |
391
+ | 1.7547 | 93 | - |
392
+ | 1.8113 | 96 | - |
393
+ | 1.8679 | 99 | - |
394
+ | 1.9245 | 102 | - |
395
+ | 1.9811 | 105 | - |
396
+ | 2.0 | 106 | - |
397
+ | 2.0377 | 108 | - |
398
+ | 2.0943 | 111 | - |
399
+ | 2.1509 | 114 | - |
400
+ | 2.2075 | 117 | - |
401
+ | 2.2642 | 120 | - |
402
+ | 2.3208 | 123 | - |
403
+ | 2.3774 | 126 | - |
404
+ | 2.4340 | 129 | - |
405
+ | 2.4906 | 132 | - |
406
+ | 2.5472 | 135 | - |
407
+ | 2.6038 | 138 | - |
408
+ | 2.6604 | 141 | - |
409
+ | 2.7170 | 144 | - |
410
+ | 2.7736 | 147 | - |
411
+ | 2.8302 | 150 | - |
412
+ | 2.8868 | 153 | - |
413
+ | 2.9434 | 156 | - |
414
+ | 3.0 | 159 | - |
415
+ | 3.0566 | 162 | - |
416
+ | 3.1132 | 165 | - |
417
+ | 3.1698 | 168 | - |
418
+ | 3.2264 | 171 | - |
419
+ | 3.2830 | 174 | - |
420
+ | 3.3396 | 177 | - |
421
+ | 3.3962 | 180 | - |
422
+ | 3.4528 | 183 | - |
423
+ | 3.5094 | 186 | - |
424
+ | 3.5660 | 189 | - |
425
+ | 3.6226 | 192 | - |
426
+ | 3.6792 | 195 | - |
427
+ | 3.7358 | 198 | - |
428
+ | 3.7925 | 201 | - |
429
+ | 3.8491 | 204 | - |
430
+ | 3.9057 | 207 | - |
431
+ | 3.9623 | 210 | - |
432
+ | 4.0 | 212 | - |
433
+ | 4.0189 | 213 | - |
434
+ | 4.0755 | 216 | - |
435
+ | 4.1321 | 219 | - |
436
+ | 4.1887 | 222 | - |
437
+ | 4.2453 | 225 | - |
438
+ | 4.3019 | 228 | - |
439
+ | 4.3585 | 231 | - |
440
+ | 4.4151 | 234 | - |
441
+ | 4.4717 | 237 | - |
442
+ | 4.5283 | 240 | - |
443
+ | 4.5849 | 243 | - |
444
+ | 4.6415 | 246 | - |
445
+ | 4.6981 | 249 | - |
446
+ | 4.7547 | 252 | - |
447
+ | 4.8113 | 255 | - |
448
+ | 4.8679 | 258 | - |
449
+ | 4.9245 | 261 | - |
450
+ | 4.9811 | 264 | - |
451
+ | 5.0 | 265 | - |
452
+ | 5.0377 | 267 | - |
453
+ | 5.0943 | 270 | - |
454
+ | 5.1509 | 273 | - |
455
+ | 5.2075 | 276 | - |
456
+ | 5.2642 | 279 | - |
457
+ | 5.3208 | 282 | - |
458
+ | 5.3774 | 285 | - |
459
+ | 5.4340 | 288 | - |
460
+ | 5.4906 | 291 | - |
461
+ | 5.5472 | 294 | - |
462
+ | 5.6038 | 297 | - |
463
+ | 5.6604 | 300 | - |
464
+ | 5.7170 | 303 | - |
465
+ | 5.7736 | 306 | - |
466
+ | 5.8302 | 309 | - |
467
+ | 5.8868 | 312 | - |
468
+ | 5.9434 | 315 | - |
469
+ | 6.0 | 318 | - |
470
+ | 6.0566 | 321 | - |
471
+ | 6.1132 | 324 | - |
472
+ | 6.1698 | 327 | - |
473
+ | 6.2264 | 330 | - |
474
+ | 6.2830 | 333 | - |
475
+ | 6.3396 | 336 | - |
476
+ | 6.3962 | 339 | - |
477
+ | 6.4528 | 342 | - |
478
+ | 6.5094 | 345 | - |
479
+ | 6.5660 | 348 | - |
480
+ | 6.6226 | 351 | - |
481
+ | 6.6792 | 354 | - |
482
+ | 6.7358 | 357 | - |
483
+ | 6.7925 | 360 | - |
484
+ | 6.8491 | 363 | - |
485
+ | 6.9057 | 366 | - |
486
+ | 6.9623 | 369 | - |
487
+ | 7.0 | 371 | - |
488
+ | 7.0189 | 372 | - |
489
+ | 7.0755 | 375 | - |
490
+ | 7.1321 | 378 | - |
491
+ | 7.1887 | 381 | - |
492
+ | 7.2453 | 384 | - |
493
+ | 7.3019 | 387 | - |
494
+ | 7.3585 | 390 | - |
495
+ | 7.4151 | 393 | - |
496
+ | 7.4717 | 396 | - |
497
+ | 7.5283 | 399 | - |
498
+ | 7.5849 | 402 | - |
499
+ | 7.6415 | 405 | - |
500
+ | 7.6981 | 408 | - |
501
+ | 7.7547 | 411 | - |
502
+ | 7.8113 | 414 | - |
503
+ | 7.8679 | 417 | - |
504
+ | 7.9245 | 420 | - |
505
+ | 7.9811 | 423 | - |
506
+ | 8.0 | 424 | - |
507
+ | 8.0377 | 426 | - |
508
+ | 8.0943 | 429 | - |
509
+ | 8.1509 | 432 | - |
510
+ | 8.2075 | 435 | - |
511
+ | 8.2642 | 438 | - |
512
+ | 8.3208 | 441 | - |
513
+ | 8.3774 | 444 | - |
514
+ | 8.4340 | 447 | - |
515
+ | 8.4906 | 450 | - |
516
+ | 8.5472 | 453 | - |
517
+ | 8.6038 | 456 | - |
518
+ | 8.6604 | 459 | - |
519
+ | 8.7170 | 462 | - |
520
+ | 8.7736 | 465 | - |
521
+ | 8.8302 | 468 | - |
522
+ | 8.8868 | 471 | - |
523
+ | 8.9434 | 474 | - |
524
+ | 9.0 | 477 | - |
525
+ | 9.0566 | 480 | - |
526
+ | 9.1132 | 483 | - |
527
+ | 9.1698 | 486 | - |
528
+ | 9.2264 | 489 | - |
529
+ | 9.2830 | 492 | - |
530
+ | 9.3396 | 495 | - |
531
+ | 9.3962 | 498 | - |
532
+ | 9.4340 | 500 | 0.6328 |
533
+ | 9.4528 | 501 | - |
534
+ | 9.5094 | 504 | - |
535
+ | 9.5660 | 507 | - |
536
+ | 9.6226 | 510 | - |
537
+ | 9.6792 | 513 | - |
538
+ | 9.7358 | 516 | - |
539
+ | 9.7925 | 519 | - |
540
+ | 9.8491 | 522 | - |
541
+ | 9.9057 | 525 | - |
542
+ | 9.9623 | 528 | - |
543
+ | 10.0 | 530 | - |
544
+ | 10.0189 | 531 | - |
545
+ | 10.0755 | 534 | - |
546
+ | 10.1321 | 537 | - |
547
+ | 10.1887 | 540 | - |
548
+ | 10.2453 | 543 | - |
549
+ | 10.3019 | 546 | - |
550
+ | 10.3585 | 549 | - |
551
+ | 10.4151 | 552 | - |
552
+ | 10.4717 | 555 | - |
553
+ | 10.5283 | 558 | - |
554
+ | 10.5849 | 561 | - |
555
+ | 10.6415 | 564 | - |
556
+ | 10.6981 | 567 | - |
557
+ | 10.7547 | 570 | - |
558
+ | 10.8113 | 573 | - |
559
+ | 10.8679 | 576 | - |
560
+
561
+ </details>
562
+
563
+ ### Framework Versions
564
+ - Python: 3.12.11
565
+ - Sentence Transformers: 5.1.0
566
+ - Transformers: 4.56.0
567
+ - PyTorch: 2.8.0+cu126
568
+ - Accelerate: 1.10.1
569
+ - Datasets: 4.0.0
570
+ - Tokenizers: 0.22.0
571
+
572
+ ## Citation
573
+
574
+ ### BibTeX
575
+
576
+ #### Sentence Transformers
577
+ ```bibtex
578
+ @inproceedings{reimers-2019-sentence-bert,
579
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
580
+ author = "Reimers, Nils and Gurevych, Iryna",
581
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
582
+ month = "11",
583
+ year = "2019",
584
+ publisher = "Association for Computational Linguistics",
585
+ url = "https://arxiv.org/abs/1908.10084",
586
+ }
587
+ ```
588
+
589
+ #### MultipleNegativesRankingLoss
590
+ ```bibtex
591
+ @misc{henderson2017efficient,
592
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
593
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
594
+ year={2017},
595
+ eprint={1705.00652},
596
+ archivePrefix={arXiv},
597
+ primaryClass={cs.CL}
598
+ }
599
+ ```
600
+
601
+ <!--
602
+ ## Glossary
603
+
604
+ *Clearly define terms in order to be accessible across audiences.*
605
+ -->
606
+
607
+ <!--
608
+ ## Model Card Authors
609
+
610
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
611
+ -->
612
+
613
+ <!--
614
+ ## Model Card Contact
615
+
616
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
617
+ -->
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "bert",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "transformers_version": "4.56.0",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 501153
31
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.1.0",
4
+ "transformers": "4.56.0",
5
+ "pytorch": "2.8.0+cu126"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e2951a46ba231936b0b5b4a751869612128ba4461faf827a75a0992d2f952d
3
+ size 1883730160
modules.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ },
20
+ {
21
+ "idx": 3,
22
+ "name": "3",
23
+ "path": "3_Normalize",
24
+ "type": "sentence_transformers.models.Normalize"
25
+ }
26
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92262b29204f8fdc169a63f9005a0e311a16262cef4d96ecfe2a7ed638662ed3
3
+ size 13632172
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "full_tokenizer_file": null,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": true,
57
+ "tokenizer_class": "BertTokenizer",
58
+ "unk_token": "[UNK]"
59
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff