Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- 2_Dense/config.json +6 -0
- 2_Dense/model.safetensors +3 -0
- README.md +617 -0
- config.json +31 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +26 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +3 -0
- tokenizer_config.json +59 -0
- vocab.txt +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": true,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
2_Dense/config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"in_features": 768,
|
| 3 |
+
"out_features": 768,
|
| 4 |
+
"bias": true,
|
| 5 |
+
"activation_function": "torch.nn.modules.activation.Tanh"
|
| 6 |
+
}
|
2_Dense/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbca3add16f6ce5eb01f49f13c20c8067073ba0b489f8f113f7f3e2654c141fa
|
| 3 |
+
size 2362528
|
README.md
ADDED
|
@@ -0,0 +1,617 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:317
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: sentence-transformers/LaBSE
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: rab 'byor 'di ji snyam du sems| de bzhin gshegs pas de bzhin gshegs
|
| 13 |
+
pa mar me mdzad las gang bla na med pa yang dag par rdzogs pa'i byang chub mngon
|
| 14 |
+
par rdzogs par sangs rgyas pa'i chos de gang yang yod dam|
|
| 15 |
+
sentences:
|
| 16 |
+
- youn-du čü orošil ügei sedkil öüsken üyiledkü.
|
| 17 |
+
- tere youni tula kemēbēsü. subudi tögünčilen boluqsan zöb nomlon sayitur nomlon
|
| 18 |
+
tögünčilen činartu nomloxu bui. tögünčilen boluqsan buruu nomloxu busuyin tula-da.
|
| 19 |
+
- subudi öün-dü you sedkikü. tögünčilen boluqsan dhi paṃ ҟa ra-ēce tögünčilen boluqsani
|
| 20 |
+
ali dēre ügei sayitur dousuqsan bodhidu. ilerkei dousun burxan boluqsan ali
|
| 21 |
+
tere nom bui kemēn sedkikü buyu. teyin kemēn zarliq boluqsan-du. burxan-du
|
| 22 |
+
nasu tögüs subudi eyin kemēn ayiladxabai. ilaγun tögüsüqsen tögünčilen boluqsan
|
| 23 |
+
dhi-paṃ-ҟara burxan-ēce tögünčilen boluqsani ali dēre ügei sayitur dousuqsan
|
| 24 |
+
bodhi-du ilerkei dousun burxan boluqsan nom tere oγōto ügei. eyin kemēn ayiladxaqsan-du. burxan
|
| 25 |
+
nasu tögüs subudidu eyin kemēn zarliq bolboi. subudi tere tögünčilen tere tögünčilen
|
| 26 |
+
küq tögünčilen boluqsan dhi-paṃ-ҟa-raēce tögünčilen boluqsani ali dēre ügei
|
| 27 |
+
sayitur dousuqsan bodhi-du ilerkei dousun burxan boluqsan nom tere oγōto ügei subudi
|
| 28 |
+
kerbe tögünčilen boluqsan ali ilerkei dousun burxan boluqsan nom zarim bui bolxulā.
|
| 29 |
+
tögünčilen boluqsan dhi paṃ ҟa ra. nada biraman küböün či irē ödüi caqtu. tögünčilen
|
| 30 |
+
boluqsan dayini darun sayitur dousuqsan šakyamuni burxan kemēkü bolxu kemēn esi
|
| 31 |
+
ülü üzüülkü atala. subudi ene metü tögünčilen boluqsan ali dēre ügei sayitur dousuqsan
|
| 32 |
+
bodhi-du ilerkei dousun burxan boluqsan nom tere oγōto ügei töüni tula tögünčilen
|
| 33 |
+
boluqsan dhi-paṃ-ҟa-ra. nada biraman küböün či irē ödüi caqtu. tögünčilen boluqsan dayini
|
| 34 |
+
darun sayitur dousuqsan šakyamuni burxan kemēkü bolxu kemēn eši üzüülbei. tere
|
| 35 |
+
youni tula kemēbēsü. subudi tögünčilen boluqsan kemēkü inu. ünen tögünčilen
|
| 36 |
+
činariyin üge xadaqsan müni tulada. subudi ali zarim eyin kemēn. tögünčilen
|
| 37 |
+
boluqsan dayini darun sayitur dousuqsan burxan. dēre ügei sayitur dousuqsan
|
| 38 |
+
bodhi-du ilerkei dousun burxan bolboi kemēn öguulekülē tere buruu ögüülekü mün.
|
| 39 |
+
- source_sentence: 'de ci''i phyir zhe na| rab ''byor gal te byang chub sems dpa''
|
| 40 |
+
sems dpa'' chen po de dag chos su ''du shes ''jug na de nyid de dag gi bdag tu
|
| 41 |
+
''dzin par ''gyur zhing| sems can du ''dzin pa dang| srog tu ''dzin pa dang| gang
|
| 42 |
+
zag tu ''dzin par ''gyur ba''i phyir ro. '
|
| 43 |
+
sentences:
|
| 44 |
+
- 'tere youni tula kemēbēsü. subudi kerbe bodhi-sadv mahā-sadv tede nom kemēn
|
| 45 |
+
xurān meden üyiledkülē tede töüni bidu barixu bolun. amitan-du barin amin-du barin
|
| 46 |
+
budγali-du barixu bolxuyin tulada. '
|
| 47 |
+
- tere youni tula kemēbēsü. subudi oroni zoҟōl-noγoud oroni zoҟōl-noγoud kemēkü.
|
| 48 |
+
zoҟōl tede ügei kemēn tögünčilen boluqsan nomloqsoni tulada. töüni tula oroni
|
| 49 |
+
zoҟōl-noγoud kemēyü.
|
| 50 |
+
- ilaγün tögüsüqsen Ānanda-du zarliq bolboi
|
| 51 |
+
- source_sentence: bcom ldan 'das kyis bka' stsal pa| yang rab 'byor skyes pa'am|
|
| 52 |
+
bud med gang gis lus gang gā'i klung gi bye ma snyed yongs su gtong ba bas gang
|
| 53 |
+
gis chos kyi rnam grangs 'di las tha na tshig bzhi pa'i tshigs su bcad pa tsam
|
| 54 |
+
bzung ste| gzhan dag la yang bstan na de gzhi de las bsod nams ches mang du grangs
|
| 55 |
+
med dpag tu med pa bskyed do.
|
| 56 |
+
sentences:
|
| 57 |
+
- 'teyin kemēn ayiladxaqsan-du burxan nasu tögüs subudi-du eyin kemēn zarliq
|
| 58 |
+
bolbui. '
|
| 59 |
+
- 'tere youni tula kemēbēsü. subudi alii amitan-du xurān medekülē. töüni bodhi-sadv
|
| 60 |
+
kemēn ülü ögüüleküyin tulada. '
|
| 61 |
+
- burxan zarliq bolboi. subudi ere buyu eme zarim γangγa müreni xumakiyin tödüi
|
| 62 |
+
beye oγōto ögüqsen-ēce. ken nomiyin züyil öünēce yadaba čü dörbön ügetü šülügiyin
|
| 63 |
+
tödüi toqtōǰi. busudtu zöb üzüükülē tere oron töün-ēce tōloši ügei caqlaši
|
| 64 |
+
ügei buyan maši ülemǰi öüskekü..
|
| 65 |
+
- source_sentence: da yang sangs rgyas spyan drangs nas chos thos ma thag tub yang
|
| 66 |
+
chung thob par gyur to
|
| 67 |
+
sentences:
|
| 68 |
+
- tere youni tula kemēbēsü. subudi sedkiliyin ürgülǰi sedkiliyin ürgülǰi kemēküi.
|
| 69 |
+
töüni ürgülǰi ügegüye tögünčilen boluqsan nomloqsoni tulada. töüni tula sedkiliyin
|
| 70 |
+
ürgülǰi kemēn ögüüleyü.
|
| 71 |
+
- 'subudi tögünčilen baroun kigēd šinggeküi zöün kigēd dēdü dorodu züq zügiyin zabsar-luγā
|
| 72 |
+
arban zügiyin oqtorγuyin kemǰē caqla kemǰikü kilbar kemēkü buyu. '
|
| 73 |
+
- 'ödügē basa burxani zalād nom sonosōd saca bodhi-yi olun üyiledbei '
|
| 74 |
+
- source_sentence: 'de ci''i phyir zhe na| rab ''byor gal te byang chub sems dpa''
|
| 75 |
+
sems can du ''du shes ''jug na| de byang chub sems dpa'' zhes mi bya ba''i phyir
|
| 76 |
+
ro. '
|
| 77 |
+
sentences:
|
| 78 |
+
- tere youni tula kemēbēsü. subudi dēdü činadu kürüqsen öüni tögünčilen boluqsan
|
| 79 |
+
zarluq bolboi.
|
| 80 |
+
- ' tere youni tula kemēbēsü. subudi kerbe bodhi-sadvnar amitan-du xurān medekülē.
|
| 81 |
+
töüni bodhi-sadv kemēn ülü ögüüleküyin tulada. '
|
| 82 |
+
- subudi basa nomiyin züyil ene sedkiši ügei adalidxaši ügei. öüni bolbosuraqsan
|
| 83 |
+
üre-yi čü sedkiši ügegüye uxan üyiled. nomiyin züyil öüni dēdü külgün-dü sayitur
|
| 84 |
+
oroqson amitan-noγoudiyin tusa kigēd ketürkei boluqsan külgün-du sayaitur oroqson
|
| 85 |
+
amitan-noγoudiyin tusayin tula tögünčilen boluqsan nomloboi.
|
| 86 |
+
pipeline_tag: sentence-similarity
|
| 87 |
+
library_name: sentence-transformers
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
# SentenceTransformer based on sentence-transformers/LaBSE
|
| 91 |
+
|
| 92 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 93 |
+
|
| 94 |
+
## Model Details
|
| 95 |
+
|
| 96 |
+
### Model Description
|
| 97 |
+
- **Model Type:** Sentence Transformer
|
| 98 |
+
- **Base model:** [sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE) <!-- at revision 836121a0533e5664b21c7aacc5d22951f2b8b25b -->
|
| 99 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 100 |
+
- **Output Dimensionality:** 768 dimensions
|
| 101 |
+
- **Similarity Function:** Cosine Similarity
|
| 102 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 103 |
+
<!-- - **Language:** Unknown -->
|
| 104 |
+
<!-- - **License:** Unknown -->
|
| 105 |
+
|
| 106 |
+
### Model Sources
|
| 107 |
+
|
| 108 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 109 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 110 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 111 |
+
|
| 112 |
+
### Full Model Architecture
|
| 113 |
+
|
| 114 |
+
```
|
| 115 |
+
SentenceTransformer(
|
| 116 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
|
| 117 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 118 |
+
(2): Dense({'in_features': 768, 'out_features': 768, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
|
| 119 |
+
(3): Normalize()
|
| 120 |
+
)
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
## Usage
|
| 124 |
+
|
| 125 |
+
### Direct Usage (Sentence Transformers)
|
| 126 |
+
|
| 127 |
+
First install the Sentence Transformers library:
|
| 128 |
+
|
| 129 |
+
```bash
|
| 130 |
+
pip install -U sentence-transformers
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
Then you can load this model and run inference.
|
| 134 |
+
```python
|
| 135 |
+
from sentence_transformers import SentenceTransformer
|
| 136 |
+
|
| 137 |
+
# Download from the 🤗 Hub
|
| 138 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 139 |
+
# Run inference
|
| 140 |
+
sentences = [
|
| 141 |
+
"de ci'i phyir zhe na| rab 'byor gal te byang chub sems dpa' sems can du 'du shes 'jug na| de byang chub sems dpa' zhes mi bya ba'i phyir ro. ",
|
| 142 |
+
' tere youni tula kemēbēsü. subudi kerbe bodhi-sadvnar amitan-du xurān medekülē. töüni bodhi-sadv kemēn ülü ögüüleküyin tulada. ',
|
| 143 |
+
'tere youni tula kemēbēsü. subudi dēdü činadu kürüqsen öüni tögünčilen boluqsan zarluq bolboi.',
|
| 144 |
+
]
|
| 145 |
+
embeddings = model.encode(sentences)
|
| 146 |
+
print(embeddings.shape)
|
| 147 |
+
# [3, 768]
|
| 148 |
+
|
| 149 |
+
# Get the similarity scores for the embeddings
|
| 150 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 151 |
+
print(similarities)
|
| 152 |
+
# tensor([[1.0000, 0.7387, 0.5014],
|
| 153 |
+
# [0.7387, 1.0000, 0.6236],
|
| 154 |
+
# [0.5014, 0.6236, 1.0000]])
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
<!--
|
| 158 |
+
### Direct Usage (Transformers)
|
| 159 |
+
|
| 160 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 161 |
+
|
| 162 |
+
</details>
|
| 163 |
+
-->
|
| 164 |
+
|
| 165 |
+
<!--
|
| 166 |
+
### Downstream Usage (Sentence Transformers)
|
| 167 |
+
|
| 168 |
+
You can finetune this model on your own dataset.
|
| 169 |
+
|
| 170 |
+
<details><summary>Click to expand</summary>
|
| 171 |
+
|
| 172 |
+
</details>
|
| 173 |
+
-->
|
| 174 |
+
|
| 175 |
+
<!--
|
| 176 |
+
### Out-of-Scope Use
|
| 177 |
+
|
| 178 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 179 |
+
-->
|
| 180 |
+
|
| 181 |
+
<!--
|
| 182 |
+
## Bias, Risks and Limitations
|
| 183 |
+
|
| 184 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 185 |
+
-->
|
| 186 |
+
|
| 187 |
+
<!--
|
| 188 |
+
### Recommendations
|
| 189 |
+
|
| 190 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 191 |
+
-->
|
| 192 |
+
|
| 193 |
+
## Training Details
|
| 194 |
+
|
| 195 |
+
### Training Dataset
|
| 196 |
+
|
| 197 |
+
#### Unnamed Dataset
|
| 198 |
+
|
| 199 |
+
* Size: 317 training samples
|
| 200 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
| 201 |
+
* Approximate statistics based on the first 317 samples:
|
| 202 |
+
| | sentence_0 | sentence_1 | label |
|
| 203 |
+
|:--------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
| 204 |
+
| type | string | string | float |
|
| 205 |
+
| details | <ul><li>min: 11 tokens</li><li>mean: 62.13 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 57.18 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
|
| 206 |
+
* Samples:
|
| 207 |
+
| sentence_0 | sentence_1 | label |
|
| 208 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
| 209 |
+
| <code>de ci'i phyir zhe na\| rab 'byor de bzhin gshegs pas gang bkrol ba'i sems can de dag gang yang med pa'i phyir ro.</code> | <code>tere youni tula kemēbēsü. subudi tögünčilen boluqsani ali tonilγoxui amitan tede aliba ügeyin tulada. subudi kerbe tögünčilen boluqsan ali zarim amitani tonilγoxulā. tögünčilen boluqsan bidü barixu boluyu. amitandu barin amin-du barin budγali barixu boluyu.</code> | <code>1.0</code> |
|
| 210 |
+
| <code>yang rab 'byor ma 'ongs pa'i dus lnga brgya tha ma la dam pa'i chos rab tu rnam par 'jig par 'gyur ba na byang chub sems dpa' sems dpa' chen po tshul khrims dang ldan pa\| yon tan dang ldan pa\| shes rab dang ldan pa dag 'byung ste\| </code> | <code>subudi irē ödüi ecüs tabun zouni caqtu dēdü nom maši ebderekui-dü. šaqšabādtai erdemtei biliq tögüsüqsen bodhi-sadv mahā-sadv-nar törökü. </code> | <code>1.0</code> |
|
| 211 |
+
| <code>bcom ldan 'das kyis bka' stsal pa\| rab 'byor khyod 'di skad du ma 'ongs pa'i dus lnga brgya tha ma la dam pa'i chos rab tu rnam par 'jig par 'gyur ba na sems can gang la la dag 'di lta bu'i mdo sde'i tshig bshad pa 'di la yang dag par 'du shes skyed par 'gyur ba mchis sam zhes ma zer cig\| </code> | <code>burxan zarliq bolboi. subudi či eyin kemēn irē ödüi ēcüs tabun zöüni caqtu dēdü nom maši ebdereküi-dü. ali zarim amitan ene metü suduriyin ayimagiyin üge nomloxuyidu ünēr xurān medeküi öüsken üyiledkü bui buyu kemēn tere metü bu ögüüle. </code> | <code>1.0</code> |
|
| 212 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 213 |
+
```json
|
| 214 |
+
{
|
| 215 |
+
"scale": 20.0,
|
| 216 |
+
"similarity_fct": "cos_sim",
|
| 217 |
+
"gather_across_devices": false
|
| 218 |
+
}
|
| 219 |
+
```
|
| 220 |
+
|
| 221 |
+
### Training Hyperparameters
|
| 222 |
+
#### Non-Default Hyperparameters
|
| 223 |
+
|
| 224 |
+
- `eval_strategy`: steps
|
| 225 |
+
- `per_device_train_batch_size`: 6
|
| 226 |
+
- `per_device_eval_batch_size`: 6
|
| 227 |
+
- `num_train_epochs`: 25
|
| 228 |
+
- `fp16`: True
|
| 229 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 230 |
+
|
| 231 |
+
#### All Hyperparameters
|
| 232 |
+
<details><summary>Click to expand</summary>
|
| 233 |
+
|
| 234 |
+
- `overwrite_output_dir`: False
|
| 235 |
+
- `do_predict`: False
|
| 236 |
+
- `eval_strategy`: steps
|
| 237 |
+
- `prediction_loss_only`: True
|
| 238 |
+
- `per_device_train_batch_size`: 6
|
| 239 |
+
- `per_device_eval_batch_size`: 6
|
| 240 |
+
- `per_gpu_train_batch_size`: None
|
| 241 |
+
- `per_gpu_eval_batch_size`: None
|
| 242 |
+
- `gradient_accumulation_steps`: 1
|
| 243 |
+
- `eval_accumulation_steps`: None
|
| 244 |
+
- `torch_empty_cache_steps`: None
|
| 245 |
+
- `learning_rate`: 5e-05
|
| 246 |
+
- `weight_decay`: 0.0
|
| 247 |
+
- `adam_beta1`: 0.9
|
| 248 |
+
- `adam_beta2`: 0.999
|
| 249 |
+
- `adam_epsilon`: 1e-08
|
| 250 |
+
- `max_grad_norm`: 1
|
| 251 |
+
- `num_train_epochs`: 25
|
| 252 |
+
- `max_steps`: -1
|
| 253 |
+
- `lr_scheduler_type`: linear
|
| 254 |
+
- `lr_scheduler_kwargs`: {}
|
| 255 |
+
- `warmup_ratio`: 0.0
|
| 256 |
+
- `warmup_steps`: 0
|
| 257 |
+
- `log_level`: passive
|
| 258 |
+
- `log_level_replica`: warning
|
| 259 |
+
- `log_on_each_node`: True
|
| 260 |
+
- `logging_nan_inf_filter`: True
|
| 261 |
+
- `save_safetensors`: True
|
| 262 |
+
- `save_on_each_node`: False
|
| 263 |
+
- `save_only_model`: False
|
| 264 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 265 |
+
- `no_cuda`: False
|
| 266 |
+
- `use_cpu`: False
|
| 267 |
+
- `use_mps_device`: False
|
| 268 |
+
- `seed`: 42
|
| 269 |
+
- `data_seed`: None
|
| 270 |
+
- `jit_mode_eval`: False
|
| 271 |
+
- `use_ipex`: False
|
| 272 |
+
- `bf16`: False
|
| 273 |
+
- `fp16`: True
|
| 274 |
+
- `fp16_opt_level`: O1
|
| 275 |
+
- `half_precision_backend`: auto
|
| 276 |
+
- `bf16_full_eval`: False
|
| 277 |
+
- `fp16_full_eval`: False
|
| 278 |
+
- `tf32`: None
|
| 279 |
+
- `local_rank`: 0
|
| 280 |
+
- `ddp_backend`: None
|
| 281 |
+
- `tpu_num_cores`: None
|
| 282 |
+
- `tpu_metrics_debug`: False
|
| 283 |
+
- `debug`: []
|
| 284 |
+
- `dataloader_drop_last`: False
|
| 285 |
+
- `dataloader_num_workers`: 0
|
| 286 |
+
- `dataloader_prefetch_factor`: None
|
| 287 |
+
- `past_index`: -1
|
| 288 |
+
- `disable_tqdm`: False
|
| 289 |
+
- `remove_unused_columns`: True
|
| 290 |
+
- `label_names`: None
|
| 291 |
+
- `load_best_model_at_end`: False
|
| 292 |
+
- `ignore_data_skip`: False
|
| 293 |
+
- `fsdp`: []
|
| 294 |
+
- `fsdp_min_num_params`: 0
|
| 295 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 296 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 297 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 298 |
+
- `parallelism_config`: None
|
| 299 |
+
- `deepspeed`: None
|
| 300 |
+
- `label_smoothing_factor`: 0.0
|
| 301 |
+
- `optim`: adamw_torch_fused
|
| 302 |
+
- `optim_args`: None
|
| 303 |
+
- `adafactor`: False
|
| 304 |
+
- `group_by_length`: False
|
| 305 |
+
- `length_column_name`: length
|
| 306 |
+
- `ddp_find_unused_parameters`: None
|
| 307 |
+
- `ddp_bucket_cap_mb`: None
|
| 308 |
+
- `ddp_broadcast_buffers`: False
|
| 309 |
+
- `dataloader_pin_memory`: True
|
| 310 |
+
- `dataloader_persistent_workers`: False
|
| 311 |
+
- `skip_memory_metrics`: True
|
| 312 |
+
- `use_legacy_prediction_loop`: False
|
| 313 |
+
- `push_to_hub`: False
|
| 314 |
+
- `resume_from_checkpoint`: None
|
| 315 |
+
- `hub_model_id`: None
|
| 316 |
+
- `hub_strategy`: every_save
|
| 317 |
+
- `hub_private_repo`: None
|
| 318 |
+
- `hub_always_push`: False
|
| 319 |
+
- `hub_revision`: None
|
| 320 |
+
- `gradient_checkpointing`: False
|
| 321 |
+
- `gradient_checkpointing_kwargs`: None
|
| 322 |
+
- `include_inputs_for_metrics`: False
|
| 323 |
+
- `include_for_metrics`: []
|
| 324 |
+
- `eval_do_concat_batches`: True
|
| 325 |
+
- `fp16_backend`: auto
|
| 326 |
+
- `push_to_hub_model_id`: None
|
| 327 |
+
- `push_to_hub_organization`: None
|
| 328 |
+
- `mp_parameters`:
|
| 329 |
+
- `auto_find_batch_size`: False
|
| 330 |
+
- `full_determinism`: False
|
| 331 |
+
- `torchdynamo`: None
|
| 332 |
+
- `ray_scope`: last
|
| 333 |
+
- `ddp_timeout`: 1800
|
| 334 |
+
- `torch_compile`: False
|
| 335 |
+
- `torch_compile_backend`: None
|
| 336 |
+
- `torch_compile_mode`: None
|
| 337 |
+
- `include_tokens_per_second`: False
|
| 338 |
+
- `include_num_input_tokens_seen`: False
|
| 339 |
+
- `neftune_noise_alpha`: None
|
| 340 |
+
- `optim_target_modules`: None
|
| 341 |
+
- `batch_eval_metrics`: False
|
| 342 |
+
- `eval_on_start`: False
|
| 343 |
+
- `use_liger_kernel`: False
|
| 344 |
+
- `liger_kernel_config`: None
|
| 345 |
+
- `eval_use_gather_object`: False
|
| 346 |
+
- `average_tokens_across_devices`: False
|
| 347 |
+
- `prompts`: None
|
| 348 |
+
- `batch_sampler`: batch_sampler
|
| 349 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 350 |
+
- `router_mapping`: {}
|
| 351 |
+
- `learning_rate_mapping`: {}
|
| 352 |
+
|
| 353 |
+
</details>
|
| 354 |
+
|
| 355 |
+
### Training Logs
|
| 356 |
+
<details><summary>Click to expand</summary>
|
| 357 |
+
|
| 358 |
+
| Epoch | Step | Training Loss |
|
| 359 |
+
|:-------:|:----:|:-------------:|
|
| 360 |
+
| 0.0566 | 3 | - |
|
| 361 |
+
| 0.1132 | 6 | - |
|
| 362 |
+
| 0.1698 | 9 | - |
|
| 363 |
+
| 0.2264 | 12 | - |
|
| 364 |
+
| 0.2830 | 15 | - |
|
| 365 |
+
| 0.3396 | 18 | - |
|
| 366 |
+
| 0.3962 | 21 | - |
|
| 367 |
+
| 0.4528 | 24 | - |
|
| 368 |
+
| 0.5094 | 27 | - |
|
| 369 |
+
| 0.5660 | 30 | - |
|
| 370 |
+
| 0.6226 | 33 | - |
|
| 371 |
+
| 0.6792 | 36 | - |
|
| 372 |
+
| 0.7358 | 39 | - |
|
| 373 |
+
| 0.7925 | 42 | - |
|
| 374 |
+
| 0.8491 | 45 | - |
|
| 375 |
+
| 0.9057 | 48 | - |
|
| 376 |
+
| 0.9623 | 51 | - |
|
| 377 |
+
| 1.0 | 53 | - |
|
| 378 |
+
| 1.0189 | 54 | - |
|
| 379 |
+
| 1.0755 | 57 | - |
|
| 380 |
+
| 1.1321 | 60 | - |
|
| 381 |
+
| 1.1887 | 63 | - |
|
| 382 |
+
| 1.2453 | 66 | - |
|
| 383 |
+
| 1.3019 | 69 | - |
|
| 384 |
+
| 1.3585 | 72 | - |
|
| 385 |
+
| 1.4151 | 75 | - |
|
| 386 |
+
| 1.4717 | 78 | - |
|
| 387 |
+
| 1.5283 | 81 | - |
|
| 388 |
+
| 1.5849 | 84 | - |
|
| 389 |
+
| 1.6415 | 87 | - |
|
| 390 |
+
| 1.6981 | 90 | - |
|
| 391 |
+
| 1.7547 | 93 | - |
|
| 392 |
+
| 1.8113 | 96 | - |
|
| 393 |
+
| 1.8679 | 99 | - |
|
| 394 |
+
| 1.9245 | 102 | - |
|
| 395 |
+
| 1.9811 | 105 | - |
|
| 396 |
+
| 2.0 | 106 | - |
|
| 397 |
+
| 2.0377 | 108 | - |
|
| 398 |
+
| 2.0943 | 111 | - |
|
| 399 |
+
| 2.1509 | 114 | - |
|
| 400 |
+
| 2.2075 | 117 | - |
|
| 401 |
+
| 2.2642 | 120 | - |
|
| 402 |
+
| 2.3208 | 123 | - |
|
| 403 |
+
| 2.3774 | 126 | - |
|
| 404 |
+
| 2.4340 | 129 | - |
|
| 405 |
+
| 2.4906 | 132 | - |
|
| 406 |
+
| 2.5472 | 135 | - |
|
| 407 |
+
| 2.6038 | 138 | - |
|
| 408 |
+
| 2.6604 | 141 | - |
|
| 409 |
+
| 2.7170 | 144 | - |
|
| 410 |
+
| 2.7736 | 147 | - |
|
| 411 |
+
| 2.8302 | 150 | - |
|
| 412 |
+
| 2.8868 | 153 | - |
|
| 413 |
+
| 2.9434 | 156 | - |
|
| 414 |
+
| 3.0 | 159 | - |
|
| 415 |
+
| 3.0566 | 162 | - |
|
| 416 |
+
| 3.1132 | 165 | - |
|
| 417 |
+
| 3.1698 | 168 | - |
|
| 418 |
+
| 3.2264 | 171 | - |
|
| 419 |
+
| 3.2830 | 174 | - |
|
| 420 |
+
| 3.3396 | 177 | - |
|
| 421 |
+
| 3.3962 | 180 | - |
|
| 422 |
+
| 3.4528 | 183 | - |
|
| 423 |
+
| 3.5094 | 186 | - |
|
| 424 |
+
| 3.5660 | 189 | - |
|
| 425 |
+
| 3.6226 | 192 | - |
|
| 426 |
+
| 3.6792 | 195 | - |
|
| 427 |
+
| 3.7358 | 198 | - |
|
| 428 |
+
| 3.7925 | 201 | - |
|
| 429 |
+
| 3.8491 | 204 | - |
|
| 430 |
+
| 3.9057 | 207 | - |
|
| 431 |
+
| 3.9623 | 210 | - |
|
| 432 |
+
| 4.0 | 212 | - |
|
| 433 |
+
| 4.0189 | 213 | - |
|
| 434 |
+
| 4.0755 | 216 | - |
|
| 435 |
+
| 4.1321 | 219 | - |
|
| 436 |
+
| 4.1887 | 222 | - |
|
| 437 |
+
| 4.2453 | 225 | - |
|
| 438 |
+
| 4.3019 | 228 | - |
|
| 439 |
+
| 4.3585 | 231 | - |
|
| 440 |
+
| 4.4151 | 234 | - |
|
| 441 |
+
| 4.4717 | 237 | - |
|
| 442 |
+
| 4.5283 | 240 | - |
|
| 443 |
+
| 4.5849 | 243 | - |
|
| 444 |
+
| 4.6415 | 246 | - |
|
| 445 |
+
| 4.6981 | 249 | - |
|
| 446 |
+
| 4.7547 | 252 | - |
|
| 447 |
+
| 4.8113 | 255 | - |
|
| 448 |
+
| 4.8679 | 258 | - |
|
| 449 |
+
| 4.9245 | 261 | - |
|
| 450 |
+
| 4.9811 | 264 | - |
|
| 451 |
+
| 5.0 | 265 | - |
|
| 452 |
+
| 5.0377 | 267 | - |
|
| 453 |
+
| 5.0943 | 270 | - |
|
| 454 |
+
| 5.1509 | 273 | - |
|
| 455 |
+
| 5.2075 | 276 | - |
|
| 456 |
+
| 5.2642 | 279 | - |
|
| 457 |
+
| 5.3208 | 282 | - |
|
| 458 |
+
| 5.3774 | 285 | - |
|
| 459 |
+
| 5.4340 | 288 | - |
|
| 460 |
+
| 5.4906 | 291 | - |
|
| 461 |
+
| 5.5472 | 294 | - |
|
| 462 |
+
| 5.6038 | 297 | - |
|
| 463 |
+
| 5.6604 | 300 | - |
|
| 464 |
+
| 5.7170 | 303 | - |
|
| 465 |
+
| 5.7736 | 306 | - |
|
| 466 |
+
| 5.8302 | 309 | - |
|
| 467 |
+
| 5.8868 | 312 | - |
|
| 468 |
+
| 5.9434 | 315 | - |
|
| 469 |
+
| 6.0 | 318 | - |
|
| 470 |
+
| 6.0566 | 321 | - |
|
| 471 |
+
| 6.1132 | 324 | - |
|
| 472 |
+
| 6.1698 | 327 | - |
|
| 473 |
+
| 6.2264 | 330 | - |
|
| 474 |
+
| 6.2830 | 333 | - |
|
| 475 |
+
| 6.3396 | 336 | - |
|
| 476 |
+
| 6.3962 | 339 | - |
|
| 477 |
+
| 6.4528 | 342 | - |
|
| 478 |
+
| 6.5094 | 345 | - |
|
| 479 |
+
| 6.5660 | 348 | - |
|
| 480 |
+
| 6.6226 | 351 | - |
|
| 481 |
+
| 6.6792 | 354 | - |
|
| 482 |
+
| 6.7358 | 357 | - |
|
| 483 |
+
| 6.7925 | 360 | - |
|
| 484 |
+
| 6.8491 | 363 | - |
|
| 485 |
+
| 6.9057 | 366 | - |
|
| 486 |
+
| 6.9623 | 369 | - |
|
| 487 |
+
| 7.0 | 371 | - |
|
| 488 |
+
| 7.0189 | 372 | - |
|
| 489 |
+
| 7.0755 | 375 | - |
|
| 490 |
+
| 7.1321 | 378 | - |
|
| 491 |
+
| 7.1887 | 381 | - |
|
| 492 |
+
| 7.2453 | 384 | - |
|
| 493 |
+
| 7.3019 | 387 | - |
|
| 494 |
+
| 7.3585 | 390 | - |
|
| 495 |
+
| 7.4151 | 393 | - |
|
| 496 |
+
| 7.4717 | 396 | - |
|
| 497 |
+
| 7.5283 | 399 | - |
|
| 498 |
+
| 7.5849 | 402 | - |
|
| 499 |
+
| 7.6415 | 405 | - |
|
| 500 |
+
| 7.6981 | 408 | - |
|
| 501 |
+
| 7.7547 | 411 | - |
|
| 502 |
+
| 7.8113 | 414 | - |
|
| 503 |
+
| 7.8679 | 417 | - |
|
| 504 |
+
| 7.9245 | 420 | - |
|
| 505 |
+
| 7.9811 | 423 | - |
|
| 506 |
+
| 8.0 | 424 | - |
|
| 507 |
+
| 8.0377 | 426 | - |
|
| 508 |
+
| 8.0943 | 429 | - |
|
| 509 |
+
| 8.1509 | 432 | - |
|
| 510 |
+
| 8.2075 | 435 | - |
|
| 511 |
+
| 8.2642 | 438 | - |
|
| 512 |
+
| 8.3208 | 441 | - |
|
| 513 |
+
| 8.3774 | 444 | - |
|
| 514 |
+
| 8.4340 | 447 | - |
|
| 515 |
+
| 8.4906 | 450 | - |
|
| 516 |
+
| 8.5472 | 453 | - |
|
| 517 |
+
| 8.6038 | 456 | - |
|
| 518 |
+
| 8.6604 | 459 | - |
|
| 519 |
+
| 8.7170 | 462 | - |
|
| 520 |
+
| 8.7736 | 465 | - |
|
| 521 |
+
| 8.8302 | 468 | - |
|
| 522 |
+
| 8.8868 | 471 | - |
|
| 523 |
+
| 8.9434 | 474 | - |
|
| 524 |
+
| 9.0 | 477 | - |
|
| 525 |
+
| 9.0566 | 480 | - |
|
| 526 |
+
| 9.1132 | 483 | - |
|
| 527 |
+
| 9.1698 | 486 | - |
|
| 528 |
+
| 9.2264 | 489 | - |
|
| 529 |
+
| 9.2830 | 492 | - |
|
| 530 |
+
| 9.3396 | 495 | - |
|
| 531 |
+
| 9.3962 | 498 | - |
|
| 532 |
+
| 9.4340 | 500 | 0.6328 |
|
| 533 |
+
| 9.4528 | 501 | - |
|
| 534 |
+
| 9.5094 | 504 | - |
|
| 535 |
+
| 9.5660 | 507 | - |
|
| 536 |
+
| 9.6226 | 510 | - |
|
| 537 |
+
| 9.6792 | 513 | - |
|
| 538 |
+
| 9.7358 | 516 | - |
|
| 539 |
+
| 9.7925 | 519 | - |
|
| 540 |
+
| 9.8491 | 522 | - |
|
| 541 |
+
| 9.9057 | 525 | - |
|
| 542 |
+
| 9.9623 | 528 | - |
|
| 543 |
+
| 10.0 | 530 | - |
|
| 544 |
+
| 10.0189 | 531 | - |
|
| 545 |
+
| 10.0755 | 534 | - |
|
| 546 |
+
| 10.1321 | 537 | - |
|
| 547 |
+
| 10.1887 | 540 | - |
|
| 548 |
+
| 10.2453 | 543 | - |
|
| 549 |
+
| 10.3019 | 546 | - |
|
| 550 |
+
| 10.3585 | 549 | - |
|
| 551 |
+
| 10.4151 | 552 | - |
|
| 552 |
+
| 10.4717 | 555 | - |
|
| 553 |
+
| 10.5283 | 558 | - |
|
| 554 |
+
| 10.5849 | 561 | - |
|
| 555 |
+
| 10.6415 | 564 | - |
|
| 556 |
+
| 10.6981 | 567 | - |
|
| 557 |
+
| 10.7547 | 570 | - |
|
| 558 |
+
| 10.8113 | 573 | - |
|
| 559 |
+
| 10.8679 | 576 | - |
|
| 560 |
+
|
| 561 |
+
</details>
|
| 562 |
+
|
| 563 |
+
### Framework Versions
|
| 564 |
+
- Python: 3.12.11
|
| 565 |
+
- Sentence Transformers: 5.1.0
|
| 566 |
+
- Transformers: 4.56.0
|
| 567 |
+
- PyTorch: 2.8.0+cu126
|
| 568 |
+
- Accelerate: 1.10.1
|
| 569 |
+
- Datasets: 4.0.0
|
| 570 |
+
- Tokenizers: 0.22.0
|
| 571 |
+
|
| 572 |
+
## Citation
|
| 573 |
+
|
| 574 |
+
### BibTeX
|
| 575 |
+
|
| 576 |
+
#### Sentence Transformers
|
| 577 |
+
```bibtex
|
| 578 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 579 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 580 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 581 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 582 |
+
month = "11",
|
| 583 |
+
year = "2019",
|
| 584 |
+
publisher = "Association for Computational Linguistics",
|
| 585 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 586 |
+
}
|
| 587 |
+
```
|
| 588 |
+
|
| 589 |
+
#### MultipleNegativesRankingLoss
|
| 590 |
+
```bibtex
|
| 591 |
+
@misc{henderson2017efficient,
|
| 592 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 593 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 594 |
+
year={2017},
|
| 595 |
+
eprint={1705.00652},
|
| 596 |
+
archivePrefix={arXiv},
|
| 597 |
+
primaryClass={cs.CL}
|
| 598 |
+
}
|
| 599 |
+
```
|
| 600 |
+
|
| 601 |
+
<!--
|
| 602 |
+
## Glossary
|
| 603 |
+
|
| 604 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 605 |
+
-->
|
| 606 |
+
|
| 607 |
+
<!--
|
| 608 |
+
## Model Card Authors
|
| 609 |
+
|
| 610 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 611 |
+
-->
|
| 612 |
+
|
| 613 |
+
<!--
|
| 614 |
+
## Model Card Contact
|
| 615 |
+
|
| 616 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 617 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"directionality": "bidi",
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-12,
|
| 16 |
+
"max_position_embeddings": 512,
|
| 17 |
+
"model_type": "bert",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"transformers_version": "4.56.0",
|
| 28 |
+
"type_vocab_size": 2,
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"vocab_size": 501153
|
| 31 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.0",
|
| 4 |
+
"transformers": "4.56.0",
|
| 5 |
+
"pytorch": "2.8.0+cu126"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6e2951a46ba231936b0b5b4a751869612128ba4461faf827a75a0992d2f952d
|
| 3 |
+
size 1883730160
|
modules.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Dense",
|
| 18 |
+
"type": "sentence_transformers.models.Dense"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"idx": 3,
|
| 22 |
+
"name": "3",
|
| 23 |
+
"path": "3_Normalize",
|
| 24 |
+
"type": "sentence_transformers.models.Normalize"
|
| 25 |
+
}
|
| 26 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92262b29204f8fdc169a63f9005a0e311a16262cef4d96ecfe2a7ed638662ed3
|
| 3 |
+
size 13632172
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"full_tokenizer_file": null,
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_token": "[PAD]",
|
| 54 |
+
"sep_token": "[SEP]",
|
| 55 |
+
"strip_accents": null,
|
| 56 |
+
"tokenize_chinese_chars": true,
|
| 57 |
+
"tokenizer_class": "BertTokenizer",
|
| 58 |
+
"unk_token": "[UNK]"
|
| 59 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|