LaBSE
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +12 -0
- LABSE-Vit-L-14/.gitattributes +143 -0
- LABSE-Vit-L-14/README.md +11 -0
- LABSE-Vit-L-14/config.json +1 -0
- LABSE-Vit-L-14/source.txt +1 -0
- LABSE-Vit-L-14/textual/model.onnx +3 -0
- LABSE-Vit-L-14/textual/rknpu/rk3566/model.rknn +3 -0
- LABSE-Vit-L-14/textual/rknpu/rk3568/model.rknn +3 -0
- LABSE-Vit-L-14/textual/rknpu/rk3576/model.rknn +3 -0
- LABSE-Vit-L-14/textual/rknpu/rk3588/model.rknn +3 -0
- LABSE-Vit-L-14/textual/special_tokens_map.json +37 -0
- LABSE-Vit-L-14/textual/tokenizer.json +3 -0
- LABSE-Vit-L-14/textual/tokenizer_config.json +59 -0
- LABSE-Vit-L-14/textual/vocab.txt +0 -0
- LABSE-Vit-L-14/visual/model.onnx +3 -0
- LABSE-Vit-L-14/visual/preprocess_cfg.json +1 -0
- LABSE-Vit-L-14/visual/rknpu/rk3566/model.rknn +3 -0
- LABSE-Vit-L-14/visual/rknpu/rk3568/model.rknn +3 -0
- LABSE-Vit-L-14/visual/rknpu/rk3576/model.rknn +3 -0
- LABSE-Vit-L-14/visual/rknpu/rk3588/model.rknn +3 -0
- LaBSE-en-ru/.gitattributes +17 -0
- LaBSE-en-ru/1_Pooling/config.json +7 -0
- LaBSE-en-ru/2_Dense/config.json +1 -0
- LaBSE-en-ru/2_Dense/pytorch_model.bin +3 -0
- LaBSE-en-ru/README.md +35 -0
- LaBSE-en-ru/config.json +30 -0
- LaBSE-en-ru/model.safetensors +3 -0
- LaBSE-en-ru/modules.json +26 -0
- LaBSE-en-ru/pytorch_model.bin +3 -0
- LaBSE-en-ru/sentence_bert_config.json +4 -0
- LaBSE-en-ru/source.txt +1 -0
- LaBSE-en-ru/special_tokens_map.json +1 -0
- LaBSE-en-ru/tf_model.h5 +3 -0
- LaBSE-en-ru/tokenizer_config.json +1 -0
- LaBSE-en-ru/vocab.txt +0 -0
- LaBSE-fr-de/.gitattributes +34 -0
- LaBSE-fr-de/README.md +31 -0
- LaBSE-fr-de/config.json +32 -0
- LaBSE-fr-de/model.safetensors +3 -0
- LaBSE-fr-de/pytorch_model.bin +3 -0
- LaBSE-fr-de/source.txt +1 -0
- LaBSE-fr-de/special_tokens_map.json +7 -0
- LaBSE-fr-de/tokenizer_config.json +17 -0
- LaBSE-fr-de/vocab.txt +0 -0
- LaBSE-onnx/.gitattributes +36 -0
- LaBSE-onnx/1_Pooling/config.json +7 -0
- LaBSE-onnx/config.json +31 -0
- LaBSE-onnx/model.onnx +3 -0
- LaBSE-onnx/source.txt +1 -0
- LaBSE-onnx/special_tokens_map.json +37 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
LaBSE-onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
LABSE-Vit-L-14/textual/rknpu/rk3566/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
LABSE-Vit-L-14/textual/rknpu/rk3568/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
LABSE-Vit-L-14/textual/rknpu/rk3576/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
LABSE-Vit-L-14/textual/rknpu/rk3588/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
LABSE-Vit-L-14/textual/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
LABSE-Vit-L-14/visual/rknpu/rk3566/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
LABSE-Vit-L-14/visual/rknpu/rk3568/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
LABSE-Vit-L-14/visual/rknpu/rk3576/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
LABSE-Vit-L-14/visual/rknpu/rk3588/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
Language-agnostic[[:space:]]BERT[[:space:]]Sentence[[:space:]]Embedding.pdf filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
sbert-LaBSE-onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
LABSE-Vit-L-14/.gitattributes
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
textual/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
visual/Constant_1562_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
visual/Constant_1600_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
visual/Constant_1632_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
visual/Constant_1745_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
visual/Constant_1764_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
visual/Constant_1772_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
visual/Constant_1801_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
visual/Constant_1914_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
visual/Constant_1933_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
visual/Constant_1941_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
visual/Constant_1970_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
visual/Constant_2083_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
visual/Constant_2102_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
visual/Constant_2110_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
visual/Constant_2139_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
visual/Constant_2252_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
visual/Constant_2271_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
visual/Constant_2279_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
visual/Constant_2308_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
visual/Constant_2421_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
visual/Constant_2440_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
visual/Constant_2448_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
visual/Constant_2477_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
visual/Constant_2590_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
visual/Constant_2609_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
visual/Constant_2617_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
visual/Constant_2646_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
visual/Constant_2759_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
visual/Constant_2778_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
visual/Constant_2786_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
visual/Constant_2815_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
visual/Constant_2928_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
visual/Constant_2947_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
visual/Constant_2955_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
visual/Constant_2984_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
visual/Constant_3097_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
visual/Constant_3116_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
visual/Constant_3124_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
visual/Constant_3153_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
visual/Constant_3266_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
visual/Constant_3285_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
visual/Constant_3293_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
visual/Constant_3322_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
visual/Constant_3435_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
visual/Constant_3454_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
visual/Constant_3462_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
visual/Constant_3491_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
visual/Constant_3604_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
visual/Constant_3623_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
visual/Constant_3631_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
visual/Constant_3660_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
visual/Constant_3773_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
visual/Constant_3792_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
visual/Constant_3800_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
visual/Constant_3829_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
visual/Constant_3942_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
visual/Constant_3961_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
visual/Constant_3969_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
visual/Constant_3998_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
visual/Constant_4111_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
visual/Constant_4130_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
visual/Constant_4138_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
visual/Constant_4167_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
visual/Constant_4280_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
visual/Constant_4299_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
visual/Constant_4307_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
visual/Constant_4336_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
visual/Constant_4449_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
visual/Constant_4468_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
visual/Constant_4476_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
visual/Constant_4505_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
visual/Constant_4618_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
visual/Constant_4637_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
visual/Constant_4645_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
visual/Constant_4674_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
visual/Constant_4787_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
visual/Constant_4806_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
visual/Constant_4814_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
visual/Constant_4843_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
visual/Constant_4956_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
visual/Constant_4975_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
visual/Constant_4983_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
visual/Constant_5012_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
visual/Constant_5125_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
visual/Constant_5144_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
visual/Constant_5152_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
visual/Constant_5181_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
visual/Constant_5294_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
visual/Constant_5313_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
visual/Constant_5321_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
visual/Constant_5350_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
visual/Constant_5463_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
visual/Constant_5482_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
visual/Constant_5490_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
visual/Constant_5519_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
visual/Constant_5632_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
visual/Constant_5651_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
visual/Constant_5659_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
visual/Constant_5670_attr__value filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
textual/rknpu/rk3566/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
textual/rknpu/rk3576/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
textual/rknpu/rk3588/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
visual/rknpu/rk3566/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
visual/rknpu/rk3576/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
visual/rknpu/rk3588/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
textual/rknpu/rk3568/model.rknn filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
visual/rknpu/rk3568/model.rknn filter=lfs diff=lfs merge=lfs -text
|
LABSE-Vit-L-14/README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- immich
|
| 4 |
+
- clip
|
| 5 |
+
- multilingual
|
| 6 |
+
---
|
| 7 |
+
# Model Description
|
| 8 |
+
|
| 9 |
+
This repo contains ONNX exports for the associated CLIP model by M-CLIP. See the [M-CLIP](https://huggingface.co/M-CLIP) repo for more info.
|
| 10 |
+
|
| 11 |
+
This repo is specifically intended for use with [Immich](https://immich.app/), a self-hosted photo library.
|
LABSE-Vit-L-14/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"embed_dim": 768, "vision_cfg": {"image_size": 224, "layers": 24, "width": 1024, "patch_size": 14}, "text_cfg": {"context_length": 77, "vocab_size": 49408, "width": 768, "heads": 12, "layers": 12}}
|
LABSE-Vit-L-14/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/immich-app/LABSE-Vit-L-14
|
LABSE-Vit-L-14/textual/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77333b8ed51cb501b0217c172c4c75bdd37ee15915335f3242dae063bf0f82b4
|
| 3 |
+
size 1883874543
|
LABSE-Vit-L-14/textual/rknpu/rk3566/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f942de9726c58fea2b153a24f24f061e797af72a07ef366cdb6c479261b5a2b
|
| 3 |
+
size 945356824
|
LABSE-Vit-L-14/textual/rknpu/rk3568/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bad1a83b0d6ad86f66409365694b36d9a327bcf8a9fe2090e97f18004a28184
|
| 3 |
+
size 945356824
|
LABSE-Vit-L-14/textual/rknpu/rk3576/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:036857011e24e80fd2df6398ac332ead044e6a96b0474d739b37321b30c1d926
|
| 3 |
+
size 950513624
|
LABSE-Vit-L-14/textual/rknpu/rk3588/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3aed2ead635849e9a7ef90beb8f4f039dddef5bf8a66c0fb9f6ebdd2188bb9d3
|
| 3 |
+
size 944952920
|
LABSE-Vit-L-14/textual/special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
LABSE-Vit-L-14/textual/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09216b42d2697b7b4a26ac05ff09ba8bf52dc19b896c5ceee8bbff9f39055322
|
| 3 |
+
size 13631919
|
LABSE-Vit-L-14/textual/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": false,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"full_tokenizer_file": null,
|
| 50 |
+
"mask_token": "[MASK]",
|
| 51 |
+
"model_max_length": 512,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_token": "[PAD]",
|
| 54 |
+
"sep_token": "[SEP]",
|
| 55 |
+
"strip_accents": null,
|
| 56 |
+
"tokenize_chinese_chars": true,
|
| 57 |
+
"tokenizer_class": "BertTokenizer",
|
| 58 |
+
"unk_token": "[UNK]"
|
| 59 |
+
}
|
LABSE-Vit-L-14/textual/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
LABSE-Vit-L-14/visual/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b02d572f59c509f4b97b9c54a868453cca1a652cd5d60e1d51d0052f055cb8c
|
| 3 |
+
size 1216297719
|
LABSE-Vit-L-14/visual/preprocess_cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"size": [224, 224], "mode": "RGB", "mean": [0.48145466, 0.4578275, 0.40821073], "std": [0.26862954, 0.26130258, 0.27577711], "interpolation": "bicubic", "resize_mode": "shortest", "fill_color": 0}
|
LABSE-Vit-L-14/visual/rknpu/rk3566/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec804bed50a21bd3774271613eb8479ad847cee6b4211a1e41c79c3ef5853f7e
|
| 3 |
+
size 644018803
|
LABSE-Vit-L-14/visual/rknpu/rk3568/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfd435401c68bfe4942017528d7cd1ea096e87d9fa934e7aa1e648687b7e5a64
|
| 3 |
+
size 644018803
|
LABSE-Vit-L-14/visual/rknpu/rk3576/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95675ecdd458ae23484e301cfae400f647d042eb77ccf604315ab879091390a9
|
| 3 |
+
size 645072115
|
LABSE-Vit-L-14/visual/rknpu/rk3588/model.rknn
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6067e14f92f4025eac000a7552309784288c1d3c75cccbed2c0d2334aff54b
|
| 3 |
+
size 626315187
|
LaBSE-en-ru/.gitattributes
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
LaBSE-en-ru/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": true,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
| 7 |
+
}
|
LaBSE-en-ru/2_Dense/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"in_features": 768, "out_features": 768, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}
|
LaBSE-en-ru/2_Dense/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06fb85120e40adf0ab188c4f0cc7684f702cb2023532947d1b85f325b0a3645c
|
| 3 |
+
size 2363431
|
LaBSE-en-ru/README.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: ["ru", "en"]
|
| 3 |
+
tags:
|
| 4 |
+
- feature-extraction
|
| 5 |
+
- embeddings
|
| 6 |
+
- sentence-similarity
|
| 7 |
+
---
|
| 8 |
+
# LaBSE for English and Russian
|
| 9 |
+
This is a truncated version of [sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE), which is, in turn, a port of [LaBSE](https://tfhub.dev/google/LaBSE/1) by Google.
|
| 10 |
+
|
| 11 |
+
The current model has only English and Russian tokens left in the vocabulary.
|
| 12 |
+
Thus, the vocabulary is 10% of the original, and number of parameters in the whole model is 27% of the original, without any loss in the quality of English and Russian embeddings.
|
| 13 |
+
|
| 14 |
+
To get the sentence embeddings, you can use the following code:
|
| 15 |
+
```python
|
| 16 |
+
import torch
|
| 17 |
+
from transformers import AutoTokenizer, AutoModel
|
| 18 |
+
tokenizer = AutoTokenizer.from_pretrained("cointegrated/LaBSE-en-ru")
|
| 19 |
+
model = AutoModel.from_pretrained("cointegrated/LaBSE-en-ru")
|
| 20 |
+
sentences = ["Hello World", "Привет Мир"]
|
| 21 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt')
|
| 22 |
+
with torch.no_grad():
|
| 23 |
+
model_output = model(**encoded_input)
|
| 24 |
+
embeddings = model_output.pooler_output
|
| 25 |
+
embeddings = torch.nn.functional.normalize(embeddings)
|
| 26 |
+
print(embeddings)
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
The model has been truncated in [this notebook](https://colab.research.google.com/drive/1dnPRn0-ugj3vZgSpyCC9sgslM2SuSfHy?usp=sharing).
|
| 30 |
+
You can adapt it for other languages (like [EIStakovskii/LaBSE-fr-de](https://huggingface.co/EIStakovskii/LaBSE-fr-de)), models or datasets.
|
| 31 |
+
|
| 32 |
+
## Reference:
|
| 33 |
+
Fangxiaoyu Feng, Yinfei Yang, Daniel Cer, Narveen Ari, Wei Wang. [Language-agnostic BERT Sentence Embedding](https://arxiv.org/abs/2007.01852). July 2020
|
| 34 |
+
|
| 35 |
+
License: [https://tfhub.dev/google/LaBSE/1](https://tfhub.dev/google/LaBSE/1)
|
LaBSE-en-ru/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "cointegrated/LaBSE-en-ru",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForPreTraining"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"directionality": "bidi",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 0,
|
| 20 |
+
"pooler_fc_size": 768,
|
| 21 |
+
"pooler_num_attention_heads": 12,
|
| 22 |
+
"pooler_num_fc_layers": 3,
|
| 23 |
+
"pooler_size_per_head": 128,
|
| 24 |
+
"pooler_type": "first_token_transform",
|
| 25 |
+
"position_embedding_type": "absolute",
|
| 26 |
+
"transformers_version": "4.5.1",
|
| 27 |
+
"type_vocab_size": 2,
|
| 28 |
+
"use_cache": true,
|
| 29 |
+
"vocab_size": 55083
|
| 30 |
+
}
|
LaBSE-en-ru/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25678be7d335a05ceec55a820c49e9ec0d637c0d469ef09d5f7155ef42a41bd3
|
| 3 |
+
size 516003632
|
LaBSE-en-ru/modules.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Dense",
|
| 18 |
+
"type": "sentence_transformers.models.Dense"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"idx": 3,
|
| 22 |
+
"name": "3",
|
| 23 |
+
"path": "3_Normalize",
|
| 24 |
+
"type": "sentence_transformers.models.Normalize"
|
| 25 |
+
}
|
| 26 |
+
]
|
LaBSE-en-ru/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d929e16c4cc9b40cdd96219e8ce3c1084129798435b3c67212efd68fa018673b
|
| 3 |
+
size 516063655
|
LaBSE-en-ru/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
LaBSE-en-ru/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/cointegrated/LaBSE-en-ru
|
LaBSE-en-ru/special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
LaBSE-en-ru/tf_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78711a9ec0426e02e001f890ac237d7cb0022cf14941dc45495019a595effa7e
|
| 3 |
+
size 687064460
|
LaBSE-en-ru/tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"do_lower_case": false, "model_max_length": 512}
|
LaBSE-en-ru/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
LaBSE-fr-de/.gitattributes
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
LaBSE-fr-de/README.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: ["fr", "de"]
|
| 3 |
+
tags:
|
| 4 |
+
- feature-extraction
|
| 5 |
+
- embeddings
|
| 6 |
+
- sentence-similarity
|
| 7 |
+
---
|
| 8 |
+
# LaBSE for French and German
|
| 9 |
+
This is a shortened version of [sentence-transformers/LaBSE](https://huggingface.co/sentence-transformers/LaBSE). The model was prepaired with the direct help of [cointegrated](https://huggingface.co/cointegrated), the author of the [LaBSE-en-ru model](https://huggingface.co/cointegrated/LaBSE-en-ru).
|
| 10 |
+
|
| 11 |
+
The current model includes only French and German tokens, and the vocabulary is thus 10% of the original while number of parameters in the whole model is 27% of the original.
|
| 12 |
+
|
| 13 |
+
To get the sentence embeddings, you can use the following code:
|
| 14 |
+
```python
|
| 15 |
+
import torch
|
| 16 |
+
from transformers import AutoTokenizer, AutoModel
|
| 17 |
+
tokenizer = AutoTokenizer.from_pretrained("EIStakovskii/LaBSE-fr-de")
|
| 18 |
+
model = AutoModel.from_pretrained("EIStakovskii/LaBSE-fr-de")
|
| 19 |
+
sentences = ["Wie geht es dir?", "Comment vas-tu?"]
|
| 20 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=64, return_tensors='pt')
|
| 21 |
+
with torch.no_grad():
|
| 22 |
+
model_output = model(**encoded_input)
|
| 23 |
+
embeddings = model_output.pooler_output
|
| 24 |
+
embeddings = torch.nn.functional.normalize(embeddings)
|
| 25 |
+
print(embeddings)
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Reference:
|
| 29 |
+
Fangxiaoyu Feng, Yinfei Yang, Daniel Cer, Narveen Ari, Wei Wang. [Language-agnostic BERT Sentence Embedding](https://arxiv.org/abs/2007.01852). July 2020
|
| 30 |
+
|
| 31 |
+
License: [https://tfhub.dev/google/LaBSE/1](https://tfhub.dev/google/LaBSE/1)
|
LaBSE-fr-de/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "sentence-transformers/LaBSE",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForPreTraining"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-12,
|
| 16 |
+
"max_position_embeddings": 512,
|
| 17 |
+
"model_type": "bert",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.20.1",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 21754
|
| 32 |
+
}
|
LaBSE-fr-de/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9d871e6e6336064e8260303b679b4da9fb530c3a853fe43a1443ccf5fbf5433
|
| 3 |
+
size 413483584
|
LaBSE-fr-de/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2186e5f6fd3a8d517610234dcf8b7b4522767ad2a7ada93e48cad3d2da3dafbd
|
| 3 |
+
size 413525283
|
LaBSE-fr-de/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/EIStakovskii/LaBSE-fr-de
|
LaBSE-fr-de/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
LaBSE-fr-de/tokenizer_config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"do_basic_tokenize": true,
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"full_tokenizer_file": null,
|
| 6 |
+
"mask_token": "[MASK]",
|
| 7 |
+
"model_max_length": 512,
|
| 8 |
+
"name_or_path": "sentence-transformers/LaBSE",
|
| 9 |
+
"never_split": null,
|
| 10 |
+
"pad_token": "[PAD]",
|
| 11 |
+
"sep_token": "[SEP]",
|
| 12 |
+
"special_tokens_map_file": "labse-pytorch/special_tokens_map.json",
|
| 13 |
+
"strip_accents": null,
|
| 14 |
+
"tokenize_chinese_chars": true,
|
| 15 |
+
"tokenizer_class": "BertTokenizer",
|
| 16 |
+
"unk_token": "[UNK]"
|
| 17 |
+
}
|
LaBSE-fr-de/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
LaBSE-onnx/.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
LaBSE-onnx/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": true,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
| 7 |
+
}
|
LaBSE-onnx/config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/Users/Shared/Libs/sentence_transformers/models--sentence-transformers--LaBSE/snapshots/e34fab64a3011d2176c99545a93d5cbddc9a91b7/",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-12,
|
| 16 |
+
"max_position_embeddings": 512,
|
| 17 |
+
"model_type": "bert",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"transformers_version": "4.42.3",
|
| 28 |
+
"type_vocab_size": 2,
|
| 29 |
+
"use_cache": true,
|
| 30 |
+
"vocab_size": 501153
|
| 31 |
+
}
|
LaBSE-onnx/model.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75801bb2ed33e25d24efe96f2bf7f7ef6d109778fc48ada8aa3b1fbd8969743c
|
| 3 |
+
size 1881599307
|
LaBSE-onnx/source.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
https://huggingface.co/LightEmbed/LaBSE-onnx
|
LaBSE-onnx/special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|