Update the models
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -18
- README.md +15 -59
- models/minishlab/potion-multilingual-128M/README.md +128 -0
- models/minishlab/potion-multilingual-128M/fp16.d128.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d128.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d256.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d256.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d32.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d32.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d64.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp16.d64.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d128.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d128.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d256.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d256.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d32.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d32.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d64.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp32.d64.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d128.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d128.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d256.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d256.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d32.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d32.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d64.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e4m3.d64.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d128.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d128.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d256.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d256.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d32.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d32.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d64.npy +3 -0
- models/minishlab/potion-multilingual-128M/fp8_e5m2.d64.npy.zst +3 -0
- models/minishlab/potion-multilingual-128M/tokenizer.json +3 -0
- models/minishlab/potion-multilingual-128M/tokenizer.json.zst +3 -0
- models/minishlab/potion-retrieval-32M/README.md +128 -0
- models/minishlab/potion-retrieval-32M/fp16.d128.npy +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d128.npy.zst +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d256.npy +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d256.npy.zst +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d32.npy +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d32.npy.zst +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d512.npy +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d512.npy.zst +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d64.npy +3 -0
- models/minishlab/potion-retrieval-32M/fp16.d64.npy.zst +3 -0
- models/minishlab/potion-retrieval-32M/fp32.d128.npy +3 -0
- models/minishlab/potion-retrieval-32M/fp32.d128.npy.zst +3 -0
.gitattributes
CHANGED
|
@@ -1,18 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
model/static-embeddings.512.fp16.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
model/tokenizer.json.zst filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
model/static-embeddings.1024.fp16.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
model/static-embeddings.256.fp16.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
model/static-embeddings.384.fp16.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
model/static-embeddings.512.fp32.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
model/static-embeddings.1024.fp32.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
model/static-embeddings.128.fp16.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
model/static-embeddings.128.int8.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
model/static-embeddings.384.fp32.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
model/static-embeddings.1024.int8.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
model/static-embeddings.256.fp32.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
model/static-embeddings.256.int8.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
model/static-embeddings.384.int8.npy.zst filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
model/static-embeddings.512.int8.npy.zst filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
models/**/*.npy filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
models/**/*.zst filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
models/**/*.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,66 +1,22 @@
|
|
| 1 |
-
---
|
| 2 |
-
language:
|
| 3 |
-
- en
|
| 4 |
-
- ar
|
| 5 |
-
- bg
|
| 6 |
-
- ca
|
| 7 |
-
- cs
|
| 8 |
-
- da
|
| 9 |
-
- de
|
| 10 |
-
- el
|
| 11 |
-
- es
|
| 12 |
-
- et
|
| 13 |
-
- fa
|
| 14 |
-
- fi
|
| 15 |
-
- fr
|
| 16 |
-
- gl
|
| 17 |
-
- gu
|
| 18 |
-
- he
|
| 19 |
-
- hi
|
| 20 |
-
- hu
|
| 21 |
-
- hy
|
| 22 |
-
- id
|
| 23 |
-
- it
|
| 24 |
-
- ja
|
| 25 |
-
- ka
|
| 26 |
-
- ko
|
| 27 |
-
- ku
|
| 28 |
-
- lt
|
| 29 |
-
- lv
|
| 30 |
-
- mk
|
| 31 |
-
- mn
|
| 32 |
-
- mr
|
| 33 |
-
- ms
|
| 34 |
-
- my
|
| 35 |
-
- nb
|
| 36 |
-
- nl
|
| 37 |
-
- pl
|
| 38 |
-
- pt
|
| 39 |
-
- ro
|
| 40 |
-
- ru
|
| 41 |
-
- sk
|
| 42 |
-
- sl
|
| 43 |
-
- sq
|
| 44 |
-
- sr
|
| 45 |
-
- sv
|
| 46 |
-
- th
|
| 47 |
-
- tr
|
| 48 |
-
- uk
|
| 49 |
-
- ur
|
| 50 |
-
- vi
|
| 51 |
-
- zh
|
| 52 |
-
- hr
|
| 53 |
-
license: apache-2.0
|
| 54 |
-
---
|
| 55 |
# Static Embeddings
|
| 56 |
|
| 57 |
This project contains multilingual static embeddings that are appropriate for generating
|
| 58 |
quick embeddings in edge devices. They are re-packaged from other projects in production
|
| 59 |
ready assets.
|
| 60 |
|
| 61 |
-
##
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Static Embeddings
|
| 2 |
|
| 3 |
This project contains multilingual static embeddings that are appropriate for generating
|
| 4 |
quick embeddings in edge devices. They are re-packaged from other projects in production
|
| 5 |
ready assets.
|
| 6 |
|
| 7 |
+
## Models
|
| 8 |
|
| 9 |
+
* [minishlab/potion-retrieval-32M/](models/minishlab/potion-retrieval-32M/README.md)
|
| 10 |
+
* [minishlab/potion-multilingual-128M/](models/minishlab/potion-multilingual-128M/README.md)
|
| 11 |
+
* [sentence-transformers/static-retrieval-mrl-en-v1/](models/sentence-transformers/static-retrieval-mrl-en-v1/README.md)
|
| 12 |
+
* [sentence-transformers/static-similarity-mrl-multilingual-v1](models/sentence-transformers/static-similarity-mrl-multilingual-v1/README.md)
|
| 13 |
+
|
| 14 |
+
## Updating
|
| 15 |
+
|
| 16 |
+
Add models to `scripts/build_models.py`.
|
| 17 |
+
|
| 18 |
+
```sh
|
| 19 |
+
pipx install huggingface_hub
|
| 20 |
+
huggingface-cli login
|
| 21 |
+
uv run scripts/build_models.py
|
| 22 |
+
```
|
models/minishlab/potion-multilingual-128M/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# [minishlab/potion-multilingual-128M](https://huggingface.co/minishlab/potion-multilingual-128M)
|
| 2 |
+
|
| 3 |
+
License: [mit](https://choosealicense.com/licenses/mit/)
|
| 4 |
+
|
| 5 |
+
A multilingual embedder. The details are a bit scant on how it's trained as
|
| 6 |
+
there is no source code for it. However, it's likely a close architecture
|
| 7 |
+
to the potion-retrieval-32M model, but trained on Common Crawl data.
|
| 8 |
+
|
| 9 |
+
The 128M references the number of parameters in the embeddings:
|
| 10 |
+
|
| 11 |
+
256 dimensions * 500,353 vocab.
|
| 12 |
+
|
| 13 |
+
## Model Stats
|
| 14 |
+
|
| 15 |
+
Stats that describe the embeddings tensor shapes and value distribution.
|
| 16 |
+
|
| 17 |
+
| item | metric | value |
|
| 18 |
+
| --------------| ----------------------- | ----- |
|
| 19 |
+
| vocab | size | 500,353 |
|
| 20 |
+
| embedding | dimensions | 256 |
|
| 21 |
+
| vector length | mean | 12.73 |
|
| 22 |
+
| vector length | median | 11.94 |
|
| 23 |
+
| vector length | stddev | 5.12 |
|
| 24 |
+
| values | mean | -0.00 |
|
| 25 |
+
| values | median | -0.00 |
|
| 26 |
+
| values | stddev | 0.86 |
|
| 27 |
+
|
| 28 |
+
## Mean Pooled Quantization Loss
|
| 29 |
+
|
| 30 |
+
This test roundtrips the vectors through quantization, but performs the
|
| 31 |
+
mean pooling arithmetic in float32 space. The quantized and unquantized
|
| 32 |
+
mean pooled vectors are compared to each other to determine their cosine
|
| 33 |
+
similarity, to show how much the meaning of the vector has changed due
|
| 34 |
+
to quantization.
|
| 35 |
+
|
| 36 |
+
| Precision | Cosine Similarity |
|
| 37 |
+
| ------------- | ----------------- |
|
| 38 |
+
| fp16 | 1.00000 |
|
| 39 |
+
| fp8 e4m3 | 0.99993 |
|
| 40 |
+
| fp8 e5m2 | 0.99973 |
|
| 41 |
+
|
| 42 |
+
## Quantization Loss Per Vector
|
| 43 |
+
|
| 44 |
+
While ultimately the embedding vectors will be mean pooled together, it's
|
| 45 |
+
still useful to look at the loss per-vector in the embedding table to see
|
| 46 |
+
which quantization strategies retain the most vector meaning.
|
| 47 |
+
|
| 48 |
+
- **Cosine Similarity** — measures how well the *direction* of embedding vectors
|
| 49 |
+
is preserved after quantization, independent of scale. This is especially
|
| 50 |
+
relevant when embeddings are used for similarity search or retrieval.
|
| 51 |
+
- **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
|
| 52 |
+
differences. Useful for detecting whether any values are badly distorted.
|
| 53 |
+
- **MAE (Mean Absolute Error)** — the average absolute difference between
|
| 54 |
+
original and quantized values. Easier to interpret, less sensitive to outliers.
|
| 55 |
+
|
| 56 |
+
| Precision | Metric | Value |
|
| 57 |
+
| ------------- | ------ | ----- |
|
| 58 |
+
| fp16 | cosine similarity | 1.00000 |
|
| 59 |
+
| fp8 e4m3 | cosine similarity | 0.99965 |
|
| 60 |
+
| fp8 e5m2 | cosine similarity | 0.99863 |
|
| 61 |
+
| fp16 | MSE | 0.00000 |
|
| 62 |
+
| fp8 e4m3 | MSE | 0.00052 |
|
| 63 |
+
| fp8 e5m2 | MSE | 0.00205 |
|
| 64 |
+
| fp16 | MAE | 0.00011 |
|
| 65 |
+
| fp8 e4m3 | MAE | 0.01364 |
|
| 66 |
+
| fp8 e5m2 | MAE | 0.02717 |
|
| 67 |
+
|
| 68 |
+
## Tokenizer Examples
|
| 69 |
+
|
| 70 |
+
**Input:** This is an example of encoding<br/>
|
| 71 |
+
**Tokens**: `▁This` `▁is` `▁an` `▁example` `▁of` `▁encoding`
|
| 72 |
+
|
| 73 |
+
**Input:** The quick brown fox jumps over the lazy dog.<br/>
|
| 74 |
+
**Tokens**: `▁The` `▁quick` `▁brown` `▁fox` `▁jumps` `▁over` `▁the` `▁lazy` `▁dog` `▁` `.`
|
| 75 |
+
|
| 76 |
+
**Input:** Curaçao, naïve fiancé, jalapeño, déjà vu.<br/>
|
| 77 |
+
**Tokens**: `▁Cura` `ça` `o` `▁` `,` `▁na` `ï` `ve` `▁fiancé` `▁` `,` `▁ja` `lap` `eño` `▁` `,` `▁déjà` `▁vu` `▁` `.`
|
| 78 |
+
|
| 79 |
+
**Input:** Привет, как дела?<br/>
|
| 80 |
+
**Tokens**: `▁При` `вет` `▁` `,` `▁как` `▁дела` `▁?`
|
| 81 |
+
|
| 82 |
+
**Input:** Бързата кафява лисица прескача мързеливото куче.<br/>
|
| 83 |
+
**Tokens**: `▁Бър` `за` `та` `▁кафяв` `а` `▁лис` `ица` `▁пре` `ска` `ча` `▁` `мър` `зе` `ливо` `то` `▁куче` `▁` `.`
|
| 84 |
+
|
| 85 |
+
**Input:** Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.<br/>
|
| 86 |
+
**Tokens**: `▁Γ` `ρή` `γο` `ρη` `▁καφέ` `▁α` `λε` `πού` `▁` `πη` `δά` `ει` `▁πάνω` `▁από` `▁τον` `▁τε` `μπ` `έλη` `▁σκύλο` `▁` `.`
|
| 87 |
+
|
| 88 |
+
**Input:** اللغة العربية جميلة وغنية بالتاريخ.<br/>
|
| 89 |
+
**Tokens**: `▁اللغة` `▁العربية` `▁جميلة` `▁وغ` `نية` `▁بال` `تاريخ` `▁` `.`
|
| 90 |
+
|
| 91 |
+
**Input:** مرحبا بالعالم!<br/>
|
| 92 |
+
**Tokens**: `▁مرحبا` `▁بالعالم` `▁!`
|
| 93 |
+
|
| 94 |
+
**Input:** Simplified: 快速的棕色狐狸跳过懒狗。<br/>
|
| 95 |
+
**Tokens**: `▁Simp` `l` `ified` `▁:` `▁` `快速` `的` `棕` `色` `狐` `狸` `跳` `过` `懒` `狗` `。`
|
| 96 |
+
|
| 97 |
+
**Input:** Traditional: 快速的棕色狐狸跳過懶狗。<br/>
|
| 98 |
+
**Tokens**: `▁Tradition` `al` `▁:` `▁` `快速` `的` `棕` `色` `狐` `狸` `跳` `過` `懶` `狗` `。`
|
| 99 |
+
|
| 100 |
+
**Input:** 素早い茶色の狐が怠け者の犬を飛び越える。<br/>
|
| 101 |
+
**Tokens**: `▁素` `早い` `茶` `色` `の` `狐` `が` `怠` `け` `者の` `犬` `を` `飛び` `越` `える` `。`
|
| 102 |
+
|
| 103 |
+
**Input:** コンピュータープログラミング<br/>
|
| 104 |
+
**Tokens**: `▁` `コンピュ��タ` `ー` `プロ` `グラ` `ミ` `ング`
|
| 105 |
+
|
| 106 |
+
**Input:** 빠른 갈색 여우가 게으른 개를 뛰어넘습니다.<br/>
|
| 107 |
+
**Tokens**: `▁빠른` `▁갈` `색` `▁여` `우` `가` `▁게` `으` `른` `▁` `개를` `▁뛰어` `넘` `습니다` `▁` `.`
|
| 108 |
+
|
| 109 |
+
**Input:** तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।<br/>
|
| 110 |
+
**Tokens**: `▁तेज़` `▁भू` `री` `▁लो` `म` `ड़ी` `▁आ` `ल` `सी` `▁कुत्ते` `▁के` `▁ऊपर` `▁` `कू` `द` `ती` `▁है` `।`
|
| 111 |
+
|
| 112 |
+
**Input:** দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।<br/>
|
| 113 |
+
**Tokens**: `▁দ্রুত` `▁বাদাম` `ী` `▁শি` `য়াল` `▁অ` `ল` `স` `▁কু` `কুর` `ের` `▁উপর` `▁দিয়ে` `▁লা` `ফ` `▁দেয়` `।`
|
| 114 |
+
|
| 115 |
+
**Input:** வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.<br/>
|
| 116 |
+
**Tokens**: `▁வேக` `மான` `▁பழ` `ு` `ப்பு` `▁ந` `ரி` `▁சோ` `ம்` `பே` `றி` `▁நா` `யின்` `▁மேல்` `▁கு` `தி` `க்கிறது` `▁` `.`
|
| 117 |
+
|
| 118 |
+
**Input:** สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.<br/>
|
| 119 |
+
**Tokens**: `▁` `สุนัข` `จิ` `้ง` `จอ` `ก` `สีน้ําตาล` `กระโดด` `ข้าม` `สุนัข` `ขี้` `เกีย` `จ` `▁` `.`
|
| 120 |
+
|
| 121 |
+
**Input:** ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።<br/>
|
| 122 |
+
**Tokens**: `▁` `ብሩ` `ክ` `▁ቡና` `ማ` `▁` `ቀበ` `ሮ` `▁ሰ` `ነፍ` `▁` `ው` `ሻ` `ን` `▁ተ` `ዘ` `ል` `ሏል` `።`
|
| 123 |
+
|
| 124 |
+
**Input:** Hello 世界 مرحبا 🌍<br/>
|
| 125 |
+
**Tokens**: `▁Hello` `▁世界` `▁مرحبا` `▁🌍`
|
| 126 |
+
|
| 127 |
+
**Input:** 123, αβγ, абв, العربية, 中文, हिन्दी.<br/>
|
| 128 |
+
**Tokens**: `▁123` `▁` `,` `▁α` `β` `γ` `▁` `,` `▁аб` `в` `▁` `,` `▁العربية` `▁` `,` `▁中文` `▁` `,` `▁हिन्दी` `▁` `.`
|
models/minishlab/potion-multilingual-128M/fp16.d128.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc887899f84cbcecb758e9a937b7af684599f3c02335c5b16571290282fff374
|
| 3 |
+
size 128090496
|
models/minishlab/potion-multilingual-128M/fp16.d128.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd2fb1e3464c24178d68712c2d4e54d60725b0fc71e3f01d247e7f5e738368b3
|
| 3 |
+
size 118744364
|
models/minishlab/potion-multilingual-128M/fp16.d256.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e48643af3158e696580c6f2f0fca2512359a70823bd3f0b1326149debb7c629
|
| 3 |
+
size 256180864
|
models/minishlab/potion-multilingual-128M/fp16.d256.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5130b79d6c3d133be5abfaa771833cce819baea7c04551a308111734e1e91e7
|
| 3 |
+
size 237834592
|
models/minishlab/potion-multilingual-128M/fp16.d32.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ad56f8b6e159fe2702416bbd03a5996bcbb97c1d445090d43d511eae5b2c6ec
|
| 3 |
+
size 32022720
|
models/minishlab/potion-multilingual-128M/fp16.d32.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaa759ab9044e1ac476a9e7592fed79238fa10e6b6a143cd2ac6fc66fe8472b9
|
| 3 |
+
size 29649845
|
models/minishlab/potion-multilingual-128M/fp16.d64.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d30d2784a45676324a1cf641a4f57796efc735a30ab9d20539e84cbc8fb8ee80
|
| 3 |
+
size 64045312
|
models/minishlab/potion-multilingual-128M/fp16.d64.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:595743a291bb571772a6d6e37e1dda91191e5ebd24e96dfcadde3ee810d917ac
|
| 3 |
+
size 59310057
|
models/minishlab/potion-multilingual-128M/fp32.d128.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53f4486ffb084395a22b5042eb01131d30985603a123718a3337639f7b561af5
|
| 3 |
+
size 256180864
|
models/minishlab/potion-multilingual-128M/fp32.d128.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cf09adbfe40db684bdd54eaa5a3c0faf39d1885b60b6d6224869459e0b0e5a6
|
| 3 |
+
size 238079206
|
models/minishlab/potion-multilingual-128M/fp32.d256.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6387c770f72908c601f9cb98c7dc1334c230acafb46693749fe564810b7d46e8
|
| 3 |
+
size 512361600
|
models/minishlab/potion-multilingual-128M/fp32.d256.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ba369e4e097bdb169082fd3dbfcf7f6c4b3a96ef7b398f50d917943243323db
|
| 3 |
+
size 476624553
|
models/minishlab/potion-multilingual-128M/fp32.d32.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92d9d6f564521fb0e9bdf3659720367611ee18c667d44662f1d76f5a152046f9
|
| 3 |
+
size 64045312
|
models/minishlab/potion-multilingual-128M/fp32.d32.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c542379a851c35da71bac51d102c74679714f0e16f1826eee841f4ade6a0c88
|
| 3 |
+
size 59489696
|
models/minishlab/potion-multilingual-128M/fp32.d64.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d32bc6682e92c085a05a949bb12124463f81fa8fe13070c2f02eb27cca1c6ad
|
| 3 |
+
size 128090496
|
models/minishlab/potion-multilingual-128M/fp32.d64.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91613a230a226d52659d4237c945b999dc4c169a4a726c2d2c24710440716caa
|
| 3 |
+
size 118969845
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d128.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96f86c14faebe685d9278a0a08d2a1753331b4877f1571ec26733cd82fbc0d5c
|
| 3 |
+
size 64045312
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d128.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c248a4ee3b127b3e3444b3a18d4350d8d263b3b0a60adc7a1d0c6c870a6e4c51
|
| 3 |
+
size 53423849
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d256.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edb6d2d54ff0bbc239efe60f94522328cb22dfc2722a6dacf367ff5148a18297
|
| 3 |
+
size 128090496
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d256.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3a59f9eb2f29aa58975d6392c4267fd7319114840cf4ad817c0bb776672a071
|
| 3 |
+
size 107325555
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d32.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32e38e83ccf6239d1d756d4a0ad6cc5506fd3140f1ea501cc9c73f8f6853027c
|
| 3 |
+
size 16011424
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d32.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2adeecb3aa5bc959bab52e5c191f6fa0ac06abef0cc61e10f8ab228c57af6057
|
| 3 |
+
size 13341806
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d64.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa0967f31ca4f78b0d904c734712612e68e88eeb95777ee70be0dff52680cdd2
|
| 3 |
+
size 32022720
|
models/minishlab/potion-multilingual-128M/fp8_e4m3.d64.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97a1b36d41adbbb4ee5b2a5d4d5ba69547764296bba6ed2f849011de7311663c
|
| 3 |
+
size 26689571
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d128.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5b5ec3646db046b96f8f40ae5fc9b7a4fb9734763920bbe64cddb848609bc37
|
| 3 |
+
size 64045312
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d128.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e03b8845a0e901c39d5a967b0e8dc768e89f4d10d71c044077dc0fb9cf57b1ae
|
| 3 |
+
size 45893916
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d256.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:592264948feedc3655f90340c2dfa6712286380f04c3486172c2c14eb065eacf
|
| 3 |
+
size 128090496
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d256.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b262539251e46cce6c0b10434c22f4e8435cadf317a6db40e94c726b14bfe139
|
| 3 |
+
size 92364240
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d32.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:731d0b29eb4f9209f6a56b1a2d431d6913dc80e44db0b05448ccf79b1f3c9c69
|
| 3 |
+
size 16011424
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d32.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a24917c635581f9b60edf8c757d40804c61fbbdaa07d77c1083d9dacc6c65ff2
|
| 3 |
+
size 11441672
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d64.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee50d08a8ac4292e3a6e47ec64da23a053f638520df8085504f2962d57801368
|
| 3 |
+
size 32022720
|
models/minishlab/potion-multilingual-128M/fp8_e5m2.d64.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2496ef9bac8c4131f43492f7eb8166b2690ac8bb57c6fd9f075c197c1e0768d
|
| 3 |
+
size 22899505
|
models/minishlab/potion-multilingual-128M/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:370e450e45668b38f46c2ac780839caab94d260fe9dc448e9872cb110f9c868d
|
| 3 |
+
size 34630288
|
models/minishlab/potion-multilingual-128M/tokenizer.json.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e06d472444bdc08af554a4296cf6b29f6c4f37f29eaabbffc2df48b730442b1
|
| 3 |
+
size 5382872
|
models/minishlab/potion-retrieval-32M/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# [minishlab/potion-retrieval-32M](https://huggingface.co/minishlab/potion-retrieval-32M)
|
| 2 |
+
|
| 3 |
+
License: [mit](https://choosealicense.com/licenses/mit/)
|
| 4 |
+
|
| 5 |
+
The token embeddings from a monolingual English 32M parameter model that was
|
| 6 |
+
distilled from embeddings that were initialized from the the multi-domain
|
| 7 |
+
[BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
|
| 8 |
+
|
| 9 |
+
The 32M references the number of parameters in the embeddings:
|
| 10 |
+
|
| 11 |
+
512 dimension * 63,091 vocab.
|
| 12 |
+
|
| 13 |
+
## Model Stats
|
| 14 |
+
|
| 15 |
+
Stats that describe the embeddings tensor shapes and value distribution.
|
| 16 |
+
|
| 17 |
+
| item | metric | value |
|
| 18 |
+
| --------------| ----------------------- | ----- |
|
| 19 |
+
| vocab | size | 63,091 |
|
| 20 |
+
| embedding | dimensions | 512 |
|
| 21 |
+
| vector length | mean | 130.27 |
|
| 22 |
+
| vector length | median | 130.39 |
|
| 23 |
+
| vector length | stddev | 30.43 |
|
| 24 |
+
| values | mean | 0.01 |
|
| 25 |
+
| values | median | 0.01 |
|
| 26 |
+
| values | stddev | 5.91 |
|
| 27 |
+
|
| 28 |
+
## Mean Pooled Quantization Loss
|
| 29 |
+
|
| 30 |
+
This test roundtrips the vectors through quantization, but performs the
|
| 31 |
+
mean pooling arithmetic in float32 space. The quantized and unquantized
|
| 32 |
+
mean pooled vectors are compared to each other to determine their cosine
|
| 33 |
+
similarity, to show how much the meaning of the vector has changed due
|
| 34 |
+
to quantization.
|
| 35 |
+
|
| 36 |
+
| Precision | Cosine Similarity |
|
| 37 |
+
| ------------- | ----------------- |
|
| 38 |
+
| fp16 | 1.00000 |
|
| 39 |
+
| fp8 e4m3 | 0.99970 |
|
| 40 |
+
| fp8 e5m2 | 0.99887 |
|
| 41 |
+
|
| 42 |
+
## Quantization Loss Per Vector
|
| 43 |
+
|
| 44 |
+
While ultimately the embedding vectors will be mean pooled together, it's
|
| 45 |
+
still useful to look at the loss per-vector in the embedding table to see
|
| 46 |
+
which quantization strategies retain the most vector meaning.
|
| 47 |
+
|
| 48 |
+
- **Cosine Similarity** — measures how well the *direction* of embedding vectors
|
| 49 |
+
is preserved after quantization, independent of scale. This is especially
|
| 50 |
+
relevant when embeddings are used for similarity search or retrieval.
|
| 51 |
+
- **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
|
| 52 |
+
differences. Useful for detecting whether any values are badly distorted.
|
| 53 |
+
- **MAE (Mean Absolute Error)** — the average absolute difference between
|
| 54 |
+
original and quantized values. Easier to interpret, less sensitive to outliers.
|
| 55 |
+
|
| 56 |
+
| Precision | Metric | Value |
|
| 57 |
+
| ------------- | ------ | ----- |
|
| 58 |
+
| fp16 | cosine similarity | 1.00000 |
|
| 59 |
+
| fp8 e4m3 | cosine similarity | 0.99965 |
|
| 60 |
+
| fp8 e5m2 | cosine similarity | 0.99862 |
|
| 61 |
+
| fp16 | MSE | 0.00000 |
|
| 62 |
+
| fp8 e4m3 | MSE | 0.02454 |
|
| 63 |
+
| fp8 e5m2 | MSE | 0.09720 |
|
| 64 |
+
| fp16 | MAE | 0.00076 |
|
| 65 |
+
| fp8 e4m3 | MAE | 0.09763 |
|
| 66 |
+
| fp8 e5m2 | MAE | 0.19461 |
|
| 67 |
+
|
| 68 |
+
## Tokenizer Examples
|
| 69 |
+
|
| 70 |
+
**Input:** This is an example of encoding<br/>
|
| 71 |
+
**Tokens**: `[CLS]` `this` `is` `an` `example` `of` `encoding` `[SEP]`
|
| 72 |
+
|
| 73 |
+
**Input:** The quick brown fox jumps over the lazy dog.<br/>
|
| 74 |
+
**Tokens**: `[CLS]` `the` `quick` `brown` `fox` `jumps` `over` `the` `lazy` `dog` `.` `[SEP]`
|
| 75 |
+
|
| 76 |
+
**Input:** Curaçao, naïve fiancé, jalapeño, déjà vu.<br/>
|
| 77 |
+
**Tokens**: `[CLS]` `curacao` `,` `naive` `fiance` `,` `jalapeno` `,` `deja` `vu` `.` `[SEP]`
|
| 78 |
+
|
| 79 |
+
**Input:** Привет, как дела?<br/>
|
| 80 |
+
**Tokens**: `[CLS]` `п` `##р` `##и` `##в` `##е` `##т` `,` `как` `д` `##е` `##л` `##а` `?` `[SEP]`
|
| 81 |
+
|
| 82 |
+
**Input:** Бързата кафява лисица прескача мързеливото куче.<br/>
|
| 83 |
+
**Tokens**: `[CLS]` `б` `##ъ` `##р` `##з` `##а` `##т` `##а` `к` `##а` `##ф` `##я` `##в` `##а` `л` `##и` `##с` `##и` `##ц` `##а` `п` `##р` `##е` `##с` `##ка` `##ч` `##а` `м` `##ъ` `##р` `##з` `##е` `##л` `##и` `##в` `##о` `##т` `##о` `к` `##у` `##ч` `##е` `.` `[SEP]`
|
| 84 |
+
|
| 85 |
+
**Input:** Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.<br/>
|
| 86 |
+
**Tokens**: `[CLS]` `γ` `##ρ` `##η` `##γ` `##ο` `##ρ` `##η` `κ` `##α` `##φ` `##ε` `α` `##λ` `##ε` `##π` `##ου` `π` `##η` `##δ` `##α` `##ε` `##ι` `π` `##α` `##ν` `##ω` `α` `##π` `##ο` `τ` `##ο` `##ν` `τ` `##ε` `##μ` `##π` `##ε` `##λ` `##η` `σ` `##κ` `##υ` `##λ` `##ο` `.` `[SEP]`
|
| 87 |
+
|
| 88 |
+
**Input:** اللغة العربية جميلة وغنية بالتاريخ.<br/>
|
| 89 |
+
**Tokens**: `[CLS]` `ا` `##ل` `##ل` `##غ` `##ة` `ا` `##ل` `##ع` `##ر` `##ب` `##ي` `##ة` `ج` `##م` `##ي` `##ل` `##ة` `و` `##غ` `##ن` `##ي` `##ة` `با` `##ل` `##ت` `##ا` `##ر` `##ي` `##خ` `.` `[SEP]`
|
| 90 |
+
|
| 91 |
+
**Input:** مرحبا بالعالم!<br/>
|
| 92 |
+
**Tokens**: `[CLS]` `م` `##ر` `##ح` `##ب` `##ا` `با` `##ل` `##ع` `##ا` `##ل` `##م` `!` `[SEP]`
|
| 93 |
+
|
| 94 |
+
**Input:** Simplified: 快速的棕色狐狸跳过懒狗。<br/>
|
| 95 |
+
**Tokens**: `[CLS]` `simplified` `:` `[UNK]` `[UNK]` `的` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `。` `[SEP]`
|
| 96 |
+
|
| 97 |
+
**Input:** Traditional: 快速的棕色狐狸跳過懶狗。<br/>
|
| 98 |
+
**Tokens**: `[CLS]` `traditional` `:` `[UNK]` `[UNK]` `的` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `。` `[SEP]`
|
| 99 |
+
|
| 100 |
+
**Input:** 素早い茶色の狐が怠け者の犬を飛び越える。<br/>
|
| 101 |
+
**Tokens**: `[CLS]` `[UNK]` `[UNK]` `い` `[UNK]` `[UNK]` `の` `[UNK]` `か` `[UNK]` `け` `[UNK]` `の` `犬` `を` `[UNK]` `ひ` `[UNK]` `え` `##る` `。` `[SEP]`
|
| 102 |
+
|
| 103 |
+
**Input:** コンピュータープログラミング<br/>
|
| 104 |
+
**Tokens**: `[CLS]` `コ` `##ン` `##ヒ` `##ュ` `##ー` `##タ` `##ー` `##フ` `##ロ` `##ク` `##ラ` `##ミ` `##ン` `##ク` `[SEP]`
|
| 105 |
+
|
| 106 |
+
**Input:** 빠른 갈색 여우가 게으른 개를 뛰어넘습니다.<br/>
|
| 107 |
+
**Tokens**: `[CLS]` `[UNK]` `ᄀ` `##ᅡ` `##ᆯ` `##ᄉ` `##ᅢ` `##ᆨ` `ᄋ` `##ᅧ` `##ᄋ` `##ᅮ` `##ᄀ` `##ᅡ` `ᄀ` `##ᅦ` `##ᄋ` `##ᅳ` `##ᄅ` `##ᅳ` `##ᆫ` `ᄀ` `##ᅢ` `##ᄅ` `##ᅳ` `##ᆯ` `[UNK]` `.` `[SEP]`
|
| 108 |
+
|
| 109 |
+
**Input:** तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।<br/>
|
| 110 |
+
**Tokens**: `[CLS]` `त` `##ज` `भ` `##र` `##ी` `ल` `##ो` `##म` `##ड` `##ी` `आ` `##ल` `##स` `##ी` `क` `##त` `##त` `क` `[UNK]` `क` `##द` `##त` `##ी` `ह` `।` `[SEP]`
|
| 111 |
+
|
| 112 |
+
**Input:** দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।<br/>
|
| 113 |
+
**Tokens**: `[CLS]` `দ` `##র` `##ত` `ব` `##া` `##দ` `##া` `##ম` `##ী` `শ` `##ি` `##য` `##া` `##ল` `অ` `##ল` `##স` `ক` `##ক` `##র` `##ে` `##র` `উ` `##প` `##র` `দ` `##ি` `##য` `##ে` `[UNK]` `দ` `##ে` `##য` `।` `[SEP]`
|
| 114 |
+
|
| 115 |
+
**Input:** வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.<br/>
|
| 116 |
+
**Tokens**: `[CLS]` `வ` `##ே` `##க` `##ம` `##ா` `##ன` `[UNK]` `ந` `##ர` `##ி` `[UNK]` `ந` `##ா` `##ய` `##ி` `##ன` `ம` `##ே` `##ல` `[UNK]` `.` `[SEP]`
|
| 117 |
+
|
| 118 |
+
**Input:** สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.<br/>
|
| 119 |
+
**Tokens**: `[CLS]` `[UNK]` `.` `[SEP]`
|
| 120 |
+
|
| 121 |
+
**Input:** ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።<br/>
|
| 122 |
+
**Tokens**: `[CLS]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[SEP]`
|
| 123 |
+
|
| 124 |
+
**Input:** Hello 世界 مرحبا 🌍<br/>
|
| 125 |
+
**Tokens**: `[CLS]` `hello` `世` `[UNK]` `م` `##ر` `##ح` `##ب` `##ا` `[UNK]` `[SEP]`
|
| 126 |
+
|
| 127 |
+
**Input:** 123, αβγ, абв, العربية, 中文, हिन्दी.<br/>
|
| 128 |
+
**Tokens**: `[CLS]` `123` `,` `α` `##β` `##γ` `,` `а` `##б` `##в` `,` `ا` `##ل` `##ع` `##ر` `##ب` `##ي` `##ة` `,` `中` `文` `,` `ह` `##ि` `##न` `##द` `##ी` `.` `[SEP]`
|
models/minishlab/potion-retrieval-32M/fp16.d128.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d39bf6868e526c83f2e77e474bea989fa3cec4e8eb0bd72efa3208efdc28ee4b
|
| 3 |
+
size 16151424
|
models/minishlab/potion-retrieval-32M/fp16.d128.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfcf9586043e994e52667dc360b2979d1400f996c8055e79be402eddf98c3766
|
| 3 |
+
size 14900440
|
models/minishlab/potion-retrieval-32M/fp16.d256.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d599424bfb205652a334abbcb84fdcf59ce5c9c13847e617fafe4b1ec7c71fe3
|
| 3 |
+
size 32302720
|
models/minishlab/potion-retrieval-32M/fp16.d256.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e194ae0eeaeaef1f13f913d10d8c7d04b35eca44ce2e0937b6d4cac83b27a048
|
| 3 |
+
size 29836775
|
models/minishlab/potion-retrieval-32M/fp16.d32.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c33b943f6deee8af7f5b49d96fc7dd5b31573149f6d0652b500178aef779310
|
| 3 |
+
size 4037952
|
models/minishlab/potion-retrieval-32M/fp16.d32.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5318a29adc2011a4708db66e1e0e2ddf5b196b7b9b8ee3e40ff4e2cad0c79c44
|
| 3 |
+
size 3724961
|
models/minishlab/potion-retrieval-32M/fp16.d512.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:307f4a3953c913da8ebee260bfc1c3eb509ebcac2e6d6ad5001e209aed0de676
|
| 3 |
+
size 64605312
|
models/minishlab/potion-retrieval-32M/fp16.d512.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:660a269222ee454b3908fbf4333dcd379a3d797910d1098528e7918ad9c1f8e7
|
| 3 |
+
size 59832428
|
models/minishlab/potion-retrieval-32M/fp16.d64.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd8d09f3d66df3b228d267eee03868179f52b0ddcb0a291781c65e6768092da1
|
| 3 |
+
size 8075776
|
models/minishlab/potion-retrieval-32M/fp16.d64.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:751256692ba27fdb3a181df3af218f9bf136a97559a6e38e9fb0f9a542254ea3
|
| 3 |
+
size 7449228
|
models/minishlab/potion-retrieval-32M/fp32.d128.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8f10c71cf475e65ce14ddd2cf423f2a45804b64d71243521f0e7a5189c350e0
|
| 3 |
+
size 32302720
|
models/minishlab/potion-retrieval-32M/fp32.d128.npy.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5fe0561f1e6c9fbb188a3a4cef3a2dbd754d6e0799dbe2306670d78fb32c68b
|
| 3 |
+
size 29981217
|