niobures commited on
Commit
8c0ceed
·
verified ·
1 Parent(s): 5306eab

E5 (code, models, paper)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Text Embeddings by Weakly-Supervised Contrastive Pre-training.pdf +3 -0
  3. e5-base-v2/.gitattributes +35 -0
  4. e5-base-v2/README.md +24 -0
  5. e5-base-v2/config.json +25 -0
  6. e5-base-v2/onnx/model.onnx +3 -0
  7. e5-base-v2/onnx/model_bnb4.onnx +3 -0
  8. e5-base-v2/onnx/model_fp16.onnx +3 -0
  9. e5-base-v2/onnx/model_int8.onnx +3 -0
  10. e5-base-v2/onnx/model_q4.onnx +3 -0
  11. e5-base-v2/onnx/model_q4f16.onnx +3 -0
  12. e5-base-v2/onnx/model_quantized.onnx +3 -0
  13. e5-base-v2/onnx/model_uint8.onnx +3 -0
  14. e5-base-v2/quant_config.json +30 -0
  15. e5-base-v2/source.txt +1 -0
  16. e5-base-v2/special_tokens_map.json +7 -0
  17. e5-base-v2/tokenizer.json +0 -0
  18. e5-base-v2/tokenizer_config.json +13 -0
  19. e5-base-v2/vocab.txt +0 -0
  20. e5-small-Quran/.gitattributes +34 -0
  21. e5-small-Quran/README.md +41 -0
  22. e5-small-Quran/config.json +25 -0
  23. e5-small-Quran/pytorch_model.bin +3 -0
  24. e5-small-Quran/source.txt +1 -0
  25. e5-small-Quran/special_tokens_map.json +7 -0
  26. e5-small-Quran/tokenizer.json +0 -0
  27. e5-small-Quran/tokenizer_config.json +15 -0
  28. e5-small-Quran/vocab.txt +0 -0
  29. e5-small-v2-quantized/.gitattributes +35 -0
  30. e5-small-v2-quantized/README.md +3 -0
  31. e5-small-v2-quantized/config.json +24 -0
  32. e5-small-v2-quantized/onnx/model.onnx +3 -0
  33. e5-small-v2-quantized/onnx/model_quantized.onnx +3 -0
  34. e5-small-v2-quantized/quantize_config.json +30 -0
  35. e5-small-v2-quantized/source.txt +1 -0
  36. e5-small-v2-quantized/special_tokens_map.json +7 -0
  37. e5-small-v2-quantized/tokenizer.json +0 -0
  38. e5-small-v2-quantized/tokenizer_config.json +15 -0
  39. e5-small-v2-quantized/vocab.txt +0 -0
  40. e5-small-v2/.gitattributes +34 -0
  41. e5-small-v2/1_Pooling/config.json +7 -0
  42. e5-small-v2/README.md +2686 -0
  43. e5-small-v2/config.json +25 -0
  44. e5-small-v2/modules.json +20 -0
  45. e5-small-v2/pytorch_model.bin +3 -0
  46. e5-small-v2/sentence_bert_config.json +4 -0
  47. e5-small-v2/source.txt +1 -0
  48. e5-small-v2/special_tokens_map.json +7 -0
  49. e5-small-v2/tokenizer.json +0 -0
  50. e5-small-v2/tokenizer_config.json +15 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Text[[:space:]]Embeddings[[:space:]]by[[:space:]]Weakly-Supervised[[:space:]]Contrastive[[:space:]]Pre-training.pdf filter=lfs diff=lfs merge=lfs -text
Text Embeddings by Weakly-Supervised Contrastive Pre-training.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2963a8af050d983f287edef210a525a69b7071847978451399ccaa3ac584be70
3
+ size 484956
e5-base-v2/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
e5-base-v2/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: intfloat/e5-base-v2
3
+ library_name: transformers.js
4
+ ---
5
+
6
+ https://huggingface.co/intfloat/e5-base-v2 with ONNX weights to be compatible with Transformers.js.
7
+
8
+ ## Usage (Transformers.js)
9
+
10
+ If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
11
+ ```bash
12
+ npm i @huggingface/transformers
13
+ ```
14
+
15
+ **Example:** Run feature extraction.
16
+
17
+ ```js
18
+ import { pipeline } from '@huggingface/transformers';
19
+
20
+ const extractor = await pipeline('feature-extraction', 'Xenova/e5-base-v2');
21
+ const output = await extractor('This is a simple test.');
22
+ ```
23
+
24
+ Note: Having a separate repo for ONNX weights is intended to be a temporary solution until WebML gains more traction. If you would like to make your models web-ready, we recommend converting to ONNX using [🤗 Optimum](https://huggingface.co/docs/optimum/index) and structuring your repo like this one (with ONNX weights located in a subfolder named `onnx`).
e5-base-v2/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "intfloat/e5-base-v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "transformers_version": "4.30.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
e5-base-v2/onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:157f97ef1957d34f52efa26f8031371bf9043acc45460cec7ebe94631ac0e96b
3
+ size 435811516
e5-base-v2/onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2800b59d479d2f840424cd9415424c909b273653d8e95b7f39d20430fa92a44
3
+ size 143893470
e5-base-v2/onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:823ade37a9d175334898ece493e2987984a56a5e3cf1b8c331bd8a6fabe9869c
3
+ size 218108177
e5-base-v2/onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a643759f4408882d6522618412f2b563a69b4179c96b58dd09f78fa5641507a6
3
+ size 109622402
e5-base-v2/onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9603ba5c19f81f90ab5baf329d42542bf3fd9ce5d7f43993cb2bb16591c759
3
+ size 149201358
e5-base-v2/onnx/model_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a176a44398f27ac003cc6d09a39a3065768500417221b57288fb28cf2793c3e7
3
+ size 95979131
e5-base-v2/onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a7aadf8227f5212695488d99de54523678f5f9ecfb070f2b58cc76cd84d882
3
+ size 110083279
e5-base-v2/onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7010a2555644b687afaea396e2af3b335cdf676427920bd0c449d3c9c41ee17d
3
+ size 109622437
e5-base-v2/quant_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Concat",
8
+ "Shape",
9
+ "Erf",
10
+ "Mul",
11
+ "Reshape",
12
+ "Pow",
13
+ "Sqrt",
14
+ "ReduceMean",
15
+ "Gather",
16
+ "Div",
17
+ "Cast",
18
+ "Transpose",
19
+ "Softmax",
20
+ "Slice",
21
+ "MatMul",
22
+ "Constant",
23
+ "Unsqueeze",
24
+ "Add",
25
+ "Sub"
26
+ ],
27
+ "weight_type": "QInt8"
28
+ }
29
+ }
30
+ }
e5-base-v2/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/Xenova/e5-base-v2
e5-base-v2/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
e5-base-v2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
e5-base-v2/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
e5-base-v2/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
e5-small-Quran/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
e5-small-Quran/README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - M-AI-C/quran-en-tafssirs
5
+ language:
6
+ - en
7
+ ---
8
+ ```python
9
+ import torch.nn.functional as F
10
+
11
+ from torch import Tensor
12
+ from transformers import AutoTokenizer, AutoModel
13
+
14
+
15
+ def average_pool(last_hidden_states: Tensor,
16
+ attention_mask: Tensor) -> Tensor:
17
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
18
+ return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
19
+
20
+
21
+ # Each input text should start with "query: " or "passage: ".
22
+ # For tasks other than retrieval, you can simply use the "query: " prefix.
23
+ input_texts = ['query: Who is prophet known for patience',
24
+ 'query: Who is moses',
25
+ "passage: passage 1",
26
+ "passage: passage 2"]
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-small')
29
+ model = AutoModel.from_pretrained('intfloat/e5-small')
30
+
31
+ # Tokenize the input texts
32
+ batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')
33
+
34
+ outputs = model(**batch_dict)
35
+ embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
36
+
37
+ # (Optionally) normalize embeddings
38
+ embeddings = F.normalize(embeddings, p=2, dim=1)
39
+ scores = (embeddings[:2] @ embeddings[2:].T) * 100
40
+ print(scores.tolist())
41
+ ```
e5-small-Quran/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "intfloat/e5-small",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.27.4",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
e5-small-Quran/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad09317c5f9b844024d0b4219bf4e006dc5a7a958079b18cb3300d363d0f4d6
3
+ size 133511213
e5-small-Quran/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/mustapha/e5-small-Quran
e5-small-Quran/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
e5-small-Quran/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
e5-small-Quran/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "never_split": null,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "special_tokens_map_file": null,
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
e5-small-Quran/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
e5-small-v2-quantized/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
e5-small-v2-quantized/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
e5-small-v2-quantized/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ggrn/e5-small-v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "transformers_version": "4.31.0.dev0",
21
+ "type_vocab_size": 2,
22
+ "use_cache": true,
23
+ "vocab_size": 30522
24
+ }
e5-small-v2-quantized/onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a0dbefd78b3e8a62955c0c95650907d7a0b8ccb5e1b052ce3e26c54900233b
3
+ size 133093490
e5-small-v2-quantized/onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5bb8cbf8f1c2ab993e6385918e202d746d7678a10d50dfaae74b2086ea282b0
3
+ size 34014426
e5-small-v2-quantized/quantize_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Cast",
8
+ "Concat",
9
+ "Add",
10
+ "Constant",
11
+ "Div",
12
+ "ReduceMean",
13
+ "Softmax",
14
+ "Erf",
15
+ "Unsqueeze",
16
+ "Transpose",
17
+ "Slice",
18
+ "Shape",
19
+ "MatMul",
20
+ "Pow",
21
+ "Sub",
22
+ "Sqrt",
23
+ "Reshape",
24
+ "Gather",
25
+ "Mul"
26
+ ],
27
+ "weight_type": "QInt8"
28
+ }
29
+ }
30
+ }
e5-small-v2-quantized/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/ankitgupta8768/e5-small-v2-quantized
e5-small-v2-quantized/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
e5-small-v2-quantized/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
e5-small-v2-quantized/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
e5-small-v2-quantized/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
e5-small-v2/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
e5-small-v2/1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
e5-small-v2/README.md ADDED
@@ -0,0 +1,2686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: mit
5
+ library_name: sentence-transformers
6
+ pipeline_tag: feature-extraction
7
+ tags:
8
+ - mteb
9
+ model-index:
10
+ - name: e5-small-v2
11
+ results:
12
+ - task:
13
+ type: Classification
14
+ dataset:
15
+ type: mteb/amazon_counterfactual
16
+ name: MTEB AmazonCounterfactualClassification (en)
17
+ config: en
18
+ split: test
19
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
20
+ metrics:
21
+ - type: accuracy
22
+ value: 77.59701492537313
23
+ - type: ap
24
+ value: 41.67064885731708
25
+ - type: f1
26
+ value: 71.86465946398573
27
+ - task:
28
+ type: Classification
29
+ dataset:
30
+ type: mteb/amazon_polarity
31
+ name: MTEB AmazonPolarityClassification
32
+ config: default
33
+ split: test
34
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
35
+ metrics:
36
+ - type: accuracy
37
+ value: 91.265875
38
+ - type: ap
39
+ value: 87.67633085349644
40
+ - type: f1
41
+ value: 91.24297521425744
42
+ - task:
43
+ type: Classification
44
+ dataset:
45
+ type: mteb/amazon_reviews_multi
46
+ name: MTEB AmazonReviewsClassification (en)
47
+ config: en
48
+ split: test
49
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
50
+ metrics:
51
+ - type: accuracy
52
+ value: 45.882000000000005
53
+ - type: f1
54
+ value: 45.08058870381236
55
+ - task:
56
+ type: Retrieval
57
+ dataset:
58
+ type: arguana
59
+ name: MTEB ArguAna
60
+ config: default
61
+ split: test
62
+ revision: None
63
+ metrics:
64
+ - type: map_at_1
65
+ value: 20.697
66
+ - type: map_at_10
67
+ value: 33.975
68
+ - type: map_at_100
69
+ value: 35.223
70
+ - type: map_at_1000
71
+ value: 35.260000000000005
72
+ - type: map_at_3
73
+ value: 29.776999999999997
74
+ - type: map_at_5
75
+ value: 32.035000000000004
76
+ - type: mrr_at_1
77
+ value: 20.982
78
+ - type: mrr_at_10
79
+ value: 34.094
80
+ - type: mrr_at_100
81
+ value: 35.343
82
+ - type: mrr_at_1000
83
+ value: 35.38
84
+ - type: mrr_at_3
85
+ value: 29.884
86
+ - type: mrr_at_5
87
+ value: 32.141999999999996
88
+ - type: ndcg_at_1
89
+ value: 20.697
90
+ - type: ndcg_at_10
91
+ value: 41.668
92
+ - type: ndcg_at_100
93
+ value: 47.397
94
+ - type: ndcg_at_1000
95
+ value: 48.305
96
+ - type: ndcg_at_3
97
+ value: 32.928000000000004
98
+ - type: ndcg_at_5
99
+ value: 36.998999999999995
100
+ - type: precision_at_1
101
+ value: 20.697
102
+ - type: precision_at_10
103
+ value: 6.636
104
+ - type: precision_at_100
105
+ value: 0.924
106
+ - type: precision_at_1000
107
+ value: 0.099
108
+ - type: precision_at_3
109
+ value: 14.035
110
+ - type: precision_at_5
111
+ value: 10.398
112
+ - type: recall_at_1
113
+ value: 20.697
114
+ - type: recall_at_10
115
+ value: 66.35799999999999
116
+ - type: recall_at_100
117
+ value: 92.39
118
+ - type: recall_at_1000
119
+ value: 99.36
120
+ - type: recall_at_3
121
+ value: 42.105
122
+ - type: recall_at_5
123
+ value: 51.991
124
+ - task:
125
+ type: Clustering
126
+ dataset:
127
+ type: mteb/arxiv-clustering-p2p
128
+ name: MTEB ArxivClusteringP2P
129
+ config: default
130
+ split: test
131
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
132
+ metrics:
133
+ - type: v_measure
134
+ value: 42.1169517447068
135
+ - task:
136
+ type: Clustering
137
+ dataset:
138
+ type: mteb/arxiv-clustering-s2s
139
+ name: MTEB ArxivClusteringS2S
140
+ config: default
141
+ split: test
142
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
143
+ metrics:
144
+ - type: v_measure
145
+ value: 34.79553720107097
146
+ - task:
147
+ type: Reranking
148
+ dataset:
149
+ type: mteb/askubuntudupquestions-reranking
150
+ name: MTEB AskUbuntuDupQuestions
151
+ config: default
152
+ split: test
153
+ revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
154
+ metrics:
155
+ - type: map
156
+ value: 58.10811337308168
157
+ - type: mrr
158
+ value: 71.56410763751482
159
+ - task:
160
+ type: STS
161
+ dataset:
162
+ type: mteb/biosses-sts
163
+ name: MTEB BIOSSES
164
+ config: default
165
+ split: test
166
+ revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
167
+ metrics:
168
+ - type: cos_sim_pearson
169
+ value: 78.46834918248696
170
+ - type: cos_sim_spearman
171
+ value: 79.4289182755206
172
+ - type: euclidean_pearson
173
+ value: 76.26662973727008
174
+ - type: euclidean_spearman
175
+ value: 78.11744260952536
176
+ - type: manhattan_pearson
177
+ value: 76.08175262609434
178
+ - type: manhattan_spearman
179
+ value: 78.29395265552289
180
+ - task:
181
+ type: Classification
182
+ dataset:
183
+ type: mteb/banking77
184
+ name: MTEB Banking77Classification
185
+ config: default
186
+ split: test
187
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
188
+ metrics:
189
+ - type: accuracy
190
+ value: 81.63636363636364
191
+ - type: f1
192
+ value: 81.55779952376953
193
+ - task:
194
+ type: Clustering
195
+ dataset:
196
+ type: mteb/biorxiv-clustering-p2p
197
+ name: MTEB BiorxivClusteringP2P
198
+ config: default
199
+ split: test
200
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
201
+ metrics:
202
+ - type: v_measure
203
+ value: 35.88541137137571
204
+ - task:
205
+ type: Clustering
206
+ dataset:
207
+ type: mteb/biorxiv-clustering-s2s
208
+ name: MTEB BiorxivClusteringS2S
209
+ config: default
210
+ split: test
211
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
212
+ metrics:
213
+ - type: v_measure
214
+ value: 30.05205685274407
215
+ - task:
216
+ type: Retrieval
217
+ dataset:
218
+ type: BeIR/cqadupstack
219
+ name: MTEB CQADupstackAndroidRetrieval
220
+ config: default
221
+ split: test
222
+ revision: None
223
+ metrics:
224
+ - type: map_at_1
225
+ value: 30.293999999999997
226
+ - type: map_at_10
227
+ value: 39.876
228
+ - type: map_at_100
229
+ value: 41.315000000000005
230
+ - type: map_at_1000
231
+ value: 41.451
232
+ - type: map_at_3
233
+ value: 37.194
234
+ - type: map_at_5
235
+ value: 38.728
236
+ - type: mrr_at_1
237
+ value: 37.053000000000004
238
+ - type: mrr_at_10
239
+ value: 45.281
240
+ - type: mrr_at_100
241
+ value: 46.188
242
+ - type: mrr_at_1000
243
+ value: 46.245999999999995
244
+ - type: mrr_at_3
245
+ value: 43.228
246
+ - type: mrr_at_5
247
+ value: 44.366
248
+ - type: ndcg_at_1
249
+ value: 37.053000000000004
250
+ - type: ndcg_at_10
251
+ value: 45.086
252
+ - type: ndcg_at_100
253
+ value: 50.756
254
+ - type: ndcg_at_1000
255
+ value: 53.123
256
+ - type: ndcg_at_3
257
+ value: 41.416
258
+ - type: ndcg_at_5
259
+ value: 43.098
260
+ - type: precision_at_1
261
+ value: 37.053000000000004
262
+ - type: precision_at_10
263
+ value: 8.34
264
+ - type: precision_at_100
265
+ value: 1.346
266
+ - type: precision_at_1000
267
+ value: 0.186
268
+ - type: precision_at_3
269
+ value: 19.647000000000002
270
+ - type: precision_at_5
271
+ value: 13.877
272
+ - type: recall_at_1
273
+ value: 30.293999999999997
274
+ - type: recall_at_10
275
+ value: 54.309
276
+ - type: recall_at_100
277
+ value: 78.59
278
+ - type: recall_at_1000
279
+ value: 93.82300000000001
280
+ - type: recall_at_3
281
+ value: 43.168
282
+ - type: recall_at_5
283
+ value: 48.192
284
+ - task:
285
+ type: Retrieval
286
+ dataset:
287
+ type: BeIR/cqadupstack
288
+ name: MTEB CQADupstackEnglishRetrieval
289
+ config: default
290
+ split: test
291
+ revision: None
292
+ metrics:
293
+ - type: map_at_1
294
+ value: 28.738000000000003
295
+ - type: map_at_10
296
+ value: 36.925999999999995
297
+ - type: map_at_100
298
+ value: 38.017
299
+ - type: map_at_1000
300
+ value: 38.144
301
+ - type: map_at_3
302
+ value: 34.446
303
+ - type: map_at_5
304
+ value: 35.704
305
+ - type: mrr_at_1
306
+ value: 35.478
307
+ - type: mrr_at_10
308
+ value: 42.786
309
+ - type: mrr_at_100
310
+ value: 43.458999999999996
311
+ - type: mrr_at_1000
312
+ value: 43.507
313
+ - type: mrr_at_3
314
+ value: 40.648
315
+ - type: mrr_at_5
316
+ value: 41.804
317
+ - type: ndcg_at_1
318
+ value: 35.478
319
+ - type: ndcg_at_10
320
+ value: 42.044
321
+ - type: ndcg_at_100
322
+ value: 46.249
323
+ - type: ndcg_at_1000
324
+ value: 48.44
325
+ - type: ndcg_at_3
326
+ value: 38.314
327
+ - type: ndcg_at_5
328
+ value: 39.798
329
+ - type: precision_at_1
330
+ value: 35.478
331
+ - type: precision_at_10
332
+ value: 7.764
333
+ - type: precision_at_100
334
+ value: 1.253
335
+ - type: precision_at_1000
336
+ value: 0.174
337
+ - type: precision_at_3
338
+ value: 18.047
339
+ - type: precision_at_5
340
+ value: 12.637
341
+ - type: recall_at_1
342
+ value: 28.738000000000003
343
+ - type: recall_at_10
344
+ value: 50.659
345
+ - type: recall_at_100
346
+ value: 68.76299999999999
347
+ - type: recall_at_1000
348
+ value: 82.811
349
+ - type: recall_at_3
350
+ value: 39.536
351
+ - type: recall_at_5
352
+ value: 43.763999999999996
353
+ - task:
354
+ type: Retrieval
355
+ dataset:
356
+ type: BeIR/cqadupstack
357
+ name: MTEB CQADupstackGamingRetrieval
358
+ config: default
359
+ split: test
360
+ revision: None
361
+ metrics:
362
+ - type: map_at_1
363
+ value: 38.565
364
+ - type: map_at_10
365
+ value: 50.168
366
+ - type: map_at_100
367
+ value: 51.11
368
+ - type: map_at_1000
369
+ value: 51.173
370
+ - type: map_at_3
371
+ value: 47.044000000000004
372
+ - type: map_at_5
373
+ value: 48.838
374
+ - type: mrr_at_1
375
+ value: 44.201
376
+ - type: mrr_at_10
377
+ value: 53.596999999999994
378
+ - type: mrr_at_100
379
+ value: 54.211
380
+ - type: mrr_at_1000
381
+ value: 54.247
382
+ - type: mrr_at_3
383
+ value: 51.202000000000005
384
+ - type: mrr_at_5
385
+ value: 52.608999999999995
386
+ - type: ndcg_at_1
387
+ value: 44.201
388
+ - type: ndcg_at_10
389
+ value: 55.694
390
+ - type: ndcg_at_100
391
+ value: 59.518
392
+ - type: ndcg_at_1000
393
+ value: 60.907
394
+ - type: ndcg_at_3
395
+ value: 50.395999999999994
396
+ - type: ndcg_at_5
397
+ value: 53.022999999999996
398
+ - type: precision_at_1
399
+ value: 44.201
400
+ - type: precision_at_10
401
+ value: 8.84
402
+ - type: precision_at_100
403
+ value: 1.162
404
+ - type: precision_at_1000
405
+ value: 0.133
406
+ - type: precision_at_3
407
+ value: 22.153
408
+ - type: precision_at_5
409
+ value: 15.260000000000002
410
+ - type: recall_at_1
411
+ value: 38.565
412
+ - type: recall_at_10
413
+ value: 68.65
414
+ - type: recall_at_100
415
+ value: 85.37400000000001
416
+ - type: recall_at_1000
417
+ value: 95.37400000000001
418
+ - type: recall_at_3
419
+ value: 54.645999999999994
420
+ - type: recall_at_5
421
+ value: 60.958
422
+ - task:
423
+ type: Retrieval
424
+ dataset:
425
+ type: BeIR/cqadupstack
426
+ name: MTEB CQADupstackGisRetrieval
427
+ config: default
428
+ split: test
429
+ revision: None
430
+ metrics:
431
+ - type: map_at_1
432
+ value: 23.945
433
+ - type: map_at_10
434
+ value: 30.641000000000002
435
+ - type: map_at_100
436
+ value: 31.599
437
+ - type: map_at_1000
438
+ value: 31.691000000000003
439
+ - type: map_at_3
440
+ value: 28.405
441
+ - type: map_at_5
442
+ value: 29.704000000000004
443
+ - type: mrr_at_1
444
+ value: 25.537
445
+ - type: mrr_at_10
446
+ value: 32.22
447
+ - type: mrr_at_100
448
+ value: 33.138
449
+ - type: mrr_at_1000
450
+ value: 33.214
451
+ - type: mrr_at_3
452
+ value: 30.151
453
+ - type: mrr_at_5
454
+ value: 31.298
455
+ - type: ndcg_at_1
456
+ value: 25.537
457
+ - type: ndcg_at_10
458
+ value: 34.638000000000005
459
+ - type: ndcg_at_100
460
+ value: 39.486
461
+ - type: ndcg_at_1000
462
+ value: 41.936
463
+ - type: ndcg_at_3
464
+ value: 30.333
465
+ - type: ndcg_at_5
466
+ value: 32.482
467
+ - type: precision_at_1
468
+ value: 25.537
469
+ - type: precision_at_10
470
+ value: 5.153
471
+ - type: precision_at_100
472
+ value: 0.7929999999999999
473
+ - type: precision_at_1000
474
+ value: 0.104
475
+ - type: precision_at_3
476
+ value: 12.429
477
+ - type: precision_at_5
478
+ value: 8.723
479
+ - type: recall_at_1
480
+ value: 23.945
481
+ - type: recall_at_10
482
+ value: 45.412
483
+ - type: recall_at_100
484
+ value: 67.836
485
+ - type: recall_at_1000
486
+ value: 86.467
487
+ - type: recall_at_3
488
+ value: 34.031
489
+ - type: recall_at_5
490
+ value: 39.039
491
+ - task:
492
+ type: Retrieval
493
+ dataset:
494
+ type: BeIR/cqadupstack
495
+ name: MTEB CQADupstackMathematicaRetrieval
496
+ config: default
497
+ split: test
498
+ revision: None
499
+ metrics:
500
+ - type: map_at_1
501
+ value: 14.419
502
+ - type: map_at_10
503
+ value: 20.858999999999998
504
+ - type: map_at_100
505
+ value: 22.067999999999998
506
+ - type: map_at_1000
507
+ value: 22.192
508
+ - type: map_at_3
509
+ value: 18.673000000000002
510
+ - type: map_at_5
511
+ value: 19.968
512
+ - type: mrr_at_1
513
+ value: 17.785999999999998
514
+ - type: mrr_at_10
515
+ value: 24.878
516
+ - type: mrr_at_100
517
+ value: 26.021
518
+ - type: mrr_at_1000
519
+ value: 26.095000000000002
520
+ - type: mrr_at_3
521
+ value: 22.616
522
+ - type: mrr_at_5
523
+ value: 23.785
524
+ - type: ndcg_at_1
525
+ value: 17.785999999999998
526
+ - type: ndcg_at_10
527
+ value: 25.153
528
+ - type: ndcg_at_100
529
+ value: 31.05
530
+ - type: ndcg_at_1000
531
+ value: 34.052
532
+ - type: ndcg_at_3
533
+ value: 21.117
534
+ - type: ndcg_at_5
535
+ value: 23.048
536
+ - type: precision_at_1
537
+ value: 17.785999999999998
538
+ - type: precision_at_10
539
+ value: 4.590000000000001
540
+ - type: precision_at_100
541
+ value: 0.864
542
+ - type: precision_at_1000
543
+ value: 0.125
544
+ - type: precision_at_3
545
+ value: 9.908999999999999
546
+ - type: precision_at_5
547
+ value: 7.313
548
+ - type: recall_at_1
549
+ value: 14.419
550
+ - type: recall_at_10
551
+ value: 34.477999999999994
552
+ - type: recall_at_100
553
+ value: 60.02499999999999
554
+ - type: recall_at_1000
555
+ value: 81.646
556
+ - type: recall_at_3
557
+ value: 23.515
558
+ - type: recall_at_5
559
+ value: 28.266999999999996
560
+ - task:
561
+ type: Retrieval
562
+ dataset:
563
+ type: BeIR/cqadupstack
564
+ name: MTEB CQADupstackPhysicsRetrieval
565
+ config: default
566
+ split: test
567
+ revision: None
568
+ metrics:
569
+ - type: map_at_1
570
+ value: 26.268
571
+ - type: map_at_10
572
+ value: 35.114000000000004
573
+ - type: map_at_100
574
+ value: 36.212
575
+ - type: map_at_1000
576
+ value: 36.333
577
+ - type: map_at_3
578
+ value: 32.436
579
+ - type: map_at_5
580
+ value: 33.992
581
+ - type: mrr_at_1
582
+ value: 31.761
583
+ - type: mrr_at_10
584
+ value: 40.355999999999995
585
+ - type: mrr_at_100
586
+ value: 41.125
587
+ - type: mrr_at_1000
588
+ value: 41.186
589
+ - type: mrr_at_3
590
+ value: 37.937
591
+ - type: mrr_at_5
592
+ value: 39.463
593
+ - type: ndcg_at_1
594
+ value: 31.761
595
+ - type: ndcg_at_10
596
+ value: 40.422000000000004
597
+ - type: ndcg_at_100
598
+ value: 45.458999999999996
599
+ - type: ndcg_at_1000
600
+ value: 47.951
601
+ - type: ndcg_at_3
602
+ value: 35.972
603
+ - type: ndcg_at_5
604
+ value: 38.272
605
+ - type: precision_at_1
606
+ value: 31.761
607
+ - type: precision_at_10
608
+ value: 7.103
609
+ - type: precision_at_100
610
+ value: 1.133
611
+ - type: precision_at_1000
612
+ value: 0.152
613
+ - type: precision_at_3
614
+ value: 16.779
615
+ - type: precision_at_5
616
+ value: 11.877
617
+ - type: recall_at_1
618
+ value: 26.268
619
+ - type: recall_at_10
620
+ value: 51.053000000000004
621
+ - type: recall_at_100
622
+ value: 72.702
623
+ - type: recall_at_1000
624
+ value: 89.521
625
+ - type: recall_at_3
626
+ value: 38.619
627
+ - type: recall_at_5
628
+ value: 44.671
629
+ - task:
630
+ type: Retrieval
631
+ dataset:
632
+ type: BeIR/cqadupstack
633
+ name: MTEB CQADupstackProgrammersRetrieval
634
+ config: default
635
+ split: test
636
+ revision: None
637
+ metrics:
638
+ - type: map_at_1
639
+ value: 25.230999999999998
640
+ - type: map_at_10
641
+ value: 34.227000000000004
642
+ - type: map_at_100
643
+ value: 35.370000000000005
644
+ - type: map_at_1000
645
+ value: 35.488
646
+ - type: map_at_3
647
+ value: 31.496000000000002
648
+ - type: map_at_5
649
+ value: 33.034
650
+ - type: mrr_at_1
651
+ value: 30.822
652
+ - type: mrr_at_10
653
+ value: 39.045
654
+ - type: mrr_at_100
655
+ value: 39.809
656
+ - type: mrr_at_1000
657
+ value: 39.873
658
+ - type: mrr_at_3
659
+ value: 36.663000000000004
660
+ - type: mrr_at_5
661
+ value: 37.964
662
+ - type: ndcg_at_1
663
+ value: 30.822
664
+ - type: ndcg_at_10
665
+ value: 39.472
666
+ - type: ndcg_at_100
667
+ value: 44.574999999999996
668
+ - type: ndcg_at_1000
669
+ value: 47.162
670
+ - type: ndcg_at_3
671
+ value: 34.929
672
+ - type: ndcg_at_5
673
+ value: 37.002
674
+ - type: precision_at_1
675
+ value: 30.822
676
+ - type: precision_at_10
677
+ value: 7.055
678
+ - type: precision_at_100
679
+ value: 1.124
680
+ - type: precision_at_1000
681
+ value: 0.152
682
+ - type: precision_at_3
683
+ value: 16.591
684
+ - type: precision_at_5
685
+ value: 11.667
686
+ - type: recall_at_1
687
+ value: 25.230999999999998
688
+ - type: recall_at_10
689
+ value: 50.42100000000001
690
+ - type: recall_at_100
691
+ value: 72.685
692
+ - type: recall_at_1000
693
+ value: 90.469
694
+ - type: recall_at_3
695
+ value: 37.503
696
+ - type: recall_at_5
697
+ value: 43.123
698
+ - task:
699
+ type: Retrieval
700
+ dataset:
701
+ type: BeIR/cqadupstack
702
+ name: MTEB CQADupstackRetrieval
703
+ config: default
704
+ split: test
705
+ revision: None
706
+ metrics:
707
+ - type: map_at_1
708
+ value: 24.604166666666664
709
+ - type: map_at_10
710
+ value: 32.427166666666665
711
+ - type: map_at_100
712
+ value: 33.51474999999999
713
+ - type: map_at_1000
714
+ value: 33.6345
715
+ - type: map_at_3
716
+ value: 30.02366666666667
717
+ - type: map_at_5
718
+ value: 31.382333333333328
719
+ - type: mrr_at_1
720
+ value: 29.001166666666666
721
+ - type: mrr_at_10
722
+ value: 36.3315
723
+ - type: mrr_at_100
724
+ value: 37.16683333333333
725
+ - type: mrr_at_1000
726
+ value: 37.23341666666668
727
+ - type: mrr_at_3
728
+ value: 34.19916666666667
729
+ - type: mrr_at_5
730
+ value: 35.40458333333334
731
+ - type: ndcg_at_1
732
+ value: 29.001166666666666
733
+ - type: ndcg_at_10
734
+ value: 37.06883333333334
735
+ - type: ndcg_at_100
736
+ value: 41.95816666666666
737
+ - type: ndcg_at_1000
738
+ value: 44.501583333333336
739
+ - type: ndcg_at_3
740
+ value: 32.973499999999994
741
+ - type: ndcg_at_5
742
+ value: 34.90833333333334
743
+ - type: precision_at_1
744
+ value: 29.001166666666666
745
+ - type: precision_at_10
746
+ value: 6.336
747
+ - type: precision_at_100
748
+ value: 1.0282499999999999
749
+ - type: precision_at_1000
750
+ value: 0.14391666666666664
751
+ - type: precision_at_3
752
+ value: 14.932499999999996
753
+ - type: precision_at_5
754
+ value: 10.50825
755
+ - type: recall_at_1
756
+ value: 24.604166666666664
757
+ - type: recall_at_10
758
+ value: 46.9525
759
+ - type: recall_at_100
760
+ value: 68.67816666666667
761
+ - type: recall_at_1000
762
+ value: 86.59783333333334
763
+ - type: recall_at_3
764
+ value: 35.49783333333333
765
+ - type: recall_at_5
766
+ value: 40.52525000000001
767
+ - task:
768
+ type: Retrieval
769
+ dataset:
770
+ type: BeIR/cqadupstack
771
+ name: MTEB CQADupstackStatsRetrieval
772
+ config: default
773
+ split: test
774
+ revision: None
775
+ metrics:
776
+ - type: map_at_1
777
+ value: 23.559
778
+ - type: map_at_10
779
+ value: 29.023
780
+ - type: map_at_100
781
+ value: 29.818
782
+ - type: map_at_1000
783
+ value: 29.909000000000002
784
+ - type: map_at_3
785
+ value: 27.037
786
+ - type: map_at_5
787
+ value: 28.225
788
+ - type: mrr_at_1
789
+ value: 26.994
790
+ - type: mrr_at_10
791
+ value: 31.962000000000003
792
+ - type: mrr_at_100
793
+ value: 32.726
794
+ - type: mrr_at_1000
795
+ value: 32.800000000000004
796
+ - type: mrr_at_3
797
+ value: 30.266
798
+ - type: mrr_at_5
799
+ value: 31.208999999999996
800
+ - type: ndcg_at_1
801
+ value: 26.994
802
+ - type: ndcg_at_10
803
+ value: 32.53
804
+ - type: ndcg_at_100
805
+ value: 36.758
806
+ - type: ndcg_at_1000
807
+ value: 39.362
808
+ - type: ndcg_at_3
809
+ value: 28.985
810
+ - type: ndcg_at_5
811
+ value: 30.757
812
+ - type: precision_at_1
813
+ value: 26.994
814
+ - type: precision_at_10
815
+ value: 4.968999999999999
816
+ - type: precision_at_100
817
+ value: 0.759
818
+ - type: precision_at_1000
819
+ value: 0.106
820
+ - type: precision_at_3
821
+ value: 12.219
822
+ - type: precision_at_5
823
+ value: 8.527999999999999
824
+ - type: recall_at_1
825
+ value: 23.559
826
+ - type: recall_at_10
827
+ value: 40.585
828
+ - type: recall_at_100
829
+ value: 60.306000000000004
830
+ - type: recall_at_1000
831
+ value: 80.11
832
+ - type: recall_at_3
833
+ value: 30.794
834
+ - type: recall_at_5
835
+ value: 35.186
836
+ - task:
837
+ type: Retrieval
838
+ dataset:
839
+ type: BeIR/cqadupstack
840
+ name: MTEB CQADupstackTexRetrieval
841
+ config: default
842
+ split: test
843
+ revision: None
844
+ metrics:
845
+ - type: map_at_1
846
+ value: 16.384999999999998
847
+ - type: map_at_10
848
+ value: 22.142
849
+ - type: map_at_100
850
+ value: 23.057
851
+ - type: map_at_1000
852
+ value: 23.177
853
+ - type: map_at_3
854
+ value: 20.29
855
+ - type: map_at_5
856
+ value: 21.332
857
+ - type: mrr_at_1
858
+ value: 19.89
859
+ - type: mrr_at_10
860
+ value: 25.771
861
+ - type: mrr_at_100
862
+ value: 26.599
863
+ - type: mrr_at_1000
864
+ value: 26.680999999999997
865
+ - type: mrr_at_3
866
+ value: 23.962
867
+ - type: mrr_at_5
868
+ value: 24.934
869
+ - type: ndcg_at_1
870
+ value: 19.89
871
+ - type: ndcg_at_10
872
+ value: 25.97
873
+ - type: ndcg_at_100
874
+ value: 30.605
875
+ - type: ndcg_at_1000
876
+ value: 33.619
877
+ - type: ndcg_at_3
878
+ value: 22.704
879
+ - type: ndcg_at_5
880
+ value: 24.199
881
+ - type: precision_at_1
882
+ value: 19.89
883
+ - type: precision_at_10
884
+ value: 4.553
885
+ - type: precision_at_100
886
+ value: 0.8049999999999999
887
+ - type: precision_at_1000
888
+ value: 0.122
889
+ - type: precision_at_3
890
+ value: 10.541
891
+ - type: precision_at_5
892
+ value: 7.46
893
+ - type: recall_at_1
894
+ value: 16.384999999999998
895
+ - type: recall_at_10
896
+ value: 34.001
897
+ - type: recall_at_100
898
+ value: 55.17100000000001
899
+ - type: recall_at_1000
900
+ value: 77.125
901
+ - type: recall_at_3
902
+ value: 24.618000000000002
903
+ - type: recall_at_5
904
+ value: 28.695999999999998
905
+ - task:
906
+ type: Retrieval
907
+ dataset:
908
+ type: BeIR/cqadupstack
909
+ name: MTEB CQADupstackUnixRetrieval
910
+ config: default
911
+ split: test
912
+ revision: None
913
+ metrics:
914
+ - type: map_at_1
915
+ value: 23.726
916
+ - type: map_at_10
917
+ value: 31.227
918
+ - type: map_at_100
919
+ value: 32.311
920
+ - type: map_at_1000
921
+ value: 32.419
922
+ - type: map_at_3
923
+ value: 28.765
924
+ - type: map_at_5
925
+ value: 30.229
926
+ - type: mrr_at_1
927
+ value: 27.705000000000002
928
+ - type: mrr_at_10
929
+ value: 35.085
930
+ - type: mrr_at_100
931
+ value: 35.931000000000004
932
+ - type: mrr_at_1000
933
+ value: 36
934
+ - type: mrr_at_3
935
+ value: 32.603
936
+ - type: mrr_at_5
937
+ value: 34.117999999999995
938
+ - type: ndcg_at_1
939
+ value: 27.705000000000002
940
+ - type: ndcg_at_10
941
+ value: 35.968
942
+ - type: ndcg_at_100
943
+ value: 41.197
944
+ - type: ndcg_at_1000
945
+ value: 43.76
946
+ - type: ndcg_at_3
947
+ value: 31.304
948
+ - type: ndcg_at_5
949
+ value: 33.661
950
+ - type: precision_at_1
951
+ value: 27.705000000000002
952
+ - type: precision_at_10
953
+ value: 5.942
954
+ - type: precision_at_100
955
+ value: 0.964
956
+ - type: precision_at_1000
957
+ value: 0.13
958
+ - type: precision_at_3
959
+ value: 13.868
960
+ - type: precision_at_5
961
+ value: 9.944
962
+ - type: recall_at_1
963
+ value: 23.726
964
+ - type: recall_at_10
965
+ value: 46.786
966
+ - type: recall_at_100
967
+ value: 70.072
968
+ - type: recall_at_1000
969
+ value: 88.2
970
+ - type: recall_at_3
971
+ value: 33.981
972
+ - type: recall_at_5
973
+ value: 39.893
974
+ - task:
975
+ type: Retrieval
976
+ dataset:
977
+ type: BeIR/cqadupstack
978
+ name: MTEB CQADupstackWebmastersRetrieval
979
+ config: default
980
+ split: test
981
+ revision: None
982
+ metrics:
983
+ - type: map_at_1
984
+ value: 23.344
985
+ - type: map_at_10
986
+ value: 31.636999999999997
987
+ - type: map_at_100
988
+ value: 33.065
989
+ - type: map_at_1000
990
+ value: 33.300000000000004
991
+ - type: map_at_3
992
+ value: 29.351
993
+ - type: map_at_5
994
+ value: 30.432
995
+ - type: mrr_at_1
996
+ value: 27.866000000000003
997
+ - type: mrr_at_10
998
+ value: 35.587
999
+ - type: mrr_at_100
1000
+ value: 36.52
1001
+ - type: mrr_at_1000
1002
+ value: 36.597
1003
+ - type: mrr_at_3
1004
+ value: 33.696
1005
+ - type: mrr_at_5
1006
+ value: 34.713
1007
+ - type: ndcg_at_1
1008
+ value: 27.866000000000003
1009
+ - type: ndcg_at_10
1010
+ value: 36.61
1011
+ - type: ndcg_at_100
1012
+ value: 41.88
1013
+ - type: ndcg_at_1000
1014
+ value: 45.105000000000004
1015
+ - type: ndcg_at_3
1016
+ value: 33.038000000000004
1017
+ - type: ndcg_at_5
1018
+ value: 34.331
1019
+ - type: precision_at_1
1020
+ value: 27.866000000000003
1021
+ - type: precision_at_10
1022
+ value: 6.917
1023
+ - type: precision_at_100
1024
+ value: 1.3599999999999999
1025
+ - type: precision_at_1000
1026
+ value: 0.233
1027
+ - type: precision_at_3
1028
+ value: 15.547
1029
+ - type: precision_at_5
1030
+ value: 10.791
1031
+ - type: recall_at_1
1032
+ value: 23.344
1033
+ - type: recall_at_10
1034
+ value: 45.782000000000004
1035
+ - type: recall_at_100
1036
+ value: 69.503
1037
+ - type: recall_at_1000
1038
+ value: 90.742
1039
+ - type: recall_at_3
1040
+ value: 35.160000000000004
1041
+ - type: recall_at_5
1042
+ value: 39.058
1043
+ - task:
1044
+ type: Retrieval
1045
+ dataset:
1046
+ type: BeIR/cqadupstack
1047
+ name: MTEB CQADupstackWordpressRetrieval
1048
+ config: default
1049
+ split: test
1050
+ revision: None
1051
+ metrics:
1052
+ - type: map_at_1
1053
+ value: 20.776
1054
+ - type: map_at_10
1055
+ value: 27.285999999999998
1056
+ - type: map_at_100
1057
+ value: 28.235
1058
+ - type: map_at_1000
1059
+ value: 28.337
1060
+ - type: map_at_3
1061
+ value: 25.147000000000002
1062
+ - type: map_at_5
1063
+ value: 26.401999999999997
1064
+ - type: mrr_at_1
1065
+ value: 22.921
1066
+ - type: mrr_at_10
1067
+ value: 29.409999999999997
1068
+ - type: mrr_at_100
1069
+ value: 30.275000000000002
1070
+ - type: mrr_at_1000
1071
+ value: 30.354999999999997
1072
+ - type: mrr_at_3
1073
+ value: 27.418
1074
+ - type: mrr_at_5
1075
+ value: 28.592000000000002
1076
+ - type: ndcg_at_1
1077
+ value: 22.921
1078
+ - type: ndcg_at_10
1079
+ value: 31.239
1080
+ - type: ndcg_at_100
1081
+ value: 35.965
1082
+ - type: ndcg_at_1000
1083
+ value: 38.602
1084
+ - type: ndcg_at_3
1085
+ value: 27.174
1086
+ - type: ndcg_at_5
1087
+ value: 29.229
1088
+ - type: precision_at_1
1089
+ value: 22.921
1090
+ - type: precision_at_10
1091
+ value: 4.806
1092
+ - type: precision_at_100
1093
+ value: 0.776
1094
+ - type: precision_at_1000
1095
+ value: 0.11
1096
+ - type: precision_at_3
1097
+ value: 11.459999999999999
1098
+ - type: precision_at_5
1099
+ value: 8.022
1100
+ - type: recall_at_1
1101
+ value: 20.776
1102
+ - type: recall_at_10
1103
+ value: 41.294
1104
+ - type: recall_at_100
1105
+ value: 63.111
1106
+ - type: recall_at_1000
1107
+ value: 82.88600000000001
1108
+ - type: recall_at_3
1109
+ value: 30.403000000000002
1110
+ - type: recall_at_5
1111
+ value: 35.455999999999996
1112
+ - task:
1113
+ type: Retrieval
1114
+ dataset:
1115
+ type: climate-fever
1116
+ name: MTEB ClimateFEVER
1117
+ config: default
1118
+ split: test
1119
+ revision: None
1120
+ metrics:
1121
+ - type: map_at_1
1122
+ value: 9.376
1123
+ - type: map_at_10
1124
+ value: 15.926000000000002
1125
+ - type: map_at_100
1126
+ value: 17.585
1127
+ - type: map_at_1000
1128
+ value: 17.776
1129
+ - type: map_at_3
1130
+ value: 13.014000000000001
1131
+ - type: map_at_5
1132
+ value: 14.417
1133
+ - type: mrr_at_1
1134
+ value: 20.195
1135
+ - type: mrr_at_10
1136
+ value: 29.95
1137
+ - type: mrr_at_100
1138
+ value: 31.052000000000003
1139
+ - type: mrr_at_1000
1140
+ value: 31.108000000000004
1141
+ - type: mrr_at_3
1142
+ value: 26.667
1143
+ - type: mrr_at_5
1144
+ value: 28.458
1145
+ - type: ndcg_at_1
1146
+ value: 20.195
1147
+ - type: ndcg_at_10
1148
+ value: 22.871
1149
+ - type: ndcg_at_100
1150
+ value: 29.921999999999997
1151
+ - type: ndcg_at_1000
1152
+ value: 33.672999999999995
1153
+ - type: ndcg_at_3
1154
+ value: 17.782999999999998
1155
+ - type: ndcg_at_5
1156
+ value: 19.544
1157
+ - type: precision_at_1
1158
+ value: 20.195
1159
+ - type: precision_at_10
1160
+ value: 7.394
1161
+ - type: precision_at_100
1162
+ value: 1.493
1163
+ - type: precision_at_1000
1164
+ value: 0.218
1165
+ - type: precision_at_3
1166
+ value: 13.073
1167
+ - type: precision_at_5
1168
+ value: 10.436
1169
+ - type: recall_at_1
1170
+ value: 9.376
1171
+ - type: recall_at_10
1172
+ value: 28.544999999999998
1173
+ - type: recall_at_100
1174
+ value: 53.147999999999996
1175
+ - type: recall_at_1000
1176
+ value: 74.62
1177
+ - type: recall_at_3
1178
+ value: 16.464000000000002
1179
+ - type: recall_at_5
1180
+ value: 21.004
1181
+ - task:
1182
+ type: Retrieval
1183
+ dataset:
1184
+ type: dbpedia-entity
1185
+ name: MTEB DBPedia
1186
+ config: default
1187
+ split: test
1188
+ revision: None
1189
+ metrics:
1190
+ - type: map_at_1
1191
+ value: 8.415000000000001
1192
+ - type: map_at_10
1193
+ value: 18.738
1194
+ - type: map_at_100
1195
+ value: 27.291999999999998
1196
+ - type: map_at_1000
1197
+ value: 28.992
1198
+ - type: map_at_3
1199
+ value: 13.196
1200
+ - type: map_at_5
1201
+ value: 15.539
1202
+ - type: mrr_at_1
1203
+ value: 66.5
1204
+ - type: mrr_at_10
1205
+ value: 74.518
1206
+ - type: mrr_at_100
1207
+ value: 74.86
1208
+ - type: mrr_at_1000
1209
+ value: 74.87
1210
+ - type: mrr_at_3
1211
+ value: 72.375
1212
+ - type: mrr_at_5
1213
+ value: 73.86200000000001
1214
+ - type: ndcg_at_1
1215
+ value: 54.37499999999999
1216
+ - type: ndcg_at_10
1217
+ value: 41.317
1218
+ - type: ndcg_at_100
1219
+ value: 45.845
1220
+ - type: ndcg_at_1000
1221
+ value: 52.92
1222
+ - type: ndcg_at_3
1223
+ value: 44.983000000000004
1224
+ - type: ndcg_at_5
1225
+ value: 42.989
1226
+ - type: precision_at_1
1227
+ value: 66.5
1228
+ - type: precision_at_10
1229
+ value: 33.6
1230
+ - type: precision_at_100
1231
+ value: 10.972999999999999
1232
+ - type: precision_at_1000
1233
+ value: 2.214
1234
+ - type: precision_at_3
1235
+ value: 48.583
1236
+ - type: precision_at_5
1237
+ value: 42.15
1238
+ - type: recall_at_1
1239
+ value: 8.415000000000001
1240
+ - type: recall_at_10
1241
+ value: 24.953
1242
+ - type: recall_at_100
1243
+ value: 52.48199999999999
1244
+ - type: recall_at_1000
1245
+ value: 75.093
1246
+ - type: recall_at_3
1247
+ value: 14.341000000000001
1248
+ - type: recall_at_5
1249
+ value: 18.468
1250
+ - task:
1251
+ type: Classification
1252
+ dataset:
1253
+ type: mteb/emotion
1254
+ name: MTEB EmotionClassification
1255
+ config: default
1256
+ split: test
1257
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
1258
+ metrics:
1259
+ - type: accuracy
1260
+ value: 47.06499999999999
1261
+ - type: f1
1262
+ value: 41.439327599975385
1263
+ - task:
1264
+ type: Retrieval
1265
+ dataset:
1266
+ type: fever
1267
+ name: MTEB FEVER
1268
+ config: default
1269
+ split: test
1270
+ revision: None
1271
+ metrics:
1272
+ - type: map_at_1
1273
+ value: 66.02
1274
+ - type: map_at_10
1275
+ value: 76.68599999999999
1276
+ - type: map_at_100
1277
+ value: 76.959
1278
+ - type: map_at_1000
1279
+ value: 76.972
1280
+ - type: map_at_3
1281
+ value: 75.024
1282
+ - type: map_at_5
1283
+ value: 76.153
1284
+ - type: mrr_at_1
1285
+ value: 71.197
1286
+ - type: mrr_at_10
1287
+ value: 81.105
1288
+ - type: mrr_at_100
1289
+ value: 81.232
1290
+ - type: mrr_at_1000
1291
+ value: 81.233
1292
+ - type: mrr_at_3
1293
+ value: 79.758
1294
+ - type: mrr_at_5
1295
+ value: 80.69
1296
+ - type: ndcg_at_1
1297
+ value: 71.197
1298
+ - type: ndcg_at_10
1299
+ value: 81.644
1300
+ - type: ndcg_at_100
1301
+ value: 82.645
1302
+ - type: ndcg_at_1000
1303
+ value: 82.879
1304
+ - type: ndcg_at_3
1305
+ value: 78.792
1306
+ - type: ndcg_at_5
1307
+ value: 80.528
1308
+ - type: precision_at_1
1309
+ value: 71.197
1310
+ - type: precision_at_10
1311
+ value: 10.206999999999999
1312
+ - type: precision_at_100
1313
+ value: 1.093
1314
+ - type: precision_at_1000
1315
+ value: 0.11299999999999999
1316
+ - type: precision_at_3
1317
+ value: 30.868000000000002
1318
+ - type: precision_at_5
1319
+ value: 19.559
1320
+ - type: recall_at_1
1321
+ value: 66.02
1322
+ - type: recall_at_10
1323
+ value: 92.50699999999999
1324
+ - type: recall_at_100
1325
+ value: 96.497
1326
+ - type: recall_at_1000
1327
+ value: 97.956
1328
+ - type: recall_at_3
1329
+ value: 84.866
1330
+ - type: recall_at_5
1331
+ value: 89.16199999999999
1332
+ - task:
1333
+ type: Retrieval
1334
+ dataset:
1335
+ type: fiqa
1336
+ name: MTEB FiQA2018
1337
+ config: default
1338
+ split: test
1339
+ revision: None
1340
+ metrics:
1341
+ - type: map_at_1
1342
+ value: 17.948
1343
+ - type: map_at_10
1344
+ value: 29.833
1345
+ - type: map_at_100
1346
+ value: 31.487
1347
+ - type: map_at_1000
1348
+ value: 31.674000000000003
1349
+ - type: map_at_3
1350
+ value: 26.029999999999998
1351
+ - type: map_at_5
1352
+ value: 28.038999999999998
1353
+ - type: mrr_at_1
1354
+ value: 34.721999999999994
1355
+ - type: mrr_at_10
1356
+ value: 44.214999999999996
1357
+ - type: mrr_at_100
1358
+ value: 44.994
1359
+ - type: mrr_at_1000
1360
+ value: 45.051
1361
+ - type: mrr_at_3
1362
+ value: 41.667
1363
+ - type: mrr_at_5
1364
+ value: 43.032
1365
+ - type: ndcg_at_1
1366
+ value: 34.721999999999994
1367
+ - type: ndcg_at_10
1368
+ value: 37.434
1369
+ - type: ndcg_at_100
1370
+ value: 43.702000000000005
1371
+ - type: ndcg_at_1000
1372
+ value: 46.993
1373
+ - type: ndcg_at_3
1374
+ value: 33.56
1375
+ - type: ndcg_at_5
1376
+ value: 34.687
1377
+ - type: precision_at_1
1378
+ value: 34.721999999999994
1379
+ - type: precision_at_10
1380
+ value: 10.401
1381
+ - type: precision_at_100
1382
+ value: 1.7049999999999998
1383
+ - type: precision_at_1000
1384
+ value: 0.22799999999999998
1385
+ - type: precision_at_3
1386
+ value: 22.531000000000002
1387
+ - type: precision_at_5
1388
+ value: 16.42
1389
+ - type: recall_at_1
1390
+ value: 17.948
1391
+ - type: recall_at_10
1392
+ value: 45.062999999999995
1393
+ - type: recall_at_100
1394
+ value: 68.191
1395
+ - type: recall_at_1000
1396
+ value: 87.954
1397
+ - type: recall_at_3
1398
+ value: 31.112000000000002
1399
+ - type: recall_at_5
1400
+ value: 36.823
1401
+ - task:
1402
+ type: Retrieval
1403
+ dataset:
1404
+ type: hotpotqa
1405
+ name: MTEB HotpotQA
1406
+ config: default
1407
+ split: test
1408
+ revision: None
1409
+ metrics:
1410
+ - type: map_at_1
1411
+ value: 36.644
1412
+ - type: map_at_10
1413
+ value: 57.658
1414
+ - type: map_at_100
1415
+ value: 58.562000000000005
1416
+ - type: map_at_1000
1417
+ value: 58.62500000000001
1418
+ - type: map_at_3
1419
+ value: 54.022999999999996
1420
+ - type: map_at_5
1421
+ value: 56.293000000000006
1422
+ - type: mrr_at_1
1423
+ value: 73.288
1424
+ - type: mrr_at_10
1425
+ value: 80.51700000000001
1426
+ - type: mrr_at_100
1427
+ value: 80.72
1428
+ - type: mrr_at_1000
1429
+ value: 80.728
1430
+ - type: mrr_at_3
1431
+ value: 79.33200000000001
1432
+ - type: mrr_at_5
1433
+ value: 80.085
1434
+ - type: ndcg_at_1
1435
+ value: 73.288
1436
+ - type: ndcg_at_10
1437
+ value: 66.61
1438
+ - type: ndcg_at_100
1439
+ value: 69.723
1440
+ - type: ndcg_at_1000
1441
+ value: 70.96000000000001
1442
+ - type: ndcg_at_3
1443
+ value: 61.358999999999995
1444
+ - type: ndcg_at_5
1445
+ value: 64.277
1446
+ - type: precision_at_1
1447
+ value: 73.288
1448
+ - type: precision_at_10
1449
+ value: 14.17
1450
+ - type: precision_at_100
1451
+ value: 1.659
1452
+ - type: precision_at_1000
1453
+ value: 0.182
1454
+ - type: precision_at_3
1455
+ value: 39.487
1456
+ - type: precision_at_5
1457
+ value: 25.999
1458
+ - type: recall_at_1
1459
+ value: 36.644
1460
+ - type: recall_at_10
1461
+ value: 70.851
1462
+ - type: recall_at_100
1463
+ value: 82.94399999999999
1464
+ - type: recall_at_1000
1465
+ value: 91.134
1466
+ - type: recall_at_3
1467
+ value: 59.230000000000004
1468
+ - type: recall_at_5
1469
+ value: 64.997
1470
+ - task:
1471
+ type: Classification
1472
+ dataset:
1473
+ type: mteb/imdb
1474
+ name: MTEB ImdbClassification
1475
+ config: default
1476
+ split: test
1477
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
1478
+ metrics:
1479
+ - type: accuracy
1480
+ value: 86.00280000000001
1481
+ - type: ap
1482
+ value: 80.46302061021223
1483
+ - type: f1
1484
+ value: 85.9592921596419
1485
+ - task:
1486
+ type: Retrieval
1487
+ dataset:
1488
+ type: msmarco
1489
+ name: MTEB MSMARCO
1490
+ config: default
1491
+ split: dev
1492
+ revision: None
1493
+ metrics:
1494
+ - type: map_at_1
1495
+ value: 22.541
1496
+ - type: map_at_10
1497
+ value: 34.625
1498
+ - type: map_at_100
1499
+ value: 35.785
1500
+ - type: map_at_1000
1501
+ value: 35.831
1502
+ - type: map_at_3
1503
+ value: 30.823
1504
+ - type: map_at_5
1505
+ value: 32.967999999999996
1506
+ - type: mrr_at_1
1507
+ value: 23.180999999999997
1508
+ - type: mrr_at_10
1509
+ value: 35.207
1510
+ - type: mrr_at_100
1511
+ value: 36.315
1512
+ - type: mrr_at_1000
1513
+ value: 36.355
1514
+ - type: mrr_at_3
1515
+ value: 31.483
1516
+ - type: mrr_at_5
1517
+ value: 33.589999999999996
1518
+ - type: ndcg_at_1
1519
+ value: 23.195
1520
+ - type: ndcg_at_10
1521
+ value: 41.461
1522
+ - type: ndcg_at_100
1523
+ value: 47.032000000000004
1524
+ - type: ndcg_at_1000
1525
+ value: 48.199999999999996
1526
+ - type: ndcg_at_3
1527
+ value: 33.702
1528
+ - type: ndcg_at_5
1529
+ value: 37.522
1530
+ - type: precision_at_1
1531
+ value: 23.195
1532
+ - type: precision_at_10
1533
+ value: 6.526999999999999
1534
+ - type: precision_at_100
1535
+ value: 0.932
1536
+ - type: precision_at_1000
1537
+ value: 0.10300000000000001
1538
+ - type: precision_at_3
1539
+ value: 14.308000000000002
1540
+ - type: precision_at_5
1541
+ value: 10.507
1542
+ - type: recall_at_1
1543
+ value: 22.541
1544
+ - type: recall_at_10
1545
+ value: 62.524
1546
+ - type: recall_at_100
1547
+ value: 88.228
1548
+ - type: recall_at_1000
1549
+ value: 97.243
1550
+ - type: recall_at_3
1551
+ value: 41.38
1552
+ - type: recall_at_5
1553
+ value: 50.55
1554
+ - task:
1555
+ type: Classification
1556
+ dataset:
1557
+ type: mteb/mtop_domain
1558
+ name: MTEB MTOPDomainClassification (en)
1559
+ config: en
1560
+ split: test
1561
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
1562
+ metrics:
1563
+ - type: accuracy
1564
+ value: 92.69949840401279
1565
+ - type: f1
1566
+ value: 92.54141471311786
1567
+ - task:
1568
+ type: Classification
1569
+ dataset:
1570
+ type: mteb/mtop_intent
1571
+ name: MTEB MTOPIntentClassification (en)
1572
+ config: en
1573
+ split: test
1574
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
1575
+ metrics:
1576
+ - type: accuracy
1577
+ value: 72.56041951664386
1578
+ - type: f1
1579
+ value: 55.88499977508287
1580
+ - task:
1581
+ type: Classification
1582
+ dataset:
1583
+ type: mteb/amazon_massive_intent
1584
+ name: MTEB MassiveIntentClassification (en)
1585
+ config: en
1586
+ split: test
1587
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
1588
+ metrics:
1589
+ - type: accuracy
1590
+ value: 71.62071284465365
1591
+ - type: f1
1592
+ value: 69.36717546572152
1593
+ - task:
1594
+ type: Classification
1595
+ dataset:
1596
+ type: mteb/amazon_massive_scenario
1597
+ name: MTEB MassiveScenarioClassification (en)
1598
+ config: en
1599
+ split: test
1600
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
1601
+ metrics:
1602
+ - type: accuracy
1603
+ value: 76.35843981170142
1604
+ - type: f1
1605
+ value: 76.15496453538884
1606
+ - task:
1607
+ type: Clustering
1608
+ dataset:
1609
+ type: mteb/medrxiv-clustering-p2p
1610
+ name: MTEB MedrxivClusteringP2P
1611
+ config: default
1612
+ split: test
1613
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
1614
+ metrics:
1615
+ - type: v_measure
1616
+ value: 31.33664956793118
1617
+ - task:
1618
+ type: Clustering
1619
+ dataset:
1620
+ type: mteb/medrxiv-clustering-s2s
1621
+ name: MTEB MedrxivClusteringS2S
1622
+ config: default
1623
+ split: test
1624
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
1625
+ metrics:
1626
+ - type: v_measure
1627
+ value: 27.883839621715524
1628
+ - task:
1629
+ type: Reranking
1630
+ dataset:
1631
+ type: mteb/mind_small
1632
+ name: MTEB MindSmallReranking
1633
+ config: default
1634
+ split: test
1635
+ revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
1636
+ metrics:
1637
+ - type: map
1638
+ value: 30.096874986740758
1639
+ - type: mrr
1640
+ value: 30.97300481932132
1641
+ - task:
1642
+ type: Retrieval
1643
+ dataset:
1644
+ type: nfcorpus
1645
+ name: MTEB NFCorpus
1646
+ config: default
1647
+ split: test
1648
+ revision: None
1649
+ metrics:
1650
+ - type: map_at_1
1651
+ value: 5.4
1652
+ - type: map_at_10
1653
+ value: 11.852
1654
+ - type: map_at_100
1655
+ value: 14.758
1656
+ - type: map_at_1000
1657
+ value: 16.134
1658
+ - type: map_at_3
1659
+ value: 8.558
1660
+ - type: map_at_5
1661
+ value: 10.087
1662
+ - type: mrr_at_1
1663
+ value: 44.272
1664
+ - type: mrr_at_10
1665
+ value: 52.05800000000001
1666
+ - type: mrr_at_100
1667
+ value: 52.689
1668
+ - type: mrr_at_1000
1669
+ value: 52.742999999999995
1670
+ - type: mrr_at_3
1671
+ value: 50.205999999999996
1672
+ - type: mrr_at_5
1673
+ value: 51.367
1674
+ - type: ndcg_at_1
1675
+ value: 42.57
1676
+ - type: ndcg_at_10
1677
+ value: 32.449
1678
+ - type: ndcg_at_100
1679
+ value: 29.596
1680
+ - type: ndcg_at_1000
1681
+ value: 38.351
1682
+ - type: ndcg_at_3
1683
+ value: 37.044
1684
+ - type: ndcg_at_5
1685
+ value: 35.275
1686
+ - type: precision_at_1
1687
+ value: 44.272
1688
+ - type: precision_at_10
1689
+ value: 23.87
1690
+ - type: precision_at_100
1691
+ value: 7.625
1692
+ - type: precision_at_1000
1693
+ value: 2.045
1694
+ - type: precision_at_3
1695
+ value: 34.365
1696
+ - type: precision_at_5
1697
+ value: 30.341
1698
+ - type: recall_at_1
1699
+ value: 5.4
1700
+ - type: recall_at_10
1701
+ value: 15.943999999999999
1702
+ - type: recall_at_100
1703
+ value: 29.805
1704
+ - type: recall_at_1000
1705
+ value: 61.695
1706
+ - type: recall_at_3
1707
+ value: 9.539
1708
+ - type: recall_at_5
1709
+ value: 12.127
1710
+ - task:
1711
+ type: Retrieval
1712
+ dataset:
1713
+ type: nq
1714
+ name: MTEB NQ
1715
+ config: default
1716
+ split: test
1717
+ revision: None
1718
+ metrics:
1719
+ - type: map_at_1
1720
+ value: 36.047000000000004
1721
+ - type: map_at_10
1722
+ value: 51.6
1723
+ - type: map_at_100
1724
+ value: 52.449999999999996
1725
+ - type: map_at_1000
1726
+ value: 52.476
1727
+ - type: map_at_3
1728
+ value: 47.452
1729
+ - type: map_at_5
1730
+ value: 49.964
1731
+ - type: mrr_at_1
1732
+ value: 40.382
1733
+ - type: mrr_at_10
1734
+ value: 54.273
1735
+ - type: mrr_at_100
1736
+ value: 54.859
1737
+ - type: mrr_at_1000
1738
+ value: 54.876000000000005
1739
+ - type: mrr_at_3
1740
+ value: 51.014
1741
+ - type: mrr_at_5
1742
+ value: 52.983999999999995
1743
+ - type: ndcg_at_1
1744
+ value: 40.353
1745
+ - type: ndcg_at_10
1746
+ value: 59.11300000000001
1747
+ - type: ndcg_at_100
1748
+ value: 62.604000000000006
1749
+ - type: ndcg_at_1000
1750
+ value: 63.187000000000005
1751
+ - type: ndcg_at_3
1752
+ value: 51.513
1753
+ - type: ndcg_at_5
1754
+ value: 55.576
1755
+ - type: precision_at_1
1756
+ value: 40.353
1757
+ - type: precision_at_10
1758
+ value: 9.418
1759
+ - type: precision_at_100
1760
+ value: 1.1440000000000001
1761
+ - type: precision_at_1000
1762
+ value: 0.12
1763
+ - type: precision_at_3
1764
+ value: 23.078000000000003
1765
+ - type: precision_at_5
1766
+ value: 16.250999999999998
1767
+ - type: recall_at_1
1768
+ value: 36.047000000000004
1769
+ - type: recall_at_10
1770
+ value: 79.22200000000001
1771
+ - type: recall_at_100
1772
+ value: 94.23
1773
+ - type: recall_at_1000
1774
+ value: 98.51100000000001
1775
+ - type: recall_at_3
1776
+ value: 59.678
1777
+ - type: recall_at_5
1778
+ value: 68.967
1779
+ - task:
1780
+ type: Retrieval
1781
+ dataset:
1782
+ type: quora
1783
+ name: MTEB QuoraRetrieval
1784
+ config: default
1785
+ split: test
1786
+ revision: None
1787
+ metrics:
1788
+ - type: map_at_1
1789
+ value: 68.232
1790
+ - type: map_at_10
1791
+ value: 81.674
1792
+ - type: map_at_100
1793
+ value: 82.338
1794
+ - type: map_at_1000
1795
+ value: 82.36099999999999
1796
+ - type: map_at_3
1797
+ value: 78.833
1798
+ - type: map_at_5
1799
+ value: 80.58
1800
+ - type: mrr_at_1
1801
+ value: 78.64
1802
+ - type: mrr_at_10
1803
+ value: 85.164
1804
+ - type: mrr_at_100
1805
+ value: 85.317
1806
+ - type: mrr_at_1000
1807
+ value: 85.319
1808
+ - type: mrr_at_3
1809
+ value: 84.127
1810
+ - type: mrr_at_5
1811
+ value: 84.789
1812
+ - type: ndcg_at_1
1813
+ value: 78.63
1814
+ - type: ndcg_at_10
1815
+ value: 85.711
1816
+ - type: ndcg_at_100
1817
+ value: 87.238
1818
+ - type: ndcg_at_1000
1819
+ value: 87.444
1820
+ - type: ndcg_at_3
1821
+ value: 82.788
1822
+ - type: ndcg_at_5
1823
+ value: 84.313
1824
+ - type: precision_at_1
1825
+ value: 78.63
1826
+ - type: precision_at_10
1827
+ value: 12.977
1828
+ - type: precision_at_100
1829
+ value: 1.503
1830
+ - type: precision_at_1000
1831
+ value: 0.156
1832
+ - type: precision_at_3
1833
+ value: 36.113
1834
+ - type: precision_at_5
1835
+ value: 23.71
1836
+ - type: recall_at_1
1837
+ value: 68.232
1838
+ - type: recall_at_10
1839
+ value: 93.30199999999999
1840
+ - type: recall_at_100
1841
+ value: 98.799
1842
+ - type: recall_at_1000
1843
+ value: 99.885
1844
+ - type: recall_at_3
1845
+ value: 84.827
1846
+ - type: recall_at_5
1847
+ value: 89.188
1848
+ - task:
1849
+ type: Clustering
1850
+ dataset:
1851
+ type: mteb/reddit-clustering
1852
+ name: MTEB RedditClustering
1853
+ config: default
1854
+ split: test
1855
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1856
+ metrics:
1857
+ - type: v_measure
1858
+ value: 45.71879170816294
1859
+ - task:
1860
+ type: Clustering
1861
+ dataset:
1862
+ type: mteb/reddit-clustering-p2p
1863
+ name: MTEB RedditClusteringP2P
1864
+ config: default
1865
+ split: test
1866
+ revision: 282350215ef01743dc01b456c7f5241fa8937f16
1867
+ metrics:
1868
+ - type: v_measure
1869
+ value: 59.65866311751794
1870
+ - task:
1871
+ type: Retrieval
1872
+ dataset:
1873
+ type: scidocs
1874
+ name: MTEB SCIDOCS
1875
+ config: default
1876
+ split: test
1877
+ revision: None
1878
+ metrics:
1879
+ - type: map_at_1
1880
+ value: 4.218
1881
+ - type: map_at_10
1882
+ value: 10.337
1883
+ - type: map_at_100
1884
+ value: 12.131
1885
+ - type: map_at_1000
1886
+ value: 12.411
1887
+ - type: map_at_3
1888
+ value: 7.4270000000000005
1889
+ - type: map_at_5
1890
+ value: 8.913
1891
+ - type: mrr_at_1
1892
+ value: 20.8
1893
+ - type: mrr_at_10
1894
+ value: 30.868000000000002
1895
+ - type: mrr_at_100
1896
+ value: 31.903
1897
+ - type: mrr_at_1000
1898
+ value: 31.972
1899
+ - type: mrr_at_3
1900
+ value: 27.367
1901
+ - type: mrr_at_5
1902
+ value: 29.372
1903
+ - type: ndcg_at_1
1904
+ value: 20.8
1905
+ - type: ndcg_at_10
1906
+ value: 17.765
1907
+ - type: ndcg_at_100
1908
+ value: 24.914
1909
+ - type: ndcg_at_1000
1910
+ value: 30.206
1911
+ - type: ndcg_at_3
1912
+ value: 16.64
1913
+ - type: ndcg_at_5
1914
+ value: 14.712
1915
+ - type: precision_at_1
1916
+ value: 20.8
1917
+ - type: precision_at_10
1918
+ value: 9.24
1919
+ - type: precision_at_100
1920
+ value: 1.9560000000000002
1921
+ - type: precision_at_1000
1922
+ value: 0.32299999999999995
1923
+ - type: precision_at_3
1924
+ value: 15.467
1925
+ - type: precision_at_5
1926
+ value: 12.94
1927
+ - type: recall_at_1
1928
+ value: 4.218
1929
+ - type: recall_at_10
1930
+ value: 18.752
1931
+ - type: recall_at_100
1932
+ value: 39.7
1933
+ - type: recall_at_1000
1934
+ value: 65.57300000000001
1935
+ - type: recall_at_3
1936
+ value: 9.428
1937
+ - type: recall_at_5
1938
+ value: 13.133000000000001
1939
+ - task:
1940
+ type: STS
1941
+ dataset:
1942
+ type: mteb/sickr-sts
1943
+ name: MTEB SICK-R
1944
+ config: default
1945
+ split: test
1946
+ revision: a6ea5a8cab320b040a23452cc28066d9beae2cee
1947
+ metrics:
1948
+ - type: cos_sim_pearson
1949
+ value: 83.04338850207233
1950
+ - type: cos_sim_spearman
1951
+ value: 78.5054651430423
1952
+ - type: euclidean_pearson
1953
+ value: 80.30739451228612
1954
+ - type: euclidean_spearman
1955
+ value: 78.48377464299097
1956
+ - type: manhattan_pearson
1957
+ value: 80.40795049052781
1958
+ - type: manhattan_spearman
1959
+ value: 78.49506205443114
1960
+ - task:
1961
+ type: STS
1962
+ dataset:
1963
+ type: mteb/sts12-sts
1964
+ name: MTEB STS12
1965
+ config: default
1966
+ split: test
1967
+ revision: a0d554a64d88156834ff5ae9920b964011b16384
1968
+ metrics:
1969
+ - type: cos_sim_pearson
1970
+ value: 84.11596224442962
1971
+ - type: cos_sim_spearman
1972
+ value: 76.20997388935461
1973
+ - type: euclidean_pearson
1974
+ value: 80.56858451349109
1975
+ - type: euclidean_spearman
1976
+ value: 75.92659183871186
1977
+ - type: manhattan_pearson
1978
+ value: 80.60246102203844
1979
+ - type: manhattan_spearman
1980
+ value: 76.03018971432664
1981
+ - task:
1982
+ type: STS
1983
+ dataset:
1984
+ type: mteb/sts13-sts
1985
+ name: MTEB STS13
1986
+ config: default
1987
+ split: test
1988
+ revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1989
+ metrics:
1990
+ - type: cos_sim_pearson
1991
+ value: 81.34691640755737
1992
+ - type: cos_sim_spearman
1993
+ value: 82.4018369631579
1994
+ - type: euclidean_pearson
1995
+ value: 81.87673092245366
1996
+ - type: euclidean_spearman
1997
+ value: 82.3671489960678
1998
+ - type: manhattan_pearson
1999
+ value: 81.88222387719948
2000
+ - type: manhattan_spearman
2001
+ value: 82.3816590344736
2002
+ - task:
2003
+ type: STS
2004
+ dataset:
2005
+ type: mteb/sts14-sts
2006
+ name: MTEB STS14
2007
+ config: default
2008
+ split: test
2009
+ revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
2010
+ metrics:
2011
+ - type: cos_sim_pearson
2012
+ value: 81.2836092579524
2013
+ - type: cos_sim_spearman
2014
+ value: 78.99982781772064
2015
+ - type: euclidean_pearson
2016
+ value: 80.5184271010527
2017
+ - type: euclidean_spearman
2018
+ value: 78.89777392101904
2019
+ - type: manhattan_pearson
2020
+ value: 80.53585705018664
2021
+ - type: manhattan_spearman
2022
+ value: 78.92898405472994
2023
+ - task:
2024
+ type: STS
2025
+ dataset:
2026
+ type: mteb/sts15-sts
2027
+ name: MTEB STS15
2028
+ config: default
2029
+ split: test
2030
+ revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
2031
+ metrics:
2032
+ - type: cos_sim_pearson
2033
+ value: 86.7349907750784
2034
+ - type: cos_sim_spearman
2035
+ value: 87.7611234446225
2036
+ - type: euclidean_pearson
2037
+ value: 86.98759326731624
2038
+ - type: euclidean_spearman
2039
+ value: 87.58321319424618
2040
+ - type: manhattan_pearson
2041
+ value: 87.03483090370842
2042
+ - type: manhattan_spearman
2043
+ value: 87.63278333060288
2044
+ - task:
2045
+ type: STS
2046
+ dataset:
2047
+ type: mteb/sts16-sts
2048
+ name: MTEB STS16
2049
+ config: default
2050
+ split: test
2051
+ revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
2052
+ metrics:
2053
+ - type: cos_sim_pearson
2054
+ value: 81.75873694924825
2055
+ - type: cos_sim_spearman
2056
+ value: 83.80237999094724
2057
+ - type: euclidean_pearson
2058
+ value: 83.55023725861537
2059
+ - type: euclidean_spearman
2060
+ value: 84.12744338577744
2061
+ - type: manhattan_pearson
2062
+ value: 83.58816983036232
2063
+ - type: manhattan_spearman
2064
+ value: 84.18520748676501
2065
+ - task:
2066
+ type: STS
2067
+ dataset:
2068
+ type: mteb/sts17-crosslingual-sts
2069
+ name: MTEB STS17 (en-en)
2070
+ config: en-en
2071
+ split: test
2072
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
2073
+ metrics:
2074
+ - type: cos_sim_pearson
2075
+ value: 87.21630882940174
2076
+ - type: cos_sim_spearman
2077
+ value: 87.72382883437031
2078
+ - type: euclidean_pearson
2079
+ value: 88.69933350930333
2080
+ - type: euclidean_spearman
2081
+ value: 88.24660814383081
2082
+ - type: manhattan_pearson
2083
+ value: 88.77331018833499
2084
+ - type: manhattan_spearman
2085
+ value: 88.26109989380632
2086
+ - task:
2087
+ type: STS
2088
+ dataset:
2089
+ type: mteb/sts22-crosslingual-sts
2090
+ name: MTEB STS22 (en)
2091
+ config: en
2092
+ split: test
2093
+ revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
2094
+ metrics:
2095
+ - type: cos_sim_pearson
2096
+ value: 61.11854063060489
2097
+ - type: cos_sim_spearman
2098
+ value: 63.14678634195072
2099
+ - type: euclidean_pearson
2100
+ value: 61.679090067000864
2101
+ - type: euclidean_spearman
2102
+ value: 62.28876589509653
2103
+ - type: manhattan_pearson
2104
+ value: 62.082324165511004
2105
+ - type: manhattan_spearman
2106
+ value: 62.56030932816679
2107
+ - task:
2108
+ type: STS
2109
+ dataset:
2110
+ type: mteb/stsbenchmark-sts
2111
+ name: MTEB STSBenchmark
2112
+ config: default
2113
+ split: test
2114
+ revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
2115
+ metrics:
2116
+ - type: cos_sim_pearson
2117
+ value: 84.00319882832645
2118
+ - type: cos_sim_spearman
2119
+ value: 85.94529772647257
2120
+ - type: euclidean_pearson
2121
+ value: 85.6661390122756
2122
+ - type: euclidean_spearman
2123
+ value: 85.97747815545827
2124
+ - type: manhattan_pearson
2125
+ value: 85.58422770541893
2126
+ - type: manhattan_spearman
2127
+ value: 85.9237139181532
2128
+ - task:
2129
+ type: Reranking
2130
+ dataset:
2131
+ type: mteb/scidocs-reranking
2132
+ name: MTEB SciDocsRR
2133
+ config: default
2134
+ split: test
2135
+ revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
2136
+ metrics:
2137
+ - type: map
2138
+ value: 79.16198731863916
2139
+ - type: mrr
2140
+ value: 94.25202702163487
2141
+ - task:
2142
+ type: Retrieval
2143
+ dataset:
2144
+ type: scifact
2145
+ name: MTEB SciFact
2146
+ config: default
2147
+ split: test
2148
+ revision: None
2149
+ metrics:
2150
+ - type: map_at_1
2151
+ value: 54.761
2152
+ - type: map_at_10
2153
+ value: 64.396
2154
+ - type: map_at_100
2155
+ value: 65.07
2156
+ - type: map_at_1000
2157
+ value: 65.09899999999999
2158
+ - type: map_at_3
2159
+ value: 61.846000000000004
2160
+ - type: map_at_5
2161
+ value: 63.284
2162
+ - type: mrr_at_1
2163
+ value: 57.667
2164
+ - type: mrr_at_10
2165
+ value: 65.83099999999999
2166
+ - type: mrr_at_100
2167
+ value: 66.36800000000001
2168
+ - type: mrr_at_1000
2169
+ value: 66.39399999999999
2170
+ - type: mrr_at_3
2171
+ value: 64.056
2172
+ - type: mrr_at_5
2173
+ value: 65.206
2174
+ - type: ndcg_at_1
2175
+ value: 57.667
2176
+ - type: ndcg_at_10
2177
+ value: 68.854
2178
+ - type: ndcg_at_100
2179
+ value: 71.59100000000001
2180
+ - type: ndcg_at_1000
2181
+ value: 72.383
2182
+ - type: ndcg_at_3
2183
+ value: 64.671
2184
+ - type: ndcg_at_5
2185
+ value: 66.796
2186
+ - type: precision_at_1
2187
+ value: 57.667
2188
+ - type: precision_at_10
2189
+ value: 9.167
2190
+ - type: precision_at_100
2191
+ value: 1.053
2192
+ - type: precision_at_1000
2193
+ value: 0.11199999999999999
2194
+ - type: precision_at_3
2195
+ value: 25.444
2196
+ - type: precision_at_5
2197
+ value: 16.667
2198
+ - type: recall_at_1
2199
+ value: 54.761
2200
+ - type: recall_at_10
2201
+ value: 80.9
2202
+ - type: recall_at_100
2203
+ value: 92.767
2204
+ - type: recall_at_1000
2205
+ value: 99
2206
+ - type: recall_at_3
2207
+ value: 69.672
2208
+ - type: recall_at_5
2209
+ value: 75.083
2210
+ - task:
2211
+ type: PairClassification
2212
+ dataset:
2213
+ type: mteb/sprintduplicatequestions-pairclassification
2214
+ name: MTEB SprintDuplicateQuestions
2215
+ config: default
2216
+ split: test
2217
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
2218
+ metrics:
2219
+ - type: cos_sim_accuracy
2220
+ value: 99.8079207920792
2221
+ - type: cos_sim_ap
2222
+ value: 94.88470927617445
2223
+ - type: cos_sim_f1
2224
+ value: 90.08179959100204
2225
+ - type: cos_sim_precision
2226
+ value: 92.15481171548117
2227
+ - type: cos_sim_recall
2228
+ value: 88.1
2229
+ - type: dot_accuracy
2230
+ value: 99.58613861386138
2231
+ - type: dot_ap
2232
+ value: 82.94822578881316
2233
+ - type: dot_f1
2234
+ value: 77.33333333333333
2235
+ - type: dot_precision
2236
+ value: 79.36842105263158
2237
+ - type: dot_recall
2238
+ value: 75.4
2239
+ - type: euclidean_accuracy
2240
+ value: 99.8069306930693
2241
+ - type: euclidean_ap
2242
+ value: 94.81367858031837
2243
+ - type: euclidean_f1
2244
+ value: 90.01009081735621
2245
+ - type: euclidean_precision
2246
+ value: 90.83503054989816
2247
+ - type: euclidean_recall
2248
+ value: 89.2
2249
+ - type: manhattan_accuracy
2250
+ value: 99.81188118811882
2251
+ - type: manhattan_ap
2252
+ value: 94.91405337220161
2253
+ - type: manhattan_f1
2254
+ value: 90.2763561924258
2255
+ - type: manhattan_precision
2256
+ value: 92.45283018867924
2257
+ - type: manhattan_recall
2258
+ value: 88.2
2259
+ - type: max_accuracy
2260
+ value: 99.81188118811882
2261
+ - type: max_ap
2262
+ value: 94.91405337220161
2263
+ - type: max_f1
2264
+ value: 90.2763561924258
2265
+ - task:
2266
+ type: Clustering
2267
+ dataset:
2268
+ type: mteb/stackexchange-clustering
2269
+ name: MTEB StackExchangeClustering
2270
+ config: default
2271
+ split: test
2272
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
2273
+ metrics:
2274
+ - type: v_measure
2275
+ value: 58.511599500053094
2276
+ - task:
2277
+ type: Clustering
2278
+ dataset:
2279
+ type: mteb/stackexchange-clustering-p2p
2280
+ name: MTEB StackExchangeClusteringP2P
2281
+ config: default
2282
+ split: test
2283
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
2284
+ metrics:
2285
+ - type: v_measure
2286
+ value: 31.984728147814707
2287
+ - task:
2288
+ type: Reranking
2289
+ dataset:
2290
+ type: mteb/stackoverflowdupquestions-reranking
2291
+ name: MTEB StackOverflowDupQuestions
2292
+ config: default
2293
+ split: test
2294
+ revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
2295
+ metrics:
2296
+ - type: map
2297
+ value: 49.93428193939015
2298
+ - type: mrr
2299
+ value: 50.916557911043206
2300
+ - task:
2301
+ type: Summarization
2302
+ dataset:
2303
+ type: mteb/summeval
2304
+ name: MTEB SummEval
2305
+ config: default
2306
+ split: test
2307
+ revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
2308
+ metrics:
2309
+ - type: cos_sim_pearson
2310
+ value: 31.562500894537145
2311
+ - type: cos_sim_spearman
2312
+ value: 31.162587976726307
2313
+ - type: dot_pearson
2314
+ value: 22.633662187735762
2315
+ - type: dot_spearman
2316
+ value: 22.723000282378962
2317
+ - task:
2318
+ type: Retrieval
2319
+ dataset:
2320
+ type: trec-covid
2321
+ name: MTEB TRECCOVID
2322
+ config: default
2323
+ split: test
2324
+ revision: None
2325
+ metrics:
2326
+ - type: map_at_1
2327
+ value: 0.219
2328
+ - type: map_at_10
2329
+ value: 1.871
2330
+ - type: map_at_100
2331
+ value: 10.487
2332
+ - type: map_at_1000
2333
+ value: 25.122
2334
+ - type: map_at_3
2335
+ value: 0.657
2336
+ - type: map_at_5
2337
+ value: 1.0699999999999998
2338
+ - type: mrr_at_1
2339
+ value: 84
2340
+ - type: mrr_at_10
2341
+ value: 89.567
2342
+ - type: mrr_at_100
2343
+ value: 89.748
2344
+ - type: mrr_at_1000
2345
+ value: 89.748
2346
+ - type: mrr_at_3
2347
+ value: 88.667
2348
+ - type: mrr_at_5
2349
+ value: 89.567
2350
+ - type: ndcg_at_1
2351
+ value: 80
2352
+ - type: ndcg_at_10
2353
+ value: 74.533
2354
+ - type: ndcg_at_100
2355
+ value: 55.839000000000006
2356
+ - type: ndcg_at_1000
2357
+ value: 49.748
2358
+ - type: ndcg_at_3
2359
+ value: 79.53099999999999
2360
+ - type: ndcg_at_5
2361
+ value: 78.245
2362
+ - type: precision_at_1
2363
+ value: 84
2364
+ - type: precision_at_10
2365
+ value: 78.4
2366
+ - type: precision_at_100
2367
+ value: 56.99999999999999
2368
+ - type: precision_at_1000
2369
+ value: 21.98
2370
+ - type: precision_at_3
2371
+ value: 85.333
2372
+ - type: precision_at_5
2373
+ value: 84.8
2374
+ - type: recall_at_1
2375
+ value: 0.219
2376
+ - type: recall_at_10
2377
+ value: 2.02
2378
+ - type: recall_at_100
2379
+ value: 13.555
2380
+ - type: recall_at_1000
2381
+ value: 46.739999999999995
2382
+ - type: recall_at_3
2383
+ value: 0.685
2384
+ - type: recall_at_5
2385
+ value: 1.13
2386
+ - task:
2387
+ type: Retrieval
2388
+ dataset:
2389
+ type: webis-touche2020
2390
+ name: MTEB Touche2020
2391
+ config: default
2392
+ split: test
2393
+ revision: None
2394
+ metrics:
2395
+ - type: map_at_1
2396
+ value: 3.5029999999999997
2397
+ - type: map_at_10
2398
+ value: 11.042
2399
+ - type: map_at_100
2400
+ value: 16.326999999999998
2401
+ - type: map_at_1000
2402
+ value: 17.836
2403
+ - type: map_at_3
2404
+ value: 6.174
2405
+ - type: map_at_5
2406
+ value: 7.979
2407
+ - type: mrr_at_1
2408
+ value: 42.857
2409
+ - type: mrr_at_10
2410
+ value: 52.617000000000004
2411
+ - type: mrr_at_100
2412
+ value: 53.351000000000006
2413
+ - type: mrr_at_1000
2414
+ value: 53.351000000000006
2415
+ - type: mrr_at_3
2416
+ value: 46.939
2417
+ - type: mrr_at_5
2418
+ value: 50.714000000000006
2419
+ - type: ndcg_at_1
2420
+ value: 38.775999999999996
2421
+ - type: ndcg_at_10
2422
+ value: 27.125
2423
+ - type: ndcg_at_100
2424
+ value: 35.845
2425
+ - type: ndcg_at_1000
2426
+ value: 47.377
2427
+ - type: ndcg_at_3
2428
+ value: 29.633
2429
+ - type: ndcg_at_5
2430
+ value: 28.378999999999998
2431
+ - type: precision_at_1
2432
+ value: 42.857
2433
+ - type: precision_at_10
2434
+ value: 24.082
2435
+ - type: precision_at_100
2436
+ value: 6.877999999999999
2437
+ - type: precision_at_1000
2438
+ value: 1.463
2439
+ - type: precision_at_3
2440
+ value: 29.932
2441
+ - type: precision_at_5
2442
+ value: 28.571
2443
+ - type: recall_at_1
2444
+ value: 3.5029999999999997
2445
+ - type: recall_at_10
2446
+ value: 17.068
2447
+ - type: recall_at_100
2448
+ value: 43.361
2449
+ - type: recall_at_1000
2450
+ value: 78.835
2451
+ - type: recall_at_3
2452
+ value: 6.821000000000001
2453
+ - type: recall_at_5
2454
+ value: 10.357
2455
+ - task:
2456
+ type: Classification
2457
+ dataset:
2458
+ type: mteb/toxic_conversations_50k
2459
+ name: MTEB ToxicConversationsClassification
2460
+ config: default
2461
+ split: test
2462
+ revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
2463
+ metrics:
2464
+ - type: accuracy
2465
+ value: 71.0954
2466
+ - type: ap
2467
+ value: 14.216844153511959
2468
+ - type: f1
2469
+ value: 54.63687418565117
2470
+ - task:
2471
+ type: Classification
2472
+ dataset:
2473
+ type: mteb/tweet_sentiment_extraction
2474
+ name: MTEB TweetSentimentExtractionClassification
2475
+ config: default
2476
+ split: test
2477
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
2478
+ metrics:
2479
+ - type: accuracy
2480
+ value: 61.46293152235427
2481
+ - type: f1
2482
+ value: 61.744177921638645
2483
+ - task:
2484
+ type: Clustering
2485
+ dataset:
2486
+ type: mteb/twentynewsgroups-clustering
2487
+ name: MTEB TwentyNewsgroupsClustering
2488
+ config: default
2489
+ split: test
2490
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
2491
+ metrics:
2492
+ - type: v_measure
2493
+ value: 41.12708617788644
2494
+ - task:
2495
+ type: PairClassification
2496
+ dataset:
2497
+ type: mteb/twittersemeval2015-pairclassification
2498
+ name: MTEB TwitterSemEval2015
2499
+ config: default
2500
+ split: test
2501
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
2502
+ metrics:
2503
+ - type: cos_sim_accuracy
2504
+ value: 85.75430649102938
2505
+ - type: cos_sim_ap
2506
+ value: 73.34252536948081
2507
+ - type: cos_sim_f1
2508
+ value: 67.53758935173774
2509
+ - type: cos_sim_precision
2510
+ value: 63.3672525439408
2511
+ - type: cos_sim_recall
2512
+ value: 72.29551451187335
2513
+ - type: dot_accuracy
2514
+ value: 81.71305954580676
2515
+ - type: dot_ap
2516
+ value: 59.5532209082386
2517
+ - type: dot_f1
2518
+ value: 56.18466898954705
2519
+ - type: dot_precision
2520
+ value: 47.830923248053395
2521
+ - type: dot_recall
2522
+ value: 68.07387862796834
2523
+ - type: euclidean_accuracy
2524
+ value: 85.81987244441795
2525
+ - type: euclidean_ap
2526
+ value: 73.34325409809446
2527
+ - type: euclidean_f1
2528
+ value: 67.83451360417443
2529
+ - type: euclidean_precision
2530
+ value: 64.09955388588871
2531
+ - type: euclidean_recall
2532
+ value: 72.0316622691293
2533
+ - type: manhattan_accuracy
2534
+ value: 85.68277999642368
2535
+ - type: manhattan_ap
2536
+ value: 73.1535450121903
2537
+ - type: manhattan_f1
2538
+ value: 67.928237896289
2539
+ - type: manhattan_precision
2540
+ value: 63.56945722171113
2541
+ - type: manhattan_recall
2542
+ value: 72.9287598944591
2543
+ - type: max_accuracy
2544
+ value: 85.81987244441795
2545
+ - type: max_ap
2546
+ value: 73.34325409809446
2547
+ - type: max_f1
2548
+ value: 67.928237896289
2549
+ - task:
2550
+ type: PairClassification
2551
+ dataset:
2552
+ type: mteb/twitterurlcorpus-pairclassification
2553
+ name: MTEB TwitterURLCorpus
2554
+ config: default
2555
+ split: test
2556
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
2557
+ metrics:
2558
+ - type: cos_sim_accuracy
2559
+ value: 88.90441262079403
2560
+ - type: cos_sim_ap
2561
+ value: 85.79331880741438
2562
+ - type: cos_sim_f1
2563
+ value: 78.31563529842548
2564
+ - type: cos_sim_precision
2565
+ value: 74.6683424102779
2566
+ - type: cos_sim_recall
2567
+ value: 82.33754234678165
2568
+ - type: dot_accuracy
2569
+ value: 84.89928978926534
2570
+ - type: dot_ap
2571
+ value: 75.25819218316
2572
+ - type: dot_f1
2573
+ value: 69.88730119720536
2574
+ - type: dot_precision
2575
+ value: 64.23362374959665
2576
+ - type: dot_recall
2577
+ value: 76.63227594702803
2578
+ - type: euclidean_accuracy
2579
+ value: 89.01695967710637
2580
+ - type: euclidean_ap
2581
+ value: 85.98986606038852
2582
+ - type: euclidean_f1
2583
+ value: 78.5277880014722
2584
+ - type: euclidean_precision
2585
+ value: 75.22211253701876
2586
+ - type: euclidean_recall
2587
+ value: 82.13735756082538
2588
+ - type: manhattan_accuracy
2589
+ value: 88.99561454573679
2590
+ - type: manhattan_ap
2591
+ value: 85.92262421793953
2592
+ - type: manhattan_f1
2593
+ value: 78.38866094740769
2594
+ - type: manhattan_precision
2595
+ value: 76.02373028505282
2596
+ - type: manhattan_recall
2597
+ value: 80.9054511857099
2598
+ - type: max_accuracy
2599
+ value: 89.01695967710637
2600
+ - type: max_ap
2601
+ value: 85.98986606038852
2602
+ - type: max_f1
2603
+ value: 78.5277880014722
2604
+ ---
2605
+
2606
+ # E5-small-v2
2607
+
2608
+ [Text Embeddings by Weakly-Supervised Contrastive Pre-training](https://arxiv.org/pdf/2212.03533.pdf).
2609
+ Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, Furu Wei, arXiv 2022
2610
+
2611
+ This model has 12 layers and the embedding size is 384.
2612
+
2613
+ ## Usage
2614
+
2615
+ Below is an example to encode queries and passages from the MS-MARCO passage ranking dataset.
2616
+
2617
+ ```python
2618
+ import torch.nn.functional as F
2619
+
2620
+ from torch import Tensor
2621
+ from transformers import AutoTokenizer, AutoModel
2622
+
2623
+
2624
+ def average_pool(last_hidden_states: Tensor,
2625
+ attention_mask: Tensor) -> Tensor:
2626
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
2627
+ return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
2628
+
2629
+
2630
+ # Each input text should start with "query: " or "passage: ".
2631
+ # For tasks other than retrieval, you can simply use the "query: " prefix.
2632
+ input_texts = ['query: how much protein should a female eat',
2633
+ 'query: summit define',
2634
+ "passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
2635
+ "passage: Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments."]
2636
+
2637
+ tokenizer = AutoTokenizer.from_pretrained('ggrn/e5-small-v2')
2638
+ model = AutoModel.from_pretrained('ggrn/e5-small-v2')
2639
+
2640
+ # Tokenize the input texts
2641
+ batch_dict = tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')
2642
+
2643
+ outputs = model(**batch_dict)
2644
+ embeddings = average_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
2645
+
2646
+ # (Optionally) normalize embeddings
2647
+ embeddings = F.normalize(embeddings, p=2, dim=1)
2648
+ scores = (embeddings[:2] @ embeddings[2:].T) * 100
2649
+ print(scores.tolist())
2650
+ ```
2651
+
2652
+ ## Training Details
2653
+
2654
+ Please refer to our paper at [https://arxiv.org/pdf/2212.03533.pdf](https://arxiv.org/pdf/2212.03533.pdf).
2655
+
2656
+ ## Benchmark Evaluation
2657
+
2658
+ Check out [unilm/e5](https://github.com/microsoft/unilm/tree/master/e5) to reproduce evaluation results
2659
+ on the [BEIR](https://arxiv.org/abs/2104.08663) and [MTEB benchmark](https://arxiv.org/abs/2210.07316).
2660
+
2661
+ ## Citation
2662
+
2663
+ If you find our paper or models helpful, please consider cite as follows:
2664
+
2665
+ ```
2666
+ @article{wang2022text,
2667
+ title={Text Embeddings by Weakly-Supervised Contrastive Pre-training},
2668
+ author={Wang, Liang and Yang, Nan and Huang, Xiaolong and Jiao, Binxing and Yang, Linjun and Jiang, Daxin and Majumder, Rangan and Wei, Furu},
2669
+ journal={arXiv preprint arXiv:2212.03533},
2670
+ year={2022}
2671
+ }
2672
+ ```
2673
+
2674
+ ## Limitations
2675
+
2676
+ This model only works for English texts. Long texts will be truncated to at most 512 tokens.
2677
+
2678
+ ## Sentence Transformers
2679
+
2680
+ Below is an example for usage with sentence_transformers. `pip install sentence_transformers~=2.2.2`
2681
+ This is community contributed, and results may vary up to numerical precision.
2682
+ ```python
2683
+ from sentence_transformers import SentenceTransformer
2684
+ model = SentenceTransformer('ggrn/e5-small-v2')
2685
+ embeddings = model.encode(input_texts, normalize_embeddings=True)
2686
+ ```
e5-small-v2/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "tmp/",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.29.0.dev0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
e5-small-v2/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
e5-small-v2/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4790fed2919e70bff573d01cd3aede75970f219ab4c0b0aeadd0f4b98084a17d
3
+ size 133508397
e5-small-v2/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
e5-small-v2/source.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://huggingface.co/ggrn/e5-small-v2
e5-small-v2/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
e5-small-v2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
e5-small-v2/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }